From 7ad3117cdf41552e72a87d09f2e536d9048b8def Mon Sep 17 00:00:00 2001 From: Brian Raf <92820864+nv-braf@users.noreply.github.com> Date: Fri, 8 Sep 2023 09:59:53 -0700 Subject: [PATCH] Move cpu_only to MCV class (#762) * Initial changes * Fix typo * Updated single model ckpt * Updated mulit-model ckpt * Updating plot manager * Updating BLS checkpoint * Ensemble unit tests passing * Fixing BLS report manager unit testing * Updating request rate checkpoint * Updating checkpoints for L0 tests * Changing tests to use add_sub * Fixing num of output rows * Removing backwards compat --- .../generate/base_model_config_generator.py | 6 +- model_analyzer/config/run/model_run_config.py | 24 - model_analyzer/config/run/run_config.py | 2 +- model_analyzer/plots/detailed_plot.py | 1 - model_analyzer/triton/model/model_config.py | 33 +- .../triton/model/model_config_variant.py | 3 +- qa/L0_output_fields/config_generator.py | 2 +- qa/L0_output_fields/test.sh | 8 +- qa/L0_quick_search/test.sh | 4 +- qa/L0_quick_search_multi_model/test.sh | 4 +- qa/L0_results/test.sh | 8 +- tests/common/bls-ckpt/0.ckpt | 18078 ++++++----- tests/common/bls-ckpt/README | 2 +- .../golden-metrics-model-inference.csv | 79 +- .../bls-ckpt/golden-metrics-server-only.csv | 2 +- tests/common/ensemble-ckpt/0.ckpt | 24839 ++-------------- tests/common/ensemble-ckpt/README | 3 +- .../golden-metrics-model-gpu.csv | 89 +- .../golden-metrics-model-inference.csv | 89 +- .../golden-metrics-server-only.csv | 2 +- tests/common/multi-model-ckpt/0.ckpt | 7768 ++++- .../golden-metrics-model-gpu.csv | 48 +- .../golden-metrics-model-inference.csv | 48 +- .../golden-metrics-server-only.csv | 2 +- .../common/multi-model-ckpt/plot_manager.json | 2 +- tests/common/request-rate-ckpt/0.ckpt | 7949 +++-- tests/common/request-rate-ckpt/README | 21 + .../golden-metrics-model-gpu.csv | 75 +- .../golden-metrics-model-inference.csv | 75 +- .../golden-metrics-server-only.csv | 2 +- tests/common/single-model-ckpt/0.ckpt | 5351 +++- .../golden-metrics-model-gpu.csv | 32 +- .../golden-metrics-model-inference.csv | 32 +- .../golden-metrics-server-only.csv | 2 +- .../single-model-ckpt/plot_manager.json | 2 +- tests/common/test_utils.py | 6 +- tests/test_bls_report_manager.py | 93 +- tests/test_ensemble_report_manager.py | 88 +- tests/test_quick_run_config_generator.py | 13 - tests/test_report_manager.py | 5 +- tests/test_result_manager.py | 2 +- tests/test_run_config.py | 6 +- 42 files changed, 30366 insertions(+), 34534 deletions(-) create mode 100644 tests/common/request-rate-ckpt/README diff --git a/model_analyzer/config/generate/base_model_config_generator.py b/model_analyzer/config/generate/base_model_config_generator.py index fcdb7739c..7cd82ee82 100755 --- a/model_analyzer/config/generate/base_model_config_generator.py +++ b/model_analyzer/config/generate/base_model_config_generator.py @@ -159,11 +159,10 @@ def _make_remote_model_config_variant(self) -> ModelConfigVariant: model_config = ModelConfig.create_from_triton_api( self._client, self._base_model_name, self._config.client_max_retries ) - model_config.set_cpu_only(self._cpu_only) if not self._config.reload_model_disable: self._client.unload_model(self._base_model_name) - return ModelConfigVariant(model_config, self._base_model_name) + return ModelConfigVariant(model_config, self._base_model_name, self._cpu_only) def _make_direct_mode_model_config_variant( self, param_combo: Dict @@ -217,9 +216,8 @@ def make_model_config_variant( model_config_dict["name"] = variant_name if c_api_mode else model_name model_config = ModelConfig.create_from_dictionary(model_config_dict) - model_config.set_cpu_only(model.cpu_only()) - return ModelConfigVariant(model_config, variant_name) + return ModelConfigVariant(model_config, variant_name, model.cpu_only()) @staticmethod def make_ensemble_model_config_variant( diff --git a/model_analyzer/config/run/model_run_config.py b/model_analyzer/config/run/model_run_config.py index 738c709b1..9618dde9f 100755 --- a/model_analyzer/config/run/model_run_config.py +++ b/model_analyzer/config/run/model_run_config.py @@ -297,13 +297,6 @@ def from_dict(cls, model_run_config_dict): model_run_config_dict["_perf_config"] ) - # TODO: TMA-1332: This is for backward compatibility with older checkpoints used in unit tests - if "_model_config" in model_run_config_dict: - model_config = ModelConfig.from_dict(model_run_config_dict["_model_config"]) - model_run_config._model_config_variant = ModelConfigVariant( - model_config, model_config.get_field("name") - ) - if "_composing_config_variants" in model_run_config_dict: model_run_config._composing_config_variants = [ ModelConfigVariant( @@ -317,21 +310,4 @@ def from_dict(cls, model_run_config_dict): ] ] - # TODO: TMA-1332: This is for backward compatibility with older checkpoints used in unit tests - if "_composing_configs" in model_run_config_dict: - composing_configs = [ - ModelConfig.from_dict(composing_config_dict) - for composing_config_dict in model_run_config_dict["_composing_configs"] - ] - - composing_variant_names = [ - composing_config.get_field("name") - for composing_config in composing_configs - ] - - model_run_config._composing_config_variants = [ - ModelConfigVariant(composing_config, composing_variant_names[i]) - for i, composing_config in enumerate(composing_configs) - ] - return model_run_config diff --git a/model_analyzer/config/run/run_config.py b/model_analyzer/config/run/run_config.py index 18daaf32c..d2b3126e1 100755 --- a/model_analyzer/config/run/run_config.py +++ b/model_analyzer/config/run/run_config.py @@ -85,7 +85,7 @@ def cpu_only(self): """ return all( [ - model_run_config.model_config().cpu_only() + model_run_config.model_config_variant().cpu_only for model_run_config in self._model_run_configs ] ) diff --git a/model_analyzer/plots/detailed_plot.py b/model_analyzer/plots/detailed_plot.py index 93a47d1fd..0d0c3966d 100755 --- a/model_analyzer/plots/detailed_plot.py +++ b/model_analyzer/plots/detailed_plot.py @@ -168,7 +168,6 @@ def plot_data(self): # Sort the data by request rate or concurrency if "request_rate" in self._data and self._data["request_rate"][0]: - print(f"\n\nFound request rate: {self._data['request_rate']}\n\n") sort_indices = list( zip(*sorted(enumerate(self._data["request_rate"]), key=lambda x: x[1])) )[0] diff --git a/model_analyzer/triton/model/model_config.py b/model_analyzer/triton/model/model_config.py index 01ccefce4..8ba9c9172 100755 --- a/model_analyzer/triton/model/model_config.py +++ b/model_analyzer/triton/model/model_config.py @@ -50,24 +50,14 @@ def __init__(self, model_config): """ self._model_config = model_config - self._cpu_only = False def to_dict(self): model_config_dict = json_format.MessageToDict(self._model_config) - model_config_dict["cpu_only"] = self._cpu_only return model_config_dict @classmethod def from_dict(cls, model_config_dict): - if "cpu_only" in model_config_dict: - cpu_only = model_config_dict["cpu_only"] - del model_config_dict["cpu_only"] - model_config = ModelConfig.create_from_dictionary(model_config_dict) - model_config._cpu_only = cpu_only - else: - model_config = ModelConfig.create_from_dictionary(model_config_dict) - - return model_config + return ModelConfig.create_from_dictionary(model_config_dict) @staticmethod def create_model_config_dict(config, client, gpus, model_repository, model_name): @@ -290,27 +280,6 @@ def create_from_profile_spec( return model_config - def set_cpu_only(self, cpu_only): - """ - Parameters - ---------- - bool - Whether this model config has only - CPU instance groups - """ - - self._cpu_only = cpu_only - - def cpu_only(self): - """ - Returns - ------- - bool - Whether the model should be run on CPU only - """ - - return self._cpu_only - def is_ensemble(self) -> bool: """ Returns diff --git a/model_analyzer/triton/model/model_config_variant.py b/model_analyzer/triton/model/model_config_variant.py index 13716d6b6..de0345796 100644 --- a/model_analyzer/triton/model/model_config_variant.py +++ b/model_analyzer/triton/model/model_config_variant.py @@ -21,8 +21,9 @@ class ModelConfigVariant: """ A dataclass that holds the ModelConfig as well as the variant name - for the model + and cpu_only flag for the model """ model_config: ModelConfig variant_name: str + cpu_only: bool = False diff --git a/qa/L0_output_fields/config_generator.py b/qa/L0_output_fields/config_generator.py index 7488ea037..534a19ace 100755 --- a/qa/L0_output_fields/config_generator.py +++ b/qa/L0_output_fields/config_generator.py @@ -20,7 +20,7 @@ def _get_sweep_configs(): sweep_configs = [] model_config = { - "profile_models": ["vgg19_libtorch"], + "profile_models": ["add_sub"], "server_output_fields": [ "model_name", "gpu_uuid", diff --git a/qa/L0_output_fields/test.sh b/qa/L0_output_fields/test.sh index 0bf5262b2..a19d983f1 100755 --- a/qa/L0_output_fields/test.sh +++ b/qa/L0_output_fields/test.sh @@ -22,8 +22,8 @@ python3 config_generator.py # Set test parameters MODEL_ANALYZER="`which model-analyzer`" REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION} -MODEL_REPOSITORY=${MODEL_REPOSITORY:="/mnt/nvdl/datasets/inferenceserver/$REPO_VERSION/libtorch_model_store"} -CHECKPOINT_REPOSITORY=${CHECKPOINT_REPOSITORY:="/mnt/nvdl/datasets/inferenceserver/model_analyzer_checkpoints/2022_08_02"} +MODEL_REPOSITORY=${MODEL_REPOSITORY:="/opt/triton-model-analyzer/examples/quick-start"} +CHECKPOINT_REPOSITORY=${CHECKPOINT_REPOSITORY:="/mnt/nvdl/datasets/inferenceserver/model_analyzer_checkpoints/2023_09_07"} FILENAME_SERVER_ONLY="server-metrics.csv" FILENAME_INFERENCE_MODEL="model-metrics-inference.csv" FILENAME_GPU_MODEL="model-metrics-gpu.csv" @@ -31,7 +31,7 @@ GPUS=(`get_all_gpus_uuids`) OUTPUT_MODEL_REPOSITORY=${OUTPUT_MODEL_REPOSITORY:=`get_output_directory`} CHECKPOINT_DIRECTORY="." -cp $CHECKPOINT_REPOSITORY/resnet50_vgg19.ckpt $CHECKPOINT_DIRECTORY/0.ckpt +cp $CHECKPOINT_REPOSITORY/add_sub.ckpt $CHECKPOINT_DIRECTORY/0.ckpt MODEL_ANALYZER_ANALYZE_BASE_ARGS="--checkpoint-directory $CHECKPOINT_DIRECTORY --filename-server-only=$FILENAME_SERVER_ONLY" MODEL_ANALYZER_ANALYZE_BASE_ARGS="$MODEL_ANALYZER_ANALYZE_BASE_ARGS --filename-model-inference=$FILENAME_INFERENCE_MODEL --filename-model-gpu=$FILENAME_GPU_MODEL" @@ -59,7 +59,7 @@ for CONFIG_FILE in ${LIST_OF_CONFIG_FILES[@]}; do MODEL_ANALYZER_ARGS="$MODEL_ANALYZER_ANALYZE_BASE_ARGS -e $EXPORT_PATH -f $CONFIG_FILE" - TEST_OUTPUT_NUM_ROWS=16 + TEST_OUTPUT_NUM_ROWS=47 run_analyzer if [ $? -ne 0 ]; then echo -e "\n***\n*** Test Failed. model-analyzer exited with non-zero exit code. \n***" diff --git a/qa/L0_quick_search/test.sh b/qa/L0_quick_search/test.sh index 96b1c8056..48b63ac37 100755 --- a/qa/L0_quick_search/test.sh +++ b/qa/L0_quick_search/test.sh @@ -20,8 +20,8 @@ create_logs_dir "L0_quick_search" # Set test parameters MODEL_ANALYZER="`which model-analyzer`" REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION} -MODEL_REPOSITORY=${MODEL_REPOSITORY:="/mnt/nvdl/datasets/inferenceserver/$REPO_VERSION/libtorch_model_store"} -QA_MODELS="resnet50_libtorch" +MODEL_REPOSITORY=${MODEL_REPOSITORY:="/opt/triton-model-analyzer/examples/quick-start"} +QA_MODELS="add_sub" MODEL_NAMES="$(echo $QA_MODELS | sed 's/ /,/g')" TRITON_LAUNCH_MODE=${TRITON_LAUNCH_MODE:="local"} CLIENT_PROTOCOL="grpc" diff --git a/qa/L0_quick_search_multi_model/test.sh b/qa/L0_quick_search_multi_model/test.sh index fba6952d5..158d052ad 100755 --- a/qa/L0_quick_search_multi_model/test.sh +++ b/qa/L0_quick_search_multi_model/test.sh @@ -20,8 +20,8 @@ create_logs_dir "L0_quick_search_multi_model" # Set test parameters MODEL_ANALYZER="`which model-analyzer`" REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION} -MODEL_REPOSITORY=${MODEL_REPOSITORY:="/mnt/nvdl/datasets/inferenceserver/$REPO_VERSION/libtorch_model_store"} -QA_MODELS="resnet50_libtorch,vgg19_libtorch" +MODEL_REPOSITORY=${MODEL_REPOSITORY:="/opt/triton-model-analyzer/examples/quick-start"} +QA_MODELS="add,sub" MODEL_NAMES="$(echo $QA_MODELS | sed 's/ /,/g')" TRITON_LAUNCH_MODE=${TRITON_LAUNCH_MODE:="local"} CLIENT_PROTOCOL="grpc" diff --git a/qa/L0_results/test.sh b/qa/L0_results/test.sh index 837121a58..b2743ba5c 100755 --- a/qa/L0_results/test.sh +++ b/qa/L0_results/test.sh @@ -19,9 +19,9 @@ create_logs_dir "L0_results" # Set test parameters MODEL_ANALYZER="`which model-analyzer`" REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION} -MODEL_REPOSITORY=${MODEL_REPOSITORY:="/mnt/nvdl/datasets/inferenceserver/$REPO_VERSION/libtorch_model_store"} -CHECKPOINT_REPOSITORY=${CHECKPOINT_REPOSITORY:="/mnt/nvdl/datasets/inferenceserver/model_analyzer_checkpoints/2022_08_02"} -QA_MODELS="resnet50_libtorch" +MODEL_REPOSITORY=${MODEL_REPOSITORY:="/opt/triton-model-analyzer/examples/quick-start"} +CHECKPOINT_REPOSITORY=${CHECKPOINT_REPOSITORY:="/mnt/nvdl/datasets/inferenceserver/model_analyzer_checkpoints/2023_09_07"} +QA_MODELS="add_sub" MODEL_NAMES="$(echo $QA_MODELS | sed 's/ /,/g')" FILENAME_SERVER_ONLY="server-metrics.csv" FILENAME_INFERENCE_MODEL="model-metrics-inference.csv" @@ -33,7 +33,7 @@ GPUS=(`get_all_gpus_uuids`) OUTPUT_MODEL_REPOSITORY=${OUTPUT_MODEL_REPOSITORY:=`get_output_directory`} create_result_paths -cp $CHECKPOINT_REPOSITORY/resnet50_vgg19.ckpt $CHECKPOINT_DIRECTORY/0.ckpt +cp $CHECKPOINT_REPOSITORY/add_sub.ckpt $CHECKPOINT_DIRECTORY/0.ckpt rm -rf $OUTPUT_MODEL_REPOSITORY diff --git a/tests/common/bls-ckpt/0.ckpt b/tests/common/bls-ckpt/0.ckpt index a6848acf1..e43701de8 100644 --- a/tests/common/bls-ckpt/0.ckpt +++ b/tests/common/bls-ckpt/0.ckpt @@ -1,1781 +1,8737 @@ { - "ModelManager.model_variant_name_manager": { - "_model_config_dicts": { - "FaceDetectionBLS_config_0": { - "name": "FaceDetectionBLS", - "input": [ - { - "name": "input", - "data_type": "TYPE_UINT8", - "dims": [ - "-1", - "3", - "-1", - "-1" - ] - }, - { - "name": "temporal", - "data_type": "TYPE_UINT32", - "dims": [ - "-1", - "1", - "1", - "1" - ] - }, - { - "name": "max_num_bbox", - "data_type": "TYPE_UINT8", - "dims": [ - "-1", - "1", - "1", - "1" - ] - } - ], - "output": [ - { - "name": "output_bbox", - "data_type": "TYPE_FP32", - "dims": [ - "-1", - "4", - "-1", - "1" - ] - }, - { - "name": "output_bbox_conf", - "data_type": "TYPE_FP32", - "dims": [ - "-1", - "4", - "-1", - "1" - ] - }, - { - "name": "output_num_bbox", - "data_type": "TYPE_UINT8", - "dims": [ - "-1", - "1", - "1", - "1" - ] - } - ], - "instance_group": [ - { - "count": 1, - "kind": "KIND_GPU" - } - ], - "backend": "bls" - }, - "FaceDetectionPreprocessing_config_0": { - "name": "FaceDetectionPreprocessing", - "platform": "FaceDetectionPreprocessing", - "input": [ - { - "name": "input", - "data_type": "TYPE_UINT8", - "dims": [ - "-1", - "3", - "-1", - "-1" - ] - } - ], - "output": [ - { - "name": "output", - "data_type": "TYPE_FP32", - "dims": [ - "-1", - "3", - "208", - "368" - ] - }, - { - "name": "scale_offset", - "data_type": "TYPE_FP32", - "dims": [ - "-1", - "4", - "1", - "1" - ] - }, - { - "name": "original_shape", - "data_type": "TYPE_UINT32", - "dims": [ - "1", - "2", - "1", - "1" - ] - } - ], - "backend": "sdk_backend", - "instance_group": [ - { - "count": 1, - "kind": "KIND_GPU" - } - ] - }, - "FaceDetectionModel_config_0": { - "name": "FaceDetectionModel", - "platform": "tensorrt_plan", - "max_batch_size": 8, - "input": [ - { - "name": "input", - "data_type": "TYPE_FP32", - "dims": [ - "3", - "208", - "368" - ] - } - ], - "output": [ - { - "name": "output_bbox", - "data_type": "TYPE_FP32", - "dims": [ - "4", - "13", - "23" - ] - }, - { - "name": "output_cov/Sigmoid", - "data_type": "TYPE_FP32", - "dims": [ - "1", - "13", - "23" - ] - } - ], - "instance_group": [ - { - "count": 1, - "kind": "KIND_GPU" - } - ], - "dynamic_batching": {} - }, - "FaceDetectionPostprocessing_config_0": { - "name": "FaceDetectionPostprocessing", - "platform": "FaceDetectionPostprocessing", - "input": [ - { - "name": "scale_offset", - "data_type": "TYPE_FP32", - "dims": [ - "-1", - "4", - "1", - "1" - ] - }, - { - "name": "input_bbox", - "data_type": "TYPE_FP32", - "dims": [ - "-1", - "4", - "13", - "23" - ] - }, + "ResultManager.results": { + "_results": { + "bls": { + "bls_config_default": [ { - "name": "input_conf", - "data_type": "TYPE_FP32", - "dims": [ - "-1", - "1", - "13", - "23" + "_triton_env": {}, + "_model_run_configs": [ + { + "_model_name": "bls", + "_model_config_variant": { + "model_config": { + "name": "bls", + "input": [ + { + "name": "INPUT0", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + }, + { + "name": "INPUT1", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "output": [ + { + "name": "OUTPUT0", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + }, + { + "name": "OUTPUT1", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "instanceGroup": [ + { + "kind": "KIND_CPU" + } + ], + "backend": "python" + }, + "variant_name": "bls_config_default", + "cpu_only": false + }, + "_perf_config": { + "_args": { + "service-kind": null, + "model-signature-name": null, + "async": null, + "sync": null, + "measurement-interval": null, + "concurrency-range": 1, + "request-rate-range": null, + "request-distribution": null, + "request-intervals": null, + "binary-search": null, + "num-of-sequences": null, + "latency-threshold": null, + "max-threads": null, + "stability-percentage": null, + "max-trials": null, + "percentile": null, + "input-data": null, + "shared-memory": null, + "output-shared-memory-size": null, + "shape": null, + "sequence-length": null, + "sequence-id-range": null, + "string-length": null, + "string-data": null, + "measurement-mode": "count_windows", + "measurement-request-count": null, + "streaming": null, + "grpc-compression-algorithm": null, + "triton-server-directory": null, + "model-repository": null, + "ssl-grpc-use-ssl": null, + "ssl-grpc-root-certifications-file": null, + "ssl-grpc-private-key-file": null, + "ssl-grpc-certificate-chain-file": null, + "ssl-https-verify-peer": null, + "ssl-https-verify-host": null, + "ssl-https-ca-certificates-file": null, + "ssl-https-client-certificate-type": null, + "ssl-https-client-certificate-file": null, + "ssl-https-private-key-type": null, + "ssl-https-private-key-file": null, + "collect-metrics": "True", + "metrics-url": "http://localhost:8002/metrics", + "metrics-interval": 1000, + "bls-composing-models": "add,sub" + }, + "_options": { + "-m": "bls", + "-x": null, + "-b": 1, + "-u": "localhost:8001", + "-i": "grpc", + "-f": "bls-results.csv", + "-H": null + }, + "_verbose": { + "-v": null, + "-v -v": null, + "--verbose-csv": "--verbose-csv" + }, + "_input_to_options": { + "model-name": "-m", + "model-version": "-x", + "batch-size": "-b", + "url": "-u", + "protocol": "-i", + "latency-report-file": "-f", + "http-header": "-H" + }, + "_input_to_verbose": { + "verbose": "-v", + "extra-verbose": "-v -v", + "verbose-csv": "--verbose-csv" + }, + "_additive_args": { + "input-data": null, + "shape": null + } + }, + "_composing_config_variants": [ + { + "model_config": { + "name": "add", + "input": [ + { + "name": "INPUT0", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + }, + { + "name": "INPUT1", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "output": [ + { + "name": "OUTPUT0", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "instanceGroup": [ + { + "kind": "KIND_CPU" + } + ], + "backend": "python" + }, + "variant_name": "add_config_default", + "cpu_only": false + }, + { + "model_config": { + "name": "sub", + "input": [ + { + "name": "INPUT0", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + }, + { + "name": "INPUT1", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "output": [ + { + "name": "OUTPUT1", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "instanceGroup": [ + { + "kind": "KIND_CPU" + } + ], + "backend": "python" + }, + "variant_name": "sub_config_default", + "cpu_only": false + } + ] + } ] }, { - "name": "temporal", - "data_type": "TYPE_UINT32", - "dims": [ - "-1", - "1", - "1", - "1" - ] - }, - { - "name": "max_num_bbox", - "data_type": "TYPE_UINT8", - "dims": [ - "-1", - "1", - "1", - "1" - ] - }, - { - "name": "original_shape", - "data_type": "TYPE_UINT32", - "dims": [ - "1", - "2", - "1", - "1" - ] - } - ], - "output": [ - { - "name": "output_bbox", - "data_type": "TYPE_FP32", - "dims": [ - "-1", - "4", - "1", - "-1" - ] - }, - { - "name": "output_bbox_conf", - "data_type": "TYPE_FP32", - "dims": [ - "-1", - "4", - "1", - "-1" - ] - }, - { - "name": "output_num_bbox", - "data_type": "TYPE_UINT8", - "dims": [ - "-1", - "1", - "1", - "1" - ] - } - ], - "sequence_batching": { - "max_sequence_idle_microseconds": "5000000", - "control_input": [ - { - "name": "START", - "control": [ - { - "fp32_false_true": [ - 0.0, - 1.0 - ] - } - ] - }, - { - "name": "END", - "control": [ - { - "kind": "CONTROL_SEQUENCE_END", - "fp32_false_true": [ - 0.0, - 1.0 - ] - } - ] - }, - { - "name": "READY", - "control": [ - { - "kind": "CONTROL_SEQUENCE_READY", - "fp32_false_true": [ - 0.0, - 1.0 - ] - } - ] - }, - { - "name": "CORRID", - "control": [ - { - "kind": "CONTROL_SEQUENCE_CORRID", - "data_type": "TYPE_UINT64" - } - ] - } - ], - "oldest": { - "max_candidate_sequences": 10, - "max_queue_delay_microseconds": "50000" - } - }, - "backend": "sdk_backend", - "instance_group": [ - { - "count": 1, - "kind": "KIND_GPU" - } - ] - }, - "FaceDetectionPostprocessing_config_1": { - "name": "FaceDetectionPostprocessing", - "platform": "FaceDetectionPostprocessing", - "input": [ - { - "name": "scale_offset", - "data_type": "TYPE_FP32", - "dims": [ - "-1", - "4", - "1", - "1" - ] - }, - { - "name": "input_bbox", - "data_type": "TYPE_FP32", - "dims": [ - "-1", - "4", - "13", - "23" - ] - }, - { - "name": "input_conf", - "data_type": "TYPE_FP32", - "dims": [ - "-1", - "1", - "13", - "23" - ] - }, - { - "name": "temporal", - "data_type": "TYPE_UINT32", - "dims": [ - "-1", - "1", - "1", - "1" - ] - }, - { - "name": "max_num_bbox", - "data_type": "TYPE_UINT8", - "dims": [ - "-1", - "1", - "1", - "1" - ] - }, - { - "name": "original_shape", - "data_type": "TYPE_UINT32", - "dims": [ - "1", - "2", - "1", - "1" - ] - } - ], - "output": [ - { - "name": "output_bbox", - "data_type": "TYPE_FP32", - "dims": [ - "-1", - "4", - "1", - "-1" - ] - }, - { - "name": "output_bbox_conf", - "data_type": "TYPE_FP32", - "dims": [ - "-1", - "4", - "1", - "-1" - ] - }, - { - "name": "output_num_bbox", - "data_type": "TYPE_UINT8", - "dims": [ - "-1", - "1", - "1", - "1" - ] - } - ], - "sequence_batching": { - "max_sequence_idle_microseconds": "5000000", - "control_input": [ - { - "name": "START", - "control": [ - { - "fp32_false_true": [ - 0.0, - 1.0 - ] - } - ] - }, - { - "name": "END", - "control": [ - { - "kind": "CONTROL_SEQUENCE_END", - "fp32_false_true": [ - 0.0, - 1.0 - ] - } - ] - }, - { - "name": "READY", - "control": [ - { - "kind": "CONTROL_SEQUENCE_READY", - "fp32_false_true": [ - 0.0, - 1.0 + "bls_config_default -m bls -b 1 -i grpc -f bls-results.csv --verbose-csv --concurrency-range=1 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000 --bls-composing-models=add,sub add_config_default,sub_config_default": { + "_model_variants_name": "bls_config_default", + "_gpu_data": { + "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ + [ + "gpu_used_memory", + { + "_value": 872.415232, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_free_memory", + { + "_value": 24897.388544, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_utilization", + { + "_value": 0.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 56.15, + "_timestamp": 0, + "_device_uuid": null + } ] - } - ] - }, - { - "name": "CORRID", - "control": [ + ] + }, + "_avg_gpu_data": [ + [ + "gpu_used_memory", + { + "_value": 872.415232, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_free_memory", + { + "_value": 24897.388544, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_utilization", + { + "_value": 0.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 56.15, + "_timestamp": 0, + "_device_uuid": null + } + ] + ], + "_avg_gpu_data_from_tag": { + "gpu_used_memory": [ + "gpu_used_memory", + { + "_value": 872.415232, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_free_memory": [ + "gpu_free_memory", + { + "_value": 24897.388544, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_utilization": [ + "gpu_utilization", + { + "_value": 0.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_power_usage": [ + "gpu_power_usage", + { + "_value": 56.15, + "_timestamp": 0, + "_device_uuid": null + } + ] + }, + "_model_config_measurements": [ { - "kind": "CONTROL_SEQUENCE_CORRID", - "data_type": "TYPE_UINT64" - } - ] - } - ], - "oldest": { - "max_candidate_sequences": 10, - "max_queue_delay_microseconds": "50000" - } - }, - "backend": "sdk_backend", - "instance_group": [ - { - "count": 2, - "kind": "KIND_GPU" - } - ] - }, - "FaceDetectionBLS_config_1": { - "name": "FaceDetectionBLS", - "input": [ - { - "name": "input", - "data_type": "TYPE_UINT8", - "dims": [ - "-1", - "3", - "-1", - "-1" - ] - }, - { - "name": "temporal", - "data_type": "TYPE_UINT32", - "dims": [ - "-1", - "1", - "1", - "1" - ] - }, - { - "name": "max_num_bbox", - "data_type": "TYPE_UINT8", - "dims": [ - "-1", - "1", - "1", - "1" - ] - } - ], - "output": [ - { - "name": "output_bbox", - "data_type": "TYPE_FP32", - "dims": [ - "-1", - "4", - "-1", - "1" - ] - }, - { - "name": "output_bbox_conf", - "data_type": "TYPE_FP32", - "dims": [ - "-1", - "4", - "-1", - "1" - ] - }, - { - "name": "output_num_bbox", - "data_type": "TYPE_UINT8", - "dims": [ - "-1", - "1", - "1", - "1" - ] - } - ], - "instance_group": [ - { - "count": 2, - "kind": "KIND_GPU" - } - ], - "backend": "bls" - }, - "FaceDetectionPreprocessing_config_1": { - "name": "FaceDetectionPreprocessing", - "platform": "FaceDetectionPreprocessing", - "input": [ - { - "name": "input", - "data_type": "TYPE_UINT8", - "dims": [ - "-1", - "3", - "-1", - "-1" - ] - } - ], - "output": [ - { - "name": "output", - "data_type": "TYPE_FP32", - "dims": [ - "-1", - "3", - "208", - "368" - ] - }, - { - "name": "scale_offset", - "data_type": "TYPE_FP32", - "dims": [ - "-1", - "4", - "1", - "1" - ] - }, - { - "name": "original_shape", - "data_type": "TYPE_UINT32", - "dims": [ - "1", - "2", - "1", - "1" - ] - } - ], - "backend": "sdk_backend", - "instance_group": [ - { - "count": 2, - "kind": "KIND_GPU" - } - ] - }, - "FaceDetectionModel_config_1": { - "name": "FaceDetectionModel", - "platform": "tensorrt_plan", - "max_batch_size": 8, - "input": [ - { - "name": "input", - "data_type": "TYPE_FP32", - "dims": [ - "3", - "208", - "368" - ] - } - ], - "output": [ - { - "name": "output_bbox", - "data_type": "TYPE_FP32", - "dims": [ - "4", - "13", - "23" - ] - }, - { - "name": "output_cov/Sigmoid", - "data_type": "TYPE_FP32", - "dims": [ - "1", - "13", - "23" - ] - } - ], - "instance_group": [ - { - "count": 2, - "kind": "KIND_GPU" - } - ], - "dynamic_batching": {} - }, - "FaceDetectionModel_config_2": { - "name": "FaceDetectionModel", - "platform": "tensorrt_plan", - "max_batch_size": 8, - "input": [ - { - "name": "input", - "data_type": "TYPE_FP32", - "dims": [ - "3", - "208", - "368" - ] - } - ], - "output": [ - { - "name": "output_bbox", - "data_type": "TYPE_FP32", - "dims": [ - "4", - "13", - "23" - ] - }, - { - "name": "output_cov/Sigmoid", - "data_type": "TYPE_FP32", - "dims": [ - "1", - "13", - "23" - ] - } - ], - "instance_group": [ - { - "count": 3, - "kind": "KIND_GPU" - } - ], - "dynamic_batching": {} - }, - "FaceDetectionPostprocessing_config_2": { - "name": "FaceDetectionPostprocessing", - "platform": "FaceDetectionPostprocessing", - "input": [ - { - "name": "scale_offset", - "data_type": "TYPE_FP32", - "dims": [ - "-1", - "4", - "1", - "1" - ] - }, - { - "name": "input_bbox", - "data_type": "TYPE_FP32", - "dims": [ - "-1", - "4", - "13", - "23" - ] - }, - { - "name": "input_conf", - "data_type": "TYPE_FP32", - "dims": [ - "-1", - "1", - "13", - "23" - ] - }, - { - "name": "temporal", - "data_type": "TYPE_UINT32", - "dims": [ - "-1", - "1", - "1", - "1" - ] - }, - { - "name": "max_num_bbox", - "data_type": "TYPE_UINT8", - "dims": [ - "-1", - "1", - "1", - "1" - ] - }, - { - "name": "original_shape", - "data_type": "TYPE_UINT32", - "dims": [ - "1", - "2", - "1", - "1" - ] - } - ], - "output": [ - { - "name": "output_bbox", - "data_type": "TYPE_FP32", - "dims": [ - "-1", - "4", - "1", - "-1" - ] - }, - { - "name": "output_bbox_conf", - "data_type": "TYPE_FP32", - "dims": [ - "-1", - "4", - "1", - "-1" - ] - }, - { - "name": "output_num_bbox", - "data_type": "TYPE_UINT8", - "dims": [ - "-1", - "1", - "1", - "1" - ] - } - ], - "sequence_batching": { - "max_sequence_idle_microseconds": "5000000", - "control_input": [ - { - "name": "START", - "control": [ - { - "fp32_false_true": [ - 0.0, - 1.0 - ] - } - ] - }, - { - "name": "END", - "control": [ - { - "kind": "CONTROL_SEQUENCE_END", - "fp32_false_true": [ - 0.0, - 1.0 - ] - } - ] - }, - { - "name": "READY", - "control": [ - { - "kind": "CONTROL_SEQUENCE_READY", - "fp32_false_true": [ - 0.0, - 1.0 - ] - } - ] - }, - { - "name": "CORRID", - "control": [ - { - "kind": "CONTROL_SEQUENCE_CORRID", - "data_type": "TYPE_UINT64" - } - ] - } - ], - "oldest": { - "max_candidate_sequences": 10, - "max_queue_delay_microseconds": "50000" - } - }, - "backend": "sdk_backend", - "instance_group": [ - { - "count": 3, - "kind": "KIND_GPU" - } - ] - }, - "FaceDetectionBLS_config_2": { - "name": "FaceDetectionBLS", - "input": [ - { - "name": "input", - "data_type": "TYPE_UINT8", - "dims": [ - "-1", - "3", - "-1", - "-1" - ] - }, - { - "name": "temporal", - "data_type": "TYPE_UINT32", - "dims": [ - "-1", - "1", - "1", - "1" - ] - }, - { - "name": "max_num_bbox", - "data_type": "TYPE_UINT8", - "dims": [ - "-1", - "1", - "1", - "1" - ] - } - ], - "output": [ - { - "name": "output_bbox", - "data_type": "TYPE_FP32", - "dims": [ - "-1", - "4", - "-1", - "1" - ] - }, - { - "name": "output_bbox_conf", - "data_type": "TYPE_FP32", - "dims": [ - "-1", - "4", - "-1", - "1" - ] - }, - { - "name": "output_num_bbox", - "data_type": "TYPE_UINT8", - "dims": [ - "-1", - "1", - "1", - "1" - ] - } - ], - "instance_group": [ - { - "count": 3, - "kind": "KIND_GPU" - } - ], - "backend": "bls" - }, - "FaceDetectionPreprocessing_config_2": { - "name": "FaceDetectionPreprocessing", - "platform": "FaceDetectionPreprocessing", - "input": [ - { - "name": "input", - "data_type": "TYPE_UINT8", - "dims": [ - "-1", - "3", - "-1", - "-1" - ] - } - ], - "output": [ - { - "name": "output", - "data_type": "TYPE_FP32", - "dims": [ - "-1", - "3", - "208", - "368" - ] - }, - { - "name": "scale_offset", - "data_type": "TYPE_FP32", - "dims": [ - "-1", - "4", - "1", - "1" - ] - }, - { - "name": "original_shape", - "data_type": "TYPE_UINT32", - "dims": [ - "1", - "2", - "1", - "1" - ] - } - ], - "backend": "sdk_backend", - "instance_group": [ - { - "count": 3, - "kind": "KIND_GPU" - } - ] - }, - "FaceDetectionBLS_config_3": { - "name": "FaceDetectionBLS", - "input": [ - { - "name": "input", - "data_type": "TYPE_UINT8", - "dims": [ - "-1", - "3", - "-1", - "-1" - ] - }, - { - "name": "temporal", - "data_type": "TYPE_UINT32", - "dims": [ - "-1", - "1", - "1", - "1" - ] - }, - { - "name": "max_num_bbox", - "data_type": "TYPE_UINT8", - "dims": [ - "-1", - "1", - "1", - "1" - ] - } - ], - "output": [ - { - "name": "output_bbox", - "data_type": "TYPE_FP32", - "dims": [ - "-1", - "4", - "-1", - "1" - ] - }, - { - "name": "output_bbox_conf", - "data_type": "TYPE_FP32", - "dims": [ - "-1", - "4", - "-1", - "1" - ] - }, - { - "name": "output_num_bbox", - "data_type": "TYPE_UINT8", - "dims": [ - "-1", - "1", - "1", - "1" - ] - } - ], - "instance_group": [ - { - "count": 5, - "kind": "KIND_GPU" - } - ], - "backend": "bls" - }, - "FaceDetectionPreprocessing_config_3": { - "name": "FaceDetectionPreprocessing", - "platform": "FaceDetectionPreprocessing", - "input": [ - { - "name": "input", - "data_type": "TYPE_UINT8", - "dims": [ - "-1", - "3", - "-1", - "-1" - ] - } - ], - "output": [ - { - "name": "output", - "data_type": "TYPE_FP32", - "dims": [ - "-1", - "3", - "208", - "368" - ] - }, - { - "name": "scale_offset", - "data_type": "TYPE_FP32", - "dims": [ - "-1", - "4", - "1", - "1" - ] - }, - { - "name": "original_shape", - "data_type": "TYPE_UINT32", - "dims": [ - "1", - "2", - "1", - "1" - ] - } - ], - "backend": "sdk_backend", - "instance_group": [ - { - "count": 5, - "kind": "KIND_GPU" - } - ] - }, - "FaceDetectionModel_config_3": { - "name": "FaceDetectionModel", - "platform": "tensorrt_plan", - "max_batch_size": 8, - "input": [ - { - "name": "input", - "data_type": "TYPE_FP32", - "dims": [ - "3", - "208", - "368" - ] - } - ], - "output": [ - { - "name": "output_bbox", - "data_type": "TYPE_FP32", - "dims": [ - "4", - "13", - "23" - ] - }, - { - "name": "output_cov/Sigmoid", - "data_type": "TYPE_FP32", - "dims": [ - "1", - "13", - "23" - ] - } - ], - "instance_group": [ - { - "count": 4, - "kind": "KIND_GPU" - } - ], - "dynamic_batching": {} - }, - "FaceDetectionPreprocessing_config_4": { - "name": "FaceDetectionPreprocessing", - "platform": "FaceDetectionPreprocessing", - "input": [ - { - "name": "input", - "data_type": "TYPE_UINT8", - "dims": [ - "-1", - "3", - "-1", - "-1" - ] - } - ], - "output": [ - { - "name": "output", - "data_type": "TYPE_FP32", - "dims": [ - "-1", - "3", - "208", - "368" - ] - }, - { - "name": "scale_offset", - "data_type": "TYPE_FP32", - "dims": [ - "-1", - "4", - "1", - "1" - ] - }, - { - "name": "original_shape", - "data_type": "TYPE_UINT32", - "dims": [ - "1", - "2", - "1", - "1" - ] - } - ], - "backend": "sdk_backend", - "instance_group": [ - { - "count": 4, - "kind": "KIND_GPU" - } - ] - }, - "FaceDetectionBLS_config_4": { - "name": "FaceDetectionBLS", - "input": [ - { - "name": "input", - "data_type": "TYPE_UINT8", - "dims": [ - "-1", - "3", - "-1", - "-1" - ] - }, - { - "name": "temporal", - "data_type": "TYPE_UINT32", - "dims": [ - "-1", - "1", - "1", - "1" - ] - }, - { - "name": "max_num_bbox", - "data_type": "TYPE_UINT8", - "dims": [ - "-1", - "1", - "1", - "1" - ] - } - ], - "output": [ - { - "name": "output_bbox", - "data_type": "TYPE_FP32", - "dims": [ - "-1", - "4", - "-1", - "1" - ] - }, - { - "name": "output_bbox_conf", - "data_type": "TYPE_FP32", - "dims": [ - "-1", - "4", - "-1", - "1" - ] - }, - { - "name": "output_num_bbox", - "data_type": "TYPE_UINT8", - "dims": [ - "-1", - "1", - "1", - "1" - ] - } - ], - "instance_group": [ - { - "count": 4, - "kind": "KIND_GPU" - } - ], - "backend": "bls" - }, - "FaceDetectionPreprocessing_config_5": { - "name": "FaceDetectionPreprocessing", - "platform": "FaceDetectionPreprocessing", - "input": [ - { - "name": "input", - "data_type": "TYPE_UINT8", - "dims": [ - "-1", - "3", - "-1", - "-1" - ] - } - ], - "output": [ - { - "name": "output", - "data_type": "TYPE_FP32", - "dims": [ - "-1", - "3", - "208", - "368" - ] - }, - { - "name": "scale_offset", - "data_type": "TYPE_FP32", - "dims": [ - "-1", - "4", - "1", - "1" - ] - }, - { - "name": "original_shape", - "data_type": "TYPE_UINT32", - "dims": [ - "1", - "2", - "1", - "1" - ] - } - ], - "backend": "sdk_backend", - "instance_group": [ - { - "count": 6, - "kind": "KIND_GPU" - } - ] - }, - "FaceDetectionModel_config_4": { - "name": "FaceDetectionModel", - "platform": "tensorrt_plan", - "max_batch_size": 8, - "input": [ - { - "name": "input", - "data_type": "TYPE_FP32", - "dims": [ - "3", - "208", - "368" - ] - } - ], - "output": [ - { - "name": "output_bbox", - "data_type": "TYPE_FP32", - "dims": [ - "4", - "13", - "23" - ] - }, - { - "name": "output_cov/Sigmoid", - "data_type": "TYPE_FP32", - "dims": [ - "1", - "13", - "23" - ] - } - ], - "instance_group": [ - { - "count": 5, - "kind": "KIND_GPU" - } - ], - "dynamic_batching": {} - }, - "FaceDetectionBLS_config_5": { - "name": "FaceDetectionBLS", - "input": [ - { - "name": "input", - "data_type": "TYPE_UINT8", - "dims": [ - "-1", - "3", - "-1", - "-1" - ] - }, - { - "name": "temporal", - "data_type": "TYPE_UINT32", - "dims": [ - "-1", - "1", - "1", - "1" - ] - }, - { - "name": "max_num_bbox", - "data_type": "TYPE_UINT8", - "dims": [ - "-1", - "1", - "1", - "1" - ] - } - ], - "output": [ - { - "name": "output_bbox", - "data_type": "TYPE_FP32", - "dims": [ - "-1", - "4", - "-1", - "1" - ] - }, - { - "name": "output_bbox_conf", - "data_type": "TYPE_FP32", - "dims": [ - "-1", - "4", - "-1", - "1" - ] - }, - { - "name": "output_num_bbox", - "data_type": "TYPE_UINT8", - "dims": [ - "-1", - "1", - "1", - "1" - ] - } - ], - "instance_group": [ - { - "count": 6, - "kind": "KIND_GPU" + "_model_config_name": "bls", + "_model_specific_pa_params": { + "batch-size": 1, + "concurrency-range": 1, + "request-rate-range": null + }, + "_non_gpu_data": [ + [ + "perf_latency_avg", + { + "_value": 1.283, + "_timestamp": 0 + } + ], + [ + "perf_latency_p90", + { + "_value": 1.511, + "_timestamp": 0 + } + ], + [ + "perf_latency_p95", + { + "_value": 1.592, + "_timestamp": 0 + } + ], + [ + "perf_latency_p99", + { + "_value": 1.744, + "_timestamp": 0 + } + ], + [ + "perf_throughput", + { + "_value": 777.97, + "_timestamp": 0 + } + ], + [ + "perf_client_send_recv", + { + "_value": 0.006, + "_timestamp": 0 + } + ], + [ + "perf_client_response_wait", + { + "_value": 1.259, + "_timestamp": 0 + } + ], + [ + "perf_server_queue", + { + "_value": 0.02, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_infer", + { + "_value": 0.911, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_input", + { + "_value": 0.024, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_output", + { + "_value": 0.1, + "_timestamp": 0 + } + ] + ], + "_non_gpu_data_from_tag": { + "perf_latency_avg": [ + "perf_latency_avg", + { + "_value": 1.283, + "_timestamp": 0 + } + ], + "perf_latency_p90": [ + "perf_latency_p90", + { + "_value": 1.511, + "_timestamp": 0 + } + ], + "perf_latency_p95": [ + "perf_latency_p95", + { + "_value": 1.592, + "_timestamp": 0 + } + ], + "perf_latency_p99": [ + "perf_latency_p99", + { + "_value": 1.744, + "_timestamp": 0 + } + ], + "perf_throughput": [ + "perf_throughput", + { + "_value": 777.97, + "_timestamp": 0 + } + ], + "perf_client_send_recv": [ + "perf_client_send_recv", + { + "_value": 0.006, + "_timestamp": 0 + } + ], + "perf_client_response_wait": [ + "perf_client_response_wait", + { + "_value": 1.259, + "_timestamp": 0 + } + ], + "perf_server_queue": [ + "perf_server_queue", + { + "_value": 0.02, + "_timestamp": 0 + } + ], + "perf_server_compute_infer": [ + "perf_server_compute_infer", + { + "_value": 0.911, + "_timestamp": 0 + } + ], + "perf_server_compute_input": [ + "perf_server_compute_input", + { + "_value": 0.024, + "_timestamp": 0 + } + ], + "perf_server_compute_output": [ + "perf_server_compute_output", + { + "_value": 0.1, + "_timestamp": 0 + } + ] + } + } + ] + }, + "bls_config_default -m bls -b 1 -i grpc -f bls-results.csv --verbose-csv --concurrency-range=2 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000 --bls-composing-models=add,sub add_config_default,sub_config_default": { + "_model_variants_name": "bls_config_default", + "_gpu_data": { + "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ + [ + "gpu_used_memory", + { + "_value": 872.415232, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_free_memory", + { + "_value": 24897.388544, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_utilization", + { + "_value": 0.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 56.025, + "_timestamp": 0, + "_device_uuid": null + } + ] + ] + }, + "_avg_gpu_data": [ + [ + "gpu_used_memory", + { + "_value": 872.415232, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_free_memory", + { + "_value": 24897.388544, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_utilization", + { + "_value": 0.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 56.025, + "_timestamp": 0, + "_device_uuid": null + } + ] + ], + "_avg_gpu_data_from_tag": { + "gpu_used_memory": [ + "gpu_used_memory", + { + "_value": 872.415232, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_free_memory": [ + "gpu_free_memory", + { + "_value": 24897.388544, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_utilization": [ + "gpu_utilization", + { + "_value": 0.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_power_usage": [ + "gpu_power_usage", + { + "_value": 56.025, + "_timestamp": 0, + "_device_uuid": null + } + ] + }, + "_model_config_measurements": [ + { + "_model_config_name": "bls", + "_model_specific_pa_params": { + "batch-size": 1, + "concurrency-range": 2, + "request-rate-range": null + }, + "_non_gpu_data": [ + [ + "perf_latency_avg", + { + "_value": 2.398, + "_timestamp": 0 + } + ], + [ + "perf_latency_p90", + { + "_value": 2.643, + "_timestamp": 0 + } + ], + [ + "perf_latency_p95", + { + "_value": 2.769, + "_timestamp": 0 + } + ], + [ + "perf_latency_p99", + { + "_value": 3.101, + "_timestamp": 0 + } + ], + [ + "perf_throughput", + { + "_value": 832.413, + "_timestamp": 0 + } + ], + [ + "perf_client_send_recv", + { + "_value": 0.007, + "_timestamp": 0 + } + ], + [ + "perf_client_response_wait", + { + "_value": 2.372, + "_timestamp": 0 + } + ], + [ + "perf_server_queue", + { + "_value": 0.99, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_infer", + { + "_value": 1.014, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_input", + { + "_value": 0.024, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_output", + { + "_value": 0.102, + "_timestamp": 0 + } + ] + ], + "_non_gpu_data_from_tag": { + "perf_latency_avg": [ + "perf_latency_avg", + { + "_value": 2.398, + "_timestamp": 0 + } + ], + "perf_latency_p90": [ + "perf_latency_p90", + { + "_value": 2.643, + "_timestamp": 0 + } + ], + "perf_latency_p95": [ + "perf_latency_p95", + { + "_value": 2.769, + "_timestamp": 0 + } + ], + "perf_latency_p99": [ + "perf_latency_p99", + { + "_value": 3.101, + "_timestamp": 0 + } + ], + "perf_throughput": [ + "perf_throughput", + { + "_value": 832.413, + "_timestamp": 0 + } + ], + "perf_client_send_recv": [ + "perf_client_send_recv", + { + "_value": 0.007, + "_timestamp": 0 + } + ], + "perf_client_response_wait": [ + "perf_client_response_wait", + { + "_value": 2.372, + "_timestamp": 0 + } + ], + "perf_server_queue": [ + "perf_server_queue", + { + "_value": 0.99, + "_timestamp": 0 + } + ], + "perf_server_compute_infer": [ + "perf_server_compute_infer", + { + "_value": 1.014, + "_timestamp": 0 + } + ], + "perf_server_compute_input": [ + "perf_server_compute_input", + { + "_value": 0.024, + "_timestamp": 0 + } + ], + "perf_server_compute_output": [ + "perf_server_compute_output", + { + "_value": 0.102, + "_timestamp": 0 + } + ] + } + } + ] + }, + "bls_config_default -m bls -b 1 -i grpc -f bls-results.csv --verbose-csv --concurrency-range=4 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000 --bls-composing-models=add,sub add_config_default,sub_config_default": { + "_model_variants_name": "bls_config_default", + "_gpu_data": { + "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ + [ + "gpu_used_memory", + { + "_value": 872.415232, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_free_memory", + { + "_value": 24897.388544, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_utilization", + { + "_value": 0.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 56.185, + "_timestamp": 0, + "_device_uuid": null + } + ] + ] + }, + "_avg_gpu_data": [ + [ + "gpu_used_memory", + { + "_value": 872.415232, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_free_memory", + { + "_value": 24897.388544, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_utilization", + { + "_value": 0.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 56.185, + "_timestamp": 0, + "_device_uuid": null + } + ] + ], + "_avg_gpu_data_from_tag": { + "gpu_used_memory": [ + "gpu_used_memory", + { + "_value": 872.415232, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_free_memory": [ + "gpu_free_memory", + { + "_value": 24897.388544, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_utilization": [ + "gpu_utilization", + { + "_value": 0.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_power_usage": [ + "gpu_power_usage", + { + "_value": 56.185, + "_timestamp": 0, + "_device_uuid": null + } + ] + }, + "_model_config_measurements": [ + { + "_model_config_name": "bls", + "_model_specific_pa_params": { + "batch-size": 1, + "concurrency-range": 4, + "request-rate-range": null + }, + "_non_gpu_data": [ + [ + "perf_latency_avg", + { + "_value": 4.894, + "_timestamp": 0 + } + ], + [ + "perf_latency_p90", + { + "_value": 5.383, + "_timestamp": 0 + } + ], + [ + "perf_latency_p95", + { + "_value": 5.522, + "_timestamp": 0 + } + ], + [ + "perf_latency_p99", + { + "_value": 6.004, + "_timestamp": 0 + } + ], + [ + "perf_throughput", + { + "_value": 815.941, + "_timestamp": 0 + } + ], + [ + "perf_client_send_recv", + { + "_value": 0.008, + "_timestamp": 0 + } + ], + [ + "perf_client_response_wait", + { + "_value": 4.861, + "_timestamp": 0 + } + ], + [ + "perf_server_queue", + { + "_value": 3.427, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_infer", + { + "_value": 1.019, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_input", + { + "_value": 0.026, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_output", + { + "_value": 0.113, + "_timestamp": 0 + } + ] + ], + "_non_gpu_data_from_tag": { + "perf_latency_avg": [ + "perf_latency_avg", + { + "_value": 4.894, + "_timestamp": 0 + } + ], + "perf_latency_p90": [ + "perf_latency_p90", + { + "_value": 5.383, + "_timestamp": 0 + } + ], + "perf_latency_p95": [ + "perf_latency_p95", + { + "_value": 5.522, + "_timestamp": 0 + } + ], + "perf_latency_p99": [ + "perf_latency_p99", + { + "_value": 6.004, + "_timestamp": 0 + } + ], + "perf_throughput": [ + "perf_throughput", + { + "_value": 815.941, + "_timestamp": 0 + } + ], + "perf_client_send_recv": [ + "perf_client_send_recv", + { + "_value": 0.008, + "_timestamp": 0 + } + ], + "perf_client_response_wait": [ + "perf_client_response_wait", + { + "_value": 4.861, + "_timestamp": 0 + } + ], + "perf_server_queue": [ + "perf_server_queue", + { + "_value": 3.427, + "_timestamp": 0 + } + ], + "perf_server_compute_infer": [ + "perf_server_compute_infer", + { + "_value": 1.019, + "_timestamp": 0 + } + ], + "perf_server_compute_input": [ + "perf_server_compute_input", + { + "_value": 0.026, + "_timestamp": 0 + } + ], + "perf_server_compute_output": [ + "perf_server_compute_output", + { + "_value": 0.113, + "_timestamp": 0 + } + ] + } + } + ] + }, + "bls_config_default -m bls -b 1 -i grpc -f bls-results.csv --verbose-csv --concurrency-range=8 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000 --bls-composing-models=add,sub add_config_default,sub_config_default": { + "_model_variants_name": "bls_config_default", + "_gpu_data": { + "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ + [ + "gpu_used_memory", + { + "_value": 872.415232, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_free_memory", + { + "_value": 24897.388544, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_utilization", + { + "_value": 0.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 34.2927, + "_timestamp": 0, + "_device_uuid": null + } + ] + ] + }, + "_avg_gpu_data": [ + [ + "gpu_used_memory", + { + "_value": 872.415232, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_free_memory", + { + "_value": 24897.388544, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_utilization", + { + "_value": 0.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 34.2927, + "_timestamp": 0, + "_device_uuid": null + } + ] + ], + "_avg_gpu_data_from_tag": { + "gpu_used_memory": [ + "gpu_used_memory", + { + "_value": 872.415232, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_free_memory": [ + "gpu_free_memory", + { + "_value": 24897.388544, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_utilization": [ + "gpu_utilization", + { + "_value": 0.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_power_usage": [ + "gpu_power_usage", + { + "_value": 34.2927, + "_timestamp": 0, + "_device_uuid": null + } + ] + }, + "_model_config_measurements": [ + { + "_model_config_name": "bls", + "_model_specific_pa_params": { + "batch-size": 1, + "concurrency-range": 8, + "request-rate-range": null + }, + "_non_gpu_data": [ + [ + "perf_latency_avg", + { + "_value": 10.016, + "_timestamp": 0 + } + ], + [ + "perf_latency_p90", + { + "_value": 10.781, + "_timestamp": 0 + } + ], + [ + "perf_latency_p95", + { + "_value": 11.006, + "_timestamp": 0 + } + ], + [ + "perf_latency_p99", + { + "_value": 11.751, + "_timestamp": 0 + } + ], + [ + "perf_throughput", + { + "_value": 796.93, + "_timestamp": 0 + } + ], + [ + "perf_client_send_recv", + { + "_value": 0.008, + "_timestamp": 0 + } + ], + [ + "perf_client_response_wait", + { + "_value": 9.98, + "_timestamp": 0 + } + ], + [ + "perf_server_queue", + { + "_value": 8.495, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_infer", + { + "_value": 1.047, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_input", + { + "_value": 0.027, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_output", + { + "_value": 0.115, + "_timestamp": 0 + } + ] + ], + "_non_gpu_data_from_tag": { + "perf_latency_avg": [ + "perf_latency_avg", + { + "_value": 10.016, + "_timestamp": 0 + } + ], + "perf_latency_p90": [ + "perf_latency_p90", + { + "_value": 10.781, + "_timestamp": 0 + } + ], + "perf_latency_p95": [ + "perf_latency_p95", + { + "_value": 11.006, + "_timestamp": 0 + } + ], + "perf_latency_p99": [ + "perf_latency_p99", + { + "_value": 11.751, + "_timestamp": 0 + } + ], + "perf_throughput": [ + "perf_throughput", + { + "_value": 796.93, + "_timestamp": 0 + } + ], + "perf_client_send_recv": [ + "perf_client_send_recv", + { + "_value": 0.008, + "_timestamp": 0 + } + ], + "perf_client_response_wait": [ + "perf_client_response_wait", + { + "_value": 9.98, + "_timestamp": 0 + } + ], + "perf_server_queue": [ + "perf_server_queue", + { + "_value": 8.495, + "_timestamp": 0 + } + ], + "perf_server_compute_infer": [ + "perf_server_compute_infer", + { + "_value": 1.047, + "_timestamp": 0 + } + ], + "perf_server_compute_input": [ + "perf_server_compute_input", + { + "_value": 0.027, + "_timestamp": 0 + } + ], + "perf_server_compute_output": [ + "perf_server_compute_output", + { + "_value": 0.115, + "_timestamp": 0 + } + ] + } + } + ] + }, + "bls_config_default -m bls -b 1 -i grpc -f bls-results.csv --verbose-csv --concurrency-range=16 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000 --bls-composing-models=add,sub add_config_default,sub_config_default": { + "_model_variants_name": "bls_config_default", + "_gpu_data": { + "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ + [ + "gpu_used_memory", + { + "_value": 872.415232, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_free_memory", + { + "_value": 24897.388544, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_utilization", + { + "_value": 0.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 23.2787, + "_timestamp": 0, + "_device_uuid": null + } + ] + ] + }, + "_avg_gpu_data": [ + [ + "gpu_used_memory", + { + "_value": 872.415232, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_free_memory", + { + "_value": 24897.388544, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_utilization", + { + "_value": 0.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 23.2787, + "_timestamp": 0, + "_device_uuid": null + } + ] + ], + "_avg_gpu_data_from_tag": { + "gpu_used_memory": [ + "gpu_used_memory", + { + "_value": 872.415232, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_free_memory": [ + "gpu_free_memory", + { + "_value": 24897.388544, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_utilization": [ + "gpu_utilization", + { + "_value": 0.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_power_usage": [ + "gpu_power_usage", + { + "_value": 23.2787, + "_timestamp": 0, + "_device_uuid": null + } + ] + }, + "_model_config_measurements": [ + { + "_model_config_name": "bls", + "_model_specific_pa_params": { + "batch-size": 1, + "concurrency-range": 16, + "request-rate-range": null + }, + "_non_gpu_data": [ + [ + "perf_latency_avg", + { + "_value": 19.539, + "_timestamp": 0 + } + ], + [ + "perf_latency_p90", + { + "_value": 21.184, + "_timestamp": 0 + } + ], + [ + "perf_latency_p95", + { + "_value": 21.993, + "_timestamp": 0 + } + ], + [ + "perf_latency_p99", + { + "_value": 24.014, + "_timestamp": 0 + } + ], + [ + "perf_throughput", + { + "_value": 815.875, + "_timestamp": 0 + } + ], + [ + "perf_client_send_recv", + { + "_value": 0.009, + "_timestamp": 0 + } + ], + [ + "perf_client_response_wait", + { + "_value": 19.503, + "_timestamp": 0 + } + ], + [ + "perf_server_queue", + { + "_value": 18.051, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_infer", + { + "_value": 1.021, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_input", + { + "_value": 0.026, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_output", + { + "_value": 0.113, + "_timestamp": 0 + } + ] + ], + "_non_gpu_data_from_tag": { + "perf_latency_avg": [ + "perf_latency_avg", + { + "_value": 19.539, + "_timestamp": 0 + } + ], + "perf_latency_p90": [ + "perf_latency_p90", + { + "_value": 21.184, + "_timestamp": 0 + } + ], + "perf_latency_p95": [ + "perf_latency_p95", + { + "_value": 21.993, + "_timestamp": 0 + } + ], + "perf_latency_p99": [ + "perf_latency_p99", + { + "_value": 24.014, + "_timestamp": 0 + } + ], + "perf_throughput": [ + "perf_throughput", + { + "_value": 815.875, + "_timestamp": 0 + } + ], + "perf_client_send_recv": [ + "perf_client_send_recv", + { + "_value": 0.009, + "_timestamp": 0 + } + ], + "perf_client_response_wait": [ + "perf_client_response_wait", + { + "_value": 19.503, + "_timestamp": 0 + } + ], + "perf_server_queue": [ + "perf_server_queue", + { + "_value": 18.051, + "_timestamp": 0 + } + ], + "perf_server_compute_infer": [ + "perf_server_compute_infer", + { + "_value": 1.021, + "_timestamp": 0 + } + ], + "perf_server_compute_input": [ + "perf_server_compute_input", + { + "_value": 0.026, + "_timestamp": 0 + } + ], + "perf_server_compute_output": [ + "perf_server_compute_output", + { + "_value": 0.113, + "_timestamp": 0 + } + ] + } + } + ] + } } ], - "backend": "bls" - }, - "FaceDetectionBLS_config_6": { - "name": "FaceDetectionBLS", - "input": [ - { - "name": "input", - "data_type": "TYPE_UINT8", - "dims": [ - "-1", - "3", - "-1", - "-1" - ] - }, + "bls_config_0": [ { - "name": "temporal", - "data_type": "TYPE_UINT32", - "dims": [ - "-1", - "1", - "1", - "1" + "_triton_env": {}, + "_model_run_configs": [ + { + "_model_name": "bls", + "_model_config_variant": { + "model_config": { + "name": "bls", + "input": [ + { + "name": "INPUT0", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + }, + { + "name": "INPUT1", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "output": [ + { + "name": "OUTPUT0", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + }, + { + "name": "OUTPUT1", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "instanceGroup": [ + { + "count": 1, + "kind": "KIND_GPU" + } + ], + "backend": "python" + }, + "variant_name": "bls_config_0", + "cpu_only": false + }, + "_perf_config": { + "_args": { + "service-kind": null, + "model-signature-name": null, + "async": null, + "sync": null, + "measurement-interval": null, + "concurrency-range": 2, + "request-rate-range": null, + "request-distribution": null, + "request-intervals": null, + "binary-search": null, + "num-of-sequences": null, + "latency-threshold": null, + "max-threads": null, + "stability-percentage": null, + "max-trials": null, + "percentile": null, + "input-data": null, + "shared-memory": null, + "output-shared-memory-size": null, + "shape": null, + "sequence-length": null, + "sequence-id-range": null, + "string-length": null, + "string-data": null, + "measurement-mode": "count_windows", + "measurement-request-count": null, + "streaming": null, + "grpc-compression-algorithm": null, + "triton-server-directory": null, + "model-repository": null, + "ssl-grpc-use-ssl": null, + "ssl-grpc-root-certifications-file": null, + "ssl-grpc-private-key-file": null, + "ssl-grpc-certificate-chain-file": null, + "ssl-https-verify-peer": null, + "ssl-https-verify-host": null, + "ssl-https-ca-certificates-file": null, + "ssl-https-client-certificate-type": null, + "ssl-https-client-certificate-file": null, + "ssl-https-private-key-type": null, + "ssl-https-private-key-file": null, + "collect-metrics": "True", + "metrics-url": "http://localhost:8002/metrics", + "metrics-interval": 1000, + "bls-composing-models": "add,sub" + }, + "_options": { + "-m": "bls", + "-x": null, + "-b": 1, + "-u": "localhost:8001", + "-i": "grpc", + "-f": "bls-results.csv", + "-H": null + }, + "_verbose": { + "-v": null, + "-v -v": null, + "--verbose-csv": "--verbose-csv" + }, + "_input_to_options": { + "model-name": "-m", + "model-version": "-x", + "batch-size": "-b", + "url": "-u", + "protocol": "-i", + "latency-report-file": "-f", + "http-header": "-H" + }, + "_input_to_verbose": { + "verbose": "-v", + "extra-verbose": "-v -v", + "verbose-csv": "--verbose-csv" + }, + "_additive_args": { + "input-data": null, + "shape": null + } + }, + "_composing_config_variants": [ + { + "model_config": { + "name": "add", + "input": [ + { + "name": "INPUT0", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + }, + { + "name": "INPUT1", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "output": [ + { + "name": "OUTPUT0", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "instanceGroup": [ + { + "count": 1, + "kind": "KIND_GPU" + } + ], + "backend": "python" + }, + "variant_name": "add_config_0", + "cpu_only": false + }, + { + "model_config": { + "name": "sub", + "input": [ + { + "name": "INPUT0", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + }, + { + "name": "INPUT1", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "output": [ + { + "name": "OUTPUT1", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "instanceGroup": [ + { + "count": 1, + "kind": "KIND_GPU" + } + ], + "backend": "python" + }, + "variant_name": "sub_config_0", + "cpu_only": false + } + ] + } ] }, { - "name": "max_num_bbox", - "data_type": "TYPE_UINT8", - "dims": [ - "-1", - "1", - "1", - "1" - ] + "bls_config_0 -m bls -b 1 -i grpc -f bls-results.csv --verbose-csv --concurrency-range=2 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000 --bls-composing-models=add,sub add_config_0,sub_config_0": { + "_model_variants_name": "bls_config_0", + "_gpu_data": { + "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ + [ + "gpu_used_memory", + { + "_value": 872.415232, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_free_memory", + { + "_value": 24897.388544, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_utilization", + { + "_value": 0.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 56.1707, + "_timestamp": 0, + "_device_uuid": null + } + ] + ] + }, + "_avg_gpu_data": [ + [ + "gpu_used_memory", + { + "_value": 872.415232, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_free_memory", + { + "_value": 24897.388544, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_utilization", + { + "_value": 0.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 56.1707, + "_timestamp": 0, + "_device_uuid": null + } + ] + ], + "_avg_gpu_data_from_tag": { + "gpu_used_memory": [ + "gpu_used_memory", + { + "_value": 872.415232, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_free_memory": [ + "gpu_free_memory", + { + "_value": 24897.388544, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_utilization": [ + "gpu_utilization", + { + "_value": 0.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_power_usage": [ + "gpu_power_usage", + { + "_value": 56.1707, + "_timestamp": 0, + "_device_uuid": null + } + ] + }, + "_model_config_measurements": [ + { + "_model_config_name": "bls", + "_model_specific_pa_params": { + "batch-size": 1, + "concurrency-range": 2, + "request-rate-range": null + }, + "_non_gpu_data": [ + [ + "perf_latency_avg", + { + "_value": 2.352, + "_timestamp": 0 + } + ], + [ + "perf_latency_p90", + { + "_value": 2.676, + "_timestamp": 0 + } + ], + [ + "perf_latency_p95", + { + "_value": 2.77, + "_timestamp": 0 + } + ], + [ + "perf_latency_p99", + { + "_value": 3.06, + "_timestamp": 0 + } + ], + [ + "perf_throughput", + { + "_value": 849.002, + "_timestamp": 0 + } + ], + [ + "perf_client_send_recv", + { + "_value": 0.006, + "_timestamp": 0 + } + ], + [ + "perf_client_response_wait", + { + "_value": 2.327, + "_timestamp": 0 + } + ], + [ + "perf_server_queue", + { + "_value": 0.984, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_infer", + { + "_value": 0.983, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_input", + { + "_value": 0.025, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_output", + { + "_value": 0.107, + "_timestamp": 0 + } + ] + ], + "_non_gpu_data_from_tag": { + "perf_latency_avg": [ + "perf_latency_avg", + { + "_value": 2.352, + "_timestamp": 0 + } + ], + "perf_latency_p90": [ + "perf_latency_p90", + { + "_value": 2.676, + "_timestamp": 0 + } + ], + "perf_latency_p95": [ + "perf_latency_p95", + { + "_value": 2.77, + "_timestamp": 0 + } + ], + "perf_latency_p99": [ + "perf_latency_p99", + { + "_value": 3.06, + "_timestamp": 0 + } + ], + "perf_throughput": [ + "perf_throughput", + { + "_value": 849.002, + "_timestamp": 0 + } + ], + "perf_client_send_recv": [ + "perf_client_send_recv", + { + "_value": 0.006, + "_timestamp": 0 + } + ], + "perf_client_response_wait": [ + "perf_client_response_wait", + { + "_value": 2.327, + "_timestamp": 0 + } + ], + "perf_server_queue": [ + "perf_server_queue", + { + "_value": 0.984, + "_timestamp": 0 + } + ], + "perf_server_compute_infer": [ + "perf_server_compute_infer", + { + "_value": 0.983, + "_timestamp": 0 + } + ], + "perf_server_compute_input": [ + "perf_server_compute_input", + { + "_value": 0.025, + "_timestamp": 0 + } + ], + "perf_server_compute_output": [ + "perf_server_compute_output", + { + "_value": 0.107, + "_timestamp": 0 + } + ] + } + } + ] + }, + "bls_config_0 -m bls -b 1 -i grpc -f bls-results.csv --verbose-csv --concurrency-range=2 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000 --bls-composing-models=add,sub add_config_0,sub_config_1": { + "_model_variants_name": "bls_config_0", + "_gpu_data": { + "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ + [ + "gpu_used_memory", + { + "_value": 872.415232, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_free_memory", + { + "_value": 24897.388544, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_utilization", + { + "_value": 0.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 56.2007, + "_timestamp": 0, + "_device_uuid": null + } + ] + ] + }, + "_avg_gpu_data": [ + [ + "gpu_used_memory", + { + "_value": 872.415232, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_free_memory", + { + "_value": 24897.388544, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_utilization", + { + "_value": 0.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 56.2007, + "_timestamp": 0, + "_device_uuid": null + } + ] + ], + "_avg_gpu_data_from_tag": { + "gpu_used_memory": [ + "gpu_used_memory", + { + "_value": 872.415232, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_free_memory": [ + "gpu_free_memory", + { + "_value": 24897.388544, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_utilization": [ + "gpu_utilization", + { + "_value": 0.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_power_usage": [ + "gpu_power_usage", + { + "_value": 56.2007, + "_timestamp": 0, + "_device_uuid": null + } + ] + }, + "_model_config_measurements": [ + { + "_model_config_name": "bls", + "_model_specific_pa_params": { + "batch-size": 1, + "concurrency-range": 2, + "request-rate-range": null + }, + "_non_gpu_data": [ + [ + "perf_latency_avg", + { + "_value": 2.364, + "_timestamp": 0 + } + ], + [ + "perf_latency_p90", + { + "_value": 2.662, + "_timestamp": 0 + } + ], + [ + "perf_latency_p95", + { + "_value": 2.759, + "_timestamp": 0 + } + ], + [ + "perf_latency_p99", + { + "_value": 2.995, + "_timestamp": 0 + } + ], + [ + "perf_throughput", + { + "_value": 844.557, + "_timestamp": 0 + } + ], + [ + "perf_client_send_recv", + { + "_value": 0.007, + "_timestamp": 0 + } + ], + [ + "perf_client_response_wait", + { + "_value": 2.337, + "_timestamp": 0 + } + ], + [ + "perf_server_queue", + { + "_value": 0.981, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_infer", + { + "_value": 0.981, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_input", + { + "_value": 0.026, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_output", + { + "_value": 0.111, + "_timestamp": 0 + } + ] + ], + "_non_gpu_data_from_tag": { + "perf_latency_avg": [ + "perf_latency_avg", + { + "_value": 2.364, + "_timestamp": 0 + } + ], + "perf_latency_p90": [ + "perf_latency_p90", + { + "_value": 2.662, + "_timestamp": 0 + } + ], + "perf_latency_p95": [ + "perf_latency_p95", + { + "_value": 2.759, + "_timestamp": 0 + } + ], + "perf_latency_p99": [ + "perf_latency_p99", + { + "_value": 2.995, + "_timestamp": 0 + } + ], + "perf_throughput": [ + "perf_throughput", + { + "_value": 844.557, + "_timestamp": 0 + } + ], + "perf_client_send_recv": [ + "perf_client_send_recv", + { + "_value": 0.007, + "_timestamp": 0 + } + ], + "perf_client_response_wait": [ + "perf_client_response_wait", + { + "_value": 2.337, + "_timestamp": 0 + } + ], + "perf_server_queue": [ + "perf_server_queue", + { + "_value": 0.981, + "_timestamp": 0 + } + ], + "perf_server_compute_infer": [ + "perf_server_compute_infer", + { + "_value": 0.981, + "_timestamp": 0 + } + ], + "perf_server_compute_input": [ + "perf_server_compute_input", + { + "_value": 0.026, + "_timestamp": 0 + } + ], + "perf_server_compute_output": [ + "perf_server_compute_output", + { + "_value": 0.111, + "_timestamp": 0 + } + ] + } + } + ] + }, + "bls_config_0 -m bls -b 1 -i grpc -f bls-results.csv --verbose-csv --concurrency-range=2 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000 --bls-composing-models=add,sub add_config_2,sub_config_2": { + "_model_variants_name": "bls_config_0", + "_gpu_data": { + "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ + [ + "gpu_used_memory", + { + "_value": 872.415232, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_free_memory", + { + "_value": 24897.388544, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_utilization", + { + "_value": 0.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 56.6027, + "_timestamp": 0, + "_device_uuid": null + } + ] + ] + }, + "_avg_gpu_data": [ + [ + "gpu_used_memory", + { + "_value": 872.415232, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_free_memory", + { + "_value": 24897.388544, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_utilization", + { + "_value": 0.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 56.6027, + "_timestamp": 0, + "_device_uuid": null + } + ] + ], + "_avg_gpu_data_from_tag": { + "gpu_used_memory": [ + "gpu_used_memory", + { + "_value": 872.415232, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_free_memory": [ + "gpu_free_memory", + { + "_value": 24897.388544, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_utilization": [ + "gpu_utilization", + { + "_value": 0.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_power_usage": [ + "gpu_power_usage", + { + "_value": 56.6027, + "_timestamp": 0, + "_device_uuid": null + } + ] + }, + "_model_config_measurements": [ + { + "_model_config_name": "bls", + "_model_specific_pa_params": { + "batch-size": 1, + "concurrency-range": 2, + "request-rate-range": null + }, + "_non_gpu_data": [ + [ + "perf_latency_avg", + { + "_value": 2.642, + "_timestamp": 0 + } + ], + [ + "perf_latency_p90", + { + "_value": 2.885, + "_timestamp": 0 + } + ], + [ + "perf_latency_p95", + { + "_value": 2.966, + "_timestamp": 0 + } + ], + [ + "perf_latency_p99", + { + "_value": 3.112, + "_timestamp": 0 + } + ], + [ + "perf_throughput", + { + "_value": 756.235, + "_timestamp": 0 + } + ], + [ + "perf_client_send_recv", + { + "_value": 0.008, + "_timestamp": 0 + } + ], + [ + "perf_client_response_wait", + { + "_value": 2.613, + "_timestamp": 0 + } + ], + [ + "perf_server_queue", + { + "_value": 1.097, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_infer", + { + "_value": 1.116, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_input", + { + "_value": 0.026, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_output", + { + "_value": 0.113, + "_timestamp": 0 + } + ] + ], + "_non_gpu_data_from_tag": { + "perf_latency_avg": [ + "perf_latency_avg", + { + "_value": 2.642, + "_timestamp": 0 + } + ], + "perf_latency_p90": [ + "perf_latency_p90", + { + "_value": 2.885, + "_timestamp": 0 + } + ], + "perf_latency_p95": [ + "perf_latency_p95", + { + "_value": 2.966, + "_timestamp": 0 + } + ], + "perf_latency_p99": [ + "perf_latency_p99", + { + "_value": 3.112, + "_timestamp": 0 + } + ], + "perf_throughput": [ + "perf_throughput", + { + "_value": 756.235, + "_timestamp": 0 + } + ], + "perf_client_send_recv": [ + "perf_client_send_recv", + { + "_value": 0.008, + "_timestamp": 0 + } + ], + "perf_client_response_wait": [ + "perf_client_response_wait", + { + "_value": 2.613, + "_timestamp": 0 + } + ], + "perf_server_queue": [ + "perf_server_queue", + { + "_value": 1.097, + "_timestamp": 0 + } + ], + "perf_server_compute_infer": [ + "perf_server_compute_infer", + { + "_value": 1.116, + "_timestamp": 0 + } + ], + "perf_server_compute_input": [ + "perf_server_compute_input", + { + "_value": 0.026, + "_timestamp": 0 + } + ], + "perf_server_compute_output": [ + "perf_server_compute_output", + { + "_value": 0.113, + "_timestamp": 0 + } + ] + } + } + ] + } } ], - "output": [ - { - "name": "output_bbox", - "data_type": "TYPE_FP32", - "dims": [ - "-1", - "4", - "-1", - "1" - ] - }, + "bls_config_1": [ { - "name": "output_bbox_conf", - "data_type": "TYPE_FP32", - "dims": [ - "-1", - "4", - "-1", - "1" + "_triton_env": {}, + "_model_run_configs": [ + { + "_model_name": "bls", + "_model_config_variant": { + "model_config": { + "name": "bls", + "input": [ + { + "name": "INPUT0", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + }, + { + "name": "INPUT1", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "output": [ + { + "name": "OUTPUT0", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + }, + { + "name": "OUTPUT1", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "instanceGroup": [ + { + "count": 2, + "kind": "KIND_GPU" + } + ], + "backend": "python" + }, + "variant_name": "bls_config_1", + "cpu_only": false + }, + "_perf_config": { + "_args": { + "service-kind": null, + "model-signature-name": null, + "async": null, + "sync": null, + "measurement-interval": null, + "concurrency-range": 4, + "request-rate-range": null, + "request-distribution": null, + "request-intervals": null, + "binary-search": null, + "num-of-sequences": null, + "latency-threshold": null, + "max-threads": null, + "stability-percentage": null, + "max-trials": null, + "percentile": null, + "input-data": null, + "shared-memory": null, + "output-shared-memory-size": null, + "shape": null, + "sequence-length": null, + "sequence-id-range": null, + "string-length": null, + "string-data": null, + "measurement-mode": "count_windows", + "measurement-request-count": null, + "streaming": null, + "grpc-compression-algorithm": null, + "triton-server-directory": null, + "model-repository": null, + "ssl-grpc-use-ssl": null, + "ssl-grpc-root-certifications-file": null, + "ssl-grpc-private-key-file": null, + "ssl-grpc-certificate-chain-file": null, + "ssl-https-verify-peer": null, + "ssl-https-verify-host": null, + "ssl-https-ca-certificates-file": null, + "ssl-https-client-certificate-type": null, + "ssl-https-client-certificate-file": null, + "ssl-https-private-key-type": null, + "ssl-https-private-key-file": null, + "collect-metrics": "True", + "metrics-url": "http://localhost:8002/metrics", + "metrics-interval": 1000, + "bls-composing-models": "add,sub" + }, + "_options": { + "-m": "bls", + "-x": null, + "-b": 1, + "-u": "localhost:8001", + "-i": "grpc", + "-f": "bls-results.csv", + "-H": null + }, + "_verbose": { + "-v": null, + "-v -v": null, + "--verbose-csv": "--verbose-csv" + }, + "_input_to_options": { + "model-name": "-m", + "model-version": "-x", + "batch-size": "-b", + "url": "-u", + "protocol": "-i", + "latency-report-file": "-f", + "http-header": "-H" + }, + "_input_to_verbose": { + "verbose": "-v", + "extra-verbose": "-v -v", + "verbose-csv": "--verbose-csv" + }, + "_additive_args": { + "input-data": null, + "shape": null + } + }, + "_composing_config_variants": [ + { + "model_config": { + "name": "add", + "input": [ + { + "name": "INPUT0", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + }, + { + "name": "INPUT1", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "output": [ + { + "name": "OUTPUT0", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "instanceGroup": [ + { + "count": 2, + "kind": "KIND_GPU" + } + ], + "backend": "python" + }, + "variant_name": "add_config_1", + "cpu_only": false + }, + { + "model_config": { + "name": "sub", + "input": [ + { + "name": "INPUT0", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + }, + { + "name": "INPUT1", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "output": [ + { + "name": "OUTPUT1", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "instanceGroup": [ + { + "count": 1, + "kind": "KIND_GPU" + } + ], + "backend": "python" + }, + "variant_name": "sub_config_0", + "cpu_only": false + } + ] + } ] }, { - "name": "output_num_bbox", - "data_type": "TYPE_UINT8", - "dims": [ - "-1", - "1", - "1", - "1" - ] - } - ], - "instance_group": [ - { - "count": 7, - "kind": "KIND_GPU" + "bls_config_1 -m bls -b 1 -i grpc -f bls-results.csv --verbose-csv --concurrency-range=4 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000 --bls-composing-models=add,sub add_config_1,sub_config_0": { + "_model_variants_name": "bls_config_1", + "_gpu_data": { + "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ + [ + "gpu_used_memory", + { + "_value": 872.415232, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_free_memory", + { + "_value": 24897.388544, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_utilization", + { + "_value": 0.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 56.4793, + "_timestamp": 0, + "_device_uuid": null + } + ] + ] + }, + "_avg_gpu_data": [ + [ + "gpu_used_memory", + { + "_value": 872.415232, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_free_memory", + { + "_value": 24897.388544, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_utilization", + { + "_value": 0.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 56.4793, + "_timestamp": 0, + "_device_uuid": null + } + ] + ], + "_avg_gpu_data_from_tag": { + "gpu_used_memory": [ + "gpu_used_memory", + { + "_value": 872.415232, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_free_memory": [ + "gpu_free_memory", + { + "_value": 24897.388544, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_utilization": [ + "gpu_utilization", + { + "_value": 0.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_power_usage": [ + "gpu_power_usage", + { + "_value": 56.4793, + "_timestamp": 0, + "_device_uuid": null + } + ] + }, + "_model_config_measurements": [ + { + "_model_config_name": "bls", + "_model_specific_pa_params": { + "batch-size": 1, + "concurrency-range": 4, + "request-rate-range": null + }, + "_non_gpu_data": [ + [ + "perf_latency_avg", + { + "_value": 3.034, + "_timestamp": 0 + } + ], + [ + "perf_latency_p90", + { + "_value": 3.15, + "_timestamp": 0 + } + ], + [ + "perf_latency_p95", + { + "_value": 3.229, + "_timestamp": 0 + } + ], + [ + "perf_latency_p99", + { + "_value": 3.413, + "_timestamp": 0 + } + ], + [ + "perf_throughput", + { + "_value": 1316.47, + "_timestamp": 0 + } + ], + [ + "perf_client_send_recv", + { + "_value": 0.008, + "_timestamp": 0 + } + ], + [ + "perf_client_response_wait", + { + "_value": 3.0, + "_timestamp": 0 + } + ], + [ + "perf_server_queue", + { + "_value": 1.241, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_infer", + { + "_value": 1.275, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_input", + { + "_value": 0.031, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_output", + { + "_value": 0.134, + "_timestamp": 0 + } + ] + ], + "_non_gpu_data_from_tag": { + "perf_latency_avg": [ + "perf_latency_avg", + { + "_value": 3.034, + "_timestamp": 0 + } + ], + "perf_latency_p90": [ + "perf_latency_p90", + { + "_value": 3.15, + "_timestamp": 0 + } + ], + "perf_latency_p95": [ + "perf_latency_p95", + { + "_value": 3.229, + "_timestamp": 0 + } + ], + "perf_latency_p99": [ + "perf_latency_p99", + { + "_value": 3.413, + "_timestamp": 0 + } + ], + "perf_throughput": [ + "perf_throughput", + { + "_value": 1316.47, + "_timestamp": 0 + } + ], + "perf_client_send_recv": [ + "perf_client_send_recv", + { + "_value": 0.008, + "_timestamp": 0 + } + ], + "perf_client_response_wait": [ + "perf_client_response_wait", + { + "_value": 3.0, + "_timestamp": 0 + } + ], + "perf_server_queue": [ + "perf_server_queue", + { + "_value": 1.241, + "_timestamp": 0 + } + ], + "perf_server_compute_infer": [ + "perf_server_compute_infer", + { + "_value": 1.275, + "_timestamp": 0 + } + ], + "perf_server_compute_input": [ + "perf_server_compute_input", + { + "_value": 0.031, + "_timestamp": 0 + } + ], + "perf_server_compute_output": [ + "perf_server_compute_output", + { + "_value": 0.134, + "_timestamp": 0 + } + ] + } + } + ] + } } ], - "backend": "bls" - }, - "FaceDetectionBLS_config_7": { - "name": "FaceDetectionBLS", - "input": [ - { - "name": "input", - "data_type": "TYPE_UINT8", - "dims": [ - "-1", - "3", - "-1", - "-1" - ] - }, + "bls_config_2": [ { - "name": "temporal", - "data_type": "TYPE_UINT32", - "dims": [ - "-1", - "1", - "1", - "1" + "_triton_env": {}, + "_model_run_configs": [ + { + "_model_name": "bls", + "_model_config_variant": { + "model_config": { + "name": "bls", + "input": [ + { + "name": "INPUT0", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + }, + { + "name": "INPUT1", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "output": [ + { + "name": "OUTPUT0", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + }, + { + "name": "OUTPUT1", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "instanceGroup": [ + { + "count": 3, + "kind": "KIND_GPU" + } + ], + "backend": "python" + }, + "variant_name": "bls_config_2", + "cpu_only": false + }, + "_perf_config": { + "_args": { + "service-kind": null, + "model-signature-name": null, + "async": null, + "sync": null, + "measurement-interval": null, + "concurrency-range": 6, + "request-rate-range": null, + "request-distribution": null, + "request-intervals": null, + "binary-search": null, + "num-of-sequences": null, + "latency-threshold": null, + "max-threads": null, + "stability-percentage": null, + "max-trials": null, + "percentile": null, + "input-data": null, + "shared-memory": null, + "output-shared-memory-size": null, + "shape": null, + "sequence-length": null, + "sequence-id-range": null, + "string-length": null, + "string-data": null, + "measurement-mode": "count_windows", + "measurement-request-count": null, + "streaming": null, + "grpc-compression-algorithm": null, + "triton-server-directory": null, + "model-repository": null, + "ssl-grpc-use-ssl": null, + "ssl-grpc-root-certifications-file": null, + "ssl-grpc-private-key-file": null, + "ssl-grpc-certificate-chain-file": null, + "ssl-https-verify-peer": null, + "ssl-https-verify-host": null, + "ssl-https-ca-certificates-file": null, + "ssl-https-client-certificate-type": null, + "ssl-https-client-certificate-file": null, + "ssl-https-private-key-type": null, + "ssl-https-private-key-file": null, + "collect-metrics": "True", + "metrics-url": "http://localhost:8002/metrics", + "metrics-interval": 1000, + "bls-composing-models": "add,sub" + }, + "_options": { + "-m": "bls", + "-x": null, + "-b": 1, + "-u": "localhost:8001", + "-i": "grpc", + "-f": "bls-results.csv", + "-H": null + }, + "_verbose": { + "-v": null, + "-v -v": null, + "--verbose-csv": "--verbose-csv" + }, + "_input_to_options": { + "model-name": "-m", + "model-version": "-x", + "batch-size": "-b", + "url": "-u", + "protocol": "-i", + "latency-report-file": "-f", + "http-header": "-H" + }, + "_input_to_verbose": { + "verbose": "-v", + "extra-verbose": "-v -v", + "verbose-csv": "--verbose-csv" + }, + "_additive_args": { + "input-data": null, + "shape": null + } + }, + "_composing_config_variants": [ + { + "model_config": { + "name": "add", + "input": [ + { + "name": "INPUT0", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + }, + { + "name": "INPUT1", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "output": [ + { + "name": "OUTPUT0", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "instanceGroup": [ + { + "count": 2, + "kind": "KIND_GPU" + } + ], + "backend": "python" + }, + "variant_name": "add_config_1", + "cpu_only": false + }, + { + "model_config": { + "name": "sub", + "input": [ + { + "name": "INPUT0", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + }, + { + "name": "INPUT1", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "output": [ + { + "name": "OUTPUT1", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "instanceGroup": [ + { + "count": 1, + "kind": "KIND_GPU" + } + ], + "backend": "python" + }, + "variant_name": "sub_config_0", + "cpu_only": false + } + ] + } ] }, { - "name": "max_num_bbox", - "data_type": "TYPE_UINT8", - "dims": [ - "-1", - "1", - "1", - "1" - ] + "bls_config_2 -m bls -b 1 -i grpc -f bls-results.csv --verbose-csv --concurrency-range=6 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000 --bls-composing-models=add,sub add_config_1,sub_config_0": { + "_model_variants_name": "bls_config_2", + "_gpu_data": { + "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ + [ + "gpu_used_memory", + { + "_value": 872.415232, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_free_memory", + { + "_value": 24897.388544, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_utilization", + { + "_value": 0.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 56.6313, + "_timestamp": 0, + "_device_uuid": null + } + ] + ] + }, + "_avg_gpu_data": [ + [ + "gpu_used_memory", + { + "_value": 872.415232, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_free_memory", + { + "_value": 24897.388544, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_utilization", + { + "_value": 0.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 56.6313, + "_timestamp": 0, + "_device_uuid": null + } + ] + ], + "_avg_gpu_data_from_tag": { + "gpu_used_memory": [ + "gpu_used_memory", + { + "_value": 872.415232, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_free_memory": [ + "gpu_free_memory", + { + "_value": 24897.388544, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_utilization": [ + "gpu_utilization", + { + "_value": 0.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_power_usage": [ + "gpu_power_usage", + { + "_value": 56.6313, + "_timestamp": 0, + "_device_uuid": null + } + ] + }, + "_model_config_measurements": [ + { + "_model_config_name": "bls", + "_model_specific_pa_params": { + "batch-size": 1, + "concurrency-range": 6, + "request-rate-range": null + }, + "_non_gpu_data": [ + [ + "perf_latency_avg", + { + "_value": 3.199, + "_timestamp": 0 + } + ], + [ + "perf_latency_p90", + { + "_value": 3.492, + "_timestamp": 0 + } + ], + [ + "perf_latency_p95", + { + "_value": 3.591, + "_timestamp": 0 + } + ], + [ + "perf_latency_p99", + { + "_value": 3.884, + "_timestamp": 0 + } + ], + [ + "perf_throughput", + { + "_value": 1874.23, + "_timestamp": 0 + } + ], + [ + "perf_client_send_recv", + { + "_value": 0.008, + "_timestamp": 0 + } + ], + [ + "perf_client_response_wait", + { + "_value": 3.164, + "_timestamp": 0 + } + ], + [ + "perf_server_queue", + { + "_value": 1.32, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_infer", + { + "_value": 1.346, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_input", + { + "_value": 0.032, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_output", + { + "_value": 0.138, + "_timestamp": 0 + } + ] + ], + "_non_gpu_data_from_tag": { + "perf_latency_avg": [ + "perf_latency_avg", + { + "_value": 3.199, + "_timestamp": 0 + } + ], + "perf_latency_p90": [ + "perf_latency_p90", + { + "_value": 3.492, + "_timestamp": 0 + } + ], + "perf_latency_p95": [ + "perf_latency_p95", + { + "_value": 3.591, + "_timestamp": 0 + } + ], + "perf_latency_p99": [ + "perf_latency_p99", + { + "_value": 3.884, + "_timestamp": 0 + } + ], + "perf_throughput": [ + "perf_throughput", + { + "_value": 1874.23, + "_timestamp": 0 + } + ], + "perf_client_send_recv": [ + "perf_client_send_recv", + { + "_value": 0.008, + "_timestamp": 0 + } + ], + "perf_client_response_wait": [ + "perf_client_response_wait", + { + "_value": 3.164, + "_timestamp": 0 + } + ], + "perf_server_queue": [ + "perf_server_queue", + { + "_value": 1.32, + "_timestamp": 0 + } + ], + "perf_server_compute_infer": [ + "perf_server_compute_infer", + { + "_value": 1.346, + "_timestamp": 0 + } + ], + "perf_server_compute_input": [ + "perf_server_compute_input", + { + "_value": 0.032, + "_timestamp": 0 + } + ], + "perf_server_compute_output": [ + "perf_server_compute_output", + { + "_value": 0.138, + "_timestamp": 0 + } + ] + } + } + ] + } } ], - "output": [ - { - "name": "output_bbox", - "data_type": "TYPE_FP32", - "dims": [ - "-1", - "4", - "-1", - "1" - ] - }, + "bls_config_3": [ { - "name": "output_bbox_conf", - "data_type": "TYPE_FP32", - "dims": [ - "-1", - "4", - "-1", - "1" + "_triton_env": {}, + "_model_run_configs": [ + { + "_model_name": "bls", + "_model_config_variant": { + "model_config": { + "name": "bls", + "input": [ + { + "name": "INPUT0", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + }, + { + "name": "INPUT1", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "output": [ + { + "name": "OUTPUT0", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + }, + { + "name": "OUTPUT1", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "instanceGroup": [ + { + "count": 5, + "kind": "KIND_GPU" + } + ], + "backend": "python" + }, + "variant_name": "bls_config_3", + "cpu_only": false + }, + "_perf_config": { + "_args": { + "service-kind": null, + "model-signature-name": null, + "async": null, + "sync": null, + "measurement-interval": null, + "concurrency-range": 10, + "request-rate-range": null, + "request-distribution": null, + "request-intervals": null, + "binary-search": null, + "num-of-sequences": null, + "latency-threshold": null, + "max-threads": null, + "stability-percentage": null, + "max-trials": null, + "percentile": null, + "input-data": null, + "shared-memory": null, + "output-shared-memory-size": null, + "shape": null, + "sequence-length": null, + "sequence-id-range": null, + "string-length": null, + "string-data": null, + "measurement-mode": "count_windows", + "measurement-request-count": 100, + "streaming": null, + "grpc-compression-algorithm": null, + "triton-server-directory": null, + "model-repository": null, + "ssl-grpc-use-ssl": null, + "ssl-grpc-root-certifications-file": null, + "ssl-grpc-private-key-file": null, + "ssl-grpc-certificate-chain-file": null, + "ssl-https-verify-peer": null, + "ssl-https-verify-host": null, + "ssl-https-ca-certificates-file": null, + "ssl-https-client-certificate-type": null, + "ssl-https-client-certificate-file": null, + "ssl-https-private-key-type": null, + "ssl-https-private-key-file": null, + "collect-metrics": "True", + "metrics-url": "http://localhost:8002/metrics", + "metrics-interval": 1000, + "bls-composing-models": "add,sub" + }, + "_options": { + "-m": "bls", + "-x": null, + "-b": 1, + "-u": "localhost:8001", + "-i": "grpc", + "-f": "bls-results.csv", + "-H": null + }, + "_verbose": { + "-v": null, + "-v -v": null, + "--verbose-csv": "--verbose-csv" + }, + "_input_to_options": { + "model-name": "-m", + "model-version": "-x", + "batch-size": "-b", + "url": "-u", + "protocol": "-i", + "latency-report-file": "-f", + "http-header": "-H" + }, + "_input_to_verbose": { + "verbose": "-v", + "extra-verbose": "-v -v", + "verbose-csv": "--verbose-csv" + }, + "_additive_args": { + "input-data": null, + "shape": null + } + }, + "_composing_config_variants": [ + { + "model_config": { + "name": "add", + "input": [ + { + "name": "INPUT0", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + }, + { + "name": "INPUT1", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "output": [ + { + "name": "OUTPUT0", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "instanceGroup": [ + { + "count": 3, + "kind": "KIND_GPU" + } + ], + "backend": "python" + }, + "variant_name": "add_config_2", + "cpu_only": false + }, + { + "model_config": { + "name": "sub", + "input": [ + { + "name": "INPUT0", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + }, + { + "name": "INPUT1", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "output": [ + { + "name": "OUTPUT1", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "instanceGroup": [ + { + "count": 1, + "kind": "KIND_GPU" + } + ], + "backend": "python" + }, + "variant_name": "sub_config_0", + "cpu_only": false + } + ] + } ] }, { - "name": "output_num_bbox", - "data_type": "TYPE_UINT8", - "dims": [ - "-1", - "1", - "1", - "1" - ] - } - ], - "instance_group": [ - { - "count": 8, - "kind": "KIND_GPU" + "bls_config_3 -m bls -b 1 -i grpc -f bls-results.csv --verbose-csv --concurrency-range=10 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000 --bls-composing-models=add,sub add_config_2,sub_config_0": { + "_model_variants_name": "bls_config_3", + "_gpu_data": { + "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ + [ + "gpu_used_memory", + { + "_value": 872.415232, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_free_memory", + { + "_value": 24897.388544, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_utilization", + { + "_value": 0.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 23.6627, + "_timestamp": 0, + "_device_uuid": null + } + ] + ] + }, + "_avg_gpu_data": [ + [ + "gpu_used_memory", + { + "_value": 872.415232, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_free_memory", + { + "_value": 24897.388544, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_utilization", + { + "_value": 0.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 23.6627, + "_timestamp": 0, + "_device_uuid": null + } + ] + ], + "_avg_gpu_data_from_tag": { + "gpu_used_memory": [ + "gpu_used_memory", + { + "_value": 872.415232, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_free_memory": [ + "gpu_free_memory", + { + "_value": 24897.388544, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_utilization": [ + "gpu_utilization", + { + "_value": 0.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_power_usage": [ + "gpu_power_usage", + { + "_value": 23.6627, + "_timestamp": 0, + "_device_uuid": null + } + ] + }, + "_model_config_measurements": [ + { + "_model_config_name": "bls", + "_model_specific_pa_params": { + "batch-size": 1, + "concurrency-range": 10, + "request-rate-range": null + }, + "_non_gpu_data": [ + [ + "perf_latency_avg", + { + "_value": 3.87, + "_timestamp": 0 + } + ], + [ + "perf_latency_p90", + { + "_value": 4.25, + "_timestamp": 0 + } + ], + [ + "perf_latency_p95", + { + "_value": 4.362, + "_timestamp": 0 + } + ], + [ + "perf_latency_p99", + { + "_value": 4.61, + "_timestamp": 0 + } + ], + [ + "perf_throughput", + { + "_value": 2581.16, + "_timestamp": 0 + } + ], + [ + "perf_client_send_recv", + { + "_value": 0.009, + "_timestamp": 0 + } + ], + [ + "perf_client_response_wait", + { + "_value": 3.833, + "_timestamp": 0 + } + ], + [ + "perf_server_queue", + { + "_value": 1.626, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_infer", + { + "_value": 1.672, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_input", + { + "_value": 0.034, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_output", + { + "_value": 0.143, + "_timestamp": 0 + } + ] + ], + "_non_gpu_data_from_tag": { + "perf_latency_avg": [ + "perf_latency_avg", + { + "_value": 3.87, + "_timestamp": 0 + } + ], + "perf_latency_p90": [ + "perf_latency_p90", + { + "_value": 4.25, + "_timestamp": 0 + } + ], + "perf_latency_p95": [ + "perf_latency_p95", + { + "_value": 4.362, + "_timestamp": 0 + } + ], + "perf_latency_p99": [ + "perf_latency_p99", + { + "_value": 4.61, + "_timestamp": 0 + } + ], + "perf_throughput": [ + "perf_throughput", + { + "_value": 2581.16, + "_timestamp": 0 + } + ], + "perf_client_send_recv": [ + "perf_client_send_recv", + { + "_value": 0.009, + "_timestamp": 0 + } + ], + "perf_client_response_wait": [ + "perf_client_response_wait", + { + "_value": 3.833, + "_timestamp": 0 + } + ], + "perf_server_queue": [ + "perf_server_queue", + { + "_value": 1.626, + "_timestamp": 0 + } + ], + "perf_server_compute_infer": [ + "perf_server_compute_infer", + { + "_value": 1.672, + "_timestamp": 0 + } + ], + "perf_server_compute_input": [ + "perf_server_compute_input", + { + "_value": 0.034, + "_timestamp": 0 + } + ], + "perf_server_compute_output": [ + "perf_server_compute_output", + { + "_value": 0.143, + "_timestamp": 0 + } + ] + } + } + ] + }, + "bls_config_3 -m bls -b 1 -i grpc -f bls-results.csv --verbose-csv --concurrency-range=10 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000 --bls-composing-models=add,sub add_config_2,sub_config_1": { + "_model_variants_name": "bls_config_3", + "_gpu_data": { + "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ + [ + "gpu_used_memory", + { + "_value": 872.415232, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_free_memory", + { + "_value": 24897.388544, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_utilization", + { + "_value": 0.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 23.6107, + "_timestamp": 0, + "_device_uuid": null + } + ] + ] + }, + "_avg_gpu_data": [ + [ + "gpu_used_memory", + { + "_value": 872.415232, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_free_memory", + { + "_value": 24897.388544, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_utilization", + { + "_value": 0.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 23.6107, + "_timestamp": 0, + "_device_uuid": null + } + ] + ], + "_avg_gpu_data_from_tag": { + "gpu_used_memory": [ + "gpu_used_memory", + { + "_value": 872.415232, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_free_memory": [ + "gpu_free_memory", + { + "_value": 24897.388544, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_utilization": [ + "gpu_utilization", + { + "_value": 0.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_power_usage": [ + "gpu_power_usage", + { + "_value": 23.6107, + "_timestamp": 0, + "_device_uuid": null + } + ] + }, + "_model_config_measurements": [ + { + "_model_config_name": "bls", + "_model_specific_pa_params": { + "batch-size": 1, + "concurrency-range": 10, + "request-rate-range": null + }, + "_non_gpu_data": [ + [ + "perf_latency_avg", + { + "_value": 3.853, + "_timestamp": 0 + } + ], + [ + "perf_latency_p90", + { + "_value": 4.221, + "_timestamp": 0 + } + ], + [ + "perf_latency_p95", + { + "_value": 4.334, + "_timestamp": 0 + } + ], + [ + "perf_latency_p99", + { + "_value": 4.591, + "_timestamp": 0 + } + ], + [ + "perf_throughput", + { + "_value": 2591.54, + "_timestamp": 0 + } + ], + [ + "perf_client_send_recv", + { + "_value": 0.009, + "_timestamp": 0 + } + ], + [ + "perf_client_response_wait", + { + "_value": 3.816, + "_timestamp": 0 + } + ], + [ + "perf_server_queue", + { + "_value": 1.622, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_infer", + { + "_value": 1.664, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_input", + { + "_value": 0.034, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_output", + { + "_value": 0.143, + "_timestamp": 0 + } + ] + ], + "_non_gpu_data_from_tag": { + "perf_latency_avg": [ + "perf_latency_avg", + { + "_value": 3.853, + "_timestamp": 0 + } + ], + "perf_latency_p90": [ + "perf_latency_p90", + { + "_value": 4.221, + "_timestamp": 0 + } + ], + "perf_latency_p95": [ + "perf_latency_p95", + { + "_value": 4.334, + "_timestamp": 0 + } + ], + "perf_latency_p99": [ + "perf_latency_p99", + { + "_value": 4.591, + "_timestamp": 0 + } + ], + "perf_throughput": [ + "perf_throughput", + { + "_value": 2591.54, + "_timestamp": 0 + } + ], + "perf_client_send_recv": [ + "perf_client_send_recv", + { + "_value": 0.009, + "_timestamp": 0 + } + ], + "perf_client_response_wait": [ + "perf_client_response_wait", + { + "_value": 3.816, + "_timestamp": 0 + } + ], + "perf_server_queue": [ + "perf_server_queue", + { + "_value": 1.622, + "_timestamp": 0 + } + ], + "perf_server_compute_infer": [ + "perf_server_compute_infer", + { + "_value": 1.664, + "_timestamp": 0 + } + ], + "perf_server_compute_input": [ + "perf_server_compute_input", + { + "_value": 0.034, + "_timestamp": 0 + } + ], + "perf_server_compute_output": [ + "perf_server_compute_output", + { + "_value": 0.143, + "_timestamp": 0 + } + ] + } + } + ] + } } ], - "backend": "bls" - }, - "FaceDetectionBLS_config_8": { - "name": "FaceDetectionBLS", - "input": [ - { - "name": "input", - "data_type": "TYPE_UINT8", - "dims": [ - "-1", - "3", - "-1", - "-1" - ] - }, + "bls_config_4": [ { - "name": "temporal", - "data_type": "TYPE_UINT32", - "dims": [ - "-1", - "1", - "1", - "1" + "_triton_env": {}, + "_model_run_configs": [ + { + "_model_name": "bls", + "_model_config_variant": { + "model_config": { + "name": "bls", + "input": [ + { + "name": "INPUT0", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + }, + { + "name": "INPUT1", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "output": [ + { + "name": "OUTPUT0", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + }, + { + "name": "OUTPUT1", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "instanceGroup": [ + { + "count": 4, + "kind": "KIND_GPU" + } + ], + "backend": "python" + }, + "variant_name": "bls_config_4", + "cpu_only": false + }, + "_perf_config": { + "_args": { + "service-kind": null, + "model-signature-name": null, + "async": null, + "sync": null, + "measurement-interval": null, + "concurrency-range": 8, + "request-rate-range": null, + "request-distribution": null, + "request-intervals": null, + "binary-search": null, + "num-of-sequences": null, + "latency-threshold": null, + "max-threads": null, + "stability-percentage": null, + "max-trials": null, + "percentile": null, + "input-data": null, + "shared-memory": null, + "output-shared-memory-size": null, + "shape": null, + "sequence-length": null, + "sequence-id-range": null, + "string-length": null, + "string-data": null, + "measurement-mode": "count_windows", + "measurement-request-count": null, + "streaming": null, + "grpc-compression-algorithm": null, + "triton-server-directory": null, + "model-repository": null, + "ssl-grpc-use-ssl": null, + "ssl-grpc-root-certifications-file": null, + "ssl-grpc-private-key-file": null, + "ssl-grpc-certificate-chain-file": null, + "ssl-https-verify-peer": null, + "ssl-https-verify-host": null, + "ssl-https-ca-certificates-file": null, + "ssl-https-client-certificate-type": null, + "ssl-https-client-certificate-file": null, + "ssl-https-private-key-type": null, + "ssl-https-private-key-file": null, + "collect-metrics": "True", + "metrics-url": "http://localhost:8002/metrics", + "metrics-interval": 1000, + "bls-composing-models": "add,sub" + }, + "_options": { + "-m": "bls", + "-x": null, + "-b": 1, + "-u": "localhost:8001", + "-i": "grpc", + "-f": "bls-results.csv", + "-H": null + }, + "_verbose": { + "-v": null, + "-v -v": null, + "--verbose-csv": "--verbose-csv" + }, + "_input_to_options": { + "model-name": "-m", + "model-version": "-x", + "batch-size": "-b", + "url": "-u", + "protocol": "-i", + "latency-report-file": "-f", + "http-header": "-H" + }, + "_input_to_verbose": { + "verbose": "-v", + "extra-verbose": "-v -v", + "verbose-csv": "--verbose-csv" + }, + "_additive_args": { + "input-data": null, + "shape": null + } + }, + "_composing_config_variants": [ + { + "model_config": { + "name": "add", + "input": [ + { + "name": "INPUT0", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + }, + { + "name": "INPUT1", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "output": [ + { + "name": "OUTPUT0", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "instanceGroup": [ + { + "count": 4, + "kind": "KIND_GPU" + } + ], + "backend": "python" + }, + "variant_name": "add_config_3", + "cpu_only": false + }, + { + "model_config": { + "name": "sub", + "input": [ + { + "name": "INPUT0", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + }, + { + "name": "INPUT1", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "output": [ + { + "name": "OUTPUT1", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "instanceGroup": [ + { + "count": 3, + "kind": "KIND_GPU" + } + ], + "backend": "python" + }, + "variant_name": "sub_config_2", + "cpu_only": false + } + ] + } ] }, { - "name": "max_num_bbox", - "data_type": "TYPE_UINT8", - "dims": [ - "-1", - "1", - "1", - "1" - ] + "bls_config_4 -m bls -b 1 -i grpc -f bls-results.csv --verbose-csv --concurrency-range=8 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000 --bls-composing-models=add,sub add_config_3,sub_config_2": { + "_model_variants_name": "bls_config_4", + "_gpu_data": { + "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ + [ + "gpu_used_memory", + { + "_value": 874.512384, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_free_memory", + { + "_value": 24895.291392, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_utilization", + { + "_value": 0.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 56.547, + "_timestamp": 0, + "_device_uuid": null + } + ] + ] + }, + "_avg_gpu_data": [ + [ + "gpu_used_memory", + { + "_value": 874.512384, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_free_memory", + { + "_value": 24895.291392, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_utilization", + { + "_value": 0.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 56.547, + "_timestamp": 0, + "_device_uuid": null + } + ] + ], + "_avg_gpu_data_from_tag": { + "gpu_used_memory": [ + "gpu_used_memory", + { + "_value": 874.512384, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_free_memory": [ + "gpu_free_memory", + { + "_value": 24895.291392, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_utilization": [ + "gpu_utilization", + { + "_value": 0.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_power_usage": [ + "gpu_power_usage", + { + "_value": 56.547, + "_timestamp": 0, + "_device_uuid": null + } + ] + }, + "_model_config_measurements": [ + { + "_model_config_name": "bls", + "_model_specific_pa_params": { + "batch-size": 1, + "concurrency-range": 8, + "request-rate-range": null + }, + "_non_gpu_data": [ + [ + "perf_latency_avg", + { + "_value": 3.536, + "_timestamp": 0 + } + ], + [ + "perf_latency_p90", + { + "_value": 3.729, + "_timestamp": 0 + } + ], + [ + "perf_latency_p95", + { + "_value": 3.823, + "_timestamp": 0 + } + ], + [ + "perf_latency_p99", + { + "_value": 4.032, + "_timestamp": 0 + } + ], + [ + "perf_throughput", + { + "_value": 2259.22, + "_timestamp": 0 + } + ], + [ + "perf_client_send_recv", + { + "_value": 0.009, + "_timestamp": 0 + } + ], + [ + "perf_client_response_wait", + { + "_value": 3.499, + "_timestamp": 0 + } + ], + [ + "perf_server_queue", + { + "_value": 1.439, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_infer", + { + "_value": 1.502, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_input", + { + "_value": 0.036, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_output", + { + "_value": 0.141, + "_timestamp": 0 + } + ] + ], + "_non_gpu_data_from_tag": { + "perf_latency_avg": [ + "perf_latency_avg", + { + "_value": 3.536, + "_timestamp": 0 + } + ], + "perf_latency_p90": [ + "perf_latency_p90", + { + "_value": 3.729, + "_timestamp": 0 + } + ], + "perf_latency_p95": [ + "perf_latency_p95", + { + "_value": 3.823, + "_timestamp": 0 + } + ], + "perf_latency_p99": [ + "perf_latency_p99", + { + "_value": 4.032, + "_timestamp": 0 + } + ], + "perf_throughput": [ + "perf_throughput", + { + "_value": 2259.22, + "_timestamp": 0 + } + ], + "perf_client_send_recv": [ + "perf_client_send_recv", + { + "_value": 0.009, + "_timestamp": 0 + } + ], + "perf_client_response_wait": [ + "perf_client_response_wait", + { + "_value": 3.499, + "_timestamp": 0 + } + ], + "perf_server_queue": [ + "perf_server_queue", + { + "_value": 1.439, + "_timestamp": 0 + } + ], + "perf_server_compute_infer": [ + "perf_server_compute_infer", + { + "_value": 1.502, + "_timestamp": 0 + } + ], + "perf_server_compute_input": [ + "perf_server_compute_input", + { + "_value": 0.036, + "_timestamp": 0 + } + ], + "perf_server_compute_output": [ + "perf_server_compute_output", + { + "_value": 0.141, + "_timestamp": 0 + } + ] + } + } + ] + } } ], - "output": [ - { - "name": "output_bbox", - "data_type": "TYPE_FP32", - "dims": [ - "-1", - "4", - "-1", - "1" - ] - }, + "bls_config_5": [ { - "name": "output_bbox_conf", - "data_type": "TYPE_FP32", - "dims": [ - "-1", - "4", - "-1", - "1" + "_triton_env": {}, + "_model_run_configs": [ + { + "_model_name": "bls", + "_model_config_variant": { + "model_config": { + "name": "bls", + "input": [ + { + "name": "INPUT0", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + }, + { + "name": "INPUT1", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "output": [ + { + "name": "OUTPUT0", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + }, + { + "name": "OUTPUT1", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "instanceGroup": [ + { + "count": 6, + "kind": "KIND_GPU" + } + ], + "backend": "python" + }, + "variant_name": "bls_config_5", + "cpu_only": false + }, + "_perf_config": { + "_args": { + "service-kind": null, + "model-signature-name": null, + "async": null, + "sync": null, + "measurement-interval": null, + "concurrency-range": 12, + "request-rate-range": null, + "request-distribution": null, + "request-intervals": null, + "binary-search": null, + "num-of-sequences": null, + "latency-threshold": null, + "max-threads": null, + "stability-percentage": null, + "max-trials": null, + "percentile": null, + "input-data": null, + "shared-memory": null, + "output-shared-memory-size": null, + "shape": null, + "sequence-length": null, + "sequence-id-range": null, + "string-length": null, + "string-data": null, + "measurement-mode": "count_windows", + "measurement-request-count": null, + "streaming": null, + "grpc-compression-algorithm": null, + "triton-server-directory": null, + "model-repository": null, + "ssl-grpc-use-ssl": null, + "ssl-grpc-root-certifications-file": null, + "ssl-grpc-private-key-file": null, + "ssl-grpc-certificate-chain-file": null, + "ssl-https-verify-peer": null, + "ssl-https-verify-host": null, + "ssl-https-ca-certificates-file": null, + "ssl-https-client-certificate-type": null, + "ssl-https-client-certificate-file": null, + "ssl-https-private-key-type": null, + "ssl-https-private-key-file": null, + "collect-metrics": "True", + "metrics-url": "http://localhost:8002/metrics", + "metrics-interval": 1000, + "bls-composing-models": "add,sub" + }, + "_options": { + "-m": "bls", + "-x": null, + "-b": 1, + "-u": "localhost:8001", + "-i": "grpc", + "-f": "bls-results.csv", + "-H": null + }, + "_verbose": { + "-v": null, + "-v -v": null, + "--verbose-csv": "--verbose-csv" + }, + "_input_to_options": { + "model-name": "-m", + "model-version": "-x", + "batch-size": "-b", + "url": "-u", + "protocol": "-i", + "latency-report-file": "-f", + "http-header": "-H" + }, + "_input_to_verbose": { + "verbose": "-v", + "extra-verbose": "-v -v", + "verbose-csv": "--verbose-csv" + }, + "_additive_args": { + "input-data": null, + "shape": null + } + }, + "_composing_config_variants": [ + { + "model_config": { + "name": "add", + "input": [ + { + "name": "INPUT0", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + }, + { + "name": "INPUT1", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "output": [ + { + "name": "OUTPUT0", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "instanceGroup": [ + { + "count": 4, + "kind": "KIND_GPU" + } + ], + "backend": "python" + }, + "variant_name": "add_config_3", + "cpu_only": false + }, + { + "model_config": { + "name": "sub", + "input": [ + { + "name": "INPUT0", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + }, + { + "name": "INPUT1", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "output": [ + { + "name": "OUTPUT1", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "instanceGroup": [ + { + "count": 1, + "kind": "KIND_GPU" + } + ], + "backend": "python" + }, + "variant_name": "sub_config_0", + "cpu_only": false + } + ] + } ] }, { - "name": "output_num_bbox", - "data_type": "TYPE_UINT8", - "dims": [ - "-1", - "1", - "1", - "1" - ] - } - ], - "instance_group": [ - { - "count": 9, - "kind": "KIND_GPU" + "bls_config_5 -m bls -b 1 -i grpc -f bls-results.csv --verbose-csv --concurrency-range=12 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000 --bls-composing-models=add,sub add_config_3,sub_config_0": { + "_model_variants_name": "bls_config_5", + "_gpu_data": { + "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ + [ + "gpu_used_memory", + { + "_value": 874.512384, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_free_memory", + { + "_value": 24895.291392, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_utilization", + { + "_value": 0.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 56.653, + "_timestamp": 0, + "_device_uuid": null + } + ] + ] + }, + "_avg_gpu_data": [ + [ + "gpu_used_memory", + { + "_value": 874.512384, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_free_memory", + { + "_value": 24895.291392, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_utilization", + { + "_value": 0.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 56.653, + "_timestamp": 0, + "_device_uuid": null + } + ] + ], + "_avg_gpu_data_from_tag": { + "gpu_used_memory": [ + "gpu_used_memory", + { + "_value": 874.512384, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_free_memory": [ + "gpu_free_memory", + { + "_value": 24895.291392, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_utilization": [ + "gpu_utilization", + { + "_value": 0.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_power_usage": [ + "gpu_power_usage", + { + "_value": 56.653, + "_timestamp": 0, + "_device_uuid": null + } + ] + }, + "_model_config_measurements": [ + { + "_model_config_name": "bls", + "_model_specific_pa_params": { + "batch-size": 1, + "concurrency-range": 12, + "request-rate-range": null + }, + "_non_gpu_data": [ + [ + "perf_latency_avg", + { + "_value": 1.647, + "_timestamp": 0 + } + ], + [ + "perf_latency_p90", + { + "_value": 1.777, + "_timestamp": 0 + } + ], + [ + "perf_latency_p95", + { + "_value": 1.879, + "_timestamp": 0 + } + ], + [ + "perf_latency_p99", + { + "_value": 2.519, + "_timestamp": 0 + } + ], + [ + "perf_throughput", + { + "_value": 7284.42, + "_timestamp": 0 + } + ], + [ + "perf_client_send_recv", + { + "_value": 0.003, + "_timestamp": 0 + } + ], + [ + "perf_client_response_wait", + { + "_value": 1.633, + "_timestamp": 0 + } + ], + [ + "perf_server_queue", + { + "_value": 0.708, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_infer", + { + "_value": 0.726, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_input", + { + "_value": 0.012, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_output", + { + "_value": 0.053, + "_timestamp": 0 + } + ] + ], + "_non_gpu_data_from_tag": { + "perf_latency_avg": [ + "perf_latency_avg", + { + "_value": 1.647, + "_timestamp": 0 + } + ], + "perf_latency_p90": [ + "perf_latency_p90", + { + "_value": 1.777, + "_timestamp": 0 + } + ], + "perf_latency_p95": [ + "perf_latency_p95", + { + "_value": 1.879, + "_timestamp": 0 + } + ], + "perf_latency_p99": [ + "perf_latency_p99", + { + "_value": 2.519, + "_timestamp": 0 + } + ], + "perf_throughput": [ + "perf_throughput", + { + "_value": 7284.42, + "_timestamp": 0 + } + ], + "perf_client_send_recv": [ + "perf_client_send_recv", + { + "_value": 0.003, + "_timestamp": 0 + } + ], + "perf_client_response_wait": [ + "perf_client_response_wait", + { + "_value": 1.633, + "_timestamp": 0 + } + ], + "perf_server_queue": [ + "perf_server_queue", + { + "_value": 0.708, + "_timestamp": 0 + } + ], + "perf_server_compute_infer": [ + "perf_server_compute_infer", + { + "_value": 0.726, + "_timestamp": 0 + } + ], + "perf_server_compute_input": [ + "perf_server_compute_input", + { + "_value": 0.012, + "_timestamp": 0 + } + ], + "perf_server_compute_output": [ + "perf_server_compute_output", + { + "_value": 0.053, + "_timestamp": 0 + } + ] + } + } + ] + }, + "bls_config_5 -m bls -b 1 -i grpc -f bls-results.csv --verbose-csv --concurrency-range=12 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000 --bls-composing-models=add,sub add_config_3,sub_config_2": { + "_model_variants_name": "bls_config_5", + "_gpu_data": { + "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ + [ + "gpu_used_memory", + { + "_value": 874.512384, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_free_memory", + { + "_value": 24895.291392, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_utilization", + { + "_value": 0.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 56.782, + "_timestamp": 0, + "_device_uuid": null + } + ] + ] + }, + "_avg_gpu_data": [ + [ + "gpu_used_memory", + { + "_value": 874.512384, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_free_memory", + { + "_value": 24895.291392, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_utilization", + { + "_value": 0.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 56.782, + "_timestamp": 0, + "_device_uuid": null + } + ] + ], + "_avg_gpu_data_from_tag": { + "gpu_used_memory": [ + "gpu_used_memory", + { + "_value": 874.512384, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_free_memory": [ + "gpu_free_memory", + { + "_value": 24895.291392, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_utilization": [ + "gpu_utilization", + { + "_value": 0.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_power_usage": [ + "gpu_power_usage", + { + "_value": 56.782, + "_timestamp": 0, + "_device_uuid": null + } + ] + }, + "_model_config_measurements": [ + { + "_model_config_name": "bls", + "_model_specific_pa_params": { + "batch-size": 1, + "concurrency-range": 12, + "request-rate-range": null + }, + "_non_gpu_data": [ + [ + "perf_latency_avg", + { + "_value": 1.414, + "_timestamp": 0 + } + ], + [ + "perf_latency_p90", + { + "_value": 1.497, + "_timestamp": 0 + } + ], + [ + "perf_latency_p95", + { + "_value": 1.583, + "_timestamp": 0 + } + ], + [ + "perf_latency_p99", + { + "_value": 2.108, + "_timestamp": 0 + } + ], + [ + "perf_throughput", + { + "_value": 8475.91, + "_timestamp": 0 + } + ], + [ + "perf_client_send_recv", + { + "_value": 0.003, + "_timestamp": 0 + } + ], + [ + "perf_client_response_wait", + { + "_value": 1.401, + "_timestamp": 0 + } + ], + [ + "perf_server_queue", + { + "_value": 0.573, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_infer", + { + "_value": 0.607, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_input", + { + "_value": 0.013, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_output", + { + "_value": 0.052, + "_timestamp": 0 + } + ] + ], + "_non_gpu_data_from_tag": { + "perf_latency_avg": [ + "perf_latency_avg", + { + "_value": 1.414, + "_timestamp": 0 + } + ], + "perf_latency_p90": [ + "perf_latency_p90", + { + "_value": 1.497, + "_timestamp": 0 + } + ], + "perf_latency_p95": [ + "perf_latency_p95", + { + "_value": 1.583, + "_timestamp": 0 + } + ], + "perf_latency_p99": [ + "perf_latency_p99", + { + "_value": 2.108, + "_timestamp": 0 + } + ], + "perf_throughput": [ + "perf_throughput", + { + "_value": 8475.91, + "_timestamp": 0 + } + ], + "perf_client_send_recv": [ + "perf_client_send_recv", + { + "_value": 0.003, + "_timestamp": 0 + } + ], + "perf_client_response_wait": [ + "perf_client_response_wait", + { + "_value": 1.401, + "_timestamp": 0 + } + ], + "perf_server_queue": [ + "perf_server_queue", + { + "_value": 0.573, + "_timestamp": 0 + } + ], + "perf_server_compute_infer": [ + "perf_server_compute_infer", + { + "_value": 0.607, + "_timestamp": 0 + } + ], + "perf_server_compute_input": [ + "perf_server_compute_input", + { + "_value": 0.013, + "_timestamp": 0 + } + ], + "perf_server_compute_output": [ + "perf_server_compute_output", + { + "_value": 0.052, + "_timestamp": 0 + } + ] + } + } + ] + }, + "bls_config_5 -m bls -b 1 -i grpc -f bls-results.csv --verbose-csv --concurrency-range=12 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000 --bls-composing-models=add,sub add_config_6,sub_config_2": { + "_model_variants_name": "bls_config_5", + "_gpu_data": { + "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ + [ + "gpu_used_memory", + { + "_value": 874.512384, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_free_memory", + { + "_value": 24895.291392, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_utilization", + { + "_value": 0.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 56.8133, + "_timestamp": 0, + "_device_uuid": null + } + ] + ] + }, + "_avg_gpu_data": [ + [ + "gpu_used_memory", + { + "_value": 874.512384, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_free_memory", + { + "_value": 24895.291392, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_utilization", + { + "_value": 0.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 56.8133, + "_timestamp": 0, + "_device_uuid": null + } + ] + ], + "_avg_gpu_data_from_tag": { + "gpu_used_memory": [ + "gpu_used_memory", + { + "_value": 874.512384, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_free_memory": [ + "gpu_free_memory", + { + "_value": 24895.291392, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_utilization": [ + "gpu_utilization", + { + "_value": 0.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_power_usage": [ + "gpu_power_usage", + { + "_value": 56.8133, + "_timestamp": 0, + "_device_uuid": null + } + ] + }, + "_model_config_measurements": [ + { + "_model_config_name": "bls", + "_model_specific_pa_params": { + "batch-size": 1, + "concurrency-range": 12, + "request-rate-range": null + }, + "_non_gpu_data": [ + [ + "perf_latency_avg", + { + "_value": 1.362, + "_timestamp": 0 + } + ], + [ + "perf_latency_p90", + { + "_value": 1.479, + "_timestamp": 0 + } + ], + [ + "perf_latency_p95", + { + "_value": 1.531, + "_timestamp": 0 + } + ], + [ + "perf_latency_p99", + { + "_value": 1.826, + "_timestamp": 0 + } + ], + [ + "perf_throughput", + { + "_value": 8801.15, + "_timestamp": 0 + } + ], + [ + "perf_client_send_recv", + { + "_value": 0.003, + "_timestamp": 0 + } + ], + [ + "perf_client_response_wait", + { + "_value": 1.349, + "_timestamp": 0 + } + ], + [ + "perf_server_queue", + { + "_value": 0.554, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_infer", + { + "_value": 0.583, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_input", + { + "_value": 0.013, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_output", + { + "_value": 0.052, + "_timestamp": 0 + } + ] + ], + "_non_gpu_data_from_tag": { + "perf_latency_avg": [ + "perf_latency_avg", + { + "_value": 1.362, + "_timestamp": 0 + } + ], + "perf_latency_p90": [ + "perf_latency_p90", + { + "_value": 1.479, + "_timestamp": 0 + } + ], + "perf_latency_p95": [ + "perf_latency_p95", + { + "_value": 1.531, + "_timestamp": 0 + } + ], + "perf_latency_p99": [ + "perf_latency_p99", + { + "_value": 1.826, + "_timestamp": 0 + } + ], + "perf_throughput": [ + "perf_throughput", + { + "_value": 8801.15, + "_timestamp": 0 + } + ], + "perf_client_send_recv": [ + "perf_client_send_recv", + { + "_value": 0.003, + "_timestamp": 0 + } + ], + "perf_client_response_wait": [ + "perf_client_response_wait", + { + "_value": 1.349, + "_timestamp": 0 + } + ], + "perf_server_queue": [ + "perf_server_queue", + { + "_value": 0.554, + "_timestamp": 0 + } + ], + "perf_server_compute_infer": [ + "perf_server_compute_infer", + { + "_value": 0.583, + "_timestamp": 0 + } + ], + "perf_server_compute_input": [ + "perf_server_compute_input", + { + "_value": 0.013, + "_timestamp": 0 + } + ], + "perf_server_compute_output": [ + "perf_server_compute_output", + { + "_value": 0.052, + "_timestamp": 0 + } + ] + } + } + ] + }, + "bls_config_5 -m bls -b 1 -i grpc -f bls-results.csv --verbose-csv --concurrency-range=1 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000 --bls-composing-models=add,sub add_config_3,sub_config_0": { + "_model_variants_name": "bls_config_5", + "_gpu_data": { + "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ + [ + "gpu_used_memory", + { + "_value": 874.512384, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_free_memory", + { + "_value": 24895.291392, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_utilization", + { + "_value": 0.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 56.4807, + "_timestamp": 0, + "_device_uuid": null + } + ] + ] + }, + "_avg_gpu_data": [ + [ + "gpu_used_memory", + { + "_value": 874.512384, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_free_memory", + { + "_value": 24895.291392, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_utilization", + { + "_value": 0.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 56.4807, + "_timestamp": 0, + "_device_uuid": null + } + ] + ], + "_avg_gpu_data_from_tag": { + "gpu_used_memory": [ + "gpu_used_memory", + { + "_value": 874.512384, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_free_memory": [ + "gpu_free_memory", + { + "_value": 24895.291392, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_utilization": [ + "gpu_utilization", + { + "_value": 0.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_power_usage": [ + "gpu_power_usage", + { + "_value": 56.4807, + "_timestamp": 0, + "_device_uuid": null + } + ] + }, + "_model_config_measurements": [ + { + "_model_config_name": "bls", + "_model_specific_pa_params": { + "batch-size": 1, + "concurrency-range": 1, + "request-rate-range": null + }, + "_non_gpu_data": [ + [ + "perf_latency_avg", + { + "_value": 1.624, + "_timestamp": 0 + } + ], + [ + "perf_latency_p90", + { + "_value": 1.792, + "_timestamp": 0 + } + ], + [ + "perf_latency_p95", + { + "_value": 1.859, + "_timestamp": 0 + } + ], + [ + "perf_latency_p99", + { + "_value": 2.002, + "_timestamp": 0 + } + ], + [ + "perf_throughput", + { + "_value": 614.772, + "_timestamp": 0 + } + ], + [ + "perf_client_send_recv", + { + "_value": 0.007, + "_timestamp": 0 + } + ], + [ + "perf_client_response_wait", + { + "_value": 1.598, + "_timestamp": 0 + } + ], + [ + "perf_server_queue", + { + "_value": 0.028, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_infer", + { + "_value": 1.17, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_input", + { + "_value": 0.032, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_output", + { + "_value": 0.14, + "_timestamp": 0 + } + ] + ], + "_non_gpu_data_from_tag": { + "perf_latency_avg": [ + "perf_latency_avg", + { + "_value": 1.624, + "_timestamp": 0 + } + ], + "perf_latency_p90": [ + "perf_latency_p90", + { + "_value": 1.792, + "_timestamp": 0 + } + ], + "perf_latency_p95": [ + "perf_latency_p95", + { + "_value": 1.859, + "_timestamp": 0 + } + ], + "perf_latency_p99": [ + "perf_latency_p99", + { + "_value": 2.002, + "_timestamp": 0 + } + ], + "perf_throughput": [ + "perf_throughput", + { + "_value": 614.772, + "_timestamp": 0 + } + ], + "perf_client_send_recv": [ + "perf_client_send_recv", + { + "_value": 0.007, + "_timestamp": 0 + } + ], + "perf_client_response_wait": [ + "perf_client_response_wait", + { + "_value": 1.598, + "_timestamp": 0 + } + ], + "perf_server_queue": [ + "perf_server_queue", + { + "_value": 0.028, + "_timestamp": 0 + } + ], + "perf_server_compute_infer": [ + "perf_server_compute_infer", + { + "_value": 1.17, + "_timestamp": 0 + } + ], + "perf_server_compute_input": [ + "perf_server_compute_input", + { + "_value": 0.032, + "_timestamp": 0 + } + ], + "perf_server_compute_output": [ + "perf_server_compute_output", + { + "_value": 0.14, + "_timestamp": 0 + } + ] + } + } + ] + }, + "bls_config_5 -m bls -b 1 -i grpc -f bls-results.csv --verbose-csv --concurrency-range=2 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000 --bls-composing-models=add,sub add_config_3,sub_config_0": { + "_model_variants_name": "bls_config_5", + "_gpu_data": { + "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ + [ + "gpu_used_memory", + { + "_value": 874.512384, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_free_memory", + { + "_value": 24895.291392, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_utilization", + { + "_value": 0.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 56.5833, + "_timestamp": 0, + "_device_uuid": null + } + ] + ] + }, + "_avg_gpu_data": [ + [ + "gpu_used_memory", + { + "_value": 874.512384, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_free_memory", + { + "_value": 24895.291392, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_utilization", + { + "_value": 0.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 56.5833, + "_timestamp": 0, + "_device_uuid": null + } + ] + ], + "_avg_gpu_data_from_tag": { + "gpu_used_memory": [ + "gpu_used_memory", + { + "_value": 874.512384, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_free_memory": [ + "gpu_free_memory", + { + "_value": 24895.291392, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_utilization": [ + "gpu_utilization", + { + "_value": 0.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_power_usage": [ + "gpu_power_usage", + { + "_value": 56.5833, + "_timestamp": 0, + "_device_uuid": null + } + ] + }, + "_model_config_measurements": [ + { + "_model_config_name": "bls", + "_model_specific_pa_params": { + "batch-size": 1, + "concurrency-range": 2, + "request-rate-range": null + }, + "_non_gpu_data": [ + [ + "perf_latency_avg", + { + "_value": 1.793, + "_timestamp": 0 + } + ], + [ + "perf_latency_p90", + { + "_value": 1.934, + "_timestamp": 0 + } + ], + [ + "perf_latency_p95", + { + "_value": 2.018, + "_timestamp": 0 + } + ], + [ + "perf_latency_p99", + { + "_value": 2.27, + "_timestamp": 0 + } + ], + [ + "perf_throughput", + { + "_value": 1113.32, + "_timestamp": 0 + } + ], + [ + "perf_client_send_recv", + { + "_value": 0.008, + "_timestamp": 0 + } + ], + [ + "perf_client_response_wait", + { + "_value": 1.762, + "_timestamp": 0 + } + ], + [ + "perf_server_queue", + { + "_value": 0.027, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_infer", + { + "_value": 1.276, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_input", + { + "_value": 0.033, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_output", + { + "_value": 0.138, + "_timestamp": 0 + } + ] + ], + "_non_gpu_data_from_tag": { + "perf_latency_avg": [ + "perf_latency_avg", + { + "_value": 1.793, + "_timestamp": 0 + } + ], + "perf_latency_p90": [ + "perf_latency_p90", + { + "_value": 1.934, + "_timestamp": 0 + } + ], + "perf_latency_p95": [ + "perf_latency_p95", + { + "_value": 2.018, + "_timestamp": 0 + } + ], + "perf_latency_p99": [ + "perf_latency_p99", + { + "_value": 2.27, + "_timestamp": 0 + } + ], + "perf_throughput": [ + "perf_throughput", + { + "_value": 1113.32, + "_timestamp": 0 + } + ], + "perf_client_send_recv": [ + "perf_client_send_recv", + { + "_value": 0.008, + "_timestamp": 0 + } + ], + "perf_client_response_wait": [ + "perf_client_response_wait", + { + "_value": 1.762, + "_timestamp": 0 + } + ], + "perf_server_queue": [ + "perf_server_queue", + { + "_value": 0.027, + "_timestamp": 0 + } + ], + "perf_server_compute_infer": [ + "perf_server_compute_infer", + { + "_value": 1.276, + "_timestamp": 0 + } + ], + "perf_server_compute_input": [ + "perf_server_compute_input", + { + "_value": 0.033, + "_timestamp": 0 + } + ], + "perf_server_compute_output": [ + "perf_server_compute_output", + { + "_value": 0.138, + "_timestamp": 0 + } + ] + } + } + ] + }, + "bls_config_5 -m bls -b 1 -i grpc -f bls-results.csv --verbose-csv --concurrency-range=4 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000 --bls-composing-models=add,sub add_config_3,sub_config_0": { + "_model_variants_name": "bls_config_5", + "_gpu_data": { + "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ + [ + "gpu_used_memory", + { + "_value": 874.512384, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_free_memory", + { + "_value": 24895.291392, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_utilization", + { + "_value": 0.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 34.6027, + "_timestamp": 0, + "_device_uuid": null + } + ] + ] + }, + "_avg_gpu_data": [ + [ + "gpu_used_memory", + { + "_value": 874.512384, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_free_memory", + { + "_value": 24895.291392, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_utilization", + { + "_value": 0.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 34.6027, + "_timestamp": 0, + "_device_uuid": null + } + ] + ], + "_avg_gpu_data_from_tag": { + "gpu_used_memory": [ + "gpu_used_memory", + { + "_value": 874.512384, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_free_memory": [ + "gpu_free_memory", + { + "_value": 24895.291392, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_utilization": [ + "gpu_utilization", + { + "_value": 0.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_power_usage": [ + "gpu_power_usage", + { + "_value": 34.6027, + "_timestamp": 0, + "_device_uuid": null + } + ] + }, + "_model_config_measurements": [ + { + "_model_config_name": "bls", + "_model_specific_pa_params": { + "batch-size": 1, + "concurrency-range": 4, + "request-rate-range": null + }, + "_non_gpu_data": [ + [ + "perf_latency_avg", + { + "_value": 2.039, + "_timestamp": 0 + } + ], + [ + "perf_latency_p90", + { + "_value": 2.238, + "_timestamp": 0 + } + ], + [ + "perf_latency_p95", + { + "_value": 2.325, + "_timestamp": 0 + } + ], + [ + "perf_latency_p99", + { + "_value": 2.579, + "_timestamp": 0 + } + ], + [ + "perf_throughput", + { + "_value": 1956.54, + "_timestamp": 0 + } + ], + [ + "perf_client_send_recv", + { + "_value": 0.009, + "_timestamp": 0 + } + ], + [ + "perf_client_response_wait", + { + "_value": 2.004, + "_timestamp": 0 + } + ], + [ + "perf_server_queue", + { + "_value": 0.029, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_infer", + { + "_value": 1.45, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_input", + { + "_value": 0.034, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_output", + { + "_value": 0.137, + "_timestamp": 0 + } + ] + ], + "_non_gpu_data_from_tag": { + "perf_latency_avg": [ + "perf_latency_avg", + { + "_value": 2.039, + "_timestamp": 0 + } + ], + "perf_latency_p90": [ + "perf_latency_p90", + { + "_value": 2.238, + "_timestamp": 0 + } + ], + "perf_latency_p95": [ + "perf_latency_p95", + { + "_value": 2.325, + "_timestamp": 0 + } + ], + "perf_latency_p99": [ + "perf_latency_p99", + { + "_value": 2.579, + "_timestamp": 0 + } + ], + "perf_throughput": [ + "perf_throughput", + { + "_value": 1956.54, + "_timestamp": 0 + } + ], + "perf_client_send_recv": [ + "perf_client_send_recv", + { + "_value": 0.009, + "_timestamp": 0 + } + ], + "perf_client_response_wait": [ + "perf_client_response_wait", + { + "_value": 2.004, + "_timestamp": 0 + } + ], + "perf_server_queue": [ + "perf_server_queue", + { + "_value": 0.029, + "_timestamp": 0 + } + ], + "perf_server_compute_infer": [ + "perf_server_compute_infer", + { + "_value": 1.45, + "_timestamp": 0 + } + ], + "perf_server_compute_input": [ + "perf_server_compute_input", + { + "_value": 0.034, + "_timestamp": 0 + } + ], + "perf_server_compute_output": [ + "perf_server_compute_output", + { + "_value": 0.137, + "_timestamp": 0 + } + ] + } + } + ] + }, + "bls_config_5 -m bls -b 1 -i grpc -f bls-results.csv --verbose-csv --concurrency-range=8 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000 --bls-composing-models=add,sub add_config_3,sub_config_0": { + "_model_variants_name": "bls_config_5", + "_gpu_data": { + "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ + [ + "gpu_used_memory", + { + "_value": 874.512384, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_free_memory", + { + "_value": 24895.291392, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_utilization", + { + "_value": 0.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 23.604, + "_timestamp": 0, + "_device_uuid": null + } + ] + ] + }, + "_avg_gpu_data": [ + [ + "gpu_used_memory", + { + "_value": 874.512384, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_free_memory", + { + "_value": 24895.291392, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_utilization", + { + "_value": 0.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 23.604, + "_timestamp": 0, + "_device_uuid": null + } + ] + ], + "_avg_gpu_data_from_tag": { + "gpu_used_memory": [ + "gpu_used_memory", + { + "_value": 874.512384, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_free_memory": [ + "gpu_free_memory", + { + "_value": 24895.291392, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_utilization": [ + "gpu_utilization", + { + "_value": 0.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_power_usage": [ + "gpu_power_usage", + { + "_value": 23.604, + "_timestamp": 0, + "_device_uuid": null + } + ] + }, + "_model_config_measurements": [ + { + "_model_config_name": "bls", + "_model_specific_pa_params": { + "batch-size": 1, + "concurrency-range": 8, + "request-rate-range": null + }, + "_non_gpu_data": [ + [ + "perf_latency_avg", + { + "_value": 2.581, + "_timestamp": 0 + } + ], + [ + "perf_latency_p90", + { + "_value": 2.994, + "_timestamp": 0 + } + ], + [ + "perf_latency_p95", + { + "_value": 3.106, + "_timestamp": 0 + } + ], + [ + "perf_latency_p99", + { + "_value": 3.356, + "_timestamp": 0 + } + ], + [ + "perf_throughput", + { + "_value": 3095.98, + "_timestamp": 0 + } + ], + [ + "perf_client_send_recv", + { + "_value": 0.008, + "_timestamp": 0 + } + ], + [ + "perf_client_response_wait", + { + "_value": 2.549, + "_timestamp": 0 + } + ], + [ + "perf_server_queue", + { + "_value": 0.375, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_infer", + { + "_value": 1.703, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_input", + { + "_value": 0.03, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_output", + { + "_value": 0.126, + "_timestamp": 0 + } + ] + ], + "_non_gpu_data_from_tag": { + "perf_latency_avg": [ + "perf_latency_avg", + { + "_value": 2.581, + "_timestamp": 0 + } + ], + "perf_latency_p90": [ + "perf_latency_p90", + { + "_value": 2.994, + "_timestamp": 0 + } + ], + "perf_latency_p95": [ + "perf_latency_p95", + { + "_value": 3.106, + "_timestamp": 0 + } + ], + "perf_latency_p99": [ + "perf_latency_p99", + { + "_value": 3.356, + "_timestamp": 0 + } + ], + "perf_throughput": [ + "perf_throughput", + { + "_value": 3095.98, + "_timestamp": 0 + } + ], + "perf_client_send_recv": [ + "perf_client_send_recv", + { + "_value": 0.008, + "_timestamp": 0 + } + ], + "perf_client_response_wait": [ + "perf_client_response_wait", + { + "_value": 2.549, + "_timestamp": 0 + } + ], + "perf_server_queue": [ + "perf_server_queue", + { + "_value": 0.375, + "_timestamp": 0 + } + ], + "perf_server_compute_infer": [ + "perf_server_compute_infer", + { + "_value": 1.703, + "_timestamp": 0 + } + ], + "perf_server_compute_input": [ + "perf_server_compute_input", + { + "_value": 0.03, + "_timestamp": 0 + } + ], + "perf_server_compute_output": [ + "perf_server_compute_output", + { + "_value": 0.126, + "_timestamp": 0 + } + ] + } + } + ] + }, + "bls_config_5 -m bls -b 1 -i grpc -f bls-results.csv --verbose-csv --concurrency-range=16 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000 --bls-composing-models=add,sub add_config_3,sub_config_0": { + "_model_variants_name": "bls_config_5", + "_gpu_data": { + "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ + [ + "gpu_used_memory", + { + "_value": 874.512384, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_free_memory", + { + "_value": 24895.291392, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_utilization", + { + "_value": 0.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 23.4258, + "_timestamp": 0, + "_device_uuid": null + } + ] + ] + }, + "_avg_gpu_data": [ + [ + "gpu_used_memory", + { + "_value": 874.512384, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_free_memory", + { + "_value": 24895.291392, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_utilization", + { + "_value": 0.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 23.4258, + "_timestamp": 0, + "_device_uuid": null + } + ] + ], + "_avg_gpu_data_from_tag": { + "gpu_used_memory": [ + "gpu_used_memory", + { + "_value": 874.512384, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_free_memory": [ + "gpu_free_memory", + { + "_value": 24895.291392, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_utilization": [ + "gpu_utilization", + { + "_value": 0.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_power_usage": [ + "gpu_power_usage", + { + "_value": 23.4258, + "_timestamp": 0, + "_device_uuid": null + } + ] + }, + "_model_config_measurements": [ + { + "_model_config_name": "bls", + "_model_specific_pa_params": { + "batch-size": 1, + "concurrency-range": 16, + "request-rate-range": null + }, + "_non_gpu_data": [ + [ + "perf_latency_avg", + { + "_value": 5.306, + "_timestamp": 0 + } + ], + [ + "perf_latency_p90", + { + "_value": 5.994, + "_timestamp": 0 + } + ], + [ + "perf_latency_p95", + { + "_value": 6.149, + "_timestamp": 0 + } + ], + [ + "perf_latency_p99", + { + "_value": 6.429, + "_timestamp": 0 + } + ], + [ + "perf_throughput", + { + "_value": 3014.95, + "_timestamp": 0 + } + ], + [ + "perf_client_send_recv", + { + "_value": 0.008, + "_timestamp": 0 + } + ], + [ + "perf_client_response_wait", + { + "_value": 5.272, + "_timestamp": 0 + } + ], + [ + "perf_server_queue", + { + "_value": 3.037, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_infer", + { + "_value": 1.747, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_input", + { + "_value": 0.031, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_output", + { + "_value": 0.13, + "_timestamp": 0 + } + ] + ], + "_non_gpu_data_from_tag": { + "perf_latency_avg": [ + "perf_latency_avg", + { + "_value": 5.306, + "_timestamp": 0 + } + ], + "perf_latency_p90": [ + "perf_latency_p90", + { + "_value": 5.994, + "_timestamp": 0 + } + ], + "perf_latency_p95": [ + "perf_latency_p95", + { + "_value": 6.149, + "_timestamp": 0 + } + ], + "perf_latency_p99": [ + "perf_latency_p99", + { + "_value": 6.429, + "_timestamp": 0 + } + ], + "perf_throughput": [ + "perf_throughput", + { + "_value": 3014.95, + "_timestamp": 0 + } + ], + "perf_client_send_recv": [ + "perf_client_send_recv", + { + "_value": 0.008, + "_timestamp": 0 + } + ], + "perf_client_response_wait": [ + "perf_client_response_wait", + { + "_value": 5.272, + "_timestamp": 0 + } + ], + "perf_server_queue": [ + "perf_server_queue", + { + "_value": 3.037, + "_timestamp": 0 + } + ], + "perf_server_compute_infer": [ + "perf_server_compute_infer", + { + "_value": 1.747, + "_timestamp": 0 + } + ], + "perf_server_compute_input": [ + "perf_server_compute_input", + { + "_value": 0.031, + "_timestamp": 0 + } + ], + "perf_server_compute_output": [ + "perf_server_compute_output", + { + "_value": 0.13, + "_timestamp": 0 + } + ] + } + } + ] + }, + "bls_config_5 -m bls -b 1 -i grpc -f bls-results.csv --verbose-csv --concurrency-range=32 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000 --bls-composing-models=add,sub add_config_3,sub_config_0": { + "_model_variants_name": "bls_config_5", + "_gpu_data": { + "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ + [ + "gpu_used_memory", + { + "_value": 874.512384, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_free_memory", + { + "_value": 24895.291392, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_utilization", + { + "_value": 0.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 23.2363, + "_timestamp": 0, + "_device_uuid": null + } + ] + ] + }, + "_avg_gpu_data": [ + [ + "gpu_used_memory", + { + "_value": 874.512384, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_free_memory", + { + "_value": 24895.291392, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_utilization", + { + "_value": 0.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 23.2363, + "_timestamp": 0, + "_device_uuid": null + } + ] + ], + "_avg_gpu_data_from_tag": { + "gpu_used_memory": [ + "gpu_used_memory", + { + "_value": 874.512384, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_free_memory": [ + "gpu_free_memory", + { + "_value": 24895.291392, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_utilization": [ + "gpu_utilization", + { + "_value": 0.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_power_usage": [ + "gpu_power_usage", + { + "_value": 23.2363, + "_timestamp": 0, + "_device_uuid": null + } + ] + }, + "_model_config_measurements": [ + { + "_model_config_name": "bls", + "_model_specific_pa_params": { + "batch-size": 1, + "concurrency-range": 32, + "request-rate-range": null + }, + "_non_gpu_data": [ + [ + "perf_latency_avg", + { + "_value": 10.738, + "_timestamp": 0 + } + ], + [ + "perf_latency_p90", + { + "_value": 11.666, + "_timestamp": 0 + } + ], + [ + "perf_latency_p95", + { + "_value": 11.926, + "_timestamp": 0 + } + ], + [ + "perf_latency_p99", + { + "_value": 12.355, + "_timestamp": 0 + } + ], + [ + "perf_throughput", + { + "_value": 2979.47, + "_timestamp": 0 + } + ], + [ + "perf_client_send_recv", + { + "_value": 0.008, + "_timestamp": 0 + } + ], + [ + "perf_client_response_wait", + { + "_value": 10.703, + "_timestamp": 0 + } + ], + [ + "perf_server_queue", + { + "_value": 8.443, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_infer", + { + "_value": 1.769, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_input", + { + "_value": 0.031, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_output", + { + "_value": 0.133, + "_timestamp": 0 + } + ] + ], + "_non_gpu_data_from_tag": { + "perf_latency_avg": [ + "perf_latency_avg", + { + "_value": 10.738, + "_timestamp": 0 + } + ], + "perf_latency_p90": [ + "perf_latency_p90", + { + "_value": 11.666, + "_timestamp": 0 + } + ], + "perf_latency_p95": [ + "perf_latency_p95", + { + "_value": 11.926, + "_timestamp": 0 + } + ], + "perf_latency_p99": [ + "perf_latency_p99", + { + "_value": 12.355, + "_timestamp": 0 + } + ], + "perf_throughput": [ + "perf_throughput", + { + "_value": 2979.47, + "_timestamp": 0 + } + ], + "perf_client_send_recv": [ + "perf_client_send_recv", + { + "_value": 0.008, + "_timestamp": 0 + } + ], + "perf_client_response_wait": [ + "perf_client_response_wait", + { + "_value": 10.703, + "_timestamp": 0 + } + ], + "perf_server_queue": [ + "perf_server_queue", + { + "_value": 8.443, + "_timestamp": 0 + } + ], + "perf_server_compute_infer": [ + "perf_server_compute_infer", + { + "_value": 1.769, + "_timestamp": 0 + } + ], + "perf_server_compute_input": [ + "perf_server_compute_input", + { + "_value": 0.031, + "_timestamp": 0 + } + ], + "perf_server_compute_output": [ + "perf_server_compute_output", + { + "_value": 0.133, + "_timestamp": 0 + } + ] + } + } + ] + }, + "bls_config_5 -m bls -b 1 -i grpc -f bls-results.csv --verbose-csv --concurrency-range=64 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000 --bls-composing-models=add,sub add_config_3,sub_config_0": { + "_model_variants_name": "bls_config_5", + "_gpu_data": { + "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ + [ + "gpu_used_memory", + { + "_value": 874.512384, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_free_memory", + { + "_value": 24895.291392, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_utilization", + { + "_value": 0.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 23.2308, + "_timestamp": 0, + "_device_uuid": null + } + ] + ] + }, + "_avg_gpu_data": [ + [ + "gpu_used_memory", + { + "_value": 874.512384, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_free_memory", + { + "_value": 24895.291392, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_utilization", + { + "_value": 0.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 23.2308, + "_timestamp": 0, + "_device_uuid": null + } + ] + ], + "_avg_gpu_data_from_tag": { + "gpu_used_memory": [ + "gpu_used_memory", + { + "_value": 874.512384, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_free_memory": [ + "gpu_free_memory", + { + "_value": 24895.291392, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_utilization": [ + "gpu_utilization", + { + "_value": 0.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_power_usage": [ + "gpu_power_usage", + { + "_value": 23.2308, + "_timestamp": 0, + "_device_uuid": null + } + ] + }, + "_model_config_measurements": [ + { + "_model_config_name": "bls", + "_model_specific_pa_params": { + "batch-size": 1, + "concurrency-range": 64, + "request-rate-range": null + }, + "_non_gpu_data": [ + [ + "perf_latency_avg", + { + "_value": 19.18, + "_timestamp": 0 + } + ], + [ + "perf_latency_p90", + { + "_value": 22.205, + "_timestamp": 0 + } + ], + [ + "perf_latency_p95", + { + "_value": 22.844, + "_timestamp": 0 + } + ], + [ + "perf_latency_p99", + { + "_value": 23.781, + "_timestamp": 0 + } + ], + [ + "perf_throughput", + { + "_value": 3328.92, + "_timestamp": 0 + } + ], + [ + "perf_client_send_recv", + { + "_value": 0.007, + "_timestamp": 0 + } + ], + [ + "perf_client_response_wait", + { + "_value": 19.147, + "_timestamp": 0 + } + ], + [ + "perf_server_queue", + { + "_value": 17.098, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_infer", + { + "_value": 1.581, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_input", + { + "_value": 0.028, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_output", + { + "_value": 0.118, + "_timestamp": 0 + } + ] + ], + "_non_gpu_data_from_tag": { + "perf_latency_avg": [ + "perf_latency_avg", + { + "_value": 19.18, + "_timestamp": 0 + } + ], + "perf_latency_p90": [ + "perf_latency_p90", + { + "_value": 22.205, + "_timestamp": 0 + } + ], + "perf_latency_p95": [ + "perf_latency_p95", + { + "_value": 22.844, + "_timestamp": 0 + } + ], + "perf_latency_p99": [ + "perf_latency_p99", + { + "_value": 23.781, + "_timestamp": 0 + } + ], + "perf_throughput": [ + "perf_throughput", + { + "_value": 3328.92, + "_timestamp": 0 + } + ], + "perf_client_send_recv": [ + "perf_client_send_recv", + { + "_value": 0.007, + "_timestamp": 0 + } + ], + "perf_client_response_wait": [ + "perf_client_response_wait", + { + "_value": 19.147, + "_timestamp": 0 + } + ], + "perf_server_queue": [ + "perf_server_queue", + { + "_value": 17.098, + "_timestamp": 0 + } + ], + "perf_server_compute_infer": [ + "perf_server_compute_infer", + { + "_value": 1.581, + "_timestamp": 0 + } + ], + "perf_server_compute_input": [ + "perf_server_compute_input", + { + "_value": 0.028, + "_timestamp": 0 + } + ], + "perf_server_compute_output": [ + "perf_server_compute_output", + { + "_value": 0.118, + "_timestamp": 0 + } + ] + } + } + ] + }, + "bls_config_5 -m bls -b 1 -i grpc -f bls-results.csv --verbose-csv --concurrency-range=128 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000 --bls-composing-models=add,sub add_config_3,sub_config_0": { + "_model_variants_name": "bls_config_5", + "_gpu_data": { + "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ + [ + "gpu_used_memory", + { + "_value": 874.512384, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_free_memory", + { + "_value": 24895.291392, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_utilization", + { + "_value": 0.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 23.1137, + "_timestamp": 0, + "_device_uuid": null + } + ] + ] + }, + "_avg_gpu_data": [ + [ + "gpu_used_memory", + { + "_value": 874.512384, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_free_memory", + { + "_value": 24895.291392, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_utilization", + { + "_value": 0.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 23.1137, + "_timestamp": 0, + "_device_uuid": null + } + ] + ], + "_avg_gpu_data_from_tag": { + "gpu_used_memory": [ + "gpu_used_memory", + { + "_value": 874.512384, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_free_memory": [ + "gpu_free_memory", + { + "_value": 24895.291392, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_utilization": [ + "gpu_utilization", + { + "_value": 0.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_power_usage": [ + "gpu_power_usage", + { + "_value": 23.1137, + "_timestamp": 0, + "_device_uuid": null + } + ] + }, + "_model_config_measurements": [ + { + "_model_config_name": "bls", + "_model_specific_pa_params": { + "batch-size": 1, + "concurrency-range": 128, + "request-rate-range": null + }, + "_non_gpu_data": [ + [ + "perf_latency_avg", + { + "_value": 41.237, + "_timestamp": 0 + } + ], + [ + "perf_latency_p90", + { + "_value": 43.461, + "_timestamp": 0 + } + ], + [ + "perf_latency_p95", + { + "_value": 43.991, + "_timestamp": 0 + } + ], + [ + "perf_latency_p99", + { + "_value": 45.318, + "_timestamp": 0 + } + ], + [ + "perf_throughput", + { + "_value": 3107.46, + "_timestamp": 0 + } + ], + [ + "perf_client_send_recv", + { + "_value": 0.008, + "_timestamp": 0 + } + ], + [ + "perf_client_response_wait", + { + "_value": 41.201, + "_timestamp": 0 + } + ], + [ + "perf_server_queue", + { + "_value": 39.03, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_infer", + { + "_value": 1.694, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_input", + { + "_value": 0.031, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_output", + { + "_value": 0.128, + "_timestamp": 0 + } + ] + ], + "_non_gpu_data_from_tag": { + "perf_latency_avg": [ + "perf_latency_avg", + { + "_value": 41.237, + "_timestamp": 0 + } + ], + "perf_latency_p90": [ + "perf_latency_p90", + { + "_value": 43.461, + "_timestamp": 0 + } + ], + "perf_latency_p95": [ + "perf_latency_p95", + { + "_value": 43.991, + "_timestamp": 0 + } + ], + "perf_latency_p99": [ + "perf_latency_p99", + { + "_value": 45.318, + "_timestamp": 0 + } + ], + "perf_throughput": [ + "perf_throughput", + { + "_value": 3107.46, + "_timestamp": 0 + } + ], + "perf_client_send_recv": [ + "perf_client_send_recv", + { + "_value": 0.008, + "_timestamp": 0 + } + ], + "perf_client_response_wait": [ + "perf_client_response_wait", + { + "_value": 41.201, + "_timestamp": 0 + } + ], + "perf_server_queue": [ + "perf_server_queue", + { + "_value": 39.03, + "_timestamp": 0 + } + ], + "perf_server_compute_infer": [ + "perf_server_compute_infer", + { + "_value": 1.694, + "_timestamp": 0 + } + ], + "perf_server_compute_input": [ + "perf_server_compute_input", + { + "_value": 0.031, + "_timestamp": 0 + } + ], + "perf_server_compute_output": [ + "perf_server_compute_output", + { + "_value": 0.128, + "_timestamp": 0 + } + ] + } + } + ] + }, + "bls_config_5 -m bls -b 1 -i grpc -f bls-results.csv --verbose-csv --concurrency-range=256 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000 --bls-composing-models=add,sub add_config_3,sub_config_0": { + "_model_variants_name": "bls_config_5", + "_gpu_data": { + "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ + [ + "gpu_used_memory", + { + "_value": 874.512384, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_free_memory", + { + "_value": 24895.291392, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_utilization", + { + "_value": 0.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 23.2747, + "_timestamp": 0, + "_device_uuid": null + } + ] + ] + }, + "_avg_gpu_data": [ + [ + "gpu_used_memory", + { + "_value": 874.512384, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_free_memory", + { + "_value": 24895.291392, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_utilization", + { + "_value": 0.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 23.2747, + "_timestamp": 0, + "_device_uuid": null + } + ] + ], + "_avg_gpu_data_from_tag": { + "gpu_used_memory": [ + "gpu_used_memory", + { + "_value": 874.512384, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_free_memory": [ + "gpu_free_memory", + { + "_value": 24895.291392, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_utilization": [ + "gpu_utilization", + { + "_value": 0.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_power_usage": [ + "gpu_power_usage", + { + "_value": 23.2747, + "_timestamp": 0, + "_device_uuid": null + } + ] + }, + "_model_config_measurements": [ + { + "_model_config_name": "bls", + "_model_specific_pa_params": { + "batch-size": 1, + "concurrency-range": 256, + "request-rate-range": null + }, + "_non_gpu_data": [ + [ + "perf_latency_avg", + { + "_value": 60.083, + "_timestamp": 0 + } + ], + [ + "perf_latency_p90", + { + "_value": 82.097, + "_timestamp": 0 + } + ], + [ + "perf_latency_p95", + { + "_value": 83.071, + "_timestamp": 0 + } + ], + [ + "perf_latency_p99", + { + "_value": 86.796, + "_timestamp": 0 + } + ], + [ + "perf_throughput", + { + "_value": 4261.38, + "_timestamp": 0 + } + ], + [ + "perf_client_send_recv", + { + "_value": 0.006, + "_timestamp": 0 + } + ], + [ + "perf_client_response_wait", + { + "_value": 60.056, + "_timestamp": 0 + } + ], + [ + "perf_server_queue", + { + "_value": 58.448, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_infer", + { + "_value": 1.237, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_input", + { + "_value": 0.023, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_output", + { + "_value": 0.091, + "_timestamp": 0 + } + ] + ], + "_non_gpu_data_from_tag": { + "perf_latency_avg": [ + "perf_latency_avg", + { + "_value": 60.083, + "_timestamp": 0 + } + ], + "perf_latency_p90": [ + "perf_latency_p90", + { + "_value": 82.097, + "_timestamp": 0 + } + ], + "perf_latency_p95": [ + "perf_latency_p95", + { + "_value": 83.071, + "_timestamp": 0 + } + ], + "perf_latency_p99": [ + "perf_latency_p99", + { + "_value": 86.796, + "_timestamp": 0 + } + ], + "perf_throughput": [ + "perf_throughput", + { + "_value": 4261.38, + "_timestamp": 0 + } + ], + "perf_client_send_recv": [ + "perf_client_send_recv", + { + "_value": 0.006, + "_timestamp": 0 + } + ], + "perf_client_response_wait": [ + "perf_client_response_wait", + { + "_value": 60.056, + "_timestamp": 0 + } + ], + "perf_server_queue": [ + "perf_server_queue", + { + "_value": 58.448, + "_timestamp": 0 + } + ], + "perf_server_compute_infer": [ + "perf_server_compute_infer", + { + "_value": 1.237, + "_timestamp": 0 + } + ], + "perf_server_compute_input": [ + "perf_server_compute_input", + { + "_value": 0.023, + "_timestamp": 0 + } + ], + "perf_server_compute_output": [ + "perf_server_compute_output", + { + "_value": 0.091, + "_timestamp": 0 + } + ] + } + } + ] + }, + "bls_config_5 -m bls -b 1 -i grpc -f bls-results.csv --verbose-csv --concurrency-range=512 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000 --bls-composing-models=add,sub add_config_3,sub_config_0": { + "_model_variants_name": "bls_config_5", + "_gpu_data": { + "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ + [ + "gpu_used_memory", + { + "_value": 874.512384, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_free_memory", + { + "_value": 24895.291392, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_utilization", + { + "_value": 0.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 23.225, + "_timestamp": 0, + "_device_uuid": null + } + ] + ] + }, + "_avg_gpu_data": [ + [ + "gpu_used_memory", + { + "_value": 874.512384, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_free_memory", + { + "_value": 24895.291392, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_utilization", + { + "_value": 0.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 23.225, + "_timestamp": 0, + "_device_uuid": null + } + ] + ], + "_avg_gpu_data_from_tag": { + "gpu_used_memory": [ + "gpu_used_memory", + { + "_value": 874.512384, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_free_memory": [ + "gpu_free_memory", + { + "_value": 24895.291392, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_utilization": [ + "gpu_utilization", + { + "_value": 0.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_power_usage": [ + "gpu_power_usage", + { + "_value": 23.225, + "_timestamp": 0, + "_device_uuid": null + } + ] + }, + "_model_config_measurements": [ + { + "_model_config_name": "bls", + "_model_specific_pa_params": { + "batch-size": 1, + "concurrency-range": 512, + "request-rate-range": null + }, + "_non_gpu_data": [ + [ + "perf_latency_avg", + { + "_value": 108.534, + "_timestamp": 0 + } + ], + [ + "perf_latency_p90", + { + "_value": 141.676, + "_timestamp": 0 + } + ], + [ + "perf_latency_p95", + { + "_value": 152.157, + "_timestamp": 0 + } + ], + [ + "perf_latency_p99", + { + "_value": 165.976, + "_timestamp": 0 + } + ], + [ + "perf_throughput", + { + "_value": 4719.63, + "_timestamp": 0 + } + ], + [ + "perf_client_send_recv", + { + "_value": 0.006, + "_timestamp": 0 + } + ], + [ + "perf_client_response_wait", + { + "_value": 108.514, + "_timestamp": 0 + } + ], + [ + "perf_server_queue", + { + "_value": 107.075, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_infer", + { + "_value": 1.115, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_input", + { + "_value": 0.021, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_output", + { + "_value": 0.082, + "_timestamp": 0 + } + ] + ], + "_non_gpu_data_from_tag": { + "perf_latency_avg": [ + "perf_latency_avg", + { + "_value": 108.534, + "_timestamp": 0 + } + ], + "perf_latency_p90": [ + "perf_latency_p90", + { + "_value": 141.676, + "_timestamp": 0 + } + ], + "perf_latency_p95": [ + "perf_latency_p95", + { + "_value": 152.157, + "_timestamp": 0 + } + ], + "perf_latency_p99": [ + "perf_latency_p99", + { + "_value": 165.976, + "_timestamp": 0 + } + ], + "perf_throughput": [ + "perf_throughput", + { + "_value": 4719.63, + "_timestamp": 0 + } + ], + "perf_client_send_recv": [ + "perf_client_send_recv", + { + "_value": 0.006, + "_timestamp": 0 + } + ], + "perf_client_response_wait": [ + "perf_client_response_wait", + { + "_value": 108.514, + "_timestamp": 0 + } + ], + "perf_server_queue": [ + "perf_server_queue", + { + "_value": 107.075, + "_timestamp": 0 + } + ], + "perf_server_compute_infer": [ + "perf_server_compute_infer", + { + "_value": 1.115, + "_timestamp": 0 + } + ], + "perf_server_compute_input": [ + "perf_server_compute_input", + { + "_value": 0.021, + "_timestamp": 0 + } + ], + "perf_server_compute_output": [ + "perf_server_compute_output", + { + "_value": 0.082, + "_timestamp": 0 + } + ] + } + } + ] + } } ], - "backend": "bls" - } - }, - "_model_name_index": { - "FaceDetectionBLS": 8, - "FaceDetectionPreprocessing": 5, - "FaceDetectionModel": 4, - "FaceDetectionPostprocessing": 2 - } - }, - "ResultManager.results": { - "_results": { - "FaceDetectionBLS": { - "FaceDetectionBLS_config_default": [ + "bls_config_6": [ { "_triton_env": {}, "_model_run_configs": [ { - "_model_name": "FaceDetectionBLS", - "_model_config": { - "name": "FaceDetectionBLS_config_default", - "input": [ - { - "name": "input", - "dataType": "TYPE_UINT8", - "dims": [ - "-1", - "3", - "-1", - "-1" - ] - }, - { - "name": "temporal", - "dataType": "TYPE_UINT32", - "dims": [ - "-1", - "1", - "1", - "1" - ] - }, - { - "name": "max_num_bbox", - "dataType": "TYPE_UINT8", - "dims": [ - "-1", - "1", - "1", - "1" - ] - } - ], - "output": [ - { - "name": "output_bbox", - "dataType": "TYPE_FP32", - "dims": [ - "-1", - "4", - "-1", - "1" - ] - }, - { - "name": "output_bbox_conf", - "dataType": "TYPE_FP32", - "dims": [ - "-1", - "4", - "-1", - "1" - ] - }, - { - "name": "output_num_bbox", - "dataType": "TYPE_UINT8", - "dims": [ - "-1", - "1", - "1", - "1" - ] - } - ], - "instanceGroup": [ - { - "count": 16, - "kind": "KIND_CPU" - } - ], - "backend": "bls", + "_model_name": "bls", + "_model_config_variant": { + "model_config": { + "name": "bls", + "input": [ + { + "name": "INPUT0", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + }, + { + "name": "INPUT1", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "output": [ + { + "name": "OUTPUT0", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + }, + { + "name": "OUTPUT1", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "instanceGroup": [ + { + "count": 8, + "kind": "KIND_GPU" + } + ], + "backend": "python" + }, + "variant_name": "bls_config_6", "cpu_only": false }, "_perf_config": { @@ -1785,7 +8741,7 @@ "async": null, "sync": null, "measurement-interval": null, - "concurrency-range": 1, + "concurrency-range": 16, "request-rate-range": null, "request-distribution": null, "request-intervals": null, @@ -1796,9 +8752,9 @@ "stability-percentage": null, "max-trials": null, "percentile": null, - "input-data": "/swdev/profile_models/face_detect.json", - "shared-memory": "cuda", - "output-shared-memory-size": "24883200", + "input-data": null, + "shared-memory": null, + "output-shared-memory-size": null, "shape": null, "sequence-length": null, "sequence-id-range": null, @@ -1823,15 +8779,16 @@ "ssl-https-private-key-file": null, "collect-metrics": "True", "metrics-url": "http://localhost:8002/metrics", - "metrics-interval": 1000.0 + "metrics-interval": 1000, + "bls-composing-models": "add,sub" }, "_options": { - "-m": "FaceDetectionBLS_config_default", + "-m": "bls", "-x": null, "-b": 1, "-u": "localhost:8001", "-i": "grpc", - "-f": "FaceDetectionBLS_config_default-results.csv", + "-f": "bls-results.csv", "-H": null }, "_verbose": { @@ -1857,277 +8814,390 @@ "input-data": null, "shape": null } - }, - "_composing_configs": [ + }, + "_composing_config_variants": [ + { + "model_config": { + "name": "add", + "input": [ + { + "name": "INPUT0", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + }, + { + "name": "INPUT1", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "output": [ + { + "name": "OUTPUT0", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "instanceGroup": [ + { + "count": 6, + "kind": "KIND_GPU" + } + ], + "backend": "python" + }, + "variant_name": "add_config_4", + "cpu_only": false + }, + { + "model_config": { + "name": "sub", + "input": [ + { + "name": "INPUT0", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + }, + { + "name": "INPUT1", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "output": [ + { + "name": "OUTPUT1", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "instanceGroup": [ + { + "count": 1, + "kind": "KIND_GPU" + } + ], + "backend": "python" + }, + "variant_name": "sub_config_0", + "cpu_only": false + } + ] + } + ] + }, + { + "bls_config_6 -m bls -b 1 -i grpc -f bls-results.csv --verbose-csv --concurrency-range=16 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000 --bls-composing-models=add,sub add_config_4,sub_config_0": { + "_model_variants_name": "bls_config_6", + "_gpu_data": { + "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ + [ + "gpu_used_memory", + { + "_value": 874.512384, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_free_memory", + { + "_value": 24895.291392, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_utilization", + { + "_value": 0.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 56.7087, + "_timestamp": 0, + "_device_uuid": null + } + ] + ] + }, + "_avg_gpu_data": [ + [ + "gpu_used_memory", + { + "_value": 874.512384, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_free_memory", { - "name": "FaceDetectionPreprocessing_config_default", - "platform": "FaceDetectionPreprocessing", - "input": [ + "_value": 24895.291392, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_utilization", + { + "_value": 0.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 56.7087, + "_timestamp": 0, + "_device_uuid": null + } + ] + ], + "_avg_gpu_data_from_tag": { + "gpu_used_memory": [ + "gpu_used_memory", + { + "_value": 874.512384, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_free_memory": [ + "gpu_free_memory", + { + "_value": 24895.291392, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_utilization": [ + "gpu_utilization", + { + "_value": 0.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_power_usage": [ + "gpu_power_usage", + { + "_value": 56.7087, + "_timestamp": 0, + "_device_uuid": null + } + ] + }, + "_model_config_measurements": [ + { + "_model_config_name": "bls", + "_model_specific_pa_params": { + "batch-size": 1, + "concurrency-range": 16, + "request-rate-range": null + }, + "_non_gpu_data": [ + [ + "perf_latency_avg", { - "name": "input", - "dataType": "TYPE_UINT8", - "dims": [ - "-1", - "3", - "-1", - "-1" - ] + "_value": 5.216, + "_timestamp": 0 } ], - "output": [ + [ + "perf_latency_p90", { - "name": "output", - "dataType": "TYPE_FP32", - "dims": [ - "-1", - "3", - "208", - "368" - ] - }, + "_value": 5.836, + "_timestamp": 0 + } + ], + [ + "perf_latency_p95", { - "name": "scale_offset", - "dataType": "TYPE_FP32", - "dims": [ - "-1", - "4", - "1", - "1" - ] - }, + "_value": 6.009, + "_timestamp": 0 + } + ], + [ + "perf_latency_p99", { - "name": "original_shape", - "dataType": "TYPE_UINT32", - "dims": [ - "1", - "2", - "1", - "1" - ] + "_value": 6.372, + "_timestamp": 0 } ], - "backend": "sdk_backend", - "cpu_only": false - }, - { - "name": "FaceDetectionModel_config_default", - "platform": "tensorrt_plan", - "maxBatchSize": 8, - "input": [ + [ + "perf_throughput", { - "name": "input", - "dataType": "TYPE_FP32", - "dims": [ - "3", - "208", - "368" - ] + "_value": 3066.01, + "_timestamp": 0 } ], - "output": [ + [ + "perf_client_send_recv", { - "name": "output_bbox", - "dataType": "TYPE_FP32", - "dims": [ - "4", - "13", - "23" - ] - }, + "_value": 0.008, + "_timestamp": 0 + } + ], + [ + "perf_client_response_wait", { - "name": "output_cov/Sigmoid", - "dataType": "TYPE_FP32", - "dims": [ - "1", - "13", - "23" - ] + "_value": 5.183, + "_timestamp": 0 } ], - "cpu_only": false - }, - { - "name": "FaceDetectionPostprocessing_config_default", - "platform": "FaceDetectionPostprocessing", - "input": [ + [ + "perf_server_queue", { - "name": "scale_offset", - "dataType": "TYPE_FP32", - "dims": [ - "-1", - "4", - "1", - "1" - ] - }, + "_value": 2.325, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_infer", { - "name": "input_bbox", - "dataType": "TYPE_FP32", - "dims": [ - "-1", - "4", - "13", - "23" - ] - }, + "_value": 2.377, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_input", { - "name": "input_conf", - "dataType": "TYPE_FP32", - "dims": [ - "-1", - "1", - "13", - "23" - ] - }, + "_value": 0.03, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_output", { - "name": "temporal", - "dataType": "TYPE_UINT32", - "dims": [ - "-1", - "1", - "1", - "1" - ] - }, + "_value": 0.125, + "_timestamp": 0 + } + ] + ], + "_non_gpu_data_from_tag": { + "perf_latency_avg": [ + "perf_latency_avg", { - "name": "max_num_bbox", - "dataType": "TYPE_UINT8", - "dims": [ - "-1", - "1", - "1", - "1" - ] - }, + "_value": 5.216, + "_timestamp": 0 + } + ], + "perf_latency_p90": [ + "perf_latency_p90", { - "name": "original_shape", - "dataType": "TYPE_UINT32", - "dims": [ - "1", - "2", - "1", - "1" - ] + "_value": 5.836, + "_timestamp": 0 } ], - "output": [ + "perf_latency_p95": [ + "perf_latency_p95", { - "name": "output_bbox", - "dataType": "TYPE_FP32", - "dims": [ - "-1", - "4", - "1", - "-1" - ] - }, + "_value": 6.009, + "_timestamp": 0 + } + ], + "perf_latency_p99": [ + "perf_latency_p99", { - "name": "output_bbox_conf", - "dataType": "TYPE_FP32", - "dims": [ - "-1", - "4", - "1", - "-1" - ] - }, + "_value": 6.372, + "_timestamp": 0 + } + ], + "perf_throughput": [ + "perf_throughput", { - "name": "output_num_bbox", - "dataType": "TYPE_UINT8", - "dims": [ - "-1", - "1", - "1", - "1" - ] + "_value": 3066.01, + "_timestamp": 0 } ], - "sequenceBatching": { - "maxSequenceIdleMicroseconds": "5000000", - "controlInput": [ - { - "name": "START", - "control": [ - { - "fp32FalseTrue": [ - 0.0, - 1.0 - ] - } - ] - }, - { - "name": "END", - "control": [ - { - "kind": "CONTROL_SEQUENCE_END", - "fp32FalseTrue": [ - 0.0, - 1.0 - ] - } - ] - }, - { - "name": "READY", - "control": [ - { - "kind": "CONTROL_SEQUENCE_READY", - "fp32FalseTrue": [ - 0.0, - 1.0 - ] - } - ] - }, - { - "name": "CORRID", - "control": [ - { - "kind": "CONTROL_SEQUENCE_CORRID", - "dataType": "TYPE_UINT64" - } - ] - } - ], - "oldest": { - "maxCandidateSequences": 10, - "maxQueueDelayMicroseconds": "50000" + "perf_client_send_recv": [ + "perf_client_send_recv", + { + "_value": 0.008, + "_timestamp": 0 } - }, - "backend": "sdk_backend", - "cpu_only": false + ], + "perf_client_response_wait": [ + "perf_client_response_wait", + { + "_value": 5.183, + "_timestamp": 0 + } + ], + "perf_server_queue": [ + "perf_server_queue", + { + "_value": 2.325, + "_timestamp": 0 + } + ], + "perf_server_compute_infer": [ + "perf_server_compute_infer", + { + "_value": 2.377, + "_timestamp": 0 + } + ], + "perf_server_compute_input": [ + "perf_server_compute_input", + { + "_value": 0.03, + "_timestamp": 0 + } + ], + "perf_server_compute_output": [ + "perf_server_compute_output", + { + "_value": 0.125, + "_timestamp": 0 + } + ] } - ] - } - ] - }, - { - "-m FaceDetectionBLS_config_default -b 1 -i grpc -f FaceDetectionBLS_config_default-results.csv --verbose-csv --concurrency-range=1 --input-data=/swdev/profile_models/face_detect.json --shared-memory=cuda --output-shared-memory-size=24883200 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "FaceDetectionBLS_config_default", + } + ] + }, + "bls_config_6 -m bls -b 1 -i grpc -f bls-results.csv --verbose-csv --concurrency-range=16 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000 --bls-composing-models=add,sub add_config_4,sub_config_1": { + "_model_variants_name": "bls_config_6", "_gpu_data": { "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ [ "gpu_used_memory", { - "_value": 1312.817152, + "_value": 874.512384, "_timestamp": 0, - "_device_uuid": null + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } ], [ "gpu_free_memory", { - "_value": 24456.986624, + "_value": 24895.291392, "_timestamp": 0, - "_device_uuid": null + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } ], [ "gpu_utilization", { - "_value": 15.6667, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -2135,7 +9205,7 @@ [ "gpu_power_usage", { - "_value": 58.7977, + "_value": 53.4603, "_timestamp": 0, "_device_uuid": null } @@ -2146,7 +9216,7 @@ [ "gpu_used_memory", { - "_value": 1312.817152, + "_value": 874.512384, "_timestamp": 0, "_device_uuid": null } @@ -2154,7 +9224,7 @@ [ "gpu_free_memory", { - "_value": 24456.986624, + "_value": 24895.291392, "_timestamp": 0, "_device_uuid": null } @@ -2162,7 +9232,7 @@ [ "gpu_utilization", { - "_value": 15.6667, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -2170,7 +9240,7 @@ [ "gpu_power_usage", { - "_value": 58.7977, + "_value": 53.4603, "_timestamp": 0, "_device_uuid": null } @@ -2180,7 +9250,7 @@ "gpu_used_memory": [ "gpu_used_memory", { - "_value": 1312.817152, + "_value": 874.512384, "_timestamp": 0, "_device_uuid": null } @@ -2188,7 +9258,7 @@ "gpu_free_memory": [ "gpu_free_memory", { - "_value": 24456.986624, + "_value": 24895.291392, "_timestamp": 0, "_device_uuid": null } @@ -2196,7 +9266,7 @@ "gpu_utilization": [ "gpu_utilization", { - "_value": 15.6667, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -2204,7 +9274,7 @@ "gpu_power_usage": [ "gpu_power_usage", { - "_value": 58.7977, + "_value": 53.4603, "_timestamp": 0, "_device_uuid": null } @@ -2212,86 +9282,87 @@ }, "_model_config_measurements": [ { - "_model_config_name": "FaceDetectionBLS_config_default", + "_model_config_name": "bls", "_model_specific_pa_params": { "batch-size": 1, - "concurrency-range": 1 + "concurrency-range": 16, + "request-rate-range": null }, "_non_gpu_data": [ [ "perf_latency_avg", { - "_value": 1.664, + "_value": 5.118, "_timestamp": 0 } ], [ "perf_latency_p90", { - "_value": 1.844, + "_value": 5.751, "_timestamp": 0 } ], [ "perf_latency_p95", { - "_value": 1.952, + "_value": 5.986, "_timestamp": 0 } ], [ "perf_latency_p99", { - "_value": 2.105, + "_value": 6.376, "_timestamp": 0 } ], [ "perf_throughput", { - "_value": 589.21, + "_value": 3120.97, "_timestamp": 0 } ], [ "perf_client_send_recv", { - "_value": 0.015, + "_value": 0.008, "_timestamp": 0 } ], [ "perf_client_response_wait", { - "_value": 1.643, + "_value": 5.086, "_timestamp": 0 } ], [ "perf_server_queue", { - "_value": 0.023, + "_value": 2.279, "_timestamp": 0 } ], [ "perf_server_compute_infer", { - "_value": 1.29, + "_value": 2.332, "_timestamp": 0 } ], [ "perf_server_compute_input", { - "_value": 0.001, + "_value": 0.03, "_timestamp": 0 } ], [ "perf_server_compute_output", { - "_value": 0.0, + "_value": 0.123, "_timestamp": 0 } ] @@ -2300,77 +9371,77 @@ "perf_latency_avg": [ "perf_latency_avg", { - "_value": 1.664, + "_value": 5.118, "_timestamp": 0 } ], "perf_latency_p90": [ "perf_latency_p90", { - "_value": 1.844, + "_value": 5.751, "_timestamp": 0 } ], "perf_latency_p95": [ "perf_latency_p95", { - "_value": 1.952, + "_value": 5.986, "_timestamp": 0 } ], "perf_latency_p99": [ "perf_latency_p99", { - "_value": 2.105, + "_value": 6.376, "_timestamp": 0 } ], "perf_throughput": [ "perf_throughput", { - "_value": 589.21, + "_value": 3120.97, "_timestamp": 0 } ], "perf_client_send_recv": [ "perf_client_send_recv", { - "_value": 0.015, + "_value": 0.008, "_timestamp": 0 } ], "perf_client_response_wait": [ "perf_client_response_wait", { - "_value": 1.643, + "_value": 5.086, "_timestamp": 0 } ], "perf_server_queue": [ "perf_server_queue", { - "_value": 0.023, + "_value": 2.279, "_timestamp": 0 } ], "perf_server_compute_infer": [ "perf_server_compute_infer", { - "_value": 1.29, + "_value": 2.332, "_timestamp": 0 } ], "perf_server_compute_input": [ "perf_server_compute_input", { - "_value": 0.001, + "_value": 0.03, "_timestamp": 0 } ], "perf_server_compute_output": [ "perf_server_compute_output", { - "_value": 0.0, + "_value": 0.123, "_timestamp": 0 } ] @@ -2378,30 +9449,30 @@ } ] }, - "-m FaceDetectionBLS_config_default -b 1 -i grpc -f FaceDetectionBLS_config_default-results.csv --verbose-csv --concurrency-range=2 --input-data=/swdev/profile_models/face_detect.json --shared-memory=cuda --output-shared-memory-size=24883200 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "FaceDetectionBLS_config_default", + "bls_config_6 -m bls -b 1 -i grpc -f bls-results.csv --verbose-csv --concurrency-range=16 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000 --bls-composing-models=add,sub add_config_3,sub_config_2": { + "_model_variants_name": "bls_config_6", "_gpu_data": { "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ [ "gpu_used_memory", { - "_value": 1312.817152, + "_value": 874.512384, "_timestamp": 0, - "_device_uuid": null + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } ], [ "gpu_free_memory", { - "_value": 24456.986624, + "_value": 24895.291392, "_timestamp": 0, - "_device_uuid": null + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } ], [ "gpu_utilization", { - "_value": 38.333299999999994, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -2409,7 +9480,7 @@ [ "gpu_power_usage", { - "_value": 106.646, + "_value": 56.725, "_timestamp": 0, "_device_uuid": null } @@ -2420,7 +9491,7 @@ [ "gpu_used_memory", { - "_value": 1312.817152, + "_value": 874.512384, "_timestamp": 0, "_device_uuid": null } @@ -2428,7 +9499,7 @@ [ "gpu_free_memory", { - "_value": 24456.986624, + "_value": 24895.291392, "_timestamp": 0, "_device_uuid": null } @@ -2436,7 +9507,7 @@ [ "gpu_utilization", { - "_value": 38.333299999999994, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -2444,7 +9515,7 @@ [ "gpu_power_usage", { - "_value": 106.646, + "_value": 56.725, "_timestamp": 0, "_device_uuid": null } @@ -2454,7 +9525,7 @@ "gpu_used_memory": [ "gpu_used_memory", { - "_value": 1312.817152, + "_value": 874.512384, "_timestamp": 0, "_device_uuid": null } @@ -2462,7 +9533,7 @@ "gpu_free_memory": [ "gpu_free_memory", { - "_value": 24456.986624, + "_value": 24895.291392, "_timestamp": 0, "_device_uuid": null } @@ -2470,7 +9541,7 @@ "gpu_utilization": [ "gpu_utilization", { - "_value": 38.333299999999994, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -2478,7 +9549,7 @@ "gpu_power_usage": [ "gpu_power_usage", { - "_value": 106.646, + "_value": 56.725, "_timestamp": 0, "_device_uuid": null } @@ -2486,86 +9557,87 @@ }, "_model_config_measurements": [ { - "_model_config_name": "FaceDetectionBLS_config_default", + "_model_config_name": "bls", "_model_specific_pa_params": { "batch-size": 1, - "concurrency-range": 2 + "concurrency-range": 16, + "request-rate-range": null }, "_non_gpu_data": [ [ "perf_latency_avg", { - "_value": 1.965, + "_value": 1.586, "_timestamp": 0 } ], [ "perf_latency_p90", { - "_value": 2.201, + "_value": 1.76, "_timestamp": 0 } ], [ "perf_latency_p95", { - "_value": 2.287, + "_value": 1.84, "_timestamp": 0 } ], [ "perf_latency_p99", { - "_value": 2.577, + "_value": 2.215, "_timestamp": 0 } ], [ "perf_throughput", { - "_value": 996.483, + "_value": 10077.3, "_timestamp": 0 } ], [ "perf_client_send_recv", { - "_value": 0.02, + "_value": 0.003, "_timestamp": 0 } ], [ "perf_client_response_wait", { - "_value": 1.937, + "_value": 1.573, "_timestamp": 0 } ], [ "perf_server_queue", { - "_value": 0.027, + "_value": 0.624, "_timestamp": 0 } ], [ "perf_server_compute_infer", { - "_value": 1.462, + "_value": 0.702, "_timestamp": 0 } ], [ "perf_server_compute_input", { - "_value": 0.002, + "_value": 0.014, "_timestamp": 0 } ], [ "perf_server_compute_output", { - "_value": 0.0, + "_value": 0.049, "_timestamp": 0 } ] @@ -2574,77 +9646,77 @@ "perf_latency_avg": [ "perf_latency_avg", { - "_value": 1.965, + "_value": 1.586, "_timestamp": 0 } ], "perf_latency_p90": [ "perf_latency_p90", { - "_value": 2.201, + "_value": 1.76, "_timestamp": 0 } ], "perf_latency_p95": [ "perf_latency_p95", { - "_value": 2.287, + "_value": 1.84, "_timestamp": 0 } ], "perf_latency_p99": [ "perf_latency_p99", { - "_value": 2.577, + "_value": 2.215, "_timestamp": 0 } ], "perf_throughput": [ "perf_throughput", { - "_value": 996.483, + "_value": 10077.3, "_timestamp": 0 } ], "perf_client_send_recv": [ "perf_client_send_recv", { - "_value": 0.02, + "_value": 0.003, "_timestamp": 0 } ], "perf_client_response_wait": [ "perf_client_response_wait", { - "_value": 1.937, + "_value": 1.573, "_timestamp": 0 } ], "perf_server_queue": [ "perf_server_queue", { - "_value": 0.027, + "_value": 0.624, "_timestamp": 0 } ], "perf_server_compute_infer": [ "perf_server_compute_infer", { - "_value": 1.462, + "_value": 0.702, "_timestamp": 0 } ], "perf_server_compute_input": [ "perf_server_compute_input", { - "_value": 0.002, + "_value": 0.014, "_timestamp": 0 } ], "perf_server_compute_output": [ "perf_server_compute_output", { - "_value": 0.0, + "_value": 0.049, "_timestamp": 0 } ] @@ -2652,30 +9724,30 @@ } ] }, - "-m FaceDetectionBLS_config_default -b 1 -i grpc -f FaceDetectionBLS_config_default-results.csv --verbose-csv --concurrency-range=4 --input-data=/swdev/profile_models/face_detect.json --shared-memory=cuda --output-shared-memory-size=24883200 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "FaceDetectionBLS_config_default", + "bls_config_6 -m bls -b 1 -i grpc -f bls-results.csv --verbose-csv --concurrency-range=16 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000 --bls-composing-models=add,sub add_config_2,sub_config_4": { + "_model_variants_name": "bls_config_6", "_gpu_data": { "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ [ "gpu_used_memory", { - "_value": 1312.817152, + "_value": 874.512384, "_timestamp": 0, - "_device_uuid": null + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } ], [ "gpu_free_memory", { - "_value": 24456.986624, + "_value": 24895.291392, "_timestamp": 0, - "_device_uuid": null + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } ], [ "gpu_utilization", { - "_value": 30.666700000000002, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -2683,7 +9755,7 @@ [ "gpu_power_usage", { - "_value": 99.995, + "_value": 56.7035, "_timestamp": 0, "_device_uuid": null } @@ -2694,7 +9766,7 @@ [ "gpu_used_memory", { - "_value": 1312.817152, + "_value": 874.512384, "_timestamp": 0, "_device_uuid": null } @@ -2702,7 +9774,7 @@ [ "gpu_free_memory", { - "_value": 24456.986624, + "_value": 24895.291392, "_timestamp": 0, "_device_uuid": null } @@ -2710,7 +9782,7 @@ [ "gpu_utilization", { - "_value": 30.666700000000002, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -2718,7 +9790,7 @@ [ "gpu_power_usage", { - "_value": 99.995, + "_value": 56.7035, "_timestamp": 0, "_device_uuid": null } @@ -2728,7 +9800,7 @@ "gpu_used_memory": [ "gpu_used_memory", { - "_value": 1312.817152, + "_value": 874.512384, "_timestamp": 0, "_device_uuid": null } @@ -2736,7 +9808,7 @@ "gpu_free_memory": [ "gpu_free_memory", { - "_value": 24456.986624, + "_value": 24895.291392, "_timestamp": 0, "_device_uuid": null } @@ -2744,7 +9816,7 @@ "gpu_utilization": [ "gpu_utilization", { - "_value": 30.666700000000002, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -2752,7 +9824,7 @@ "gpu_power_usage": [ "gpu_power_usage", { - "_value": 99.995, + "_value": 56.7035, "_timestamp": 0, "_device_uuid": null } @@ -2760,86 +9832,87 @@ }, "_model_config_measurements": [ { - "_model_config_name": "FaceDetectionBLS_config_default", + "_model_config_name": "bls", "_model_specific_pa_params": { "batch-size": 1, - "concurrency-range": 4 + "concurrency-range": 16, + "request-rate-range": null }, "_non_gpu_data": [ [ "perf_latency_avg", { - "_value": 2.42, + "_value": 1.627, "_timestamp": 0 } ], [ "perf_latency_p90", { - "_value": 2.908, + "_value": 1.761, "_timestamp": 0 } ], [ "perf_latency_p95", { - "_value": 3.098, + "_value": 1.848, "_timestamp": 0 } ], [ "perf_latency_p99", { - "_value": 3.52, + "_value": 2.367, "_timestamp": 0 } ], [ "perf_throughput", { - "_value": 1623.02, + "_value": 9824.14, "_timestamp": 0 } ], [ "perf_client_send_recv", { - "_value": 0.021, + "_value": 0.003, "_timestamp": 0 } ], [ "perf_client_response_wait", { - "_value": 2.39, + "_value": 1.613, "_timestamp": 0 } ], [ "perf_server_queue", { - "_value": 0.028, + "_value": 0.642, "_timestamp": 0 } ], [ "perf_server_compute_infer", { - "_value": 1.888, + "_value": 0.721, "_timestamp": 0 } ], [ "perf_server_compute_input", { - "_value": 0.002, + "_value": 0.014, "_timestamp": 0 } ], [ "perf_server_compute_output", { - "_value": 0.0, + "_value": 0.048, "_timestamp": 0 } ] @@ -2848,77 +9921,77 @@ "perf_latency_avg": [ "perf_latency_avg", { - "_value": 2.42, + "_value": 1.627, "_timestamp": 0 } ], "perf_latency_p90": [ "perf_latency_p90", { - "_value": 2.908, + "_value": 1.761, "_timestamp": 0 } ], "perf_latency_p95": [ "perf_latency_p95", { - "_value": 3.098, + "_value": 1.848, "_timestamp": 0 } ], "perf_latency_p99": [ "perf_latency_p99", { - "_value": 3.52, + "_value": 2.367, "_timestamp": 0 } ], "perf_throughput": [ "perf_throughput", { - "_value": 1623.02, + "_value": 9824.14, "_timestamp": 0 } ], "perf_client_send_recv": [ "perf_client_send_recv", { - "_value": 0.021, + "_value": 0.003, "_timestamp": 0 } ], "perf_client_response_wait": [ "perf_client_response_wait", { - "_value": 2.39, + "_value": 1.613, "_timestamp": 0 } ], "perf_server_queue": [ "perf_server_queue", { - "_value": 0.028, + "_value": 0.642, "_timestamp": 0 } ], "perf_server_compute_infer": [ "perf_server_compute_infer", { - "_value": 1.888, + "_value": 0.721, "_timestamp": 0 } ], "perf_server_compute_input": [ "perf_server_compute_input", { - "_value": 0.002, + "_value": 0.014, "_timestamp": 0 } ], "perf_server_compute_output": [ "perf_server_compute_output", { - "_value": 0.0, + "_value": 0.048, "_timestamp": 0 } ] @@ -2926,30 +9999,30 @@ } ] }, - "-m FaceDetectionBLS_config_default -b 1 -i grpc -f FaceDetectionBLS_config_default-results.csv --verbose-csv --concurrency-range=8 --input-data=/swdev/profile_models/face_detect.json --shared-memory=cuda --output-shared-memory-size=24883200 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "FaceDetectionBLS_config_default", + "bls_config_6 -m bls -b 1 -i grpc -f bls-results.csv --verbose-csv --concurrency-range=1 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000 --bls-composing-models=add,sub add_config_4,sub_config_0": { + "_model_variants_name": "bls_config_6", "_gpu_data": { "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ [ "gpu_used_memory", { - "_value": 1312.817152, + "_value": 874.512384, "_timestamp": 0, - "_device_uuid": null + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } ], [ "gpu_free_memory", { - "_value": 24456.986624, + "_value": 24895.291392, "_timestamp": 0, - "_device_uuid": null + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } ], [ "gpu_utilization", { - "_value": 42.0, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -2957,7 +10030,7 @@ [ "gpu_power_usage", { - "_value": 142.404, + "_value": 56.7623, "_timestamp": 0, "_device_uuid": null } @@ -2968,7 +10041,7 @@ [ "gpu_used_memory", { - "_value": 1312.817152, + "_value": 874.512384, "_timestamp": 0, "_device_uuid": null } @@ -2976,7 +10049,7 @@ [ "gpu_free_memory", { - "_value": 24456.986624, + "_value": 24895.291392, "_timestamp": 0, "_device_uuid": null } @@ -2984,7 +10057,7 @@ [ "gpu_utilization", { - "_value": 42.0, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -2992,7 +10065,7 @@ [ "gpu_power_usage", { - "_value": 142.404, + "_value": 56.7623, "_timestamp": 0, "_device_uuid": null } @@ -3002,7 +10075,7 @@ "gpu_used_memory": [ "gpu_used_memory", { - "_value": 1312.817152, + "_value": 874.512384, "_timestamp": 0, "_device_uuid": null } @@ -3010,7 +10083,7 @@ "gpu_free_memory": [ "gpu_free_memory", { - "_value": 24456.986624, + "_value": 24895.291392, "_timestamp": 0, "_device_uuid": null } @@ -3018,7 +10091,7 @@ "gpu_utilization": [ "gpu_utilization", { - "_value": 42.0, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -3026,7 +10099,7 @@ "gpu_power_usage": [ "gpu_power_usage", { - "_value": 142.404, + "_value": 56.7623, "_timestamp": 0, "_device_uuid": null } @@ -3034,86 +10107,87 @@ }, "_model_config_measurements": [ { - "_model_config_name": "FaceDetectionBLS_config_default", + "_model_config_name": "bls", "_model_specific_pa_params": { "batch-size": 1, - "concurrency-range": 8 + "concurrency-range": 1, + "request-rate-range": null }, "_non_gpu_data": [ [ "perf_latency_avg", { - "_value": 2.773, + "_value": 1.719, "_timestamp": 0 } ], [ "perf_latency_p90", { - "_value": 3.588, + "_value": 1.916, "_timestamp": 0 } ], [ "perf_latency_p95", { - "_value": 3.876, + "_value": 2.011, "_timestamp": 0 } ], [ "perf_latency_p99", { - "_value": 4.588, + "_value": 2.177, "_timestamp": 0 } ], [ "perf_throughput", { - "_value": 2847.85, + "_value": 580.858, "_timestamp": 0 } ], [ "perf_client_send_recv", { - "_value": 0.018, + "_value": 0.007, "_timestamp": 0 } ], [ "perf_client_response_wait", { - "_value": 2.748, + "_value": 1.691, "_timestamp": 0 } ], [ "perf_server_queue", { - "_value": 0.022, + "_value": 0.028, "_timestamp": 0 } ], [ "perf_server_compute_infer", { - "_value": 2.329, + "_value": 1.253, "_timestamp": 0 } ], [ "perf_server_compute_input", { - "_value": 0.001, + "_value": 0.033, "_timestamp": 0 } ], [ "perf_server_compute_output", { - "_value": 0.0, + "_value": 0.142, "_timestamp": 0 } ] @@ -3122,77 +10196,77 @@ "perf_latency_avg": [ "perf_latency_avg", { - "_value": 2.773, + "_value": 1.719, "_timestamp": 0 } ], "perf_latency_p90": [ "perf_latency_p90", { - "_value": 3.588, + "_value": 1.916, "_timestamp": 0 } ], "perf_latency_p95": [ "perf_latency_p95", { - "_value": 3.876, + "_value": 2.011, "_timestamp": 0 } ], "perf_latency_p99": [ "perf_latency_p99", { - "_value": 4.588, + "_value": 2.177, "_timestamp": 0 } ], "perf_throughput": [ "perf_throughput", { - "_value": 2847.85, + "_value": 580.858, "_timestamp": 0 } ], "perf_client_send_recv": [ "perf_client_send_recv", { - "_value": 0.018, + "_value": 0.007, "_timestamp": 0 } ], "perf_client_response_wait": [ "perf_client_response_wait", { - "_value": 2.748, + "_value": 1.691, "_timestamp": 0 } ], "perf_server_queue": [ "perf_server_queue", { - "_value": 0.022, + "_value": 0.028, "_timestamp": 0 } ], "perf_server_compute_infer": [ "perf_server_compute_infer", { - "_value": 2.329, + "_value": 1.253, "_timestamp": 0 } ], "perf_server_compute_input": [ "perf_server_compute_input", { - "_value": 0.001, + "_value": 0.033, "_timestamp": 0 } ], "perf_server_compute_output": [ "perf_server_compute_output", { - "_value": 0.0, + "_value": 0.142, "_timestamp": 0 } ] @@ -3200,30 +10274,30 @@ } ] }, - "-m FaceDetectionBLS_config_default -b 1 -i grpc -f FaceDetectionBLS_config_default-results.csv --verbose-csv --concurrency-range=16 --input-data=/swdev/profile_models/face_detect.json --shared-memory=cuda --output-shared-memory-size=24883200 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "FaceDetectionBLS_config_default", + "bls_config_6 -m bls -b 1 -i grpc -f bls-results.csv --verbose-csv --concurrency-range=2 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000 --bls-composing-models=add,sub add_config_4,sub_config_0": { + "_model_variants_name": "bls_config_6", "_gpu_data": { "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ [ "gpu_used_memory", { - "_value": 1312.817152, + "_value": 874.512384, "_timestamp": 0, - "_device_uuid": null + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } ], [ "gpu_free_memory", { - "_value": 24456.986624, + "_value": 24895.291392, "_timestamp": 0, - "_device_uuid": null + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } ], [ "gpu_utilization", { - "_value": 49.3333, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -3231,7 +10305,7 @@ [ "gpu_power_usage", { - "_value": 120.683, + "_value": 34.7513, "_timestamp": 0, "_device_uuid": null } @@ -3242,7 +10316,7 @@ [ "gpu_used_memory", { - "_value": 1312.817152, + "_value": 874.512384, "_timestamp": 0, "_device_uuid": null } @@ -3250,7 +10324,7 @@ [ "gpu_free_memory", { - "_value": 24456.986624, + "_value": 24895.291392, "_timestamp": 0, "_device_uuid": null } @@ -3258,7 +10332,7 @@ [ "gpu_utilization", { - "_value": 49.3333, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -3266,7 +10340,7 @@ [ "gpu_power_usage", { - "_value": 120.683, + "_value": 34.7513, "_timestamp": 0, "_device_uuid": null } @@ -3276,7 +10350,7 @@ "gpu_used_memory": [ "gpu_used_memory", { - "_value": 1312.817152, + "_value": 874.512384, "_timestamp": 0, "_device_uuid": null } @@ -3284,7 +10358,7 @@ "gpu_free_memory": [ "gpu_free_memory", { - "_value": 24456.986624, + "_value": 24895.291392, "_timestamp": 0, "_device_uuid": null } @@ -3292,7 +10366,7 @@ "gpu_utilization": [ "gpu_utilization", { - "_value": 49.3333, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -3300,7 +10374,7 @@ "gpu_power_usage": [ "gpu_power_usage", { - "_value": 120.683, + "_value": 34.7513, "_timestamp": 0, "_device_uuid": null } @@ -3308,86 +10382,87 @@ }, "_model_config_measurements": [ { - "_model_config_name": "FaceDetectionBLS_config_default", + "_model_config_name": "bls", "_model_specific_pa_params": { "batch-size": 1, - "concurrency-range": 16 + "concurrency-range": 2, + "request-rate-range": null }, "_non_gpu_data": [ [ "perf_latency_avg", { - "_value": 10.969, + "_value": 1.788, "_timestamp": 0 } ], [ "perf_latency_p90", { - "_value": 13.216, + "_value": 1.953, "_timestamp": 0 } ], [ "perf_latency_p95", { - "_value": 13.76, + "_value": 2.006, "_timestamp": 0 } ], [ "perf_latency_p99", { - "_value": 14.795, + "_value": 2.138, "_timestamp": 0 } ], [ "perf_throughput", { - "_value": 1452.71, + "_value": 1116.3, "_timestamp": 0 } ], [ "perf_client_send_recv", { - "_value": 0.023, + "_value": 0.008, "_timestamp": 0 } ], [ "perf_client_response_wait", { - "_value": 10.936, + "_value": 1.758, "_timestamp": 0 } ], [ "perf_server_queue", { - "_value": 0.028, + "_value": 0.026, "_timestamp": 0 } ], [ "perf_server_compute_infer", { - "_value": 10.405, + "_value": 1.263, "_timestamp": 0 } ], [ "perf_server_compute_input", { - "_value": 0.002, + "_value": 0.033, "_timestamp": 0 } ], [ "perf_server_compute_output", { - "_value": 0.0, + "_value": 0.131, "_timestamp": 0 } ] @@ -3396,536 +10471,108 @@ "perf_latency_avg": [ "perf_latency_avg", { - "_value": 10.969, + "_value": 1.788, "_timestamp": 0 } ], "perf_latency_p90": [ "perf_latency_p90", { - "_value": 13.216, + "_value": 1.953, "_timestamp": 0 } ], "perf_latency_p95": [ "perf_latency_p95", { - "_value": 13.76, + "_value": 2.006, "_timestamp": 0 } ], "perf_latency_p99": [ "perf_latency_p99", { - "_value": 14.795, + "_value": 2.138, "_timestamp": 0 } ], "perf_throughput": [ "perf_throughput", { - "_value": 1452.71, + "_value": 1116.3, "_timestamp": 0 } ], "perf_client_send_recv": [ "perf_client_send_recv", { - "_value": 0.023, + "_value": 0.008, "_timestamp": 0 } ], "perf_client_response_wait": [ "perf_client_response_wait", { - "_value": 10.936, + "_value": 1.758, "_timestamp": 0 } ], "perf_server_queue": [ "perf_server_queue", { - "_value": 0.028, + "_value": 0.026, "_timestamp": 0 } ], "perf_server_compute_infer": [ "perf_server_compute_infer", { - "_value": 10.405, - "_timestamp": 0 - } - ], - "perf_server_compute_input": [ - "perf_server_compute_input", - { - "_value": 0.002, - "_timestamp": 0 - } - ], - "perf_server_compute_output": [ - "perf_server_compute_output", - { - "_value": 0.0, - "_timestamp": 0 - } - ] - } - } - ] - } - } - ], - "FaceDetectionBLS_config_0": [ - { - "_triton_env": {}, - "_model_run_configs": [ - { - "_model_name": "FaceDetectionBLS", - "_model_config": { - "name": "FaceDetectionBLS_config_0", - "input": [ - { - "name": "input", - "dataType": "TYPE_UINT8", - "dims": [ - "-1", - "3", - "-1", - "-1" - ] - }, - { - "name": "temporal", - "dataType": "TYPE_UINT32", - "dims": [ - "-1", - "1", - "1", - "1" - ] - }, - { - "name": "max_num_bbox", - "dataType": "TYPE_UINT8", - "dims": [ - "-1", - "1", - "1", - "1" - ] - } - ], - "output": [ - { - "name": "output_bbox", - "dataType": "TYPE_FP32", - "dims": [ - "-1", - "4", - "-1", - "1" - ] - }, - { - "name": "output_bbox_conf", - "dataType": "TYPE_FP32", - "dims": [ - "-1", - "4", - "-1", - "1" - ] - }, - { - "name": "output_num_bbox", - "dataType": "TYPE_UINT8", - "dims": [ - "-1", - "1", - "1", - "1" - ] - } - ], - "instanceGroup": [ - { - "count": 1, - "kind": "KIND_GPU" - } - ], - "backend": "bls", - "cpu_only": false - }, - "_perf_config": { - "_args": { - "service-kind": null, - "model-signature-name": null, - "async": null, - "sync": null, - "measurement-interval": null, - "concurrency-range": 16, - "request-rate-range": null, - "request-distribution": null, - "request-intervals": null, - "binary-search": null, - "num-of-sequences": null, - "latency-threshold": null, - "max-threads": null, - "stability-percentage": null, - "max-trials": null, - "percentile": null, - "input-data": "/swdev/profile_models/face_detect.json", - "shared-memory": "cuda", - "output-shared-memory-size": "24883200", - "shape": null, - "sequence-length": null, - "sequence-id-range": null, - "string-length": null, - "string-data": null, - "measurement-mode": "count_windows", - "measurement-request-count": null, - "streaming": null, - "grpc-compression-algorithm": null, - "triton-server-directory": null, - "model-repository": null, - "ssl-grpc-use-ssl": null, - "ssl-grpc-root-certifications-file": null, - "ssl-grpc-private-key-file": null, - "ssl-grpc-certificate-chain-file": null, - "ssl-https-verify-peer": null, - "ssl-https-verify-host": null, - "ssl-https-ca-certificates-file": null, - "ssl-https-client-certificate-type": null, - "ssl-https-client-certificate-file": null, - "ssl-https-private-key-type": null, - "ssl-https-private-key-file": null, - "collect-metrics": "True", - "metrics-url": "http://localhost:8002/metrics", - "metrics-interval": 1000.0 - }, - "_options": { - "-m": "FaceDetectionBLS_config_0", - "-x": null, - "-b": 1, - "-u": "localhost:8001", - "-i": "grpc", - "-f": "FaceDetectionBLS_config_0-results.csv", - "-H": null - }, - "_verbose": { - "-v": null, - "-v -v": null, - "--verbose-csv": "--verbose-csv" - }, - "_input_to_options": { - "model-name": "-m", - "model-version": "-x", - "batch-size": "-b", - "url": "-u", - "protocol": "-i", - "latency-report-file": "-f", - "http-header": "-H" - }, - "_input_to_verbose": { - "verbose": "-v", - "extra-verbose": "-v -v", - "verbose-csv": "--verbose-csv" - }, - "_additive_args": { - "input-data": null, - "shape": null - } - }, - "_composing_configs": [ - { - "name": "FaceDetectionPreprocessing_config_0", - "platform": "FaceDetectionPreprocessing", - "input": [ - { - "name": "input", - "dataType": "TYPE_UINT8", - "dims": [ - "-1", - "3", - "-1", - "-1" - ] - } - ], - "output": [ - { - "name": "output", - "dataType": "TYPE_FP32", - "dims": [ - "-1", - "3", - "208", - "368" - ] - }, - { - "name": "scale_offset", - "dataType": "TYPE_FP32", - "dims": [ - "-1", - "4", - "1", - "1" - ] - }, - { - "name": "original_shape", - "dataType": "TYPE_UINT32", - "dims": [ - "1", - "2", - "1", - "1" - ] - } - ], - "instanceGroup": [ - { - "count": 1, - "kind": "KIND_GPU" - } - ], - "backend": "sdk_backend", - "cpu_only": false - }, - { - "name": "FaceDetectionModel_config_0", - "platform": "tensorrt_plan", - "maxBatchSize": 8, - "input": [ - { - "name": "input", - "dataType": "TYPE_FP32", - "dims": [ - "3", - "208", - "368" - ] - } - ], - "output": [ - { - "name": "output_bbox", - "dataType": "TYPE_FP32", - "dims": [ - "4", - "13", - "23" - ] - }, - { - "name": "output_cov/Sigmoid", - "dataType": "TYPE_FP32", - "dims": [ - "1", - "13", - "23" - ] - } - ], - "instanceGroup": [ - { - "count": 1, - "kind": "KIND_GPU" - } - ], - "dynamicBatching": {}, - "cpu_only": false - }, - { - "name": "FaceDetectionPostprocessing_config_0", - "platform": "FaceDetectionPostprocessing", - "input": [ - { - "name": "scale_offset", - "dataType": "TYPE_FP32", - "dims": [ - "-1", - "4", - "1", - "1" - ] - }, - { - "name": "input_bbox", - "dataType": "TYPE_FP32", - "dims": [ - "-1", - "4", - "13", - "23" - ] - }, - { - "name": "input_conf", - "dataType": "TYPE_FP32", - "dims": [ - "-1", - "1", - "13", - "23" - ] - }, - { - "name": "temporal", - "dataType": "TYPE_UINT32", - "dims": [ - "-1", - "1", - "1", - "1" - ] - }, - { - "name": "max_num_bbox", - "dataType": "TYPE_UINT8", - "dims": [ - "-1", - "1", - "1", - "1" - ] - }, - { - "name": "original_shape", - "dataType": "TYPE_UINT32", - "dims": [ - "1", - "2", - "1", - "1" - ] - } - ], - "output": [ - { - "name": "output_bbox", - "dataType": "TYPE_FP32", - "dims": [ - "-1", - "4", - "1", - "-1" - ] - }, - { - "name": "output_bbox_conf", - "dataType": "TYPE_FP32", - "dims": [ - "-1", - "4", - "1", - "-1" - ] - }, - { - "name": "output_num_bbox", - "dataType": "TYPE_UINT8", - "dims": [ - "-1", - "1", - "1", - "1" - ] + "_value": 1.263, + "_timestamp": 0 } ], - "instanceGroup": [ + "perf_server_compute_input": [ + "perf_server_compute_input", { - "count": 1, - "kind": "KIND_GPU" + "_value": 0.033, + "_timestamp": 0 } ], - "sequenceBatching": { - "maxSequenceIdleMicroseconds": "5000000", - "controlInput": [ - { - "name": "START", - "control": [ - { - "fp32FalseTrue": [ - 0.0, - 1.0 - ] - } - ] - }, - { - "name": "END", - "control": [ - { - "kind": "CONTROL_SEQUENCE_END", - "fp32FalseTrue": [ - 0.0, - 1.0 - ] - } - ] - }, - { - "name": "READY", - "control": [ - { - "kind": "CONTROL_SEQUENCE_READY", - "fp32FalseTrue": [ - 0.0, - 1.0 - ] - } - ] - }, - { - "name": "CORRID", - "control": [ - { - "kind": "CONTROL_SEQUENCE_CORRID", - "dataType": "TYPE_UINT64" - } - ] - } - ], - "oldest": { - "maxCandidateSequences": 10, - "maxQueueDelayMicroseconds": "50000" + "perf_server_compute_output": [ + "perf_server_compute_output", + { + "_value": 0.131, + "_timestamp": 0 } - }, - "backend": "sdk_backend", - "cpu_only": false + ] } - ] - } - ] - }, - { - "-m FaceDetectionBLS_config_0 -b 1 -i grpc -f FaceDetectionBLS_config_0-results.csv --verbose-csv --concurrency-range=16 --input-data=/swdev/profile_models/face_detect.json --shared-memory=cuda --output-shared-memory-size=24883200 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "FaceDetectionBLS_config_0", + } + ] + }, + "bls_config_6 -m bls -b 1 -i grpc -f bls-results.csv --verbose-csv --concurrency-range=4 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000 --bls-composing-models=add,sub add_config_4,sub_config_0": { + "_model_variants_name": "bls_config_6", "_gpu_data": { "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ [ "gpu_used_memory", { - "_value": 1312.817152, + "_value": 874.512384, "_timestamp": 0, - "_device_uuid": null + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } ], [ "gpu_free_memory", { - "_value": 24456.986624, + "_value": 24895.291392, "_timestamp": 0, - "_device_uuid": null + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } ], [ "gpu_utilization", { - "_value": 8.33333, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -3933,7 +10580,7 @@ [ "gpu_power_usage", { - "_value": 59.607, + "_value": 23.7323, "_timestamp": 0, "_device_uuid": null } @@ -3944,7 +10591,7 @@ [ "gpu_used_memory", { - "_value": 1312.817152, + "_value": 874.512384, "_timestamp": 0, "_device_uuid": null } @@ -3952,7 +10599,7 @@ [ "gpu_free_memory", { - "_value": 24456.986624, + "_value": 24895.291392, "_timestamp": 0, "_device_uuid": null } @@ -3960,7 +10607,7 @@ [ "gpu_utilization", { - "_value": 8.33333, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -3968,7 +10615,7 @@ [ "gpu_power_usage", { - "_value": 59.607, + "_value": 23.7323, "_timestamp": 0, "_device_uuid": null } @@ -3978,7 +10625,7 @@ "gpu_used_memory": [ "gpu_used_memory", { - "_value": 1312.817152, + "_value": 874.512384, "_timestamp": 0, "_device_uuid": null } @@ -3986,7 +10633,7 @@ "gpu_free_memory": [ "gpu_free_memory", { - "_value": 24456.986624, + "_value": 24895.291392, "_timestamp": 0, "_device_uuid": null } @@ -3994,7 +10641,7 @@ "gpu_utilization": [ "gpu_utilization", { - "_value": 8.33333, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -4002,7 +10649,7 @@ "gpu_power_usage": [ "gpu_power_usage", { - "_value": 59.607, + "_value": 23.7323, "_timestamp": 0, "_device_uuid": null } @@ -4010,86 +10657,87 @@ }, "_model_config_measurements": [ { - "_model_config_name": "FaceDetectionBLS_config_0", + "_model_config_name": "bls", "_model_specific_pa_params": { "batch-size": 1, - "concurrency-range": 16 + "concurrency-range": 4, + "request-rate-range": null }, "_non_gpu_data": [ [ "perf_latency_avg", { - "_value": 24.383, + "_value": 2.069, "_timestamp": 0 } ], [ "perf_latency_p90", { - "_value": 25.677, + "_value": 2.26, "_timestamp": 0 } ], [ "perf_latency_p95", { - "_value": 26.168, + "_value": 2.332, "_timestamp": 0 } ], [ "perf_latency_p99", { - "_value": 27.096, + "_value": 2.508, "_timestamp": 0 } ], [ "perf_throughput", { - "_value": 652.275, + "_value": 1929.86, "_timestamp": 0 } ], [ "perf_client_send_recv", { - "_value": 0.022, + "_value": 0.009, "_timestamp": 0 } ], [ "perf_client_response_wait", { - "_value": 24.351, + "_value": 2.034, "_timestamp": 0 } ], [ "perf_server_queue", { - "_value": 22.454, + "_value": 0.029, "_timestamp": 0 } ], [ "perf_server_compute_infer", { - "_value": 1.418, + "_value": 1.426, "_timestamp": 0 } ], [ "perf_server_compute_input", { - "_value": 0.001, + "_value": 0.035, "_timestamp": 0 } ], [ "perf_server_compute_output", { - "_value": 0.0, + "_value": 0.128, "_timestamp": 0 } ] @@ -4098,536 +10746,108 @@ "perf_latency_avg": [ "perf_latency_avg", { - "_value": 24.383, + "_value": 2.069, "_timestamp": 0 } ], "perf_latency_p90": [ "perf_latency_p90", { - "_value": 25.677, + "_value": 2.26, "_timestamp": 0 } ], "perf_latency_p95": [ "perf_latency_p95", { - "_value": 26.168, + "_value": 2.332, "_timestamp": 0 } ], "perf_latency_p99": [ "perf_latency_p99", { - "_value": 27.096, + "_value": 2.508, "_timestamp": 0 } ], "perf_throughput": [ "perf_throughput", { - "_value": 652.275, + "_value": 1929.86, "_timestamp": 0 } ], "perf_client_send_recv": [ "perf_client_send_recv", { - "_value": 0.022, + "_value": 0.009, "_timestamp": 0 } ], "perf_client_response_wait": [ "perf_client_response_wait", { - "_value": 24.351, + "_value": 2.034, "_timestamp": 0 } ], "perf_server_queue": [ "perf_server_queue", { - "_value": 22.454, + "_value": 0.029, "_timestamp": 0 } ], "perf_server_compute_infer": [ "perf_server_compute_infer", { - "_value": 1.418, - "_timestamp": 0 - } - ], - "perf_server_compute_input": [ - "perf_server_compute_input", - { - "_value": 0.001, - "_timestamp": 0 - } - ], - "perf_server_compute_output": [ - "perf_server_compute_output", - { - "_value": 0.0, - "_timestamp": 0 - } - ] - } - } - ] - } - } - ], - "FaceDetectionBLS_config_1": [ - { - "_triton_env": {}, - "_model_run_configs": [ - { - "_model_name": "FaceDetectionBLS", - "_model_config": { - "name": "FaceDetectionBLS_config_1", - "input": [ - { - "name": "input", - "dataType": "TYPE_UINT8", - "dims": [ - "-1", - "3", - "-1", - "-1" - ] - }, - { - "name": "temporal", - "dataType": "TYPE_UINT32", - "dims": [ - "-1", - "1", - "1", - "1" - ] - }, - { - "name": "max_num_bbox", - "dataType": "TYPE_UINT8", - "dims": [ - "-1", - "1", - "1", - "1" - ] - } - ], - "output": [ - { - "name": "output_bbox", - "dataType": "TYPE_FP32", - "dims": [ - "-1", - "4", - "-1", - "1" - ] - }, - { - "name": "output_bbox_conf", - "dataType": "TYPE_FP32", - "dims": [ - "-1", - "4", - "-1", - "1" - ] - }, - { - "name": "output_num_bbox", - "dataType": "TYPE_UINT8", - "dims": [ - "-1", - "1", - "1", - "1" - ] - } - ], - "instanceGroup": [ - { - "count": 2, - "kind": "KIND_GPU" - } - ], - "backend": "bls", - "cpu_only": false - }, - "_perf_config": { - "_args": { - "service-kind": null, - "model-signature-name": null, - "async": null, - "sync": null, - "measurement-interval": null, - "concurrency-range": 16, - "request-rate-range": null, - "request-distribution": null, - "request-intervals": null, - "binary-search": null, - "num-of-sequences": null, - "latency-threshold": null, - "max-threads": null, - "stability-percentage": null, - "max-trials": null, - "percentile": null, - "input-data": "/swdev/profile_models/face_detect.json", - "shared-memory": "cuda", - "output-shared-memory-size": "24883200", - "shape": null, - "sequence-length": null, - "sequence-id-range": null, - "string-length": null, - "string-data": null, - "measurement-mode": "count_windows", - "measurement-request-count": null, - "streaming": null, - "grpc-compression-algorithm": null, - "triton-server-directory": null, - "model-repository": null, - "ssl-grpc-use-ssl": null, - "ssl-grpc-root-certifications-file": null, - "ssl-grpc-private-key-file": null, - "ssl-grpc-certificate-chain-file": null, - "ssl-https-verify-peer": null, - "ssl-https-verify-host": null, - "ssl-https-ca-certificates-file": null, - "ssl-https-client-certificate-type": null, - "ssl-https-client-certificate-file": null, - "ssl-https-private-key-type": null, - "ssl-https-private-key-file": null, - "collect-metrics": "True", - "metrics-url": "http://localhost:8002/metrics", - "metrics-interval": 1000.0 - }, - "_options": { - "-m": "FaceDetectionBLS_config_1", - "-x": null, - "-b": 1, - "-u": "localhost:8001", - "-i": "grpc", - "-f": "FaceDetectionBLS_config_1-results.csv", - "-H": null - }, - "_verbose": { - "-v": null, - "-v -v": null, - "--verbose-csv": "--verbose-csv" - }, - "_input_to_options": { - "model-name": "-m", - "model-version": "-x", - "batch-size": "-b", - "url": "-u", - "protocol": "-i", - "latency-report-file": "-f", - "http-header": "-H" - }, - "_input_to_verbose": { - "verbose": "-v", - "extra-verbose": "-v -v", - "verbose-csv": "--verbose-csv" - }, - "_additive_args": { - "input-data": null, - "shape": null - } - }, - "_composing_configs": [ - { - "name": "FaceDetectionPreprocessing_config_1", - "platform": "FaceDetectionPreprocessing", - "input": [ - { - "name": "input", - "dataType": "TYPE_UINT8", - "dims": [ - "-1", - "3", - "-1", - "-1" - ] - } - ], - "output": [ - { - "name": "output", - "dataType": "TYPE_FP32", - "dims": [ - "-1", - "3", - "208", - "368" - ] - }, - { - "name": "scale_offset", - "dataType": "TYPE_FP32", - "dims": [ - "-1", - "4", - "1", - "1" - ] - }, - { - "name": "original_shape", - "dataType": "TYPE_UINT32", - "dims": [ - "1", - "2", - "1", - "1" - ] - } - ], - "instanceGroup": [ - { - "count": 2, - "kind": "KIND_GPU" - } - ], - "backend": "sdk_backend", - "cpu_only": false - }, - { - "name": "FaceDetectionModel_config_1", - "platform": "tensorrt_plan", - "maxBatchSize": 8, - "input": [ - { - "name": "input", - "dataType": "TYPE_FP32", - "dims": [ - "3", - "208", - "368" - ] - } - ], - "output": [ - { - "name": "output_bbox", - "dataType": "TYPE_FP32", - "dims": [ - "4", - "13", - "23" - ] - }, - { - "name": "output_cov/Sigmoid", - "dataType": "TYPE_FP32", - "dims": [ - "1", - "13", - "23" - ] - } - ], - "instanceGroup": [ - { - "count": 2, - "kind": "KIND_GPU" - } - ], - "dynamicBatching": {}, - "cpu_only": false - }, - { - "name": "FaceDetectionPostprocessing_config_0", - "platform": "FaceDetectionPostprocessing", - "input": [ - { - "name": "scale_offset", - "dataType": "TYPE_FP32", - "dims": [ - "-1", - "4", - "1", - "1" - ] - }, - { - "name": "input_bbox", - "dataType": "TYPE_FP32", - "dims": [ - "-1", - "4", - "13", - "23" - ] - }, - { - "name": "input_conf", - "dataType": "TYPE_FP32", - "dims": [ - "-1", - "1", - "13", - "23" - ] - }, - { - "name": "temporal", - "dataType": "TYPE_UINT32", - "dims": [ - "-1", - "1", - "1", - "1" - ] - }, - { - "name": "max_num_bbox", - "dataType": "TYPE_UINT8", - "dims": [ - "-1", - "1", - "1", - "1" - ] - }, - { - "name": "original_shape", - "dataType": "TYPE_UINT32", - "dims": [ - "1", - "2", - "1", - "1" - ] - } - ], - "output": [ - { - "name": "output_bbox", - "dataType": "TYPE_FP32", - "dims": [ - "-1", - "4", - "1", - "-1" - ] - }, - { - "name": "output_bbox_conf", - "dataType": "TYPE_FP32", - "dims": [ - "-1", - "4", - "1", - "-1" - ] - }, - { - "name": "output_num_bbox", - "dataType": "TYPE_UINT8", - "dims": [ - "-1", - "1", - "1", - "1" - ] + "_value": 1.426, + "_timestamp": 0 } ], - "instanceGroup": [ + "perf_server_compute_input": [ + "perf_server_compute_input", { - "count": 1, - "kind": "KIND_GPU" + "_value": 0.035, + "_timestamp": 0 } ], - "sequenceBatching": { - "maxSequenceIdleMicroseconds": "5000000", - "controlInput": [ - { - "name": "START", - "control": [ - { - "fp32FalseTrue": [ - 0.0, - 1.0 - ] - } - ] - }, - { - "name": "END", - "control": [ - { - "kind": "CONTROL_SEQUENCE_END", - "fp32FalseTrue": [ - 0.0, - 1.0 - ] - } - ] - }, - { - "name": "READY", - "control": [ - { - "kind": "CONTROL_SEQUENCE_READY", - "fp32FalseTrue": [ - 0.0, - 1.0 - ] - } - ] - }, - { - "name": "CORRID", - "control": [ - { - "kind": "CONTROL_SEQUENCE_CORRID", - "dataType": "TYPE_UINT64" - } - ] - } - ], - "oldest": { - "maxCandidateSequences": 10, - "maxQueueDelayMicroseconds": "50000" + "perf_server_compute_output": [ + "perf_server_compute_output", + { + "_value": 0.128, + "_timestamp": 0 } - }, - "backend": "sdk_backend", - "cpu_only": false + ] } - ] - } - ] - }, - { - "-m FaceDetectionBLS_config_1 -b 1 -i grpc -f FaceDetectionBLS_config_1-results.csv --verbose-csv --concurrency-range=16 --input-data=/swdev/profile_models/face_detect.json --shared-memory=cuda --output-shared-memory-size=24883200 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "FaceDetectionBLS_config_1", + } + ] + }, + "bls_config_6 -m bls -b 1 -i grpc -f bls-results.csv --verbose-csv --concurrency-range=8 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000 --bls-composing-models=add,sub add_config_4,sub_config_0": { + "_model_variants_name": "bls_config_6", "_gpu_data": { "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ [ "gpu_used_memory", { - "_value": 1312.817152, + "_value": 874.512384, "_timestamp": 0, - "_device_uuid": null + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } ], [ "gpu_free_memory", { - "_value": 24456.986624, + "_value": 24895.291392, "_timestamp": 0, - "_device_uuid": null + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } ], [ "gpu_utilization", { - "_value": 44.25, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -4635,7 +10855,7 @@ [ "gpu_power_usage", { - "_value": 111.683, + "_value": 23.6932, "_timestamp": 0, "_device_uuid": null } @@ -4646,7 +10866,7 @@ [ "gpu_used_memory", { - "_value": 1312.817152, + "_value": 874.512384, "_timestamp": 0, "_device_uuid": null } @@ -4654,7 +10874,7 @@ [ "gpu_free_memory", { - "_value": 24456.986624, + "_value": 24895.291392, "_timestamp": 0, "_device_uuid": null } @@ -4662,7 +10882,7 @@ [ "gpu_utilization", { - "_value": 44.25, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -4670,7 +10890,7 @@ [ "gpu_power_usage", { - "_value": 111.683, + "_value": 23.6932, "_timestamp": 0, "_device_uuid": null } @@ -4680,7 +10900,7 @@ "gpu_used_memory": [ "gpu_used_memory", { - "_value": 1312.817152, + "_value": 874.512384, "_timestamp": 0, "_device_uuid": null } @@ -4688,7 +10908,7 @@ "gpu_free_memory": [ "gpu_free_memory", { - "_value": 24456.986624, + "_value": 24895.291392, "_timestamp": 0, "_device_uuid": null } @@ -4696,7 +10916,7 @@ "gpu_utilization": [ "gpu_utilization", { - "_value": 44.25, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -4704,7 +10924,7 @@ "gpu_power_usage": [ "gpu_power_usage", { - "_value": 111.683, + "_value": 23.6932, "_timestamp": 0, "_device_uuid": null } @@ -4712,86 +10932,87 @@ }, "_model_config_measurements": [ { - "_model_config_name": "FaceDetectionBLS_config_1", + "_model_config_name": "bls", "_model_specific_pa_params": { "batch-size": 1, - "concurrency-range": 16 + "concurrency-range": 8, + "request-rate-range": null }, "_non_gpu_data": [ [ "perf_latency_avg", { - "_value": 13.573, + "_value": 0.876, "_timestamp": 0 } ], [ "perf_latency_p90", { - "_value": 14.655, + "_value": 0.987, "_timestamp": 0 } ], [ "perf_latency_p95", { - "_value": 14.977, + "_value": 1.047, "_timestamp": 0 } ], [ "perf_latency_p99", { - "_value": 15.489, + "_value": 1.392, "_timestamp": 0 } ], [ "perf_throughput", { - "_value": 1174.67, + "_value": 9115.99, "_timestamp": 0 } ], [ "perf_client_send_recv", { - "_value": 0.023, + "_value": 0.003, "_timestamp": 0 } ], [ "perf_client_response_wait", { - "_value": 13.54, + "_value": 0.863, "_timestamp": 0 } ], [ "perf_server_queue", { - "_value": 11.461, + "_value": 0.012, "_timestamp": 0 } ], [ "perf_server_compute_infer", { - "_value": 1.568, + "_value": 0.63, "_timestamp": 0 } ], [ "perf_server_compute_input", { - "_value": 0.001, + "_value": 0.013, "_timestamp": 0 } ], [ "perf_server_compute_output", { - "_value": 0.0, + "_value": 0.049, "_timestamp": 0 } ] @@ -4800,536 +11021,108 @@ "perf_latency_avg": [ "perf_latency_avg", { - "_value": 13.573, + "_value": 0.876, "_timestamp": 0 } ], "perf_latency_p90": [ "perf_latency_p90", { - "_value": 14.655, + "_value": 0.987, "_timestamp": 0 } ], "perf_latency_p95": [ "perf_latency_p95", { - "_value": 14.977, + "_value": 1.047, "_timestamp": 0 } ], "perf_latency_p99": [ "perf_latency_p99", { - "_value": 15.489, + "_value": 1.392, "_timestamp": 0 } ], "perf_throughput": [ "perf_throughput", { - "_value": 1174.67, + "_value": 9115.99, "_timestamp": 0 } ], "perf_client_send_recv": [ "perf_client_send_recv", { - "_value": 0.023, + "_value": 0.003, "_timestamp": 0 } ], "perf_client_response_wait": [ "perf_client_response_wait", { - "_value": 13.54, + "_value": 0.863, "_timestamp": 0 } ], "perf_server_queue": [ "perf_server_queue", { - "_value": 11.461, + "_value": 0.012, "_timestamp": 0 } ], "perf_server_compute_infer": [ "perf_server_compute_infer", { - "_value": 1.568, + "_value": 0.63, "_timestamp": 0 } ], "perf_server_compute_input": [ "perf_server_compute_input", { - "_value": 0.001, - "_timestamp": 0 - } - ], - "perf_server_compute_output": [ - "perf_server_compute_output", - { - "_value": 0.0, - "_timestamp": 0 - } - ] - } - } - ] - } - } - ], - "FaceDetectionBLS_config_2": [ - { - "_triton_env": {}, - "_model_run_configs": [ - { - "_model_name": "FaceDetectionBLS", - "_model_config": { - "name": "FaceDetectionBLS_config_2", - "input": [ - { - "name": "input", - "dataType": "TYPE_UINT8", - "dims": [ - "-1", - "3", - "-1", - "-1" - ] - }, - { - "name": "temporal", - "dataType": "TYPE_UINT32", - "dims": [ - "-1", - "1", - "1", - "1" - ] - }, - { - "name": "max_num_bbox", - "dataType": "TYPE_UINT8", - "dims": [ - "-1", - "1", - "1", - "1" - ] - } - ], - "output": [ - { - "name": "output_bbox", - "dataType": "TYPE_FP32", - "dims": [ - "-1", - "4", - "-1", - "1" - ] - }, - { - "name": "output_bbox_conf", - "dataType": "TYPE_FP32", - "dims": [ - "-1", - "4", - "-1", - "1" - ] - }, - { - "name": "output_num_bbox", - "dataType": "TYPE_UINT8", - "dims": [ - "-1", - "1", - "1", - "1" - ] - } - ], - "instanceGroup": [ - { - "count": 3, - "kind": "KIND_GPU" - } - ], - "backend": "bls", - "cpu_only": false - }, - "_perf_config": { - "_args": { - "service-kind": null, - "model-signature-name": null, - "async": null, - "sync": null, - "measurement-interval": null, - "concurrency-range": 16, - "request-rate-range": null, - "request-distribution": null, - "request-intervals": null, - "binary-search": null, - "num-of-sequences": null, - "latency-threshold": null, - "max-threads": null, - "stability-percentage": null, - "max-trials": null, - "percentile": null, - "input-data": "/swdev/profile_models/face_detect.json", - "shared-memory": "cuda", - "output-shared-memory-size": "24883200", - "shape": null, - "sequence-length": null, - "sequence-id-range": null, - "string-length": null, - "string-data": null, - "measurement-mode": "count_windows", - "measurement-request-count": null, - "streaming": null, - "grpc-compression-algorithm": null, - "triton-server-directory": null, - "model-repository": null, - "ssl-grpc-use-ssl": null, - "ssl-grpc-root-certifications-file": null, - "ssl-grpc-private-key-file": null, - "ssl-grpc-certificate-chain-file": null, - "ssl-https-verify-peer": null, - "ssl-https-verify-host": null, - "ssl-https-ca-certificates-file": null, - "ssl-https-client-certificate-type": null, - "ssl-https-client-certificate-file": null, - "ssl-https-private-key-type": null, - "ssl-https-private-key-file": null, - "collect-metrics": "True", - "metrics-url": "http://localhost:8002/metrics", - "metrics-interval": 1000.0 - }, - "_options": { - "-m": "FaceDetectionBLS_config_2", - "-x": null, - "-b": 1, - "-u": "localhost:8001", - "-i": "grpc", - "-f": "FaceDetectionBLS_config_2-results.csv", - "-H": null - }, - "_verbose": { - "-v": null, - "-v -v": null, - "--verbose-csv": "--verbose-csv" - }, - "_input_to_options": { - "model-name": "-m", - "model-version": "-x", - "batch-size": "-b", - "url": "-u", - "protocol": "-i", - "latency-report-file": "-f", - "http-header": "-H" - }, - "_input_to_verbose": { - "verbose": "-v", - "extra-verbose": "-v -v", - "verbose-csv": "--verbose-csv" - }, - "_additive_args": { - "input-data": null, - "shape": null - } - }, - "_composing_configs": [ - { - "name": "FaceDetectionPreprocessing_config_2", - "platform": "FaceDetectionPreprocessing", - "input": [ - { - "name": "input", - "dataType": "TYPE_UINT8", - "dims": [ - "-1", - "3", - "-1", - "-1" - ] - } - ], - "output": [ - { - "name": "output", - "dataType": "TYPE_FP32", - "dims": [ - "-1", - "3", - "208", - "368" - ] - }, - { - "name": "scale_offset", - "dataType": "TYPE_FP32", - "dims": [ - "-1", - "4", - "1", - "1" - ] - }, - { - "name": "original_shape", - "dataType": "TYPE_UINT32", - "dims": [ - "1", - "2", - "1", - "1" - ] - } - ], - "instanceGroup": [ - { - "count": 3, - "kind": "KIND_GPU" - } - ], - "backend": "sdk_backend", - "cpu_only": false - }, - { - "name": "FaceDetectionModel_config_1", - "platform": "tensorrt_plan", - "maxBatchSize": 8, - "input": [ - { - "name": "input", - "dataType": "TYPE_FP32", - "dims": [ - "3", - "208", - "368" - ] - } - ], - "output": [ - { - "name": "output_bbox", - "dataType": "TYPE_FP32", - "dims": [ - "4", - "13", - "23" - ] - }, - { - "name": "output_cov/Sigmoid", - "dataType": "TYPE_FP32", - "dims": [ - "1", - "13", - "23" - ] - } - ], - "instanceGroup": [ - { - "count": 2, - "kind": "KIND_GPU" - } - ], - "dynamicBatching": {}, - "cpu_only": false - }, - { - "name": "FaceDetectionPostprocessing_config_0", - "platform": "FaceDetectionPostprocessing", - "input": [ - { - "name": "scale_offset", - "dataType": "TYPE_FP32", - "dims": [ - "-1", - "4", - "1", - "1" - ] - }, - { - "name": "input_bbox", - "dataType": "TYPE_FP32", - "dims": [ - "-1", - "4", - "13", - "23" - ] - }, - { - "name": "input_conf", - "dataType": "TYPE_FP32", - "dims": [ - "-1", - "1", - "13", - "23" - ] - }, - { - "name": "temporal", - "dataType": "TYPE_UINT32", - "dims": [ - "-1", - "1", - "1", - "1" - ] - }, - { - "name": "max_num_bbox", - "dataType": "TYPE_UINT8", - "dims": [ - "-1", - "1", - "1", - "1" - ] - }, - { - "name": "original_shape", - "dataType": "TYPE_UINT32", - "dims": [ - "1", - "2", - "1", - "1" - ] - } - ], - "output": [ - { - "name": "output_bbox", - "dataType": "TYPE_FP32", - "dims": [ - "-1", - "4", - "1", - "-1" - ] - }, - { - "name": "output_bbox_conf", - "dataType": "TYPE_FP32", - "dims": [ - "-1", - "4", - "1", - "-1" - ] - }, - { - "name": "output_num_bbox", - "dataType": "TYPE_UINT8", - "dims": [ - "-1", - "1", - "1", - "1" - ] - } - ], - "instanceGroup": [ - { - "count": 1, - "kind": "KIND_GPU" + "_value": 0.013, + "_timestamp": 0 } ], - "sequenceBatching": { - "maxSequenceIdleMicroseconds": "5000000", - "controlInput": [ - { - "name": "START", - "control": [ - { - "fp32FalseTrue": [ - 0.0, - 1.0 - ] - } - ] - }, - { - "name": "END", - "control": [ - { - "kind": "CONTROL_SEQUENCE_END", - "fp32FalseTrue": [ - 0.0, - 1.0 - ] - } - ] - }, - { - "name": "READY", - "control": [ - { - "kind": "CONTROL_SEQUENCE_READY", - "fp32FalseTrue": [ - 0.0, - 1.0 - ] - } - ] - }, - { - "name": "CORRID", - "control": [ - { - "kind": "CONTROL_SEQUENCE_CORRID", - "dataType": "TYPE_UINT64" - } - ] - } - ], - "oldest": { - "maxCandidateSequences": 10, - "maxQueueDelayMicroseconds": "50000" + "perf_server_compute_output": [ + "perf_server_compute_output", + { + "_value": 0.049, + "_timestamp": 0 } - }, - "backend": "sdk_backend", - "cpu_only": false + ] } - ] - } - ] - }, - { - "-m FaceDetectionBLS_config_2 -b 1 -i grpc -f FaceDetectionBLS_config_2-results.csv --verbose-csv --concurrency-range=16 --input-data=/swdev/profile_models/face_detect.json --shared-memory=cuda --output-shared-memory-size=24883200 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "FaceDetectionBLS_config_2", + } + ] + }, + "bls_config_6 -m bls -b 1 -i grpc -f bls-results.csv --verbose-csv --concurrency-range=32 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000 --bls-composing-models=add,sub add_config_4,sub_config_0": { + "_model_variants_name": "bls_config_6", "_gpu_data": { "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ [ "gpu_used_memory", { - "_value": 1312.817152, + "_value": 874.512384, "_timestamp": 0, - "_device_uuid": null + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } ], [ "gpu_free_memory", { - "_value": 24456.986624, + "_value": 24895.291392, "_timestamp": 0, - "_device_uuid": null + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } ], [ "gpu_utilization", { - "_value": 32.333299999999994, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -5337,7 +11130,7 @@ [ "gpu_power_usage", { - "_value": 95.7177, + "_value": 23.6878, "_timestamp": 0, "_device_uuid": null } @@ -5348,7 +11141,7 @@ [ "gpu_used_memory", { - "_value": 1312.817152, + "_value": 874.512384, "_timestamp": 0, "_device_uuid": null } @@ -5356,7 +11149,7 @@ [ "gpu_free_memory", { - "_value": 24456.986624, + "_value": 24895.291392, "_timestamp": 0, "_device_uuid": null } @@ -5364,7 +11157,7 @@ [ "gpu_utilization", { - "_value": 32.333299999999994, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -5372,7 +11165,7 @@ [ "gpu_power_usage", { - "_value": 95.7177, + "_value": 23.6878, "_timestamp": 0, "_device_uuid": null } @@ -5382,7 +11175,7 @@ "gpu_used_memory": [ "gpu_used_memory", { - "_value": 1312.817152, + "_value": 874.512384, "_timestamp": 0, "_device_uuid": null } @@ -5390,7 +11183,7 @@ "gpu_free_memory": [ "gpu_free_memory", { - "_value": 24456.986624, + "_value": 24895.291392, "_timestamp": 0, "_device_uuid": null } @@ -5398,7 +11191,7 @@ "gpu_utilization": [ "gpu_utilization", { - "_value": 32.333299999999994, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -5406,7 +11199,7 @@ "gpu_power_usage": [ "gpu_power_usage", { - "_value": 95.7177, + "_value": 23.6878, "_timestamp": 0, "_device_uuid": null } @@ -5414,86 +11207,87 @@ }, "_model_config_measurements": [ { - "_model_config_name": "FaceDetectionBLS_config_2", + "_model_config_name": "bls", "_model_specific_pa_params": { "batch-size": 1, - "concurrency-range": 16 + "concurrency-range": 32, + "request-rate-range": null }, "_non_gpu_data": [ [ "perf_latency_avg", { - "_value": 10.117, + "_value": 3.223, "_timestamp": 0 } ], [ "perf_latency_p90", { - "_value": 11.007, + "_value": 3.423, "_timestamp": 0 } ], [ "perf_latency_p95", { - "_value": 11.288, + "_value": 3.595, "_timestamp": 0 } ], [ "perf_latency_p99", { - "_value": 11.904, + "_value": 4.679, "_timestamp": 0 } ], [ "perf_throughput", { - "_value": 1571.56, + "_value": 9915.61, "_timestamp": 0 } ], [ "perf_client_send_recv", { - "_value": 0.022, + "_value": 0.003, "_timestamp": 0 } ], [ "perf_client_response_wait", { - "_value": 10.086, + "_value": 3.209, "_timestamp": 0 } ], [ "perf_server_queue", { - "_value": 7.825, + "_value": 2.249, "_timestamp": 0 } ], [ "perf_server_compute_infer", { - "_value": 1.78, + "_value": 0.711, "_timestamp": 0 } ], [ "perf_server_compute_input", { - "_value": 0.001, + "_value": 0.014, "_timestamp": 0 } ], [ "perf_server_compute_output", { - "_value": 0.0, + "_value": 0.051, "_timestamp": 0 } ] @@ -5502,536 +11296,108 @@ "perf_latency_avg": [ "perf_latency_avg", { - "_value": 10.117, + "_value": 3.223, "_timestamp": 0 } ], "perf_latency_p90": [ "perf_latency_p90", { - "_value": 11.007, + "_value": 3.423, "_timestamp": 0 } ], "perf_latency_p95": [ "perf_latency_p95", { - "_value": 11.288, + "_value": 3.595, "_timestamp": 0 } ], "perf_latency_p99": [ "perf_latency_p99", { - "_value": 11.904, + "_value": 4.679, "_timestamp": 0 } ], "perf_throughput": [ "perf_throughput", { - "_value": 1571.56, + "_value": 9915.61, "_timestamp": 0 } ], "perf_client_send_recv": [ "perf_client_send_recv", { - "_value": 0.022, + "_value": 0.003, "_timestamp": 0 } ], "perf_client_response_wait": [ "perf_client_response_wait", { - "_value": 10.086, + "_value": 3.209, "_timestamp": 0 } ], "perf_server_queue": [ "perf_server_queue", { - "_value": 7.825, + "_value": 2.249, "_timestamp": 0 } ], "perf_server_compute_infer": [ "perf_server_compute_infer", { - "_value": 1.78, + "_value": 0.711, "_timestamp": 0 } ], "perf_server_compute_input": [ "perf_server_compute_input", { - "_value": 0.001, + "_value": 0.014, "_timestamp": 0 } ], "perf_server_compute_output": [ "perf_server_compute_output", { - "_value": 0.0, + "_value": 0.051, "_timestamp": 0 } - ] - } - } - ] - } - } - ], - "FaceDetectionBLS_config_3": [ - { - "_triton_env": {}, - "_model_run_configs": [ - { - "_model_name": "FaceDetectionBLS", - "_model_config": { - "name": "FaceDetectionBLS_config_3", - "input": [ - { - "name": "input", - "dataType": "TYPE_UINT8", - "dims": [ - "-1", - "3", - "-1", - "-1" - ] - }, - { - "name": "temporal", - "dataType": "TYPE_UINT32", - "dims": [ - "-1", - "1", - "1", - "1" - ] - }, - { - "name": "max_num_bbox", - "dataType": "TYPE_UINT8", - "dims": [ - "-1", - "1", - "1", - "1" - ] - } - ], - "output": [ - { - "name": "output_bbox", - "dataType": "TYPE_FP32", - "dims": [ - "-1", - "4", - "-1", - "1" - ] - }, - { - "name": "output_bbox_conf", - "dataType": "TYPE_FP32", - "dims": [ - "-1", - "4", - "-1", - "1" - ] - }, - { - "name": "output_num_bbox", - "dataType": "TYPE_UINT8", - "dims": [ - "-1", - "1", - "1", - "1" - ] - } - ], - "instanceGroup": [ - { - "count": 5, - "kind": "KIND_GPU" - } - ], - "backend": "bls", - "cpu_only": false - }, - "_perf_config": { - "_args": { - "service-kind": null, - "model-signature-name": null, - "async": null, - "sync": null, - "measurement-interval": null, - "concurrency-range": 16, - "request-rate-range": null, - "request-distribution": null, - "request-intervals": null, - "binary-search": null, - "num-of-sequences": null, - "latency-threshold": null, - "max-threads": null, - "stability-percentage": null, - "max-trials": null, - "percentile": null, - "input-data": "/swdev/profile_models/face_detect.json", - "shared-memory": "cuda", - "output-shared-memory-size": "24883200", - "shape": null, - "sequence-length": null, - "sequence-id-range": null, - "string-length": null, - "string-data": null, - "measurement-mode": "count_windows", - "measurement-request-count": null, - "streaming": null, - "grpc-compression-algorithm": null, - "triton-server-directory": null, - "model-repository": null, - "ssl-grpc-use-ssl": null, - "ssl-grpc-root-certifications-file": null, - "ssl-grpc-private-key-file": null, - "ssl-grpc-certificate-chain-file": null, - "ssl-https-verify-peer": null, - "ssl-https-verify-host": null, - "ssl-https-ca-certificates-file": null, - "ssl-https-client-certificate-type": null, - "ssl-https-client-certificate-file": null, - "ssl-https-private-key-type": null, - "ssl-https-private-key-file": null, - "collect-metrics": "True", - "metrics-url": "http://localhost:8002/metrics", - "metrics-interval": 1000.0 - }, - "_options": { - "-m": "FaceDetectionBLS_config_3", - "-x": null, - "-b": 1, - "-u": "localhost:8001", - "-i": "grpc", - "-f": "FaceDetectionBLS_config_3-results.csv", - "-H": null - }, - "_verbose": { - "-v": null, - "-v -v": null, - "--verbose-csv": "--verbose-csv" - }, - "_input_to_options": { - "model-name": "-m", - "model-version": "-x", - "batch-size": "-b", - "url": "-u", - "protocol": "-i", - "latency-report-file": "-f", - "http-header": "-H" - }, - "_input_to_verbose": { - "verbose": "-v", - "extra-verbose": "-v -v", - "verbose-csv": "--verbose-csv" - }, - "_additive_args": { - "input-data": null, - "shape": null - } - }, - "_composing_configs": [ - { - "name": "FaceDetectionPreprocessing_config_3", - "platform": "FaceDetectionPreprocessing", - "input": [ - { - "name": "input", - "dataType": "TYPE_UINT8", - "dims": [ - "-1", - "3", - "-1", - "-1" - ] - } - ], - "output": [ - { - "name": "output", - "dataType": "TYPE_FP32", - "dims": [ - "-1", - "3", - "208", - "368" - ] - }, - { - "name": "scale_offset", - "dataType": "TYPE_FP32", - "dims": [ - "-1", - "4", - "1", - "1" - ] - }, - { - "name": "original_shape", - "dataType": "TYPE_UINT32", - "dims": [ - "1", - "2", - "1", - "1" - ] - } - ], - "instanceGroup": [ - { - "count": 5, - "kind": "KIND_GPU" - } - ], - "backend": "sdk_backend", - "cpu_only": false - }, - { - "name": "FaceDetectionModel_config_3", - "platform": "tensorrt_plan", - "maxBatchSize": 8, - "input": [ - { - "name": "input", - "dataType": "TYPE_FP32", - "dims": [ - "3", - "208", - "368" - ] - } - ], - "output": [ - { - "name": "output_bbox", - "dataType": "TYPE_FP32", - "dims": [ - "4", - "13", - "23" - ] - }, - { - "name": "output_cov/Sigmoid", - "dataType": "TYPE_FP32", - "dims": [ - "1", - "13", - "23" - ] - } - ], - "instanceGroup": [ - { - "count": 4, - "kind": "KIND_GPU" - } - ], - "dynamicBatching": {}, - "cpu_only": false - }, - { - "name": "FaceDetectionPostprocessing_config_0", - "platform": "FaceDetectionPostprocessing", - "input": [ - { - "name": "scale_offset", - "dataType": "TYPE_FP32", - "dims": [ - "-1", - "4", - "1", - "1" - ] - }, - { - "name": "input_bbox", - "dataType": "TYPE_FP32", - "dims": [ - "-1", - "4", - "13", - "23" - ] - }, - { - "name": "input_conf", - "dataType": "TYPE_FP32", - "dims": [ - "-1", - "1", - "13", - "23" - ] - }, - { - "name": "temporal", - "dataType": "TYPE_UINT32", - "dims": [ - "-1", - "1", - "1", - "1" - ] - }, - { - "name": "max_num_bbox", - "dataType": "TYPE_UINT8", - "dims": [ - "-1", - "1", - "1", - "1" - ] - }, - { - "name": "original_shape", - "dataType": "TYPE_UINT32", - "dims": [ - "1", - "2", - "1", - "1" - ] - } - ], - "output": [ - { - "name": "output_bbox", - "dataType": "TYPE_FP32", - "dims": [ - "-1", - "4", - "1", - "-1" - ] - }, - { - "name": "output_bbox_conf", - "dataType": "TYPE_FP32", - "dims": [ - "-1", - "4", - "1", - "-1" - ] - }, - { - "name": "output_num_bbox", - "dataType": "TYPE_UINT8", - "dims": [ - "-1", - "1", - "1", - "1" - ] - } - ], - "instanceGroup": [ - { - "count": 1, - "kind": "KIND_GPU" - } - ], - "sequenceBatching": { - "maxSequenceIdleMicroseconds": "5000000", - "controlInput": [ - { - "name": "START", - "control": [ - { - "fp32FalseTrue": [ - 0.0, - 1.0 - ] - } - ] - }, - { - "name": "END", - "control": [ - { - "kind": "CONTROL_SEQUENCE_END", - "fp32FalseTrue": [ - 0.0, - 1.0 - ] - } - ] - }, - { - "name": "READY", - "control": [ - { - "kind": "CONTROL_SEQUENCE_READY", - "fp32FalseTrue": [ - 0.0, - 1.0 - ] - } - ] - }, - { - "name": "CORRID", - "control": [ - { - "kind": "CONTROL_SEQUENCE_CORRID", - "dataType": "TYPE_UINT64" - } - ] - } - ], - "oldest": { - "maxCandidateSequences": 10, - "maxQueueDelayMicroseconds": "50000" - } - }, - "backend": "sdk_backend", - "cpu_only": false + ] } - ] - } - ] - }, - { - "-m FaceDetectionBLS_config_3 -b 1 -i grpc -f FaceDetectionBLS_config_3-results.csv --verbose-csv --concurrency-range=16 --input-data=/swdev/profile_models/face_detect.json --shared-memory=cuda --output-shared-memory-size=24883200 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "FaceDetectionBLS_config_3", + } + ] + }, + "bls_config_6 -m bls -b 1 -i grpc -f bls-results.csv --verbose-csv --concurrency-range=64 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000 --bls-composing-models=add,sub add_config_4,sub_config_0": { + "_model_variants_name": "bls_config_6", "_gpu_data": { "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ [ "gpu_used_memory", { - "_value": 1314.914304, + "_value": 874.512384, "_timestamp": 0, - "_device_uuid": null + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } ], [ "gpu_free_memory", { - "_value": 24454.889472000003, + "_value": 24895.291392, "_timestamp": 0, - "_device_uuid": null + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } ], [ "gpu_utilization", { - "_value": 17.333299999999998, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -6039,7 +11405,7 @@ [ "gpu_power_usage", { - "_value": 103.01, + "_value": 23.6623, "_timestamp": 0, "_device_uuid": null } @@ -6050,7 +11416,7 @@ [ "gpu_used_memory", { - "_value": 1314.914304, + "_value": 874.512384, "_timestamp": 0, "_device_uuid": null } @@ -6058,7 +11424,7 @@ [ "gpu_free_memory", { - "_value": 24454.889472000003, + "_value": 24895.291392, "_timestamp": 0, "_device_uuid": null } @@ -6066,7 +11432,7 @@ [ "gpu_utilization", { - "_value": 17.333299999999998, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -6074,7 +11440,7 @@ [ "gpu_power_usage", { - "_value": 103.01, + "_value": 23.6623, "_timestamp": 0, "_device_uuid": null } @@ -6084,7 +11450,7 @@ "gpu_used_memory": [ "gpu_used_memory", { - "_value": 1314.914304, + "_value": 874.512384, "_timestamp": 0, "_device_uuid": null } @@ -6092,7 +11458,7 @@ "gpu_free_memory": [ "gpu_free_memory", { - "_value": 24454.889472000003, + "_value": 24895.291392, "_timestamp": 0, "_device_uuid": null } @@ -6100,7 +11466,7 @@ "gpu_utilization": [ "gpu_utilization", { - "_value": 17.333299999999998, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -6108,7 +11474,7 @@ "gpu_power_usage": [ "gpu_power_usage", { - "_value": 103.01, + "_value": 23.6623, "_timestamp": 0, "_device_uuid": null } @@ -6116,86 +11482,87 @@ }, "_model_config_measurements": [ { - "_model_config_name": "FaceDetectionBLS_config_3", + "_model_config_name": "bls", "_model_specific_pa_params": { "batch-size": 1, - "concurrency-range": 16 + "concurrency-range": 64, + "request-rate-range": null }, "_non_gpu_data": [ [ "perf_latency_avg", { - "_value": 7.167, + "_value": 6.5, "_timestamp": 0 } ], [ "perf_latency_p90", { - "_value": 8.186, + "_value": 6.76, "_timestamp": 0 } ], [ "perf_latency_p95", { - "_value": 8.686, + "_value": 7.109, "_timestamp": 0 } ], [ "perf_latency_p99", { - "_value": 9.893, + "_value": 10.155, "_timestamp": 0 } ], [ "perf_throughput", { - "_value": 2216.53, + "_value": 9822.01, "_timestamp": 0 } ], [ "perf_client_send_recv", { - "_value": 0.02, + "_value": 0.003, "_timestamp": 0 } ], [ "perf_client_response_wait", { - "_value": 7.139, + "_value": 6.486, "_timestamp": 0 } ], [ "perf_server_queue", { - "_value": 4.561, + "_value": 5.511, "_timestamp": 0 } ], [ "perf_server_compute_infer", { - "_value": 2.139, + "_value": 0.717, "_timestamp": 0 } ], [ "perf_server_compute_input", { - "_value": 0.001, + "_value": 0.014, "_timestamp": 0 } ], [ "perf_server_compute_output", { - "_value": 0.0, + "_value": 0.052, "_timestamp": 0 } ] @@ -6204,536 +11571,108 @@ "perf_latency_avg": [ "perf_latency_avg", { - "_value": 7.167, + "_value": 6.5, "_timestamp": 0 } ], "perf_latency_p90": [ "perf_latency_p90", { - "_value": 8.186, + "_value": 6.76, "_timestamp": 0 } ], "perf_latency_p95": [ "perf_latency_p95", { - "_value": 8.686, + "_value": 7.109, "_timestamp": 0 } ], "perf_latency_p99": [ "perf_latency_p99", { - "_value": 9.893, + "_value": 10.155, "_timestamp": 0 } ], "perf_throughput": [ "perf_throughput", { - "_value": 2216.53, + "_value": 9822.01, "_timestamp": 0 } ], "perf_client_send_recv": [ "perf_client_send_recv", { - "_value": 0.02, + "_value": 0.003, "_timestamp": 0 } ], "perf_client_response_wait": [ "perf_client_response_wait", { - "_value": 7.139, + "_value": 6.486, "_timestamp": 0 } ], "perf_server_queue": [ "perf_server_queue", { - "_value": 4.561, - "_timestamp": 0 - } - ], - "perf_server_compute_infer": [ - "perf_server_compute_infer", - { - "_value": 2.139, - "_timestamp": 0 - } - ], - "perf_server_compute_input": [ - "perf_server_compute_input", - { - "_value": 0.001, - "_timestamp": 0 - } - ], - "perf_server_compute_output": [ - "perf_server_compute_output", - { - "_value": 0.0, + "_value": 5.511, "_timestamp": 0 } - ] - } - } - ] - } - } - ], - "FaceDetectionBLS_config_4": [ - { - "_triton_env": {}, - "_model_run_configs": [ - { - "_model_name": "FaceDetectionBLS", - "_model_config": { - "name": "FaceDetectionBLS_config_4", - "input": [ - { - "name": "input", - "dataType": "TYPE_UINT8", - "dims": [ - "-1", - "3", - "-1", - "-1" - ] - }, - { - "name": "temporal", - "dataType": "TYPE_UINT32", - "dims": [ - "-1", - "1", - "1", - "1" - ] - }, - { - "name": "max_num_bbox", - "dataType": "TYPE_UINT8", - "dims": [ - "-1", - "1", - "1", - "1" - ] - } - ], - "output": [ - { - "name": "output_bbox", - "dataType": "TYPE_FP32", - "dims": [ - "-1", - "4", - "-1", - "1" - ] - }, - { - "name": "output_bbox_conf", - "dataType": "TYPE_FP32", - "dims": [ - "-1", - "4", - "-1", - "1" - ] - }, - { - "name": "output_num_bbox", - "dataType": "TYPE_UINT8", - "dims": [ - "-1", - "1", - "1", - "1" - ] - } - ], - "instanceGroup": [ - { - "count": 4, - "kind": "KIND_GPU" - } - ], - "backend": "bls", - "cpu_only": false - }, - "_perf_config": { - "_args": { - "service-kind": null, - "model-signature-name": null, - "async": null, - "sync": null, - "measurement-interval": null, - "concurrency-range": 16, - "request-rate-range": null, - "request-distribution": null, - "request-intervals": null, - "binary-search": null, - "num-of-sequences": null, - "latency-threshold": null, - "max-threads": null, - "stability-percentage": null, - "max-trials": null, - "percentile": null, - "input-data": "/swdev/profile_models/face_detect.json", - "shared-memory": "cuda", - "output-shared-memory-size": "24883200", - "shape": null, - "sequence-length": null, - "sequence-id-range": null, - "string-length": null, - "string-data": null, - "measurement-mode": "count_windows", - "measurement-request-count": null, - "streaming": null, - "grpc-compression-algorithm": null, - "triton-server-directory": null, - "model-repository": null, - "ssl-grpc-use-ssl": null, - "ssl-grpc-root-certifications-file": null, - "ssl-grpc-private-key-file": null, - "ssl-grpc-certificate-chain-file": null, - "ssl-https-verify-peer": null, - "ssl-https-verify-host": null, - "ssl-https-ca-certificates-file": null, - "ssl-https-client-certificate-type": null, - "ssl-https-client-certificate-file": null, - "ssl-https-private-key-type": null, - "ssl-https-private-key-file": null, - "collect-metrics": "True", - "metrics-url": "http://localhost:8002/metrics", - "metrics-interval": 1000.0 - }, - "_options": { - "-m": "FaceDetectionBLS_config_4", - "-x": null, - "-b": 1, - "-u": "localhost:8001", - "-i": "grpc", - "-f": "FaceDetectionBLS_config_4-results.csv", - "-H": null - }, - "_verbose": { - "-v": null, - "-v -v": null, - "--verbose-csv": "--verbose-csv" - }, - "_input_to_options": { - "model-name": "-m", - "model-version": "-x", - "batch-size": "-b", - "url": "-u", - "protocol": "-i", - "latency-report-file": "-f", - "http-header": "-H" - }, - "_input_to_verbose": { - "verbose": "-v", - "extra-verbose": "-v -v", - "verbose-csv": "--verbose-csv" - }, - "_additive_args": { - "input-data": null, - "shape": null - } - }, - "_composing_configs": [ - { - "name": "FaceDetectionPreprocessing_config_5", - "platform": "FaceDetectionPreprocessing", - "input": [ - { - "name": "input", - "dataType": "TYPE_UINT8", - "dims": [ - "-1", - "3", - "-1", - "-1" - ] - } - ], - "output": [ - { - "name": "output", - "dataType": "TYPE_FP32", - "dims": [ - "-1", - "3", - "208", - "368" - ] - }, - { - "name": "scale_offset", - "dataType": "TYPE_FP32", - "dims": [ - "-1", - "4", - "1", - "1" - ] - }, - { - "name": "original_shape", - "dataType": "TYPE_UINT32", - "dims": [ - "1", - "2", - "1", - "1" - ] - } - ], - "instanceGroup": [ - { - "count": 6, - "kind": "KIND_GPU" - } - ], - "backend": "sdk_backend", - "cpu_only": false - }, - { - "name": "FaceDetectionModel_config_4", - "platform": "tensorrt_plan", - "maxBatchSize": 8, - "input": [ - { - "name": "input", - "dataType": "TYPE_FP32", - "dims": [ - "3", - "208", - "368" - ] - } - ], - "output": [ - { - "name": "output_bbox", - "dataType": "TYPE_FP32", - "dims": [ - "4", - "13", - "23" - ] - }, - { - "name": "output_cov/Sigmoid", - "dataType": "TYPE_FP32", - "dims": [ - "1", - "13", - "23" - ] - } - ], - "instanceGroup": [ - { - "count": 5, - "kind": "KIND_GPU" - } - ], - "dynamicBatching": {}, - "cpu_only": false - }, - { - "name": "FaceDetectionPostprocessing_config_2", - "platform": "FaceDetectionPostprocessing", - "input": [ - { - "name": "scale_offset", - "dataType": "TYPE_FP32", - "dims": [ - "-1", - "4", - "1", - "1" - ] - }, - { - "name": "input_bbox", - "dataType": "TYPE_FP32", - "dims": [ - "-1", - "4", - "13", - "23" - ] - }, - { - "name": "input_conf", - "dataType": "TYPE_FP32", - "dims": [ - "-1", - "1", - "13", - "23" - ] - }, - { - "name": "temporal", - "dataType": "TYPE_UINT32", - "dims": [ - "-1", - "1", - "1", - "1" - ] - }, - { - "name": "max_num_bbox", - "dataType": "TYPE_UINT8", - "dims": [ - "-1", - "1", - "1", - "1" - ] - }, - { - "name": "original_shape", - "dataType": "TYPE_UINT32", - "dims": [ - "1", - "2", - "1", - "1" - ] - } - ], - "output": [ - { - "name": "output_bbox", - "dataType": "TYPE_FP32", - "dims": [ - "-1", - "4", - "1", - "-1" - ] - }, - { - "name": "output_bbox_conf", - "dataType": "TYPE_FP32", - "dims": [ - "-1", - "4", - "1", - "-1" - ] - }, - { - "name": "output_num_bbox", - "dataType": "TYPE_UINT8", - "dims": [ - "-1", - "1", - "1", - "1" - ] - } - ], - "instanceGroup": [ - { - "count": 3, - "kind": "KIND_GPU" - } ], - "sequenceBatching": { - "maxSequenceIdleMicroseconds": "5000000", - "controlInput": [ - { - "name": "START", - "control": [ - { - "fp32FalseTrue": [ - 0.0, - 1.0 - ] - } - ] - }, - { - "name": "END", - "control": [ - { - "kind": "CONTROL_SEQUENCE_END", - "fp32FalseTrue": [ - 0.0, - 1.0 - ] - } - ] - }, - { - "name": "READY", - "control": [ - { - "kind": "CONTROL_SEQUENCE_READY", - "fp32FalseTrue": [ - 0.0, - 1.0 - ] - } - ] - }, - { - "name": "CORRID", - "control": [ - { - "kind": "CONTROL_SEQUENCE_CORRID", - "dataType": "TYPE_UINT64" - } - ] - } - ], - "oldest": { - "maxCandidateSequences": 10, - "maxQueueDelayMicroseconds": "50000" + "perf_server_compute_infer": [ + "perf_server_compute_infer", + { + "_value": 0.717, + "_timestamp": 0 } - }, - "backend": "sdk_backend", - "cpu_only": false + ], + "perf_server_compute_input": [ + "perf_server_compute_input", + { + "_value": 0.014, + "_timestamp": 0 + } + ], + "perf_server_compute_output": [ + "perf_server_compute_output", + { + "_value": 0.052, + "_timestamp": 0 + } + ] } - ] - } - ] - }, - { - "-m FaceDetectionBLS_config_4 -b 1 -i grpc -f FaceDetectionBLS_config_4-results.csv --verbose-csv --concurrency-range=16 --input-data=/swdev/profile_models/face_detect.json --shared-memory=cuda --output-shared-memory-size=24883200 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "FaceDetectionBLS_config_4", + } + ] + }, + "bls_config_6 -m bls -b 1 -i grpc -f bls-results.csv --verbose-csv --concurrency-range=128 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000 --bls-composing-models=add,sub add_config_4,sub_config_0": { + "_model_variants_name": "bls_config_6", "_gpu_data": { "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ [ "gpu_used_memory", { - "_value": 1314.914304, + "_value": 874.512384, "_timestamp": 0, - "_device_uuid": null + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } ], [ "gpu_free_memory", { - "_value": 24454.889472000003, + "_value": 24895.291392, "_timestamp": 0, - "_device_uuid": null + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } ], [ "gpu_utilization", { - "_value": 20.0, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -6741,7 +11680,7 @@ [ "gpu_power_usage", { - "_value": 100.959, + "_value": 23.639, "_timestamp": 0, "_device_uuid": null } @@ -6752,7 +11691,7 @@ [ "gpu_used_memory", { - "_value": 1314.914304, + "_value": 874.512384, "_timestamp": 0, "_device_uuid": null } @@ -6760,7 +11699,7 @@ [ "gpu_free_memory", { - "_value": 24454.889472000003, + "_value": 24895.291392, "_timestamp": 0, "_device_uuid": null } @@ -6768,7 +11707,7 @@ [ "gpu_utilization", { - "_value": 20.0, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -6776,7 +11715,7 @@ [ "gpu_power_usage", { - "_value": 100.959, + "_value": 23.639, "_timestamp": 0, "_device_uuid": null } @@ -6786,7 +11725,7 @@ "gpu_used_memory": [ "gpu_used_memory", { - "_value": 1314.914304, + "_value": 874.512384, "_timestamp": 0, "_device_uuid": null } @@ -6794,7 +11733,7 @@ "gpu_free_memory": [ "gpu_free_memory", { - "_value": 24454.889472000003, + "_value": 24895.291392, "_timestamp": 0, "_device_uuid": null } @@ -6802,7 +11741,7 @@ "gpu_utilization": [ "gpu_utilization", { - "_value": 20.0, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -6810,7 +11749,7 @@ "gpu_power_usage": [ "gpu_power_usage", { - "_value": 100.959, + "_value": 23.639, "_timestamp": 0, "_device_uuid": null } @@ -6818,86 +11757,87 @@ }, "_model_config_measurements": [ { - "_model_config_name": "FaceDetectionBLS_config_4", + "_model_config_name": "bls", "_model_specific_pa_params": { "batch-size": 1, - "concurrency-range": 16 + "concurrency-range": 128, + "request-rate-range": null }, "_non_gpu_data": [ [ "perf_latency_avg", { - "_value": 8.572, + "_value": 13.034, "_timestamp": 0 } ], [ "perf_latency_p90", { - "_value": 9.441, + "_value": 13.185, "_timestamp": 0 } ], [ "perf_latency_p95", { - "_value": 9.801, + "_value": 14.044, "_timestamp": 0 } ], [ "perf_latency_p99", { - "_value": 10.908, + "_value": 18.842, "_timestamp": 0 } ], [ "perf_throughput", { - "_value": 1853.84, + "_value": 9772.16, "_timestamp": 0 } ], [ "perf_client_send_recv", { - "_value": 0.022, + "_value": 0.004, "_timestamp": 0 } ], [ "perf_client_response_wait", { - "_value": 8.54, + "_value": 13.019, "_timestamp": 0 } ], [ "perf_server_queue", { - "_value": 6.028, + "_value": 11.922, "_timestamp": 0 } ], [ "perf_server_compute_infer", { - "_value": 2.032, + "_value": 0.719, "_timestamp": 0 } ], [ "perf_server_compute_input", { - "_value": 0.001, + "_value": 0.015, "_timestamp": 0 } ], [ "perf_server_compute_output", { - "_value": 0.0, + "_value": 0.053, "_timestamp": 0 } ] @@ -6906,536 +11846,108 @@ "perf_latency_avg": [ "perf_latency_avg", { - "_value": 8.572, + "_value": 13.034, "_timestamp": 0 } ], "perf_latency_p90": [ "perf_latency_p90", { - "_value": 9.441, - "_timestamp": 0 - } - ], - "perf_latency_p95": [ - "perf_latency_p95", - { - "_value": 9.801, - "_timestamp": 0 - } - ], - "perf_latency_p99": [ - "perf_latency_p99", - { - "_value": 10.908, - "_timestamp": 0 - } - ], - "perf_throughput": [ - "perf_throughput", - { - "_value": 1853.84, - "_timestamp": 0 - } - ], - "perf_client_send_recv": [ - "perf_client_send_recv", - { - "_value": 0.022, - "_timestamp": 0 - } - ], - "perf_client_response_wait": [ - "perf_client_response_wait", - { - "_value": 8.54, - "_timestamp": 0 - } - ], - "perf_server_queue": [ - "perf_server_queue", - { - "_value": 6.028, - "_timestamp": 0 - } - ], - "perf_server_compute_infer": [ - "perf_server_compute_infer", - { - "_value": 2.032, - "_timestamp": 0 - } - ], - "perf_server_compute_input": [ - "perf_server_compute_input", - { - "_value": 0.001, - "_timestamp": 0 - } - ], - "perf_server_compute_output": [ - "perf_server_compute_output", - { - "_value": 0.0, + "_value": 13.185, "_timestamp": 0 - } - ] - } - } - ] - } - } - ], - "FaceDetectionBLS_config_5": [ - { - "_triton_env": {}, - "_model_run_configs": [ - { - "_model_name": "FaceDetectionBLS", - "_model_config": { - "name": "FaceDetectionBLS_config_5", - "input": [ - { - "name": "input", - "dataType": "TYPE_UINT8", - "dims": [ - "-1", - "3", - "-1", - "-1" - ] - }, - { - "name": "temporal", - "dataType": "TYPE_UINT32", - "dims": [ - "-1", - "1", - "1", - "1" - ] - }, - { - "name": "max_num_bbox", - "dataType": "TYPE_UINT8", - "dims": [ - "-1", - "1", - "1", - "1" - ] - } - ], - "output": [ - { - "name": "output_bbox", - "dataType": "TYPE_FP32", - "dims": [ - "-1", - "4", - "-1", - "1" - ] - }, - { - "name": "output_bbox_conf", - "dataType": "TYPE_FP32", - "dims": [ - "-1", - "4", - "-1", - "1" - ] - }, - { - "name": "output_num_bbox", - "dataType": "TYPE_UINT8", - "dims": [ - "-1", - "1", - "1", - "1" - ] - } - ], - "instanceGroup": [ - { - "count": 6, - "kind": "KIND_GPU" - } - ], - "backend": "bls", - "cpu_only": false - }, - "_perf_config": { - "_args": { - "service-kind": null, - "model-signature-name": null, - "async": null, - "sync": null, - "measurement-interval": null, - "concurrency-range": 16, - "request-rate-range": null, - "request-distribution": null, - "request-intervals": null, - "binary-search": null, - "num-of-sequences": null, - "latency-threshold": null, - "max-threads": null, - "stability-percentage": null, - "max-trials": null, - "percentile": null, - "input-data": "/swdev/profile_models/face_detect.json", - "shared-memory": "cuda", - "output-shared-memory-size": "24883200", - "shape": null, - "sequence-length": null, - "sequence-id-range": null, - "string-length": null, - "string-data": null, - "measurement-mode": "count_windows", - "measurement-request-count": null, - "streaming": null, - "grpc-compression-algorithm": null, - "triton-server-directory": null, - "model-repository": null, - "ssl-grpc-use-ssl": null, - "ssl-grpc-root-certifications-file": null, - "ssl-grpc-private-key-file": null, - "ssl-grpc-certificate-chain-file": null, - "ssl-https-verify-peer": null, - "ssl-https-verify-host": null, - "ssl-https-ca-certificates-file": null, - "ssl-https-client-certificate-type": null, - "ssl-https-client-certificate-file": null, - "ssl-https-private-key-type": null, - "ssl-https-private-key-file": null, - "collect-metrics": "True", - "metrics-url": "http://localhost:8002/metrics", - "metrics-interval": 1000.0 - }, - "_options": { - "-m": "FaceDetectionBLS_config_5", - "-x": null, - "-b": 1, - "-u": "localhost:8001", - "-i": "grpc", - "-f": "FaceDetectionBLS_config_5-results.csv", - "-H": null - }, - "_verbose": { - "-v": null, - "-v -v": null, - "--verbose-csv": "--verbose-csv" - }, - "_input_to_options": { - "model-name": "-m", - "model-version": "-x", - "batch-size": "-b", - "url": "-u", - "protocol": "-i", - "latency-report-file": "-f", - "http-header": "-H" - }, - "_input_to_verbose": { - "verbose": "-v", - "extra-verbose": "-v -v", - "verbose-csv": "--verbose-csv" - }, - "_additive_args": { - "input-data": null, - "shape": null - } - }, - "_composing_configs": [ - { - "name": "FaceDetectionPreprocessing_config_3", - "platform": "FaceDetectionPreprocessing", - "input": [ - { - "name": "input", - "dataType": "TYPE_UINT8", - "dims": [ - "-1", - "3", - "-1", - "-1" - ] - } - ], - "output": [ - { - "name": "output", - "dataType": "TYPE_FP32", - "dims": [ - "-1", - "3", - "208", - "368" - ] - }, - { - "name": "scale_offset", - "dataType": "TYPE_FP32", - "dims": [ - "-1", - "4", - "1", - "1" - ] - }, - { - "name": "original_shape", - "dataType": "TYPE_UINT32", - "dims": [ - "1", - "2", - "1", - "1" - ] - } - ], - "instanceGroup": [ - { - "count": 5, - "kind": "KIND_GPU" - } - ], - "backend": "sdk_backend", - "cpu_only": false - }, - { - "name": "FaceDetectionModel_config_3", - "platform": "tensorrt_plan", - "maxBatchSize": 8, - "input": [ - { - "name": "input", - "dataType": "TYPE_FP32", - "dims": [ - "3", - "208", - "368" - ] - } - ], - "output": [ - { - "name": "output_bbox", - "dataType": "TYPE_FP32", - "dims": [ - "4", - "13", - "23" - ] - }, - { - "name": "output_cov/Sigmoid", - "dataType": "TYPE_FP32", - "dims": [ - "1", - "13", - "23" - ] - } - ], - "instanceGroup": [ - { - "count": 4, - "kind": "KIND_GPU" - } - ], - "dynamicBatching": {}, - "cpu_only": false - }, - { - "name": "FaceDetectionPostprocessing_config_0", - "platform": "FaceDetectionPostprocessing", - "input": [ - { - "name": "scale_offset", - "dataType": "TYPE_FP32", - "dims": [ - "-1", - "4", - "1", - "1" - ] - }, - { - "name": "input_bbox", - "dataType": "TYPE_FP32", - "dims": [ - "-1", - "4", - "13", - "23" - ] - }, + } + ], + "perf_latency_p95": [ + "perf_latency_p95", { - "name": "input_conf", - "dataType": "TYPE_FP32", - "dims": [ - "-1", - "1", - "13", - "23" - ] - }, + "_value": 14.044, + "_timestamp": 0 + } + ], + "perf_latency_p99": [ + "perf_latency_p99", { - "name": "temporal", - "dataType": "TYPE_UINT32", - "dims": [ - "-1", - "1", - "1", - "1" - ] - }, + "_value": 18.842, + "_timestamp": 0 + } + ], + "perf_throughput": [ + "perf_throughput", { - "name": "max_num_bbox", - "dataType": "TYPE_UINT8", - "dims": [ - "-1", - "1", - "1", - "1" - ] - }, + "_value": 9772.16, + "_timestamp": 0 + } + ], + "perf_client_send_recv": [ + "perf_client_send_recv", { - "name": "original_shape", - "dataType": "TYPE_UINT32", - "dims": [ - "1", - "2", - "1", - "1" - ] + "_value": 0.004, + "_timestamp": 0 } ], - "output": [ + "perf_client_response_wait": [ + "perf_client_response_wait", { - "name": "output_bbox", - "dataType": "TYPE_FP32", - "dims": [ - "-1", - "4", - "1", - "-1" - ] - }, + "_value": 13.019, + "_timestamp": 0 + } + ], + "perf_server_queue": [ + "perf_server_queue", { - "name": "output_bbox_conf", - "dataType": "TYPE_FP32", - "dims": [ - "-1", - "4", - "1", - "-1" - ] - }, + "_value": 11.922, + "_timestamp": 0 + } + ], + "perf_server_compute_infer": [ + "perf_server_compute_infer", { - "name": "output_num_bbox", - "dataType": "TYPE_UINT8", - "dims": [ - "-1", - "1", - "1", - "1" - ] + "_value": 0.719, + "_timestamp": 0 } ], - "instanceGroup": [ + "perf_server_compute_input": [ + "perf_server_compute_input", { - "count": 1, - "kind": "KIND_GPU" + "_value": 0.015, + "_timestamp": 0 } ], - "sequenceBatching": { - "maxSequenceIdleMicroseconds": "5000000", - "controlInput": [ - { - "name": "START", - "control": [ - { - "fp32FalseTrue": [ - 0.0, - 1.0 - ] - } - ] - }, - { - "name": "END", - "control": [ - { - "kind": "CONTROL_SEQUENCE_END", - "fp32FalseTrue": [ - 0.0, - 1.0 - ] - } - ] - }, - { - "name": "READY", - "control": [ - { - "kind": "CONTROL_SEQUENCE_READY", - "fp32FalseTrue": [ - 0.0, - 1.0 - ] - } - ] - }, - { - "name": "CORRID", - "control": [ - { - "kind": "CONTROL_SEQUENCE_CORRID", - "dataType": "TYPE_UINT64" - } - ] - } - ], - "oldest": { - "maxCandidateSequences": 10, - "maxQueueDelayMicroseconds": "50000" + "perf_server_compute_output": [ + "perf_server_compute_output", + { + "_value": 0.053, + "_timestamp": 0 } - }, - "backend": "sdk_backend", - "cpu_only": false + ] } - ] - } - ] - }, - { - "-m FaceDetectionBLS_config_5 -b 1 -i grpc -f FaceDetectionBLS_config_5-results.csv --verbose-csv --concurrency-range=16 --input-data=/swdev/profile_models/face_detect.json --shared-memory=cuda --output-shared-memory-size=24883200 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "FaceDetectionBLS_config_5", + } + ] + }, + "bls_config_6 -m bls -b 1 -i grpc -f bls-results.csv --verbose-csv --concurrency-range=256 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000 --bls-composing-models=add,sub add_config_4,sub_config_0": { + "_model_variants_name": "bls_config_6", "_gpu_data": { "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ [ "gpu_used_memory", { - "_value": 1314.914304, + "_value": 874.512384, "_timestamp": 0, - "_device_uuid": null + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } ], [ "gpu_free_memory", { - "_value": 24454.889472000003, + "_value": 24895.291392, "_timestamp": 0, - "_device_uuid": null + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } ], [ "gpu_utilization", { - "_value": 23.0, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -7443,7 +11955,7 @@ [ "gpu_power_usage", { - "_value": 107.85, + "_value": 23.6405, "_timestamp": 0, "_device_uuid": null } @@ -7454,7 +11966,7 @@ [ "gpu_used_memory", { - "_value": 1314.914304, + "_value": 874.512384, "_timestamp": 0, "_device_uuid": null } @@ -7462,7 +11974,7 @@ [ "gpu_free_memory", { - "_value": 24454.889472000003, + "_value": 24895.291392, "_timestamp": 0, "_device_uuid": null } @@ -7470,7 +11982,7 @@ [ "gpu_utilization", { - "_value": 23.0, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -7478,7 +11990,7 @@ [ "gpu_power_usage", { - "_value": 107.85, + "_value": 23.6405, "_timestamp": 0, "_device_uuid": null } @@ -7488,7 +12000,7 @@ "gpu_used_memory": [ "gpu_used_memory", { - "_value": 1314.914304, + "_value": 874.512384, "_timestamp": 0, "_device_uuid": null } @@ -7496,7 +12008,7 @@ "gpu_free_memory": [ "gpu_free_memory", { - "_value": 24454.889472000003, + "_value": 24895.291392, "_timestamp": 0, "_device_uuid": null } @@ -7504,7 +12016,7 @@ "gpu_utilization": [ "gpu_utilization", { - "_value": 23.0, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -7512,7 +12024,7 @@ "gpu_power_usage": [ "gpu_power_usage", { - "_value": 107.85, + "_value": 23.6405, "_timestamp": 0, "_device_uuid": null } @@ -7520,86 +12032,87 @@ }, "_model_config_measurements": [ { - "_model_config_name": "FaceDetectionBLS_config_5", + "_model_config_name": "bls", "_model_specific_pa_params": { "batch-size": 1, - "concurrency-range": 16 + "concurrency-range": 256, + "request-rate-range": null }, "_non_gpu_data": [ [ "perf_latency_avg", { - "_value": 6.09, + "_value": 26.088, "_timestamp": 0 } ], [ "perf_latency_p90", { - "_value": 7.133, + "_value": 26.526, "_timestamp": 0 } ], [ "perf_latency_p95", { - "_value": 7.565, + "_value": 28.456, "_timestamp": 0 } ], [ "perf_latency_p99", { - "_value": 8.957, + "_value": 33.974, "_timestamp": 0 } ], [ "perf_throughput", { - "_value": 2607.07, + "_value": 9731.24, "_timestamp": 0 } ], [ "perf_client_send_recv", { - "_value": 0.019, + "_value": 0.004, "_timestamp": 0 } ], [ "perf_client_response_wait", { - "_value": 6.063, + "_value": 26.072, "_timestamp": 0 } ], [ "perf_server_queue", { - "_value": 3.458, + "_value": 25.035, "_timestamp": 0 } ], [ "perf_server_compute_infer", { - "_value": 2.194, + "_value": 0.723, "_timestamp": 0 } ], [ "perf_server_compute_input", { - "_value": 0.001, + "_value": 0.015, "_timestamp": 0 } ], [ "perf_server_compute_output", { - "_value": 0.0, + "_value": 0.053, "_timestamp": 0 } ] @@ -7608,77 +12121,77 @@ "perf_latency_avg": [ "perf_latency_avg", { - "_value": 6.09, + "_value": 26.088, "_timestamp": 0 } ], "perf_latency_p90": [ "perf_latency_p90", { - "_value": 7.133, + "_value": 26.526, "_timestamp": 0 } ], "perf_latency_p95": [ "perf_latency_p95", { - "_value": 7.565, + "_value": 28.456, "_timestamp": 0 } ], "perf_latency_p99": [ "perf_latency_p99", { - "_value": 8.957, + "_value": 33.974, "_timestamp": 0 } ], "perf_throughput": [ "perf_throughput", { - "_value": 2607.07, + "_value": 9731.24, "_timestamp": 0 } ], "perf_client_send_recv": [ "perf_client_send_recv", { - "_value": 0.019, + "_value": 0.004, "_timestamp": 0 } ], "perf_client_response_wait": [ "perf_client_response_wait", { - "_value": 6.063, + "_value": 26.072, "_timestamp": 0 } ], "perf_server_queue": [ "perf_server_queue", { - "_value": 3.458, + "_value": 25.035, "_timestamp": 0 } ], "perf_server_compute_infer": [ "perf_server_compute_infer", { - "_value": 2.194, + "_value": 0.723, "_timestamp": 0 } ], "perf_server_compute_input": [ "perf_server_compute_input", { - "_value": 0.001, + "_value": 0.015, "_timestamp": 0 } ], "perf_server_compute_output": [ "perf_server_compute_output", { - "_value": 0.0, + "_value": 0.053, "_timestamp": 0 } ] @@ -7688,85 +12201,56 @@ } } ], - "FaceDetectionBLS_config_6": [ + "bls_config_7": [ { "_triton_env": {}, "_model_run_configs": [ { - "_model_name": "FaceDetectionBLS", - "_model_config": { - "name": "FaceDetectionBLS_config_6", - "input": [ - { - "name": "input", - "dataType": "TYPE_UINT8", - "dims": [ - "-1", - "3", - "-1", - "-1" - ] - }, - { - "name": "temporal", - "dataType": "TYPE_UINT32", - "dims": [ - "-1", - "1", - "1", - "1" - ] - }, - { - "name": "max_num_bbox", - "dataType": "TYPE_UINT8", - "dims": [ - "-1", - "1", - "1", - "1" - ] - } - ], - "output": [ - { - "name": "output_bbox", - "dataType": "TYPE_FP32", - "dims": [ - "-1", - "4", - "-1", - "1" - ] - }, - { - "name": "output_bbox_conf", - "dataType": "TYPE_FP32", - "dims": [ - "-1", - "4", - "-1", - "1" - ] - }, - { - "name": "output_num_bbox", - "dataType": "TYPE_UINT8", - "dims": [ - "-1", - "1", - "1", - "1" - ] - } - ], - "instanceGroup": [ - { - "count": 7, - "kind": "KIND_GPU" - } - ], - "backend": "bls", + "_model_name": "bls", + "_model_config_variant": { + "model_config": { + "name": "bls", + "input": [ + { + "name": "INPUT0", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + }, + { + "name": "INPUT1", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "output": [ + { + "name": "OUTPUT0", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + }, + { + "name": "OUTPUT1", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "instanceGroup": [ + { + "count": 7, + "kind": "KIND_GPU" + } + ], + "backend": "python" + }, + "variant_name": "bls_config_7", "cpu_only": false }, "_perf_config": { @@ -7776,7 +12260,7 @@ "async": null, "sync": null, "measurement-interval": null, - "concurrency-range": 16, + "concurrency-range": 14, "request-rate-range": null, "request-distribution": null, "request-intervals": null, @@ -7787,9 +12271,9 @@ "stability-percentage": null, "max-trials": null, "percentile": null, - "input-data": "/swdev/profile_models/face_detect.json", - "shared-memory": "cuda", - "output-shared-memory-size": "24883200", + "input-data": null, + "shared-memory": null, + "output-shared-memory-size": null, "shape": null, "sequence-length": null, "sequence-id-range": null, @@ -7814,15 +12298,16 @@ "ssl-https-private-key-file": null, "collect-metrics": "True", "metrics-url": "http://localhost:8002/metrics", - "metrics-interval": 1000.0 + "metrics-interval": 1000, + "bls-composing-models": "add,sub" }, "_options": { - "-m": "FaceDetectionBLS_config_6", + "-m": "bls", "-x": null, "-b": 1, "-u": "localhost:8001", "-i": "grpc", - "-f": "FaceDetectionBLS_config_6-results.csv", + "-f": "bls-results.csv", "-H": null }, "_verbose": { @@ -7849,264 +12334,83 @@ "shape": null } }, - "_composing_configs": [ - { - "name": "FaceDetectionPreprocessing_config_5", - "platform": "FaceDetectionPreprocessing", - "input": [ - { - "name": "input", - "dataType": "TYPE_UINT8", - "dims": [ - "-1", - "3", - "-1", - "-1" - ] - } - ], - "output": [ - { - "name": "output", - "dataType": "TYPE_FP32", - "dims": [ - "-1", - "3", - "208", - "368" - ] - }, - { - "name": "scale_offset", - "dataType": "TYPE_FP32", - "dims": [ - "-1", - "4", - "1", - "1" - ] - }, - { - "name": "original_shape", - "dataType": "TYPE_UINT32", - "dims": [ - "1", - "2", - "1", - "1" - ] - } - ], - "instanceGroup": [ - { - "count": 6, - "kind": "KIND_GPU" - } - ], - "backend": "sdk_backend", - "cpu_only": false - }, - { - "name": "FaceDetectionModel_config_4", - "platform": "tensorrt_plan", - "maxBatchSize": 8, - "input": [ - { - "name": "input", - "dataType": "TYPE_FP32", - "dims": [ - "3", - "208", - "368" - ] - } - ], - "output": [ - { - "name": "output_bbox", - "dataType": "TYPE_FP32", - "dims": [ - "4", - "13", - "23" - ] - }, - { - "name": "output_cov/Sigmoid", - "dataType": "TYPE_FP32", - "dims": [ - "1", - "13", - "23" - ] - } - ], - "instanceGroup": [ - { - "count": 5, - "kind": "KIND_GPU" - } - ], - "dynamicBatching": {}, - "cpu_only": false - }, + "_composing_config_variants": [ { - "name": "FaceDetectionPostprocessing_config_0", - "platform": "FaceDetectionPostprocessing", - "input": [ - { - "name": "scale_offset", - "dataType": "TYPE_FP32", - "dims": [ - "-1", - "4", - "1", - "1" - ] - }, - { - "name": "input_bbox", - "dataType": "TYPE_FP32", - "dims": [ - "-1", - "4", - "13", - "23" - ] - }, - { - "name": "input_conf", - "dataType": "TYPE_FP32", - "dims": [ - "-1", - "1", - "13", - "23" - ] - }, - { - "name": "temporal", - "dataType": "TYPE_UINT32", - "dims": [ - "-1", - "1", - "1", - "1" - ] - }, - { - "name": "max_num_bbox", - "dataType": "TYPE_UINT8", - "dims": [ - "-1", - "1", - "1", - "1" - ] - }, - { - "name": "original_shape", - "dataType": "TYPE_UINT32", - "dims": [ - "1", - "2", - "1", - "1" - ] - } - ], - "output": [ - { - "name": "output_bbox", - "dataType": "TYPE_FP32", - "dims": [ - "-1", - "4", - "1", - "-1" - ] - }, - { - "name": "output_bbox_conf", - "dataType": "TYPE_FP32", - "dims": [ - "-1", - "4", - "1", - "-1" - ] - }, - { - "name": "output_num_bbox", - "dataType": "TYPE_UINT8", - "dims": [ - "-1", - "1", - "1", - "1" - ] - } - ], - "instanceGroup": [ - { - "count": 1, - "kind": "KIND_GPU" - } - ], - "sequenceBatching": { - "maxSequenceIdleMicroseconds": "5000000", - "controlInput": [ + "model_config": { + "name": "add", + "input": [ + { + "name": "INPUT0", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + }, + { + "name": "INPUT1", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "output": [ { - "name": "START", - "control": [ - { - "fp32FalseTrue": [ - 0.0, - 1.0 - ] - } + "name": "OUTPUT0", + "dataType": "TYPE_FP32", + "dims": [ + "4" ] - }, + } + ], + "instanceGroup": [ + { + "count": 5, + "kind": "KIND_GPU" + } + ], + "backend": "python" + }, + "variant_name": "add_config_6", + "cpu_only": false + }, + { + "model_config": { + "name": "sub", + "input": [ { - "name": "END", - "control": [ - { - "kind": "CONTROL_SEQUENCE_END", - "fp32FalseTrue": [ - 0.0, - 1.0 - ] - } + "name": "INPUT0", + "dataType": "TYPE_FP32", + "dims": [ + "4" ] }, { - "name": "READY", - "control": [ - { - "kind": "CONTROL_SEQUENCE_READY", - "fp32FalseTrue": [ - 0.0, - 1.0 - ] - } + "name": "INPUT1", + "dataType": "TYPE_FP32", + "dims": [ + "4" ] - }, + } + ], + "output": [ { - "name": "CORRID", - "control": [ - { - "kind": "CONTROL_SEQUENCE_CORRID", - "dataType": "TYPE_UINT64" - } + "name": "OUTPUT1", + "dataType": "TYPE_FP32", + "dims": [ + "4" ] } ], - "oldest": { - "maxCandidateSequences": 10, - "maxQueueDelayMicroseconds": "50000" - } + "instanceGroup": [ + { + "count": 3, + "kind": "KIND_GPU" + } + ], + "backend": "python" }, - "backend": "sdk_backend", + "variant_name": "sub_config_2", "cpu_only": false } ] @@ -8114,30 +12418,30 @@ ] }, { - "-m FaceDetectionBLS_config_6 -b 1 -i grpc -f FaceDetectionBLS_config_6-results.csv --verbose-csv --concurrency-range=16 --input-data=/swdev/profile_models/face_detect.json --shared-memory=cuda --output-shared-memory-size=24883200 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "FaceDetectionBLS_config_6", + "bls_config_7 -m bls -b 1 -i grpc -f bls-results.csv --verbose-csv --concurrency-range=14 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000 --bls-composing-models=add,sub add_config_6,sub_config_2": { + "_model_variants_name": "bls_config_7", "_gpu_data": { "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ [ "gpu_used_memory", { - "_value": 1314.914304, + "_value": 874.512384, "_timestamp": 0, - "_device_uuid": null + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } ], [ "gpu_free_memory", { - "_value": 24454.889472000003, + "_value": 24895.291392, "_timestamp": 0, - "_device_uuid": null + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } ], [ "gpu_utilization", { - "_value": 40.6667, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -8145,7 +12449,7 @@ [ "gpu_power_usage", { - "_value": 111.54, + "_value": 56.632, "_timestamp": 0, "_device_uuid": null } @@ -8156,7 +12460,7 @@ [ "gpu_used_memory", { - "_value": 1314.914304, + "_value": 874.512384, "_timestamp": 0, "_device_uuid": null } @@ -8164,7 +12468,7 @@ [ "gpu_free_memory", { - "_value": 24454.889472000003, + "_value": 24895.291392, "_timestamp": 0, "_device_uuid": null } @@ -8172,7 +12476,7 @@ [ "gpu_utilization", { - "_value": 40.6667, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -8180,7 +12484,7 @@ [ "gpu_power_usage", { - "_value": 111.54, + "_value": 56.632, "_timestamp": 0, "_device_uuid": null } @@ -8190,7 +12494,7 @@ "gpu_used_memory": [ "gpu_used_memory", { - "_value": 1314.914304, + "_value": 874.512384, "_timestamp": 0, "_device_uuid": null } @@ -8198,7 +12502,7 @@ "gpu_free_memory": [ "gpu_free_memory", { - "_value": 24454.889472000003, + "_value": 24895.291392, "_timestamp": 0, "_device_uuid": null } @@ -8206,7 +12510,7 @@ "gpu_utilization": [ "gpu_utilization", { - "_value": 40.6667, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -8214,7 +12518,7 @@ "gpu_power_usage": [ "gpu_power_usage", { - "_value": 111.54, + "_value": 56.632, "_timestamp": 0, "_device_uuid": null } @@ -8222,86 +12526,87 @@ }, "_model_config_measurements": [ { - "_model_config_name": "FaceDetectionBLS_config_6", + "_model_config_name": "bls", "_model_specific_pa_params": { "batch-size": 1, - "concurrency-range": 16 + "concurrency-range": 14, + "request-rate-range": null }, "_non_gpu_data": [ [ "perf_latency_avg", { - "_value": 5.745, + "_value": 1.526, "_timestamp": 0 } ], [ "perf_latency_p90", { - "_value": 6.685, + "_value": 1.632, "_timestamp": 0 } ], [ "perf_latency_p95", { - "_value": 7.024, + "_value": 1.717, "_timestamp": 0 } ], [ "perf_latency_p99", { - "_value": 7.872, + "_value": 2.236, "_timestamp": 0 } ], [ "perf_throughput", { - "_value": 2763.81, + "_value": 9161.43, "_timestamp": 0 } ], [ "perf_client_send_recv", { - "_value": 0.019, + "_value": 0.003, "_timestamp": 0 } ], [ "perf_client_response_wait", { - "_value": 5.718, + "_value": 1.513, "_timestamp": 0 } ], [ "perf_server_queue", { - "_value": 2.889, + "_value": 0.623, "_timestamp": 0 } ], [ "perf_server_compute_infer", { - "_value": 2.428, + "_value": 0.661, "_timestamp": 0 } ], [ "perf_server_compute_input", { - "_value": 0.001, + "_value": 0.014, "_timestamp": 0 } ], [ "perf_server_compute_output", { - "_value": 0.0, + "_value": 0.055, "_timestamp": 0 } ] @@ -8310,77 +12615,77 @@ "perf_latency_avg": [ "perf_latency_avg", { - "_value": 5.745, + "_value": 1.526, "_timestamp": 0 } ], "perf_latency_p90": [ "perf_latency_p90", { - "_value": 6.685, + "_value": 1.632, "_timestamp": 0 } ], "perf_latency_p95": [ "perf_latency_p95", { - "_value": 7.024, + "_value": 1.717, "_timestamp": 0 } ], "perf_latency_p99": [ "perf_latency_p99", { - "_value": 7.872, + "_value": 2.236, "_timestamp": 0 } ], "perf_throughput": [ "perf_throughput", { - "_value": 2763.81, + "_value": 9161.43, "_timestamp": 0 } ], "perf_client_send_recv": [ "perf_client_send_recv", { - "_value": 0.019, + "_value": 0.003, "_timestamp": 0 } ], "perf_client_response_wait": [ "perf_client_response_wait", { - "_value": 5.718, + "_value": 1.513, "_timestamp": 0 } ], "perf_server_queue": [ "perf_server_queue", { - "_value": 2.889, + "_value": 0.623, "_timestamp": 0 } ], "perf_server_compute_infer": [ "perf_server_compute_infer", { - "_value": 2.428, + "_value": 0.661, "_timestamp": 0 } ], "perf_server_compute_input": [ "perf_server_compute_input", { - "_value": 0.001, + "_value": 0.014, "_timestamp": 0 } ], "perf_server_compute_output": [ "perf_server_compute_output", { - "_value": 0.0, + "_value": 0.055, "_timestamp": 0 } ] @@ -8388,30 +12693,30 @@ } ] }, - "-m FaceDetectionBLS_config_6 -b 1 -i grpc -f FaceDetectionBLS_config_6-results.csv --verbose-csv --concurrency-range=1 --input-data=/swdev/profile_models/face_detect.json --shared-memory=cuda --output-shared-memory-size=24883200 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "FaceDetectionBLS_config_6", + "bls_config_7 -m bls -b 1 -i grpc -f bls-results.csv --verbose-csv --concurrency-range=14 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000 --bls-composing-models=add,sub add_config_3,sub_config_2": { + "_model_variants_name": "bls_config_7", "_gpu_data": { "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ [ "gpu_used_memory", { - "_value": 1314.914304, + "_value": 874.512384, "_timestamp": 0, - "_device_uuid": null + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } ], [ "gpu_free_memory", { - "_value": 24454.889472000003, + "_value": 24895.291392, "_timestamp": 0, - "_device_uuid": null + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } ], [ "gpu_utilization", { - "_value": 10.666699999999999, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -8419,7 +12724,7 @@ [ "gpu_power_usage", { - "_value": 60.5053, + "_value": 56.7988, "_timestamp": 0, "_device_uuid": null } @@ -8430,7 +12735,7 @@ [ "gpu_used_memory", { - "_value": 1314.914304, + "_value": 874.512384, "_timestamp": 0, "_device_uuid": null } @@ -8438,7 +12743,7 @@ [ "gpu_free_memory", { - "_value": 24454.889472000003, + "_value": 24895.291392, "_timestamp": 0, "_device_uuid": null } @@ -8446,7 +12751,7 @@ [ "gpu_utilization", { - "_value": 10.666699999999999, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -8454,7 +12759,7 @@ [ "gpu_power_usage", { - "_value": 60.5053, + "_value": 56.7988, "_timestamp": 0, "_device_uuid": null } @@ -8464,7 +12769,7 @@ "gpu_used_memory": [ "gpu_used_memory", { - "_value": 1314.914304, + "_value": 874.512384, "_timestamp": 0, "_device_uuid": null } @@ -8472,7 +12777,7 @@ "gpu_free_memory": [ "gpu_free_memory", { - "_value": 24454.889472000003, + "_value": 24895.291392, "_timestamp": 0, "_device_uuid": null } @@ -8480,7 +12785,7 @@ "gpu_utilization": [ "gpu_utilization", { - "_value": 10.666699999999999, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -8488,7 +12793,7 @@ "gpu_power_usage": [ "gpu_power_usage", { - "_value": 60.5053, + "_value": 56.7988, "_timestamp": 0, "_device_uuid": null } @@ -8496,86 +12801,87 @@ }, "_model_config_measurements": [ { - "_model_config_name": "FaceDetectionBLS_config_6", + "_model_config_name": "bls", "_model_specific_pa_params": { "batch-size": 1, - "concurrency-range": 1 + "concurrency-range": 14, + "request-rate-range": null }, "_non_gpu_data": [ [ "perf_latency_avg", { - "_value": 1.801, + "_value": 1.455, "_timestamp": 0 } ], [ "perf_latency_p90", { - "_value": 1.974, + "_value": 1.593, "_timestamp": 0 } ], [ "perf_latency_p95", { - "_value": 2.043, + "_value": 1.645, "_timestamp": 0 } ], [ "perf_latency_p99", { - "_value": 2.292, + "_value": 1.776, "_timestamp": 0 } ], [ "perf_throughput", { - "_value": 544.227, + "_value": 9610.87, "_timestamp": 0 } ], [ "perf_client_send_recv", { - "_value": 0.016, + "_value": 0.003, "_timestamp": 0 } ], [ "perf_client_response_wait", { - "_value": 1.778, + "_value": 1.442, "_timestamp": 0 } ], [ "perf_server_queue", { - "_value": 0.027, + "_value": 0.591, "_timestamp": 0 } ], [ "perf_server_compute_infer", { - "_value": 1.385, + "_value": 0.628, "_timestamp": 0 } ], [ "perf_server_compute_input", { - "_value": 0.002, + "_value": 0.014, "_timestamp": 0 } ], [ "perf_server_compute_output", { - "_value": 0.0, + "_value": 0.054, "_timestamp": 0 } ] @@ -8584,77 +12890,77 @@ "perf_latency_avg": [ "perf_latency_avg", { - "_value": 1.801, + "_value": 1.455, "_timestamp": 0 } ], "perf_latency_p90": [ "perf_latency_p90", { - "_value": 1.974, + "_value": 1.593, "_timestamp": 0 } ], "perf_latency_p95": [ "perf_latency_p95", { - "_value": 2.043, + "_value": 1.645, "_timestamp": 0 } ], "perf_latency_p99": [ "perf_latency_p99", { - "_value": 2.292, + "_value": 1.776, "_timestamp": 0 } ], "perf_throughput": [ "perf_throughput", { - "_value": 544.227, + "_value": 9610.87, "_timestamp": 0 } ], "perf_client_send_recv": [ "perf_client_send_recv", { - "_value": 0.016, + "_value": 0.003, "_timestamp": 0 } ], "perf_client_response_wait": [ "perf_client_response_wait", { - "_value": 1.778, + "_value": 1.442, "_timestamp": 0 } ], "perf_server_queue": [ "perf_server_queue", { - "_value": 0.027, + "_value": 0.591, "_timestamp": 0 } ], "perf_server_compute_infer": [ "perf_server_compute_infer", { - "_value": 1.385, + "_value": 0.628, "_timestamp": 0 } ], "perf_server_compute_input": [ "perf_server_compute_input", { - "_value": 0.002, + "_value": 0.014, "_timestamp": 0 } ], "perf_server_compute_output": [ "perf_server_compute_output", { - "_value": 0.0, + "_value": 0.054, "_timestamp": 0 } ] @@ -8662,30 +12968,30 @@ } ] }, - "-m FaceDetectionBLS_config_6 -b 1 -i grpc -f FaceDetectionBLS_config_6-results.csv --verbose-csv --concurrency-range=2 --input-data=/swdev/profile_models/face_detect.json --shared-memory=cuda --output-shared-memory-size=24883200 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "FaceDetectionBLS_config_6", + "bls_config_7 -m bls -b 1 -i grpc -f bls-results.csv --verbose-csv --concurrency-range=14 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000 --bls-composing-models=add,sub add_config_4,sub_config_2": { + "_model_variants_name": "bls_config_7", "_gpu_data": { "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ [ "gpu_used_memory", { - "_value": 1314.914304, + "_value": 874.512384, "_timestamp": 0, - "_device_uuid": null + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } ], [ "gpu_free_memory", { - "_value": 24454.889472000003, + "_value": 24895.291392, "_timestamp": 0, - "_device_uuid": null + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } ], [ "gpu_utilization", { - "_value": 23.3333, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -8693,7 +12999,7 @@ [ "gpu_power_usage", { - "_value": 81.5127, + "_value": 56.8143, "_timestamp": 0, "_device_uuid": null } @@ -8704,7 +13010,7 @@ [ "gpu_used_memory", { - "_value": 1314.914304, + "_value": 874.512384, "_timestamp": 0, "_device_uuid": null } @@ -8712,7 +13018,7 @@ [ "gpu_free_memory", { - "_value": 24454.889472000003, + "_value": 24895.291392, "_timestamp": 0, "_device_uuid": null } @@ -8720,7 +13026,7 @@ [ "gpu_utilization", { - "_value": 23.3333, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -8728,7 +13034,7 @@ [ "gpu_power_usage", { - "_value": 81.5127, + "_value": 56.8143, "_timestamp": 0, "_device_uuid": null } @@ -8738,7 +13044,7 @@ "gpu_used_memory": [ "gpu_used_memory", { - "_value": 1314.914304, + "_value": 874.512384, "_timestamp": 0, "_device_uuid": null } @@ -8746,7 +13052,7 @@ "gpu_free_memory": [ "gpu_free_memory", { - "_value": 24454.889472000003, + "_value": 24895.291392, "_timestamp": 0, "_device_uuid": null } @@ -8754,7 +13060,7 @@ "gpu_utilization": [ "gpu_utilization", { - "_value": 23.3333, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -8762,7 +13068,7 @@ "gpu_power_usage": [ "gpu_power_usage", { - "_value": 81.5127, + "_value": 56.8143, "_timestamp": 0, "_device_uuid": null } @@ -8770,86 +13076,87 @@ }, "_model_config_measurements": [ { - "_model_config_name": "FaceDetectionBLS_config_6", + "_model_config_name": "bls", "_model_specific_pa_params": { "batch-size": 1, - "concurrency-range": 2 + "concurrency-range": 14, + "request-rate-range": null }, "_non_gpu_data": [ [ "perf_latency_avg", { - "_value": 1.923, + "_value": 1.485, "_timestamp": 0 } ], [ "perf_latency_p90", { - "_value": 2.125, + "_value": 1.622, "_timestamp": 0 } ], [ "perf_latency_p95", { - "_value": 2.207, + "_value": 1.718, "_timestamp": 0 } ], [ "perf_latency_p99", { - "_value": 2.423, + "_value": 2.389, "_timestamp": 0 } ], [ "perf_throughput", { - "_value": 1018.38, + "_value": 9412.5, "_timestamp": 0 } ], [ "perf_client_send_recv", { - "_value": 0.02, + "_value": 0.003, "_timestamp": 0 } ], [ "perf_client_response_wait", { - "_value": 1.895, + "_value": 1.472, "_timestamp": 0 } ], [ "perf_server_queue", { - "_value": 0.026, + "_value": 0.594, "_timestamp": 0 } ], [ "perf_server_compute_infer", { - "_value": 1.437, + "_value": 0.643, "_timestamp": 0 } ], [ "perf_server_compute_input", { - "_value": 0.002, + "_value": 0.014, "_timestamp": 0 } ], [ "perf_server_compute_output", { - "_value": 0.0, + "_value": 0.053, "_timestamp": 0 } ] @@ -8858,77 +13165,77 @@ "perf_latency_avg": [ "perf_latency_avg", { - "_value": 1.923, + "_value": 1.485, "_timestamp": 0 } ], "perf_latency_p90": [ "perf_latency_p90", { - "_value": 2.125, + "_value": 1.622, "_timestamp": 0 } ], "perf_latency_p95": [ "perf_latency_p95", { - "_value": 2.207, + "_value": 1.718, "_timestamp": 0 } ], "perf_latency_p99": [ "perf_latency_p99", { - "_value": 2.423, + "_value": 2.389, "_timestamp": 0 } ], "perf_throughput": [ "perf_throughput", { - "_value": 1018.38, + "_value": 9412.5, "_timestamp": 0 } ], "perf_client_send_recv": [ "perf_client_send_recv", { - "_value": 0.02, + "_value": 0.003, "_timestamp": 0 } ], "perf_client_response_wait": [ "perf_client_response_wait", { - "_value": 1.895, + "_value": 1.472, "_timestamp": 0 } ], "perf_server_queue": [ "perf_server_queue", { - "_value": 0.026, + "_value": 0.594, "_timestamp": 0 } ], "perf_server_compute_infer": [ "perf_server_compute_infer", { - "_value": 1.437, + "_value": 0.643, "_timestamp": 0 } ], "perf_server_compute_input": [ "perf_server_compute_input", { - "_value": 0.002, + "_value": 0.014, "_timestamp": 0 } ], "perf_server_compute_output": [ "perf_server_compute_output", { - "_value": 0.0, + "_value": 0.053, "_timestamp": 0 } ] @@ -8936,30 +13243,30 @@ } ] }, - "-m FaceDetectionBLS_config_6 -b 1 -i grpc -f FaceDetectionBLS_config_6-results.csv --verbose-csv --concurrency-range=4 --input-data=/swdev/profile_models/face_detect.json --shared-memory=cuda --output-shared-memory-size=24883200 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "FaceDetectionBLS_config_6", + "bls_config_7 -m bls -b 1 -i grpc -f bls-results.csv --verbose-csv --concurrency-range=14 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000 --bls-composing-models=add,sub add_config_6,sub_config_1": { + "_model_variants_name": "bls_config_7", "_gpu_data": { "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ [ "gpu_used_memory", { - "_value": 1314.914304, + "_value": 874.512384, "_timestamp": 0, - "_device_uuid": null + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } ], [ "gpu_free_memory", { - "_value": 24454.889472000003, + "_value": 24895.291392, "_timestamp": 0, - "_device_uuid": null + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } ], [ "gpu_utilization", { - "_value": 31.6667, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -8967,7 +13274,7 @@ [ "gpu_power_usage", { - "_value": 98.8347, + "_value": 23.916, "_timestamp": 0, "_device_uuid": null } @@ -8978,7 +13285,7 @@ [ "gpu_used_memory", { - "_value": 1314.914304, + "_value": 874.512384, "_timestamp": 0, "_device_uuid": null } @@ -8986,7 +13293,7 @@ [ "gpu_free_memory", { - "_value": 24454.889472000003, + "_value": 24895.291392, "_timestamp": 0, "_device_uuid": null } @@ -8994,7 +13301,7 @@ [ "gpu_utilization", { - "_value": 31.6667, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -9002,7 +13309,7 @@ [ "gpu_power_usage", { - "_value": 98.8347, + "_value": 23.916, "_timestamp": 0, "_device_uuid": null } @@ -9012,7 +13319,7 @@ "gpu_used_memory": [ "gpu_used_memory", { - "_value": 1314.914304, + "_value": 874.512384, "_timestamp": 0, "_device_uuid": null } @@ -9020,7 +13327,7 @@ "gpu_free_memory": [ "gpu_free_memory", { - "_value": 24454.889472000003, + "_value": 24895.291392, "_timestamp": 0, "_device_uuid": null } @@ -9028,7 +13335,7 @@ "gpu_utilization": [ "gpu_utilization", { - "_value": 31.6667, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -9036,7 +13343,7 @@ "gpu_power_usage": [ "gpu_power_usage", { - "_value": 98.8347, + "_value": 23.916, "_timestamp": 0, "_device_uuid": null } @@ -9044,86 +13351,87 @@ }, "_model_config_measurements": [ { - "_model_config_name": "FaceDetectionBLS_config_6", + "_model_config_name": "bls", "_model_specific_pa_params": { "batch-size": 1, - "concurrency-range": 4 + "concurrency-range": 14, + "request-rate-range": null }, "_non_gpu_data": [ [ "perf_latency_avg", { - "_value": 2.321, + "_value": 1.475, "_timestamp": 0 } ], [ "perf_latency_p90", { - "_value": 2.726, + "_value": 1.611, "_timestamp": 0 } ], [ "perf_latency_p95", { - "_value": 2.88, + "_value": 1.687, "_timestamp": 0 } ], [ "perf_latency_p99", { - "_value": 3.22, + "_value": 2.182, "_timestamp": 0 } ], [ "perf_throughput", { - "_value": 1690.98, + "_value": 9477.85, "_timestamp": 0 } ], [ "perf_client_send_recv", { - "_value": 0.021, + "_value": 0.003, "_timestamp": 0 } ], [ "perf_client_response_wait", { - "_value": 2.291, + "_value": 1.462, "_timestamp": 0 } ], [ "perf_server_queue", { - "_value": 0.027, + "_value": 0.596, "_timestamp": 0 } ], [ "perf_server_compute_infer", { - "_value": 1.801, + "_value": 0.638, "_timestamp": 0 } ], [ "perf_server_compute_input", { - "_value": 0.002, + "_value": 0.014, "_timestamp": 0 } ], [ "perf_server_compute_output", { - "_value": 0.0, + "_value": 0.054, "_timestamp": 0 } ] @@ -9132,77 +13440,77 @@ "perf_latency_avg": [ "perf_latency_avg", { - "_value": 2.321, + "_value": 1.475, "_timestamp": 0 } ], "perf_latency_p90": [ "perf_latency_p90", { - "_value": 2.726, + "_value": 1.611, "_timestamp": 0 } ], "perf_latency_p95": [ "perf_latency_p95", { - "_value": 2.88, + "_value": 1.687, "_timestamp": 0 } ], "perf_latency_p99": [ "perf_latency_p99", { - "_value": 3.22, + "_value": 2.182, "_timestamp": 0 } ], "perf_throughput": [ "perf_throughput", { - "_value": 1690.98, + "_value": 9477.85, "_timestamp": 0 } ], "perf_client_send_recv": [ "perf_client_send_recv", { - "_value": 0.021, + "_value": 0.003, "_timestamp": 0 } ], "perf_client_response_wait": [ "perf_client_response_wait", { - "_value": 2.291, + "_value": 1.462, "_timestamp": 0 } ], "perf_server_queue": [ "perf_server_queue", { - "_value": 0.027, + "_value": 0.596, "_timestamp": 0 } ], "perf_server_compute_infer": [ "perf_server_compute_infer", { - "_value": 1.801, + "_value": 0.638, "_timestamp": 0 } ], "perf_server_compute_input": [ "perf_server_compute_input", { - "_value": 0.002, + "_value": 0.014, "_timestamp": 0 } ], "perf_server_compute_output": [ "perf_server_compute_output", { - "_value": 0.0, + "_value": 0.054, "_timestamp": 0 } ] @@ -9210,30 +13518,30 @@ } ] }, - "-m FaceDetectionBLS_config_6 -b 1 -i grpc -f FaceDetectionBLS_config_6-results.csv --verbose-csv --concurrency-range=8 --input-data=/swdev/profile_models/face_detect.json --shared-memory=cuda --output-shared-memory-size=24883200 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "FaceDetectionBLS_config_6", + "bls_config_7 -m bls -b 1 -i grpc -f bls-results.csv --verbose-csv --concurrency-range=14 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000 --bls-composing-models=add,sub add_config_6,sub_config_4": { + "_model_variants_name": "bls_config_7", "_gpu_data": { "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ [ "gpu_used_memory", { - "_value": 1314.914304, + "_value": 874.512384, "_timestamp": 0, - "_device_uuid": null + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } ], [ "gpu_free_memory", { - "_value": 24454.889472000003, + "_value": 24895.291392, "_timestamp": 0, - "_device_uuid": null + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } ], [ "gpu_utilization", { - "_value": 50.6667, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -9241,7 +13549,7 @@ [ "gpu_power_usage", { - "_value": 120.15, + "_value": 23.8205, "_timestamp": 0, "_device_uuid": null } @@ -9252,7 +13560,7 @@ [ "gpu_used_memory", { - "_value": 1314.914304, + "_value": 874.512384, "_timestamp": 0, "_device_uuid": null } @@ -9260,7 +13568,7 @@ [ "gpu_free_memory", { - "_value": 24454.889472000003, + "_value": 24895.291392, "_timestamp": 0, "_device_uuid": null } @@ -9268,7 +13576,7 @@ [ "gpu_utilization", { - "_value": 50.6667, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -9276,7 +13584,7 @@ [ "gpu_power_usage", { - "_value": 120.15, + "_value": 23.8205, "_timestamp": 0, "_device_uuid": null } @@ -9286,7 +13594,7 @@ "gpu_used_memory": [ "gpu_used_memory", { - "_value": 1314.914304, + "_value": 874.512384, "_timestamp": 0, "_device_uuid": null } @@ -9294,7 +13602,7 @@ "gpu_free_memory": [ "gpu_free_memory", { - "_value": 24454.889472000003, + "_value": 24895.291392, "_timestamp": 0, "_device_uuid": null } @@ -9302,7 +13610,7 @@ "gpu_utilization": [ "gpu_utilization", { - "_value": 50.6667, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -9310,7 +13618,7 @@ "gpu_power_usage": [ "gpu_power_usage", { - "_value": 120.15, + "_value": 23.8205, "_timestamp": 0, "_device_uuid": null } @@ -9318,86 +13626,87 @@ }, "_model_config_measurements": [ { - "_model_config_name": "FaceDetectionBLS_config_6", + "_model_config_name": "bls", "_model_specific_pa_params": { "batch-size": 1, - "concurrency-range": 8 + "concurrency-range": 14, + "request-rate-range": null }, "_non_gpu_data": [ [ "perf_latency_avg", { - "_value": 4.928, + "_value": 1.467, "_timestamp": 0 } ], [ "perf_latency_p90", { - "_value": 6.306, + "_value": 1.597, "_timestamp": 0 } ], [ "perf_latency_p95", { - "_value": 6.702, + "_value": 1.657, "_timestamp": 0 } ], [ "perf_latency_p99", { - "_value": 7.621, + "_value": 1.893, "_timestamp": 0 } ], [ "perf_throughput", { - "_value": 1608.44, + "_value": 9532.59, "_timestamp": 0 } ], [ "perf_client_send_recv", { - "_value": 0.022, + "_value": 0.003, "_timestamp": 0 } ], [ "perf_client_response_wait", { - "_value": 4.897, + "_value": 1.453, "_timestamp": 0 } ], [ "perf_server_queue", { - "_value": 0.232, + "_value": 0.593, "_timestamp": 0 } ], [ "perf_server_compute_infer", { - "_value": 4.162, + "_value": 0.632, "_timestamp": 0 } ], [ "perf_server_compute_input", { - "_value": 0.002, + "_value": 0.014, "_timestamp": 0 } ], [ "perf_server_compute_output", { - "_value": 0.0, + "_value": 0.054, "_timestamp": 0 } ] @@ -9406,536 +13715,108 @@ "perf_latency_avg": [ "perf_latency_avg", { - "_value": 4.928, + "_value": 1.467, "_timestamp": 0 } ], "perf_latency_p90": [ "perf_latency_p90", { - "_value": 6.306, + "_value": 1.597, "_timestamp": 0 } ], "perf_latency_p95": [ "perf_latency_p95", { - "_value": 6.702, + "_value": 1.657, "_timestamp": 0 } ], "perf_latency_p99": [ "perf_latency_p99", { - "_value": 7.621, + "_value": 1.893, "_timestamp": 0 } ], "perf_throughput": [ "perf_throughput", { - "_value": 1608.44, + "_value": 9532.59, "_timestamp": 0 } ], "perf_client_send_recv": [ "perf_client_send_recv", { - "_value": 0.022, + "_value": 0.003, "_timestamp": 0 } ], "perf_client_response_wait": [ "perf_client_response_wait", { - "_value": 4.897, + "_value": 1.453, "_timestamp": 0 } ], "perf_server_queue": [ "perf_server_queue", { - "_value": 0.232, + "_value": 0.593, "_timestamp": 0 } ], "perf_server_compute_infer": [ "perf_server_compute_infer", { - "_value": 4.162, + "_value": 0.632, "_timestamp": 0 } ], "perf_server_compute_input": [ "perf_server_compute_input", { - "_value": 0.002, + "_value": 0.014, "_timestamp": 0 } ], - "perf_server_compute_output": [ - "perf_server_compute_output", - { - "_value": 0.0, - "_timestamp": 0 - } - ] - } - } - ] - } - } - ], - "FaceDetectionBLS_config_7": [ - { - "_triton_env": {}, - "_model_run_configs": [ - { - "_model_name": "FaceDetectionBLS", - "_model_config": { - "name": "FaceDetectionBLS_config_7", - "input": [ - { - "name": "input", - "dataType": "TYPE_UINT8", - "dims": [ - "-1", - "3", - "-1", - "-1" - ] - }, - { - "name": "temporal", - "dataType": "TYPE_UINT32", - "dims": [ - "-1", - "1", - "1", - "1" - ] - }, - { - "name": "max_num_bbox", - "dataType": "TYPE_UINT8", - "dims": [ - "-1", - "1", - "1", - "1" - ] - } - ], - "output": [ - { - "name": "output_bbox", - "dataType": "TYPE_FP32", - "dims": [ - "-1", - "4", - "-1", - "1" - ] - }, - { - "name": "output_bbox_conf", - "dataType": "TYPE_FP32", - "dims": [ - "-1", - "4", - "-1", - "1" - ] - }, - { - "name": "output_num_bbox", - "dataType": "TYPE_UINT8", - "dims": [ - "-1", - "1", - "1", - "1" - ] - } - ], - "instanceGroup": [ - { - "count": 8, - "kind": "KIND_GPU" - } - ], - "backend": "bls", - "cpu_only": false - }, - "_perf_config": { - "_args": { - "service-kind": null, - "model-signature-name": null, - "async": null, - "sync": null, - "measurement-interval": null, - "concurrency-range": 16, - "request-rate-range": null, - "request-distribution": null, - "request-intervals": null, - "binary-search": null, - "num-of-sequences": null, - "latency-threshold": null, - "max-threads": null, - "stability-percentage": null, - "max-trials": null, - "percentile": null, - "input-data": "/swdev/profile_models/face_detect.json", - "shared-memory": "cuda", - "output-shared-memory-size": "24883200", - "shape": null, - "sequence-length": null, - "sequence-id-range": null, - "string-length": null, - "string-data": null, - "measurement-mode": "count_windows", - "measurement-request-count": null, - "streaming": null, - "grpc-compression-algorithm": null, - "triton-server-directory": null, - "model-repository": null, - "ssl-grpc-use-ssl": null, - "ssl-grpc-root-certifications-file": null, - "ssl-grpc-private-key-file": null, - "ssl-grpc-certificate-chain-file": null, - "ssl-https-verify-peer": null, - "ssl-https-verify-host": null, - "ssl-https-ca-certificates-file": null, - "ssl-https-client-certificate-type": null, - "ssl-https-client-certificate-file": null, - "ssl-https-private-key-type": null, - "ssl-https-private-key-file": null, - "collect-metrics": "True", - "metrics-url": "http://localhost:8002/metrics", - "metrics-interval": 1000.0 - }, - "_options": { - "-m": "FaceDetectionBLS_config_7", - "-x": null, - "-b": 1, - "-u": "localhost:8001", - "-i": "grpc", - "-f": "FaceDetectionBLS_config_7-results.csv", - "-H": null - }, - "_verbose": { - "-v": null, - "-v -v": null, - "--verbose-csv": "--verbose-csv" - }, - "_input_to_options": { - "model-name": "-m", - "model-version": "-x", - "batch-size": "-b", - "url": "-u", - "protocol": "-i", - "latency-report-file": "-f", - "http-header": "-H" - }, - "_input_to_verbose": { - "verbose": "-v", - "extra-verbose": "-v -v", - "verbose-csv": "--verbose-csv" - }, - "_additive_args": { - "input-data": null, - "shape": null - } - }, - "_composing_configs": [ - { - "name": "FaceDetectionPreprocessing_config_5", - "platform": "FaceDetectionPreprocessing", - "input": [ - { - "name": "input", - "dataType": "TYPE_UINT8", - "dims": [ - "-1", - "3", - "-1", - "-1" - ] - } - ], - "output": [ - { - "name": "output", - "dataType": "TYPE_FP32", - "dims": [ - "-1", - "3", - "208", - "368" - ] - }, - { - "name": "scale_offset", - "dataType": "TYPE_FP32", - "dims": [ - "-1", - "4", - "1", - "1" - ] - }, - { - "name": "original_shape", - "dataType": "TYPE_UINT32", - "dims": [ - "1", - "2", - "1", - "1" - ] - } - ], - "instanceGroup": [ - { - "count": 6, - "kind": "KIND_GPU" - } - ], - "backend": "sdk_backend", - "cpu_only": false - }, - { - "name": "FaceDetectionModel_config_4", - "platform": "tensorrt_plan", - "maxBatchSize": 8, - "input": [ - { - "name": "input", - "dataType": "TYPE_FP32", - "dims": [ - "3", - "208", - "368" - ] - } - ], - "output": [ - { - "name": "output_bbox", - "dataType": "TYPE_FP32", - "dims": [ - "4", - "13", - "23" - ] - }, - { - "name": "output_cov/Sigmoid", - "dataType": "TYPE_FP32", - "dims": [ - "1", - "13", - "23" - ] - } - ], - "instanceGroup": [ - { - "count": 5, - "kind": "KIND_GPU" - } - ], - "dynamicBatching": {}, - "cpu_only": false - }, - { - "name": "FaceDetectionPostprocessing_config_0", - "platform": "FaceDetectionPostprocessing", - "input": [ - { - "name": "scale_offset", - "dataType": "TYPE_FP32", - "dims": [ - "-1", - "4", - "1", - "1" - ] - }, - { - "name": "input_bbox", - "dataType": "TYPE_FP32", - "dims": [ - "-1", - "4", - "13", - "23" - ] - }, - { - "name": "input_conf", - "dataType": "TYPE_FP32", - "dims": [ - "-1", - "1", - "13", - "23" - ] - }, - { - "name": "temporal", - "dataType": "TYPE_UINT32", - "dims": [ - "-1", - "1", - "1", - "1" - ] - }, - { - "name": "max_num_bbox", - "dataType": "TYPE_UINT8", - "dims": [ - "-1", - "1", - "1", - "1" - ] - }, - { - "name": "original_shape", - "dataType": "TYPE_UINT32", - "dims": [ - "1", - "2", - "1", - "1" - ] - } - ], - "output": [ - { - "name": "output_bbox", - "dataType": "TYPE_FP32", - "dims": [ - "-1", - "4", - "1", - "-1" - ] - }, - { - "name": "output_bbox_conf", - "dataType": "TYPE_FP32", - "dims": [ - "-1", - "4", - "1", - "-1" - ] - }, - { - "name": "output_num_bbox", - "dataType": "TYPE_UINT8", - "dims": [ - "-1", - "1", - "1", - "1" - ] - } - ], - "instanceGroup": [ - { - "count": 1, - "kind": "KIND_GPU" - } - ], - "sequenceBatching": { - "maxSequenceIdleMicroseconds": "5000000", - "controlInput": [ - { - "name": "START", - "control": [ - { - "fp32FalseTrue": [ - 0.0, - 1.0 - ] - } - ] - }, - { - "name": "END", - "control": [ - { - "kind": "CONTROL_SEQUENCE_END", - "fp32FalseTrue": [ - 0.0, - 1.0 - ] - } - ] - }, - { - "name": "READY", - "control": [ - { - "kind": "CONTROL_SEQUENCE_READY", - "fp32FalseTrue": [ - 0.0, - 1.0 - ] - } - ] - }, - { - "name": "CORRID", - "control": [ - { - "kind": "CONTROL_SEQUENCE_CORRID", - "dataType": "TYPE_UINT64" - } - ] - } - ], - "oldest": { - "maxCandidateSequences": 10, - "maxQueueDelayMicroseconds": "50000" + "perf_server_compute_output": [ + "perf_server_compute_output", + { + "_value": 0.054, + "_timestamp": 0 } - }, - "backend": "sdk_backend", - "cpu_only": false + ] } - ] - } - ] - }, - { - "-m FaceDetectionBLS_config_7 -b 1 -i grpc -f FaceDetectionBLS_config_7-results.csv --verbose-csv --concurrency-range=16 --input-data=/swdev/profile_models/face_detect.json --shared-memory=cuda --output-shared-memory-size=24883200 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "FaceDetectionBLS_config_7", + } + ] + }, + "bls_config_7 -m bls -b 1 -i grpc -f bls-results.csv --verbose-csv --concurrency-range=14 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000 --bls-composing-models=add,sub add_config_2,sub_config_2": { + "_model_variants_name": "bls_config_7", "_gpu_data": { "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ [ "gpu_used_memory", { - "_value": 1314.914304, + "_value": 874.512384, "_timestamp": 0, - "_device_uuid": null + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } ], [ "gpu_free_memory", { - "_value": 24454.889472000003, + "_value": 24895.291392, "_timestamp": 0, - "_device_uuid": null + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } ], [ "gpu_utilization", { - "_value": 57.666700000000006, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -9943,7 +13824,7 @@ [ "gpu_power_usage", { - "_value": 140.584, + "_value": 56.7725, "_timestamp": 0, "_device_uuid": null } @@ -9954,7 +13835,7 @@ [ "gpu_used_memory", { - "_value": 1314.914304, + "_value": 874.512384, "_timestamp": 0, "_device_uuid": null } @@ -9962,7 +13843,7 @@ [ "gpu_free_memory", { - "_value": 24454.889472000003, + "_value": 24895.291392, "_timestamp": 0, "_device_uuid": null } @@ -9970,7 +13851,7 @@ [ "gpu_utilization", { - "_value": 57.666700000000006, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -9978,7 +13859,7 @@ [ "gpu_power_usage", { - "_value": 140.584, + "_value": 56.7725, "_timestamp": 0, "_device_uuid": null } @@ -9988,7 +13869,7 @@ "gpu_used_memory": [ "gpu_used_memory", { - "_value": 1314.914304, + "_value": 874.512384, "_timestamp": 0, "_device_uuid": null } @@ -9996,7 +13877,7 @@ "gpu_free_memory": [ "gpu_free_memory", { - "_value": 24454.889472000003, + "_value": 24895.291392, "_timestamp": 0, "_device_uuid": null } @@ -10004,7 +13885,7 @@ "gpu_utilization": [ "gpu_utilization", { - "_value": 57.666700000000006, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -10012,7 +13893,7 @@ "gpu_power_usage": [ "gpu_power_usage", { - "_value": 140.584, + "_value": 56.7725, "_timestamp": 0, "_device_uuid": null } @@ -10020,86 +13901,87 @@ }, "_model_config_measurements": [ { - "_model_config_name": "FaceDetectionBLS_config_7", + "_model_config_name": "bls", "_model_specific_pa_params": { "batch-size": 1, - "concurrency-range": 16 + "concurrency-range": 14, + "request-rate-range": null }, "_non_gpu_data": [ [ "perf_latency_avg", { - "_value": 4.989, + "_value": 1.452, "_timestamp": 0 } ], [ "perf_latency_p90", { - "_value": 6.28, + "_value": 1.587, "_timestamp": 0 } ], [ "perf_latency_p95", { - "_value": 6.65, + "_value": 1.636, "_timestamp": 0 } ], [ "perf_latency_p99", { - "_value": 7.439, + "_value": 1.752, "_timestamp": 0 } ], [ "perf_throughput", { - "_value": 3183.7, + "_value": 9629.36, "_timestamp": 0 } ], [ "perf_client_send_recv", { - "_value": 0.017, + "_value": 0.003, "_timestamp": 0 } ], [ "perf_client_response_wait", { - "_value": 4.966, + "_value": 1.439, "_timestamp": 0 } ], [ "perf_server_queue", { - "_value": 2.174, + "_value": 0.578, "_timestamp": 0 } ], [ "perf_server_compute_infer", { - "_value": 2.418, + "_value": 0.635, "_timestamp": 0 } ], [ "perf_server_compute_input", { - "_value": 0.001, + "_value": 0.013, "_timestamp": 0 } ], [ "perf_server_compute_output", { - "_value": 0.0, + "_value": 0.048, "_timestamp": 0 } ] @@ -10108,77 +13990,77 @@ "perf_latency_avg": [ "perf_latency_avg", { - "_value": 4.989, + "_value": 1.452, "_timestamp": 0 } ], "perf_latency_p90": [ "perf_latency_p90", { - "_value": 6.28, + "_value": 1.587, "_timestamp": 0 } ], "perf_latency_p95": [ "perf_latency_p95", { - "_value": 6.65, + "_value": 1.636, "_timestamp": 0 } ], "perf_latency_p99": [ "perf_latency_p99", { - "_value": 7.439, + "_value": 1.752, "_timestamp": 0 } ], "perf_throughput": [ "perf_throughput", { - "_value": 3183.7, + "_value": 9629.36, "_timestamp": 0 } ], "perf_client_send_recv": [ "perf_client_send_recv", { - "_value": 0.017, + "_value": 0.003, "_timestamp": 0 } ], "perf_client_response_wait": [ "perf_client_response_wait", { - "_value": 4.966, + "_value": 1.439, "_timestamp": 0 } ], "perf_server_queue": [ "perf_server_queue", { - "_value": 2.174, + "_value": 0.578, "_timestamp": 0 } ], "perf_server_compute_infer": [ "perf_server_compute_infer", { - "_value": 2.418, + "_value": 0.635, "_timestamp": 0 } ], "perf_server_compute_input": [ "perf_server_compute_input", { - "_value": 0.001, + "_value": 0.013, "_timestamp": 0 } ], "perf_server_compute_output": [ "perf_server_compute_output", { - "_value": 0.0, + "_value": 0.048, "_timestamp": 0 } ] @@ -10186,30 +14068,30 @@ } ] }, - "-m FaceDetectionBLS_config_7 -b 1 -i grpc -f FaceDetectionBLS_config_7-results.csv --verbose-csv --concurrency-range=1 --input-data=/swdev/profile_models/face_detect.json --shared-memory=cuda --output-shared-memory-size=24883200 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "FaceDetectionBLS_config_7", + "bls_config_7 -m bls -b 1 -i grpc -f bls-results.csv --verbose-csv --concurrency-range=14 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000 --bls-composing-models=add,sub add_config_3,sub_config_1": { + "_model_variants_name": "bls_config_7", "_gpu_data": { "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ [ "gpu_used_memory", { - "_value": 1314.914304, + "_value": 874.512384, "_timestamp": 0, - "_device_uuid": null + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } ], [ "gpu_free_memory", { - "_value": 24454.889472000003, + "_value": 24895.291392, "_timestamp": 0, - "_device_uuid": null + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } ], [ "gpu_utilization", { - "_value": 12.0, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -10217,7 +14099,7 @@ [ "gpu_power_usage", { - "_value": 60.0487, + "_value": 56.7557, "_timestamp": 0, "_device_uuid": null } @@ -10228,7 +14110,7 @@ [ "gpu_used_memory", { - "_value": 1314.914304, + "_value": 874.512384, "_timestamp": 0, "_device_uuid": null } @@ -10236,7 +14118,7 @@ [ "gpu_free_memory", { - "_value": 24454.889472000003, + "_value": 24895.291392, "_timestamp": 0, "_device_uuid": null } @@ -10244,7 +14126,7 @@ [ "gpu_utilization", { - "_value": 12.0, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -10252,7 +14134,7 @@ [ "gpu_power_usage", { - "_value": 60.0487, + "_value": 56.7557, "_timestamp": 0, "_device_uuid": null } @@ -10262,7 +14144,7 @@ "gpu_used_memory": [ "gpu_used_memory", { - "_value": 1314.914304, + "_value": 874.512384, "_timestamp": 0, "_device_uuid": null } @@ -10270,7 +14152,7 @@ "gpu_free_memory": [ "gpu_free_memory", { - "_value": 24454.889472000003, + "_value": 24895.291392, "_timestamp": 0, "_device_uuid": null } @@ -10278,7 +14160,7 @@ "gpu_utilization": [ "gpu_utilization", { - "_value": 12.0, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -10286,7 +14168,7 @@ "gpu_power_usage": [ "gpu_power_usage", { - "_value": 60.0487, + "_value": 56.7557, "_timestamp": 0, "_device_uuid": null } @@ -10294,86 +14176,87 @@ }, "_model_config_measurements": [ { - "_model_config_name": "FaceDetectionBLS_config_7", + "_model_config_name": "bls", "_model_specific_pa_params": { "batch-size": 1, - "concurrency-range": 1 + "concurrency-range": 14, + "request-rate-range": null }, "_non_gpu_data": [ [ "perf_latency_avg", { - "_value": 1.794, + "_value": 1.473, "_timestamp": 0 } ], [ "perf_latency_p90", { - "_value": 1.924, + "_value": 1.612, "_timestamp": 0 } ], [ "perf_latency_p95", { - "_value": 1.994, + "_value": 1.678, "_timestamp": 0 } ], [ "perf_latency_p99", { - "_value": 2.137, + "_value": 2.021, "_timestamp": 0 } ], [ "perf_throughput", { - "_value": 545.843, + "_value": 9489.7, "_timestamp": 0 } ], [ "perf_client_send_recv", { - "_value": 0.015, + "_value": 0.003, "_timestamp": 0 } ], [ "perf_client_response_wait", { - "_value": 1.772, + "_value": 1.46, "_timestamp": 0 } ], [ "perf_server_queue", { - "_value": 0.026, + "_value": 0.585, "_timestamp": 0 } ], [ "perf_server_compute_infer", { - "_value": 1.381, + "_value": 0.643, "_timestamp": 0 } ], [ "perf_server_compute_input", { - "_value": 0.002, + "_value": 0.013, "_timestamp": 0 } ], [ "perf_server_compute_output", { - "_value": 0.0, + "_value": 0.049, "_timestamp": 0 } ] @@ -10382,77 +14265,77 @@ "perf_latency_avg": [ "perf_latency_avg", { - "_value": 1.794, + "_value": 1.473, "_timestamp": 0 } ], "perf_latency_p90": [ "perf_latency_p90", { - "_value": 1.924, + "_value": 1.612, "_timestamp": 0 } ], "perf_latency_p95": [ "perf_latency_p95", { - "_value": 1.994, + "_value": 1.678, "_timestamp": 0 } ], "perf_latency_p99": [ "perf_latency_p99", { - "_value": 2.137, + "_value": 2.021, "_timestamp": 0 } ], "perf_throughput": [ "perf_throughput", { - "_value": 545.843, + "_value": 9489.7, "_timestamp": 0 } ], "perf_client_send_recv": [ "perf_client_send_recv", { - "_value": 0.015, + "_value": 0.003, "_timestamp": 0 } ], "perf_client_response_wait": [ "perf_client_response_wait", { - "_value": 1.772, + "_value": 1.46, "_timestamp": 0 } ], "perf_server_queue": [ "perf_server_queue", { - "_value": 0.026, + "_value": 0.585, "_timestamp": 0 } ], "perf_server_compute_infer": [ "perf_server_compute_infer", { - "_value": 1.381, + "_value": 0.643, "_timestamp": 0 } ], "perf_server_compute_input": [ "perf_server_compute_input", { - "_value": 0.002, + "_value": 0.013, "_timestamp": 0 } ], "perf_server_compute_output": [ "perf_server_compute_output", { - "_value": 0.0, + "_value": 0.049, "_timestamp": 0 } ] @@ -10460,30 +14343,30 @@ } ] }, - "-m FaceDetectionBLS_config_7 -b 1 -i grpc -f FaceDetectionBLS_config_7-results.csv --verbose-csv --concurrency-range=2 --input-data=/swdev/profile_models/face_detect.json --shared-memory=cuda --output-shared-memory-size=24883200 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "FaceDetectionBLS_config_7", + "bls_config_7 -m bls -b 1 -i grpc -f bls-results.csv --verbose-csv --concurrency-range=14 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000 --bls-composing-models=add,sub add_config_3,sub_config_4": { + "_model_variants_name": "bls_config_7", "_gpu_data": { "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ [ "gpu_used_memory", { - "_value": 1314.914304, + "_value": 874.512384, "_timestamp": 0, - "_device_uuid": null + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } ], [ "gpu_free_memory", { - "_value": 24454.889472000003, + "_value": 24895.291392, "_timestamp": 0, - "_device_uuid": null + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } ], [ "gpu_utilization", { - "_value": 24.0, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -10491,7 +14374,7 @@ [ "gpu_power_usage", { - "_value": 73.5297, + "_value": 32.0528, "_timestamp": 0, "_device_uuid": null } @@ -10502,7 +14385,7 @@ [ "gpu_used_memory", { - "_value": 1314.914304, + "_value": 874.512384, "_timestamp": 0, "_device_uuid": null } @@ -10510,7 +14393,7 @@ [ "gpu_free_memory", { - "_value": 24454.889472000003, + "_value": 24895.291392, "_timestamp": 0, "_device_uuid": null } @@ -10518,7 +14401,7 @@ [ "gpu_utilization", { - "_value": 24.0, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -10526,7 +14409,7 @@ [ "gpu_power_usage", { - "_value": 73.5297, + "_value": 32.0528, "_timestamp": 0, "_device_uuid": null } @@ -10536,7 +14419,7 @@ "gpu_used_memory": [ "gpu_used_memory", { - "_value": 1314.914304, + "_value": 874.512384, "_timestamp": 0, "_device_uuid": null } @@ -10544,7 +14427,7 @@ "gpu_free_memory": [ "gpu_free_memory", { - "_value": 24454.889472000003, + "_value": 24895.291392, "_timestamp": 0, "_device_uuid": null } @@ -10552,7 +14435,7 @@ "gpu_utilization": [ "gpu_utilization", { - "_value": 24.0, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -10560,7 +14443,7 @@ "gpu_power_usage": [ "gpu_power_usage", { - "_value": 73.5297, + "_value": 32.0528, "_timestamp": 0, "_device_uuid": null } @@ -10568,86 +14451,87 @@ }, "_model_config_measurements": [ { - "_model_config_name": "FaceDetectionBLS_config_7", + "_model_config_name": "bls", "_model_specific_pa_params": { "batch-size": 1, - "concurrency-range": 2 + "concurrency-range": 14, + "request-rate-range": null }, "_non_gpu_data": [ [ "perf_latency_avg", { - "_value": 1.963, + "_value": 1.464, "_timestamp": 0 } ], [ "perf_latency_p90", { - "_value": 2.136, + "_value": 1.597, "_timestamp": 0 } ], [ "perf_latency_p95", { - "_value": 2.213, + "_value": 1.656, "_timestamp": 0 } ], [ "perf_latency_p99", { - "_value": 2.432, + "_value": 1.914, "_timestamp": 0 } ], [ "perf_throughput", { - "_value": 997.349, + "_value": 9551.42, "_timestamp": 0 } ], [ "perf_client_send_recv", { - "_value": 0.02, + "_value": 0.003, "_timestamp": 0 } ], [ "perf_client_response_wait", { - "_value": 1.934, + "_value": 1.45, "_timestamp": 0 } ], [ "perf_server_queue", { - "_value": 0.027, + "_value": 0.583, "_timestamp": 0 } ], [ "perf_server_compute_infer", { - "_value": 1.464, + "_value": 0.639, "_timestamp": 0 } ], [ "perf_server_compute_input", { - "_value": 0.002, + "_value": 0.013, "_timestamp": 0 } ], [ "perf_server_compute_output", { - "_value": 0.0, + "_value": 0.049, "_timestamp": 0 } ] @@ -10656,77 +14540,77 @@ "perf_latency_avg": [ "perf_latency_avg", { - "_value": 1.963, + "_value": 1.464, "_timestamp": 0 } ], "perf_latency_p90": [ "perf_latency_p90", { - "_value": 2.136, + "_value": 1.597, "_timestamp": 0 } ], "perf_latency_p95": [ "perf_latency_p95", { - "_value": 2.213, + "_value": 1.656, "_timestamp": 0 } ], "perf_latency_p99": [ "perf_latency_p99", { - "_value": 2.432, + "_value": 1.914, "_timestamp": 0 } ], "perf_throughput": [ "perf_throughput", { - "_value": 997.349, + "_value": 9551.42, "_timestamp": 0 } ], "perf_client_send_recv": [ "perf_client_send_recv", { - "_value": 0.02, + "_value": 0.003, "_timestamp": 0 } ], "perf_client_response_wait": [ "perf_client_response_wait", { - "_value": 1.934, + "_value": 1.45, "_timestamp": 0 } ], "perf_server_queue": [ "perf_server_queue", { - "_value": 0.027, + "_value": 0.583, "_timestamp": 0 } ], "perf_server_compute_infer": [ "perf_server_compute_infer", { - "_value": 1.464, + "_value": 0.639, "_timestamp": 0 } ], "perf_server_compute_input": [ "perf_server_compute_input", { - "_value": 0.002, + "_value": 0.013, "_timestamp": 0 } ], "perf_server_compute_output": [ "perf_server_compute_output", { - "_value": 0.0, + "_value": 0.049, "_timestamp": 0 } ] @@ -10734,30 +14618,30 @@ } ] }, - "-m FaceDetectionBLS_config_7 -b 1 -i grpc -f FaceDetectionBLS_config_7-results.csv --verbose-csv --concurrency-range=4 --input-data=/swdev/profile_models/face_detect.json --shared-memory=cuda --output-shared-memory-size=24883200 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "FaceDetectionBLS_config_7", + "bls_config_7 -m bls -b 1 -i grpc -f bls-results.csv --verbose-csv --concurrency-range=1 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000 --bls-composing-models=add,sub add_config_6,sub_config_2": { + "_model_variants_name": "bls_config_7", "_gpu_data": { "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ [ "gpu_used_memory", { - "_value": 1314.914304, + "_value": 874.512384, "_timestamp": 0, - "_device_uuid": null + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } ], [ "gpu_free_memory", { - "_value": 24454.889472000003, + "_value": 24895.291392, "_timestamp": 0, - "_device_uuid": null + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } ], [ "gpu_utilization", { - "_value": 32.333299999999994, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -10765,7 +14649,7 @@ [ "gpu_power_usage", { - "_value": 96.087, + "_value": 56.735, "_timestamp": 0, "_device_uuid": null } @@ -10776,7 +14660,7 @@ [ "gpu_used_memory", { - "_value": 1314.914304, + "_value": 874.512384, "_timestamp": 0, "_device_uuid": null } @@ -10784,7 +14668,7 @@ [ "gpu_free_memory", { - "_value": 24454.889472000003, + "_value": 24895.291392, "_timestamp": 0, "_device_uuid": null } @@ -10792,7 +14676,7 @@ [ "gpu_utilization", { - "_value": 32.333299999999994, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -10800,7 +14684,7 @@ [ "gpu_power_usage", { - "_value": 96.087, + "_value": 56.735, "_timestamp": 0, "_device_uuid": null } @@ -10810,7 +14694,7 @@ "gpu_used_memory": [ "gpu_used_memory", { - "_value": 1314.914304, + "_value": 874.512384, "_timestamp": 0, "_device_uuid": null } @@ -10818,7 +14702,7 @@ "gpu_free_memory": [ "gpu_free_memory", { - "_value": 24454.889472000003, + "_value": 24895.291392, "_timestamp": 0, "_device_uuid": null } @@ -10826,7 +14710,7 @@ "gpu_utilization": [ "gpu_utilization", { - "_value": 32.333299999999994, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -10834,7 +14718,7 @@ "gpu_power_usage": [ "gpu_power_usage", { - "_value": 96.087, + "_value": 56.735, "_timestamp": 0, "_device_uuid": null } @@ -10842,86 +14726,87 @@ }, "_model_config_measurements": [ { - "_model_config_name": "FaceDetectionBLS_config_7", + "_model_config_name": "bls", "_model_specific_pa_params": { "batch-size": 1, - "concurrency-range": 4 + "concurrency-range": 1, + "request-rate-range": null }, "_non_gpu_data": [ [ "perf_latency_avg", { - "_value": 2.357, + "_value": 1.699, "_timestamp": 0 } ], [ "perf_latency_p90", { - "_value": 2.775, + "_value": 1.913, "_timestamp": 0 } ], [ "perf_latency_p95", { - "_value": 2.939, + "_value": 1.981, "_timestamp": 0 } ], [ "perf_latency_p99", { - "_value": 3.256, + "_value": 2.135, "_timestamp": 0 } ], [ "perf_throughput", { - "_value": 1665.9, + "_value": 587.452, "_timestamp": 0 } ], [ "perf_client_send_recv", { - "_value": 0.021, + "_value": 0.007, "_timestamp": 0 } ], [ "perf_client_response_wait", { - "_value": 2.327, + "_value": 1.672, "_timestamp": 0 } ], [ "perf_server_queue", { - "_value": 0.027, + "_value": 0.029, "_timestamp": 0 } ], [ "perf_server_compute_infer", { - "_value": 1.828, + "_value": 1.237, "_timestamp": 0 } ], [ "perf_server_compute_input", { - "_value": 0.002, + "_value": 0.033, "_timestamp": 0 } ], [ "perf_server_compute_output", { - "_value": 0.0, + "_value": 0.142, "_timestamp": 0 } ] @@ -10930,77 +14815,77 @@ "perf_latency_avg": [ "perf_latency_avg", { - "_value": 2.357, + "_value": 1.699, "_timestamp": 0 } ], "perf_latency_p90": [ "perf_latency_p90", { - "_value": 2.775, + "_value": 1.913, "_timestamp": 0 } ], "perf_latency_p95": [ "perf_latency_p95", { - "_value": 2.939, + "_value": 1.981, "_timestamp": 0 } ], "perf_latency_p99": [ "perf_latency_p99", { - "_value": 3.256, + "_value": 2.135, "_timestamp": 0 } ], "perf_throughput": [ "perf_throughput", { - "_value": 1665.9, + "_value": 587.452, "_timestamp": 0 } ], "perf_client_send_recv": [ "perf_client_send_recv", { - "_value": 0.021, + "_value": 0.007, "_timestamp": 0 } ], "perf_client_response_wait": [ "perf_client_response_wait", { - "_value": 2.327, + "_value": 1.672, "_timestamp": 0 } ], "perf_server_queue": [ "perf_server_queue", { - "_value": 0.027, + "_value": 0.029, "_timestamp": 0 } ], "perf_server_compute_infer": [ "perf_server_compute_infer", { - "_value": 1.828, + "_value": 1.237, "_timestamp": 0 } ], "perf_server_compute_input": [ "perf_server_compute_input", { - "_value": 0.002, + "_value": 0.033, "_timestamp": 0 } ], "perf_server_compute_output": [ "perf_server_compute_output", { - "_value": 0.0, + "_value": 0.142, "_timestamp": 0 } ] @@ -11008,30 +14893,30 @@ } ] }, - "-m FaceDetectionBLS_config_7 -b 1 -i grpc -f FaceDetectionBLS_config_7-results.csv --verbose-csv --concurrency-range=8 --input-data=/swdev/profile_models/face_detect.json --shared-memory=cuda --output-shared-memory-size=24883200 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "FaceDetectionBLS_config_7", + "bls_config_7 -m bls -b 1 -i grpc -f bls-results.csv --verbose-csv --concurrency-range=2 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000 --bls-composing-models=add,sub add_config_6,sub_config_2": { + "_model_variants_name": "bls_config_7", "_gpu_data": { "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ [ "gpu_used_memory", { - "_value": 1314.914304, + "_value": 874.512384, "_timestamp": 0, - "_device_uuid": null + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } ], [ "gpu_free_memory", { - "_value": 24454.889472000003, + "_value": 24895.291392, "_timestamp": 0, - "_device_uuid": null + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } ], [ "gpu_utilization", { - "_value": 43.6667, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -11039,7 +14924,7 @@ [ "gpu_power_usage", { - "_value": 136.339, + "_value": 56.6767, "_timestamp": 0, "_device_uuid": null } @@ -11050,7 +14935,7 @@ [ "gpu_used_memory", { - "_value": 1314.914304, + "_value": 874.512384, "_timestamp": 0, "_device_uuid": null } @@ -11058,7 +14943,7 @@ [ "gpu_free_memory", { - "_value": 24454.889472000003, + "_value": 24895.291392, "_timestamp": 0, "_device_uuid": null } @@ -11066,7 +14951,7 @@ [ "gpu_utilization", { - "_value": 43.6667, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -11074,7 +14959,7 @@ [ "gpu_power_usage", { - "_value": 136.339, + "_value": 56.6767, "_timestamp": 0, "_device_uuid": null } @@ -11084,7 +14969,7 @@ "gpu_used_memory": [ "gpu_used_memory", { - "_value": 1314.914304, + "_value": 874.512384, "_timestamp": 0, "_device_uuid": null } @@ -11092,7 +14977,7 @@ "gpu_free_memory": [ "gpu_free_memory", { - "_value": 24454.889472000003, + "_value": 24895.291392, "_timestamp": 0, "_device_uuid": null } @@ -11100,7 +14985,7 @@ "gpu_utilization": [ "gpu_utilization", { - "_value": 43.6667, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -11108,7 +14993,7 @@ "gpu_power_usage": [ "gpu_power_usage", { - "_value": 136.339, + "_value": 56.6767, "_timestamp": 0, "_device_uuid": null } @@ -11116,86 +15001,87 @@ }, "_model_config_measurements": [ { - "_model_config_name": "FaceDetectionBLS_config_7", + "_model_config_name": "bls", "_model_specific_pa_params": { "batch-size": 1, - "concurrency-range": 8 + "concurrency-range": 2, + "request-rate-range": null }, "_non_gpu_data": [ [ "perf_latency_avg", { - "_value": 2.826, + "_value": 1.794, "_timestamp": 0 } ], [ "perf_latency_p90", { - "_value": 3.601, + "_value": 1.968, "_timestamp": 0 } ], [ "perf_latency_p95", { - "_value": 3.887, + "_value": 2.018, "_timestamp": 0 } ], [ "perf_latency_p99", { - "_value": 4.506, + "_value": 2.153, "_timestamp": 0 } ], [ "perf_throughput", { - "_value": 2795.7, + "_value": 1113.08, "_timestamp": 0 } ], [ "perf_client_send_recv", { - "_value": 0.017, + "_value": 0.008, "_timestamp": 0 } ], [ "perf_client_response_wait", { - "_value": 2.801, + "_value": 1.763, "_timestamp": 0 } ], [ "perf_server_queue", { - "_value": 0.022, + "_value": 0.027, "_timestamp": 0 } ], [ "perf_server_compute_infer", { - "_value": 2.382, + "_value": 1.262, "_timestamp": 0 } ], [ "perf_server_compute_input", { - "_value": 0.001, + "_value": 0.033, "_timestamp": 0 } ], [ "perf_server_compute_output", { - "_value": 0.0, + "_value": 0.127, "_timestamp": 0 } ] @@ -11204,536 +15090,108 @@ "perf_latency_avg": [ "perf_latency_avg", { - "_value": 2.826, + "_value": 1.794, "_timestamp": 0 } ], "perf_latency_p90": [ "perf_latency_p90", { - "_value": 3.601, + "_value": 1.968, "_timestamp": 0 } ], "perf_latency_p95": [ "perf_latency_p95", { - "_value": 3.887, + "_value": 2.018, "_timestamp": 0 } ], "perf_latency_p99": [ "perf_latency_p99", { - "_value": 4.506, + "_value": 2.153, "_timestamp": 0 } ], - "perf_throughput": [ - "perf_throughput", - { - "_value": 2795.7, - "_timestamp": 0 - } - ], - "perf_client_send_recv": [ - "perf_client_send_recv", - { - "_value": 0.017, - "_timestamp": 0 - } - ], - "perf_client_response_wait": [ - "perf_client_response_wait", - { - "_value": 2.801, - "_timestamp": 0 - } - ], - "perf_server_queue": [ - "perf_server_queue", - { - "_value": 0.022, - "_timestamp": 0 - } - ], - "perf_server_compute_infer": [ - "perf_server_compute_infer", - { - "_value": 2.382, - "_timestamp": 0 - } - ], - "perf_server_compute_input": [ - "perf_server_compute_input", - { - "_value": 0.001, - "_timestamp": 0 - } - ], - "perf_server_compute_output": [ - "perf_server_compute_output", - { - "_value": 0.0, - "_timestamp": 0 - } - ] - } - } - ] - } - } - ], - "FaceDetectionBLS_config_8": [ - { - "_triton_env": {}, - "_model_run_configs": [ - { - "_model_name": "FaceDetectionBLS", - "_model_config": { - "name": "FaceDetectionBLS_config_8", - "input": [ - { - "name": "input", - "dataType": "TYPE_UINT8", - "dims": [ - "-1", - "3", - "-1", - "-1" - ] - }, - { - "name": "temporal", - "dataType": "TYPE_UINT32", - "dims": [ - "-1", - "1", - "1", - "1" - ] - }, - { - "name": "max_num_bbox", - "dataType": "TYPE_UINT8", - "dims": [ - "-1", - "1", - "1", - "1" - ] - } - ], - "output": [ - { - "name": "output_bbox", - "dataType": "TYPE_FP32", - "dims": [ - "-1", - "4", - "-1", - "1" - ] - }, - { - "name": "output_bbox_conf", - "dataType": "TYPE_FP32", - "dims": [ - "-1", - "4", - "-1", - "1" - ] - }, - { - "name": "output_num_bbox", - "dataType": "TYPE_UINT8", - "dims": [ - "-1", - "1", - "1", - "1" - ] - } - ], - "instanceGroup": [ - { - "count": 9, - "kind": "KIND_GPU" - } - ], - "backend": "bls", - "cpu_only": false - }, - "_perf_config": { - "_args": { - "service-kind": null, - "model-signature-name": null, - "async": null, - "sync": null, - "measurement-interval": null, - "concurrency-range": 16, - "request-rate-range": null, - "request-distribution": null, - "request-intervals": null, - "binary-search": null, - "num-of-sequences": null, - "latency-threshold": null, - "max-threads": null, - "stability-percentage": null, - "max-trials": null, - "percentile": null, - "input-data": "/swdev/profile_models/face_detect.json", - "shared-memory": "cuda", - "output-shared-memory-size": "24883200", - "shape": null, - "sequence-length": null, - "sequence-id-range": null, - "string-length": null, - "string-data": null, - "measurement-mode": "count_windows", - "measurement-request-count": null, - "streaming": null, - "grpc-compression-algorithm": null, - "triton-server-directory": null, - "model-repository": null, - "ssl-grpc-use-ssl": null, - "ssl-grpc-root-certifications-file": null, - "ssl-grpc-private-key-file": null, - "ssl-grpc-certificate-chain-file": null, - "ssl-https-verify-peer": null, - "ssl-https-verify-host": null, - "ssl-https-ca-certificates-file": null, - "ssl-https-client-certificate-type": null, - "ssl-https-client-certificate-file": null, - "ssl-https-private-key-type": null, - "ssl-https-private-key-file": null, - "collect-metrics": "True", - "metrics-url": "http://localhost:8002/metrics", - "metrics-interval": 1000.0 - }, - "_options": { - "-m": "FaceDetectionBLS_config_8", - "-x": null, - "-b": 1, - "-u": "localhost:8001", - "-i": "grpc", - "-f": "FaceDetectionBLS_config_8-results.csv", - "-H": null - }, - "_verbose": { - "-v": null, - "-v -v": null, - "--verbose-csv": "--verbose-csv" - }, - "_input_to_options": { - "model-name": "-m", - "model-version": "-x", - "batch-size": "-b", - "url": "-u", - "protocol": "-i", - "latency-report-file": "-f", - "http-header": "-H" - }, - "_input_to_verbose": { - "verbose": "-v", - "extra-verbose": "-v -v", - "verbose-csv": "--verbose-csv" - }, - "_additive_args": { - "input-data": null, - "shape": null - } - }, - "_composing_configs": [ - { - "name": "FaceDetectionPreprocessing_config_5", - "platform": "FaceDetectionPreprocessing", - "input": [ - { - "name": "input", - "dataType": "TYPE_UINT8", - "dims": [ - "-1", - "3", - "-1", - "-1" - ] - } - ], - "output": [ - { - "name": "output", - "dataType": "TYPE_FP32", - "dims": [ - "-1", - "3", - "208", - "368" - ] - }, - { - "name": "scale_offset", - "dataType": "TYPE_FP32", - "dims": [ - "-1", - "4", - "1", - "1" - ] - }, - { - "name": "original_shape", - "dataType": "TYPE_UINT32", - "dims": [ - "1", - "2", - "1", - "1" - ] - } - ], - "instanceGroup": [ - { - "count": 6, - "kind": "KIND_GPU" - } - ], - "backend": "sdk_backend", - "cpu_only": false - }, - { - "name": "FaceDetectionModel_config_4", - "platform": "tensorrt_plan", - "maxBatchSize": 8, - "input": [ - { - "name": "input", - "dataType": "TYPE_FP32", - "dims": [ - "3", - "208", - "368" - ] - } - ], - "output": [ - { - "name": "output_bbox", - "dataType": "TYPE_FP32", - "dims": [ - "4", - "13", - "23" - ] - }, - { - "name": "output_cov/Sigmoid", - "dataType": "TYPE_FP32", - "dims": [ - "1", - "13", - "23" - ] - } - ], - "instanceGroup": [ + "perf_throughput": [ + "perf_throughput", { - "count": 5, - "kind": "KIND_GPU" + "_value": 1113.08, + "_timestamp": 0 } ], - "dynamicBatching": {}, - "cpu_only": false - }, - { - "name": "FaceDetectionPostprocessing_config_0", - "platform": "FaceDetectionPostprocessing", - "input": [ - { - "name": "scale_offset", - "dataType": "TYPE_FP32", - "dims": [ - "-1", - "4", - "1", - "1" - ] - }, - { - "name": "input_bbox", - "dataType": "TYPE_FP32", - "dims": [ - "-1", - "4", - "13", - "23" - ] - }, - { - "name": "input_conf", - "dataType": "TYPE_FP32", - "dims": [ - "-1", - "1", - "13", - "23" - ] - }, - { - "name": "temporal", - "dataType": "TYPE_UINT32", - "dims": [ - "-1", - "1", - "1", - "1" - ] - }, - { - "name": "max_num_bbox", - "dataType": "TYPE_UINT8", - "dims": [ - "-1", - "1", - "1", - "1" - ] - }, + "perf_client_send_recv": [ + "perf_client_send_recv", { - "name": "original_shape", - "dataType": "TYPE_UINT32", - "dims": [ - "1", - "2", - "1", - "1" - ] + "_value": 0.008, + "_timestamp": 0 } ], - "output": [ + "perf_client_response_wait": [ + "perf_client_response_wait", { - "name": "output_bbox", - "dataType": "TYPE_FP32", - "dims": [ - "-1", - "4", - "1", - "-1" - ] - }, + "_value": 1.763, + "_timestamp": 0 + } + ], + "perf_server_queue": [ + "perf_server_queue", { - "name": "output_bbox_conf", - "dataType": "TYPE_FP32", - "dims": [ - "-1", - "4", - "1", - "-1" - ] - }, + "_value": 0.027, + "_timestamp": 0 + } + ], + "perf_server_compute_infer": [ + "perf_server_compute_infer", { - "name": "output_num_bbox", - "dataType": "TYPE_UINT8", - "dims": [ - "-1", - "1", - "1", - "1" - ] + "_value": 1.262, + "_timestamp": 0 } ], - "instanceGroup": [ + "perf_server_compute_input": [ + "perf_server_compute_input", { - "count": 1, - "kind": "KIND_GPU" + "_value": 0.033, + "_timestamp": 0 } ], - "sequenceBatching": { - "maxSequenceIdleMicroseconds": "5000000", - "controlInput": [ - { - "name": "START", - "control": [ - { - "fp32FalseTrue": [ - 0.0, - 1.0 - ] - } - ] - }, - { - "name": "END", - "control": [ - { - "kind": "CONTROL_SEQUENCE_END", - "fp32FalseTrue": [ - 0.0, - 1.0 - ] - } - ] - }, - { - "name": "READY", - "control": [ - { - "kind": "CONTROL_SEQUENCE_READY", - "fp32FalseTrue": [ - 0.0, - 1.0 - ] - } - ] - }, - { - "name": "CORRID", - "control": [ - { - "kind": "CONTROL_SEQUENCE_CORRID", - "dataType": "TYPE_UINT64" - } - ] - } - ], - "oldest": { - "maxCandidateSequences": 10, - "maxQueueDelayMicroseconds": "50000" + "perf_server_compute_output": [ + "perf_server_compute_output", + { + "_value": 0.127, + "_timestamp": 0 } - }, - "backend": "sdk_backend", - "cpu_only": false + ] } - ] - } - ] - }, - { - "-m FaceDetectionBLS_config_8 -b 1 -i grpc -f FaceDetectionBLS_config_8-results.csv --verbose-csv --concurrency-range=16 --input-data=/swdev/profile_models/face_detect.json --shared-memory=cuda --output-shared-memory-size=24883200 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "FaceDetectionBLS_config_8", + } + ] + }, + "bls_config_7 -m bls -b 1 -i grpc -f bls-results.csv --verbose-csv --concurrency-range=4 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000 --bls-composing-models=add,sub add_config_6,sub_config_2": { + "_model_variants_name": "bls_config_7", "_gpu_data": { "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ [ "gpu_used_memory", { - "_value": 1314.914304, + "_value": 874.512384, "_timestamp": 0, - "_device_uuid": null + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } ], [ "gpu_free_memory", { - "_value": 24454.889472000003, + "_value": 24895.291392, "_timestamp": 0, - "_device_uuid": null + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } ], [ "gpu_utilization", { - "_value": 28.000000000000004, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -11741,7 +15199,7 @@ [ "gpu_power_usage", { - "_value": 114.018, + "_value": 23.6833, "_timestamp": 0, "_device_uuid": null } @@ -11752,7 +15210,7 @@ [ "gpu_used_memory", { - "_value": 1314.914304, + "_value": 874.512384, "_timestamp": 0, "_device_uuid": null } @@ -11760,7 +15218,7 @@ [ "gpu_free_memory", { - "_value": 24454.889472000003, + "_value": 24895.291392, "_timestamp": 0, "_device_uuid": null } @@ -11768,7 +15226,7 @@ [ "gpu_utilization", { - "_value": 28.000000000000004, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -11776,7 +15234,7 @@ [ "gpu_power_usage", { - "_value": 114.018, + "_value": 23.6833, "_timestamp": 0, "_device_uuid": null } @@ -11786,7 +15244,7 @@ "gpu_used_memory": [ "gpu_used_memory", { - "_value": 1314.914304, + "_value": 874.512384, "_timestamp": 0, "_device_uuid": null } @@ -11794,7 +15252,7 @@ "gpu_free_memory": [ "gpu_free_memory", { - "_value": 24454.889472000003, + "_value": 24895.291392, "_timestamp": 0, "_device_uuid": null } @@ -11802,7 +15260,7 @@ "gpu_utilization": [ "gpu_utilization", { - "_value": 28.000000000000004, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -11810,7 +15268,7 @@ "gpu_power_usage": [ "gpu_power_usage", { - "_value": 114.018, + "_value": 23.6833, "_timestamp": 0, "_device_uuid": null } @@ -11818,86 +15276,87 @@ }, "_model_config_measurements": [ { - "_model_config_name": "FaceDetectionBLS_config_8", + "_model_config_name": "bls", "_model_specific_pa_params": { "batch-size": 1, - "concurrency-range": 16 + "concurrency-range": 4, + "request-rate-range": null }, "_non_gpu_data": [ [ "perf_latency_avg", { - "_value": 5.051, + "_value": 2.066, "_timestamp": 0 } ], [ "perf_latency_p90", { - "_value": 6.302, + "_value": 2.253, "_timestamp": 0 } ], [ "perf_latency_p95", { - "_value": 6.68, + "_value": 2.325, "_timestamp": 0 } ], [ "perf_latency_p99", { - "_value": 7.774, + "_value": 2.464, "_timestamp": 0 } ], [ "perf_throughput", { - "_value": 3142.0, + "_value": 1932.8, "_timestamp": 0 } ], [ "perf_client_send_recv", { - "_value": 0.018, + "_value": 0.009, "_timestamp": 0 } ], [ "perf_client_response_wait", { - "_value": 5.026, + "_value": 2.031, "_timestamp": 0 } ], [ "perf_server_queue", { - "_value": 1.878, + "_value": 0.03, "_timestamp": 0 } ], [ "perf_server_compute_infer", { - "_value": 2.766, + "_value": 1.426, "_timestamp": 0 } ], [ "perf_server_compute_input", { - "_value": 0.001, + "_value": 0.036, "_timestamp": 0 } ], [ "perf_server_compute_output", { - "_value": 0.0, + "_value": 0.129, "_timestamp": 0 } ] @@ -11906,77 +15365,77 @@ "perf_latency_avg": [ "perf_latency_avg", { - "_value": 5.051, + "_value": 2.066, "_timestamp": 0 } ], "perf_latency_p90": [ "perf_latency_p90", { - "_value": 6.302, + "_value": 2.253, "_timestamp": 0 } ], "perf_latency_p95": [ "perf_latency_p95", { - "_value": 6.68, + "_value": 2.325, "_timestamp": 0 } ], "perf_latency_p99": [ "perf_latency_p99", { - "_value": 7.774, + "_value": 2.464, "_timestamp": 0 } ], "perf_throughput": [ "perf_throughput", { - "_value": 3142.0, + "_value": 1932.8, "_timestamp": 0 } ], "perf_client_send_recv": [ "perf_client_send_recv", { - "_value": 0.018, + "_value": 0.009, "_timestamp": 0 } ], "perf_client_response_wait": [ "perf_client_response_wait", { - "_value": 5.026, + "_value": 2.031, "_timestamp": 0 } ], "perf_server_queue": [ "perf_server_queue", { - "_value": 1.878, + "_value": 0.03, "_timestamp": 0 } ], "perf_server_compute_infer": [ "perf_server_compute_infer", { - "_value": 2.766, + "_value": 1.426, "_timestamp": 0 } ], "perf_server_compute_input": [ "perf_server_compute_input", { - "_value": 0.001, + "_value": 0.036, "_timestamp": 0 } ], "perf_server_compute_output": [ "perf_server_compute_output", { - "_value": 0.0, + "_value": 0.129, "_timestamp": 0 } ] @@ -11984,30 +15443,30 @@ } ] }, - "-m FaceDetectionBLS_config_8 -b 1 -i grpc -f FaceDetectionBLS_config_8-results.csv --verbose-csv --concurrency-range=1 --input-data=/swdev/profile_models/face_detect.json --shared-memory=cuda --output-shared-memory-size=24883200 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "FaceDetectionBLS_config_8", + "bls_config_7 -m bls -b 1 -i grpc -f bls-results.csv --verbose-csv --concurrency-range=8 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000 --bls-composing-models=add,sub add_config_6,sub_config_2": { + "_model_variants_name": "bls_config_7", "_gpu_data": { "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ [ "gpu_used_memory", { - "_value": 1314.914304, + "_value": 874.512384, "_timestamp": 0, - "_device_uuid": null + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } ], [ "gpu_free_memory", { - "_value": 24454.889472000003, + "_value": 24895.291392, "_timestamp": 0, - "_device_uuid": null + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } ], [ "gpu_utilization", { - "_value": 9.66667, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -12015,7 +15474,7 @@ [ "gpu_power_usage", { - "_value": 60.124, + "_value": 23.6703, "_timestamp": 0, "_device_uuid": null } @@ -12026,7 +15485,7 @@ [ "gpu_used_memory", { - "_value": 1314.914304, + "_value": 874.512384, "_timestamp": 0, "_device_uuid": null } @@ -12034,7 +15493,7 @@ [ "gpu_free_memory", { - "_value": 24454.889472000003, + "_value": 24895.291392, "_timestamp": 0, "_device_uuid": null } @@ -12042,7 +15501,7 @@ [ "gpu_utilization", { - "_value": 9.66667, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -12050,7 +15509,7 @@ [ "gpu_power_usage", { - "_value": 60.124, + "_value": 23.6703, "_timestamp": 0, "_device_uuid": null } @@ -12060,7 +15519,7 @@ "gpu_used_memory": [ "gpu_used_memory", { - "_value": 1314.914304, + "_value": 874.512384, "_timestamp": 0, "_device_uuid": null } @@ -12068,7 +15527,7 @@ "gpu_free_memory": [ "gpu_free_memory", { - "_value": 24454.889472000003, + "_value": 24895.291392, "_timestamp": 0, "_device_uuid": null } @@ -12076,7 +15535,7 @@ "gpu_utilization": [ "gpu_utilization", { - "_value": 9.66667, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -12084,7 +15543,7 @@ "gpu_power_usage": [ "gpu_power_usage", { - "_value": 60.124, + "_value": 23.6703, "_timestamp": 0, "_device_uuid": null } @@ -12092,86 +15551,87 @@ }, "_model_config_measurements": [ { - "_model_config_name": "FaceDetectionBLS_config_8", + "_model_config_name": "bls", "_model_specific_pa_params": { "batch-size": 1, - "concurrency-range": 1 + "concurrency-range": 8, + "request-rate-range": null }, "_non_gpu_data": [ [ "perf_latency_avg", { - "_value": 1.833, + "_value": 0.877, "_timestamp": 0 } ], [ "perf_latency_p90", { - "_value": 1.957, + "_value": 0.997, "_timestamp": 0 } ], [ "perf_latency_p95", { - "_value": 2.016, + "_value": 1.051, "_timestamp": 0 } ], [ "perf_latency_p99", { - "_value": 2.152, + "_value": 1.254, "_timestamp": 0 } ], [ "perf_throughput", { - "_value": 534.564, + "_value": 9099.48, "_timestamp": 0 } ], [ "perf_client_send_recv", { - "_value": 0.016, + "_value": 0.003, "_timestamp": 0 } ], [ "perf_client_response_wait", { - "_value": 1.81, + "_value": 0.864, "_timestamp": 0 } ], [ "perf_server_queue", { - "_value": 0.028, + "_value": 0.039, "_timestamp": 0 } ], [ "perf_server_compute_infer", { - "_value": 1.404, + "_value": 0.609, "_timestamp": 0 } ], [ "perf_server_compute_input", { - "_value": 0.002, + "_value": 0.013, "_timestamp": 0 } ], [ "perf_server_compute_output", { - "_value": 0.0, + "_value": 0.054, "_timestamp": 0 } ] @@ -12180,77 +15640,77 @@ "perf_latency_avg": [ "perf_latency_avg", { - "_value": 1.833, + "_value": 0.877, "_timestamp": 0 } ], "perf_latency_p90": [ "perf_latency_p90", { - "_value": 1.957, + "_value": 0.997, "_timestamp": 0 } ], "perf_latency_p95": [ "perf_latency_p95", { - "_value": 2.016, + "_value": 1.051, "_timestamp": 0 } ], "perf_latency_p99": [ "perf_latency_p99", { - "_value": 2.152, + "_value": 1.254, "_timestamp": 0 } ], "perf_throughput": [ "perf_throughput", { - "_value": 534.564, + "_value": 9099.48, "_timestamp": 0 } ], "perf_client_send_recv": [ "perf_client_send_recv", { - "_value": 0.016, + "_value": 0.003, "_timestamp": 0 } ], "perf_client_response_wait": [ "perf_client_response_wait", { - "_value": 1.81, + "_value": 0.864, "_timestamp": 0 } ], "perf_server_queue": [ "perf_server_queue", { - "_value": 0.028, + "_value": 0.039, "_timestamp": 0 } ], "perf_server_compute_infer": [ "perf_server_compute_infer", { - "_value": 1.404, + "_value": 0.609, "_timestamp": 0 } ], "perf_server_compute_input": [ "perf_server_compute_input", { - "_value": 0.002, + "_value": 0.013, "_timestamp": 0 } ], "perf_server_compute_output": [ "perf_server_compute_output", { - "_value": 0.0, + "_value": 0.054, "_timestamp": 0 } ] @@ -12258,30 +15718,30 @@ } ] }, - "-m FaceDetectionBLS_config_8 -b 1 -i grpc -f FaceDetectionBLS_config_8-results.csv --verbose-csv --concurrency-range=2 --input-data=/swdev/profile_models/face_detect.json --shared-memory=cuda --output-shared-memory-size=24883200 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "FaceDetectionBLS_config_8", + "bls_config_7 -m bls -b 1 -i grpc -f bls-results.csv --verbose-csv --concurrency-range=16 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000 --bls-composing-models=add,sub add_config_6,sub_config_2": { + "_model_variants_name": "bls_config_7", "_gpu_data": { "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ [ "gpu_used_memory", { - "_value": 1314.914304, + "_value": 874.512384, "_timestamp": 0, - "_device_uuid": null + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } ], [ "gpu_free_memory", { - "_value": 24454.889472000003, + "_value": 24895.291392, "_timestamp": 0, - "_device_uuid": null + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } ], [ "gpu_utilization", { - "_value": 23.0, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -12289,7 +15749,7 @@ [ "gpu_power_usage", { - "_value": 77.091, + "_value": 23.6685, "_timestamp": 0, "_device_uuid": null } @@ -12300,7 +15760,7 @@ [ "gpu_used_memory", { - "_value": 1314.914304, + "_value": 874.512384, "_timestamp": 0, "_device_uuid": null } @@ -12308,7 +15768,7 @@ [ "gpu_free_memory", { - "_value": 24454.889472000003, + "_value": 24895.291392, "_timestamp": 0, "_device_uuid": null } @@ -12316,7 +15776,7 @@ [ "gpu_utilization", { - "_value": 23.0, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -12324,7 +15784,7 @@ [ "gpu_power_usage", { - "_value": 77.091, + "_value": 23.6685, "_timestamp": 0, "_device_uuid": null } @@ -12334,7 +15794,7 @@ "gpu_used_memory": [ "gpu_used_memory", { - "_value": 1314.914304, + "_value": 874.512384, "_timestamp": 0, "_device_uuid": null } @@ -12342,7 +15802,7 @@ "gpu_free_memory": [ "gpu_free_memory", { - "_value": 24454.889472000003, + "_value": 24895.291392, "_timestamp": 0, "_device_uuid": null } @@ -12350,7 +15810,7 @@ "gpu_utilization": [ "gpu_utilization", { - "_value": 23.0, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -12358,7 +15818,7 @@ "gpu_power_usage": [ "gpu_power_usage", { - "_value": 77.091, + "_value": 23.6685, "_timestamp": 0, "_device_uuid": null } @@ -12366,86 +15826,87 @@ }, "_model_config_measurements": [ { - "_model_config_name": "FaceDetectionBLS_config_8", + "_model_config_name": "bls", "_model_specific_pa_params": { "batch-size": 1, - "concurrency-range": 2 + "concurrency-range": 16, + "request-rate-range": null }, "_non_gpu_data": [ [ "perf_latency_avg", { - "_value": 1.977, + "_value": 1.689, "_timestamp": 0 } ], [ "perf_latency_p90", { - "_value": 2.178, + "_value": 1.874, "_timestamp": 0 } ], [ "perf_latency_p95", { - "_value": 2.281, + "_value": 1.938, "_timestamp": 0 } ], [ "perf_latency_p99", { - "_value": 2.693, + "_value": 2.167, "_timestamp": 0 } ], [ "perf_throughput", { - "_value": 991.034, + "_value": 9460.29, "_timestamp": 0 } ], [ "perf_client_send_recv", { - "_value": 0.02, + "_value": 0.003, "_timestamp": 0 } ], [ "perf_client_response_wait", { - "_value": 1.948, + "_value": 1.676, "_timestamp": 0 } ], [ "perf_server_queue", { - "_value": 0.027, + "_value": 0.811, "_timestamp": 0 } ], [ "perf_server_compute_infer", { - "_value": 1.477, + "_value": 0.638, "_timestamp": 0 } ], [ "perf_server_compute_input", { - "_value": 0.002, + "_value": 0.014, "_timestamp": 0 } ], [ "perf_server_compute_output", { - "_value": 0.0, + "_value": 0.055, "_timestamp": 0 } ] @@ -12454,77 +15915,77 @@ "perf_latency_avg": [ "perf_latency_avg", { - "_value": 1.977, + "_value": 1.689, "_timestamp": 0 } ], "perf_latency_p90": [ "perf_latency_p90", { - "_value": 2.178, + "_value": 1.874, "_timestamp": 0 } ], "perf_latency_p95": [ "perf_latency_p95", { - "_value": 2.281, + "_value": 1.938, "_timestamp": 0 } ], "perf_latency_p99": [ "perf_latency_p99", { - "_value": 2.693, + "_value": 2.167, "_timestamp": 0 } ], "perf_throughput": [ "perf_throughput", { - "_value": 991.034, + "_value": 9460.29, "_timestamp": 0 } ], "perf_client_send_recv": [ "perf_client_send_recv", { - "_value": 0.02, + "_value": 0.003, "_timestamp": 0 } ], "perf_client_response_wait": [ "perf_client_response_wait", { - "_value": 1.948, + "_value": 1.676, "_timestamp": 0 } ], "perf_server_queue": [ "perf_server_queue", { - "_value": 0.027, + "_value": 0.811, "_timestamp": 0 } ], "perf_server_compute_infer": [ "perf_server_compute_infer", { - "_value": 1.477, + "_value": 0.638, "_timestamp": 0 } ], "perf_server_compute_input": [ "perf_server_compute_input", { - "_value": 0.002, + "_value": 0.014, "_timestamp": 0 } ], "perf_server_compute_output": [ "perf_server_compute_output", { - "_value": 0.0, + "_value": 0.055, "_timestamp": 0 } ] @@ -12532,30 +15993,30 @@ } ] }, - "-m FaceDetectionBLS_config_8 -b 1 -i grpc -f FaceDetectionBLS_config_8-results.csv --verbose-csv --concurrency-range=4 --input-data=/swdev/profile_models/face_detect.json --shared-memory=cuda --output-shared-memory-size=24883200 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "FaceDetectionBLS_config_8", + "bls_config_7 -m bls -b 1 -i grpc -f bls-results.csv --verbose-csv --concurrency-range=32 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000 --bls-composing-models=add,sub add_config_6,sub_config_2": { + "_model_variants_name": "bls_config_7", "_gpu_data": { "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ [ "gpu_used_memory", { - "_value": 1314.914304, + "_value": 874.512384, "_timestamp": 0, - "_device_uuid": null + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } ], [ "gpu_free_memory", { - "_value": 24454.889472000003, + "_value": 24895.291392, "_timestamp": 0, - "_device_uuid": null + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } ], [ "gpu_utilization", { - "_value": 52.74999999999999, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -12563,7 +16024,7 @@ [ "gpu_power_usage", { - "_value": 118.227, + "_value": 23.7035, "_timestamp": 0, "_device_uuid": null } @@ -12574,7 +16035,7 @@ [ "gpu_used_memory", { - "_value": 1314.914304, + "_value": 874.512384, "_timestamp": 0, "_device_uuid": null } @@ -12582,7 +16043,7 @@ [ "gpu_free_memory", { - "_value": 24454.889472000003, + "_value": 24895.291392, "_timestamp": 0, "_device_uuid": null } @@ -12590,7 +16051,7 @@ [ "gpu_utilization", { - "_value": 52.74999999999999, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -12598,7 +16059,7 @@ [ "gpu_power_usage", { - "_value": 118.227, + "_value": 23.7035, "_timestamp": 0, "_device_uuid": null } @@ -12608,7 +16069,7 @@ "gpu_used_memory": [ "gpu_used_memory", { - "_value": 1314.914304, + "_value": 874.512384, "_timestamp": 0, "_device_uuid": null } @@ -12616,7 +16077,7 @@ "gpu_free_memory": [ "gpu_free_memory", { - "_value": 24454.889472000003, + "_value": 24895.291392, "_timestamp": 0, "_device_uuid": null } @@ -12624,7 +16085,7 @@ "gpu_utilization": [ "gpu_utilization", { - "_value": 52.74999999999999, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -12632,7 +16093,7 @@ "gpu_power_usage": [ "gpu_power_usage", { - "_value": 118.227, + "_value": 23.7035, "_timestamp": 0, "_device_uuid": null } @@ -12640,86 +16101,87 @@ }, "_model_config_measurements": [ { - "_model_config_name": "FaceDetectionBLS_config_8", + "_model_config_name": "bls", "_model_specific_pa_params": { "batch-size": 1, - "concurrency-range": 4 + "concurrency-range": 32, + "request-rate-range": null }, "_non_gpu_data": [ [ "perf_latency_avg", { - "_value": 2.508, + "_value": 3.407, "_timestamp": 0 } ], [ "perf_latency_p90", { - "_value": 3.005, + "_value": 3.589, "_timestamp": 0 } ], [ "perf_latency_p95", { - "_value": 3.216, + "_value": 3.677, "_timestamp": 0 } ], [ "perf_latency_p99", { - "_value": 3.647, + "_value": 4.185, "_timestamp": 0 } ], [ "perf_throughput", { - "_value": 1568.01, + "_value": 9378.86, "_timestamp": 0 } ], [ "perf_client_send_recv", { - "_value": 0.021, + "_value": 0.003, "_timestamp": 0 } ], [ "perf_client_response_wait", { - "_value": 2.478, + "_value": 3.393, "_timestamp": 0 } ], [ "perf_server_queue", { - "_value": 0.027, + "_value": 2.523, "_timestamp": 0 } ], [ "perf_server_compute_infer", { - "_value": 1.978, + "_value": 0.641, "_timestamp": 0 } ], [ "perf_server_compute_input", { - "_value": 0.002, + "_value": 0.014, "_timestamp": 0 } ], [ "perf_server_compute_output", { - "_value": 0.0, + "_value": 0.057, "_timestamp": 0 } ] @@ -12728,77 +16190,77 @@ "perf_latency_avg": [ "perf_latency_avg", { - "_value": 2.508, + "_value": 3.407, "_timestamp": 0 } ], "perf_latency_p90": [ "perf_latency_p90", { - "_value": 3.005, + "_value": 3.589, "_timestamp": 0 } ], "perf_latency_p95": [ "perf_latency_p95", { - "_value": 3.216, + "_value": 3.677, "_timestamp": 0 } ], "perf_latency_p99": [ "perf_latency_p99", { - "_value": 3.647, + "_value": 4.185, "_timestamp": 0 } ], "perf_throughput": [ "perf_throughput", { - "_value": 1568.01, + "_value": 9378.86, "_timestamp": 0 } ], "perf_client_send_recv": [ "perf_client_send_recv", { - "_value": 0.021, + "_value": 0.003, "_timestamp": 0 } ], "perf_client_response_wait": [ "perf_client_response_wait", { - "_value": 2.478, + "_value": 3.393, "_timestamp": 0 } ], "perf_server_queue": [ "perf_server_queue", { - "_value": 0.027, + "_value": 2.523, "_timestamp": 0 } ], "perf_server_compute_infer": [ "perf_server_compute_infer", { - "_value": 1.978, + "_value": 0.641, "_timestamp": 0 } ], "perf_server_compute_input": [ "perf_server_compute_input", { - "_value": 0.002, + "_value": 0.014, "_timestamp": 0 } ], "perf_server_compute_output": [ "perf_server_compute_output", { - "_value": 0.0, + "_value": 0.057, "_timestamp": 0 } ] @@ -12806,30 +16268,30 @@ } ] }, - "-m FaceDetectionBLS_config_8 -b 1 -i grpc -f FaceDetectionBLS_config_8-results.csv --verbose-csv --concurrency-range=8 --input-data=/swdev/profile_models/face_detect.json --shared-memory=cuda --output-shared-memory-size=24883200 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "FaceDetectionBLS_config_8", + "bls_config_7 -m bls -b 1 -i grpc -f bls-results.csv --verbose-csv --concurrency-range=64 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000 --bls-composing-models=add,sub add_config_6,sub_config_2": { + "_model_variants_name": "bls_config_7", "_gpu_data": { "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ [ "gpu_used_memory", { - "_value": 1314.914304, + "_value": 874.512384, "_timestamp": 0, - "_device_uuid": null + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } ], [ "gpu_free_memory", { - "_value": 24454.889472000003, + "_value": 24895.291392, "_timestamp": 0, - "_device_uuid": null + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } ], [ "gpu_utilization", { - "_value": 54.333299999999994, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -12837,7 +16299,7 @@ [ "gpu_power_usage", { - "_value": 118.506, + "_value": 23.6913, "_timestamp": 0, "_device_uuid": null } @@ -12848,7 +16310,7 @@ [ "gpu_used_memory", { - "_value": 1314.914304, + "_value": 874.512384, "_timestamp": 0, "_device_uuid": null } @@ -12856,7 +16318,7 @@ [ "gpu_free_memory", { - "_value": 24454.889472000003, + "_value": 24895.291392, "_timestamp": 0, "_device_uuid": null } @@ -12864,7 +16326,7 @@ [ "gpu_utilization", { - "_value": 54.333299999999994, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -12872,7 +16334,7 @@ [ "gpu_power_usage", { - "_value": 118.506, + "_value": 23.6913, "_timestamp": 0, "_device_uuid": null } @@ -12882,7 +16344,7 @@ "gpu_used_memory": [ "gpu_used_memory", { - "_value": 1314.914304, + "_value": 874.512384, "_timestamp": 0, "_device_uuid": null } @@ -12890,7 +16352,7 @@ "gpu_free_memory": [ "gpu_free_memory", { - "_value": 24454.889472000003, + "_value": 24895.291392, "_timestamp": 0, "_device_uuid": null } @@ -12898,7 +16360,7 @@ "gpu_utilization": [ "gpu_utilization", { - "_value": 54.333299999999994, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -12906,7 +16368,7 @@ "gpu_power_usage": [ "gpu_power_usage", { - "_value": 118.506, + "_value": 23.6913, "_timestamp": 0, "_device_uuid": null } @@ -12914,86 +16376,87 @@ }, "_model_config_measurements": [ { - "_model_config_name": "FaceDetectionBLS_config_8", + "_model_config_name": "bls", "_model_specific_pa_params": { "batch-size": 1, - "concurrency-range": 8 + "concurrency-range": 64, + "request-rate-range": null }, "_non_gpu_data": [ [ "perf_latency_avg", { - "_value": 4.824, + "_value": 6.953, "_timestamp": 0 } ], [ "perf_latency_p90", { - "_value": 6.41, + "_value": 7.174, "_timestamp": 0 } ], [ "perf_latency_p95", { - "_value": 6.794, + "_value": 7.65, "_timestamp": 0 } ], [ "perf_latency_p99", { - "_value": 7.928, + "_value": 10.275, "_timestamp": 0 } ], [ "perf_throughput", { - "_value": 1644.23, + "_value": 9183.63, "_timestamp": 0 } ], [ "perf_client_send_recv", { - "_value": 0.022, + "_value": 0.003, "_timestamp": 0 } ], [ "perf_client_response_wait", { - "_value": 4.792, + "_value": 6.938, "_timestamp": 0 } ], [ "perf_server_queue", { - "_value": 0.027, + "_value": 6.037, "_timestamp": 0 } ], [ "perf_server_compute_infer", { - "_value": 4.26, + "_value": 0.655, "_timestamp": 0 } ], [ "perf_server_compute_input", { - "_value": 0.002, + "_value": 0.015, "_timestamp": 0 } ], [ "perf_server_compute_output", { - "_value": 0.0, + "_value": 0.057, "_timestamp": 0 } ] @@ -13002,77 +16465,77 @@ "perf_latency_avg": [ "perf_latency_avg", { - "_value": 4.824, + "_value": 6.953, "_timestamp": 0 } ], "perf_latency_p90": [ "perf_latency_p90", { - "_value": 6.41, + "_value": 7.174, "_timestamp": 0 } ], "perf_latency_p95": [ "perf_latency_p95", { - "_value": 6.794, + "_value": 7.65, "_timestamp": 0 } ], "perf_latency_p99": [ "perf_latency_p99", { - "_value": 7.928, + "_value": 10.275, "_timestamp": 0 } ], "perf_throughput": [ "perf_throughput", { - "_value": 1644.23, + "_value": 9183.63, "_timestamp": 0 } ], "perf_client_send_recv": [ "perf_client_send_recv", { - "_value": 0.022, + "_value": 0.003, "_timestamp": 0 } ], "perf_client_response_wait": [ "perf_client_response_wait", { - "_value": 4.792, + "_value": 6.938, "_timestamp": 0 } ], "perf_server_queue": [ "perf_server_queue", { - "_value": 0.027, + "_value": 6.037, "_timestamp": 0 } ], "perf_server_compute_infer": [ "perf_server_compute_infer", { - "_value": 4.26, + "_value": 0.655, "_timestamp": 0 } ], "perf_server_compute_input": [ "perf_server_compute_input", { - "_value": 0.002, + "_value": 0.015, "_timestamp": 0 } ], "perf_server_compute_output": [ "perf_server_compute_output", { - "_value": 0.0, + "_value": 0.057, "_timestamp": 0 } ] @@ -13081,50 +16544,815 @@ ] } } - ] + ] + } + } + }, + "ResultManager.server_only_data": { + "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ + [ + "gpu_used_memory", + { + "_value": 872.0, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_free_memory", + { + "_value": 24897.0, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_utilization", + { + "_value": 0.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 56.0405, + "_timestamp": 0, + "_device_uuid": null + } + ] + ] + }, + "MetricsManager.gpus": { + "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": { + "name": "NVIDIA TITAN RTX", + "total_memory": 25387401216 + } + }, + "ModelManager.model_variant_name_manager": { + "_model_config_dicts": { + "bls_config_0": { + "name": "bls", + "input": [ + { + "name": "INPUT0", + "data_type": "TYPE_FP32", + "dims": [ + "4" + ] + }, + { + "name": "INPUT1", + "data_type": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "output": [ + { + "name": "OUTPUT0", + "data_type": "TYPE_FP32", + "dims": [ + "4" + ] + }, + { + "name": "OUTPUT1", + "data_type": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "instance_group": [ + { + "count": 1, + "kind": "KIND_GPU" + } + ], + "backend": "python" + }, + "add_config_0": { + "name": "add", + "input": [ + { + "name": "INPUT0", + "data_type": "TYPE_FP32", + "dims": [ + "4" + ] + }, + { + "name": "INPUT1", + "data_type": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "output": [ + { + "name": "OUTPUT0", + "data_type": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "instance_group": [ + { + "count": 1, + "kind": "KIND_GPU" + } + ], + "backend": "python" + }, + "sub_config_0": { + "name": "sub", + "input": [ + { + "name": "INPUT0", + "data_type": "TYPE_FP32", + "dims": [ + "4" + ] + }, + { + "name": "INPUT1", + "data_type": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "output": [ + { + "name": "OUTPUT1", + "data_type": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "instance_group": [ + { + "count": 1, + "kind": "KIND_GPU" + } + ], + "backend": "python" + }, + "sub_config_1": { + "name": "sub", + "input": [ + { + "name": "INPUT0", + "data_type": "TYPE_FP32", + "dims": [ + "4" + ] + }, + { + "name": "INPUT1", + "data_type": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "output": [ + { + "name": "OUTPUT1", + "data_type": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "instance_group": [ + { + "count": 2, + "kind": "KIND_GPU" + } + ], + "backend": "python" + }, + "bls_config_1": { + "name": "bls", + "input": [ + { + "name": "INPUT0", + "data_type": "TYPE_FP32", + "dims": [ + "4" + ] + }, + { + "name": "INPUT1", + "data_type": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "output": [ + { + "name": "OUTPUT0", + "data_type": "TYPE_FP32", + "dims": [ + "4" + ] + }, + { + "name": "OUTPUT1", + "data_type": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "instance_group": [ + { + "count": 2, + "kind": "KIND_GPU" + } + ], + "backend": "python" + }, + "add_config_1": { + "name": "add", + "input": [ + { + "name": "INPUT0", + "data_type": "TYPE_FP32", + "dims": [ + "4" + ] + }, + { + "name": "INPUT1", + "data_type": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "output": [ + { + "name": "OUTPUT0", + "data_type": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "instance_group": [ + { + "count": 2, + "kind": "KIND_GPU" + } + ], + "backend": "python" + }, + "add_config_2": { + "name": "add", + "input": [ + { + "name": "INPUT0", + "data_type": "TYPE_FP32", + "dims": [ + "4" + ] + }, + { + "name": "INPUT1", + "data_type": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "output": [ + { + "name": "OUTPUT0", + "data_type": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "instance_group": [ + { + "count": 3, + "kind": "KIND_GPU" + } + ], + "backend": "python" + }, + "sub_config_2": { + "name": "sub", + "input": [ + { + "name": "INPUT0", + "data_type": "TYPE_FP32", + "dims": [ + "4" + ] + }, + { + "name": "INPUT1", + "data_type": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "output": [ + { + "name": "OUTPUT1", + "data_type": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "instance_group": [ + { + "count": 3, + "kind": "KIND_GPU" + } + ], + "backend": "python" + }, + "bls_config_2": { + "name": "bls", + "input": [ + { + "name": "INPUT0", + "data_type": "TYPE_FP32", + "dims": [ + "4" + ] + }, + { + "name": "INPUT1", + "data_type": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "output": [ + { + "name": "OUTPUT0", + "data_type": "TYPE_FP32", + "dims": [ + "4" + ] + }, + { + "name": "OUTPUT1", + "data_type": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "instance_group": [ + { + "count": 3, + "kind": "KIND_GPU" + } + ], + "backend": "python" + }, + "bls_config_3": { + "name": "bls", + "input": [ + { + "name": "INPUT0", + "data_type": "TYPE_FP32", + "dims": [ + "4" + ] + }, + { + "name": "INPUT1", + "data_type": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "output": [ + { + "name": "OUTPUT0", + "data_type": "TYPE_FP32", + "dims": [ + "4" + ] + }, + { + "name": "OUTPUT1", + "data_type": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "instance_group": [ + { + "count": 5, + "kind": "KIND_GPU" + } + ], + "backend": "python" + }, + "bls_config_4": { + "name": "bls", + "input": [ + { + "name": "INPUT0", + "data_type": "TYPE_FP32", + "dims": [ + "4" + ] + }, + { + "name": "INPUT1", + "data_type": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "output": [ + { + "name": "OUTPUT0", + "data_type": "TYPE_FP32", + "dims": [ + "4" + ] + }, + { + "name": "OUTPUT1", + "data_type": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "instance_group": [ + { + "count": 4, + "kind": "KIND_GPU" + } + ], + "backend": "python" + }, + "add_config_3": { + "name": "add", + "input": [ + { + "name": "INPUT0", + "data_type": "TYPE_FP32", + "dims": [ + "4" + ] + }, + { + "name": "INPUT1", + "data_type": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "output": [ + { + "name": "OUTPUT0", + "data_type": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "instance_group": [ + { + "count": 4, + "kind": "KIND_GPU" + } + ], + "backend": "python" + }, + "bls_config_5": { + "name": "bls", + "input": [ + { + "name": "INPUT0", + "data_type": "TYPE_FP32", + "dims": [ + "4" + ] + }, + { + "name": "INPUT1", + "data_type": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "output": [ + { + "name": "OUTPUT0", + "data_type": "TYPE_FP32", + "dims": [ + "4" + ] + }, + { + "name": "OUTPUT1", + "data_type": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "instance_group": [ + { + "count": 6, + "kind": "KIND_GPU" + } + ], + "backend": "python" + }, + "bls_config_6": { + "name": "bls", + "input": [ + { + "name": "INPUT0", + "data_type": "TYPE_FP32", + "dims": [ + "4" + ] + }, + { + "name": "INPUT1", + "data_type": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "output": [ + { + "name": "OUTPUT0", + "data_type": "TYPE_FP32", + "dims": [ + "4" + ] + }, + { + "name": "OUTPUT1", + "data_type": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "instance_group": [ + { + "count": 8, + "kind": "KIND_GPU" + } + ], + "backend": "python" + }, + "add_config_4": { + "name": "add", + "input": [ + { + "name": "INPUT0", + "data_type": "TYPE_FP32", + "dims": [ + "4" + ] + }, + { + "name": "INPUT1", + "data_type": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "output": [ + { + "name": "OUTPUT0", + "data_type": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "instance_group": [ + { + "count": 6, + "kind": "KIND_GPU" + } + ], + "backend": "python" + }, + "bls_config_7": { + "name": "bls", + "input": [ + { + "name": "INPUT0", + "data_type": "TYPE_FP32", + "dims": [ + "4" + ] + }, + { + "name": "INPUT1", + "data_type": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "output": [ + { + "name": "OUTPUT0", + "data_type": "TYPE_FP32", + "dims": [ + "4" + ] + }, + { + "name": "OUTPUT1", + "data_type": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "instance_group": [ + { + "count": 7, + "kind": "KIND_GPU" + } + ], + "backend": "python" + }, + "add_config_5": { + "name": "add", + "input": [ + { + "name": "INPUT0", + "data_type": "TYPE_FP32", + "dims": [ + "4" + ] + }, + { + "name": "INPUT1", + "data_type": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "output": [ + { + "name": "OUTPUT0", + "data_type": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "instance_group": [ + { + "count": 7, + "kind": "KIND_GPU" + } + ], + "backend": "python" + }, + "add_config_6": { + "name": "add", + "input": [ + { + "name": "INPUT0", + "data_type": "TYPE_FP32", + "dims": [ + "4" + ] + }, + { + "name": "INPUT1", + "data_type": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "output": [ + { + "name": "OUTPUT0", + "data_type": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "instance_group": [ + { + "count": 5, + "kind": "KIND_GPU" + } + ], + "backend": "python" + }, + "sub_config_3": { + "name": "sub", + "input": [ + { + "name": "INPUT0", + "data_type": "TYPE_FP32", + "dims": [ + "4" + ] + }, + { + "name": "INPUT1", + "data_type": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "output": [ + { + "name": "OUTPUT1", + "data_type": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "instance_group": [ + { + "count": 5, + "kind": "KIND_GPU" + } + ], + "backend": "python" + }, + "sub_config_4": { + "name": "sub", + "input": [ + { + "name": "INPUT0", + "data_type": "TYPE_FP32", + "dims": [ + "4" + ] + }, + { + "name": "INPUT1", + "data_type": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "output": [ + { + "name": "OUTPUT1", + "data_type": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "instance_group": [ + { + "count": 4, + "kind": "KIND_GPU" + } + ], + "backend": "python" } - } - }, - "ResultManager.server_only_data": { - "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ - [ - "gpu_used_memory", - { - "_value": 845.0, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_free_memory", - { - "_value": 24924.0, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_utilization", - { - "_value": 0.0, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 55.655, - "_timestamp": 0, - "_device_uuid": null - } - ] - ] - }, - "MetricsManager.gpus": { - "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": { - "name": "NVIDIA TITAN RTX", - "total_memory": 25387401216 + }, + "_model_name_index": { + "bls": 7, + "add": 6, + "sub": 4 } } } \ No newline at end of file diff --git a/tests/common/bls-ckpt/README b/tests/common/bls-ckpt/README index 37f7dfbbc..b34f9cbd6 100644 --- a/tests/common/bls-ckpt/README +++ b/tests/common/bls-ckpt/README @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -This checkpoint was created by running the `FaceDetectionBLS` model. +This checkpoint was created by running the model from the BLS Quick Start. diff --git a/tests/common/bls-ckpt/golden-metrics-model-inference.csv b/tests/common/bls-ckpt/golden-metrics-model-inference.csv index 05fe4e301..fb69726d4 100644 --- a/tests/common/bls-ckpt/golden-metrics-model-inference.csv +++ b/tests/common/bls-ckpt/golden-metrics-model-inference.csv @@ -1,28 +1,55 @@ Model,Batch,Concurrency,Model Config Path,Instance Group,Max Batch Size,Satisfies Constraints,Throughput (infer/sec),p99 Latency (ms) -FaceDetectionBLS,1,16,FaceDetectionBLS_config_7,8:GPU,0,Yes,3183.7,7.4 -FaceDetectionBLS,1,8,FaceDetectionBLS_config_7,8:GPU,0,Yes,2795.7,4.5 -FaceDetectionBLS,1,4,FaceDetectionBLS_config_7,8:GPU,0,Yes,1665.9,3.3 -FaceDetectionBLS,1,2,FaceDetectionBLS_config_7,8:GPU,0,Yes,997.3,2.4 -FaceDetectionBLS,1,1,FaceDetectionBLS_config_7,8:GPU,0,Yes,545.8,2.1 -FaceDetectionBLS,1,16,FaceDetectionBLS_config_8,9:GPU,0,Yes,3142.0,7.8 -FaceDetectionBLS,1,8,FaceDetectionBLS_config_8,9:GPU,0,Yes,1644.2,7.9 -FaceDetectionBLS,1,4,FaceDetectionBLS_config_8,9:GPU,0,Yes,1568.0,3.6 -FaceDetectionBLS,1,2,FaceDetectionBLS_config_8,9:GPU,0,Yes,991.0,2.7 -FaceDetectionBLS,1,1,FaceDetectionBLS_config_8,9:GPU,0,Yes,534.6,2.2 -FaceDetectionBLS,1,8,FaceDetectionBLS_config_default,16:CPU,0,Yes,2847.8,4.6 -FaceDetectionBLS,1,4,FaceDetectionBLS_config_default,16:CPU,0,Yes,1623.0,3.5 -FaceDetectionBLS,1,16,FaceDetectionBLS_config_default,16:CPU,0,Yes,1452.7,14.8 -FaceDetectionBLS,1,2,FaceDetectionBLS_config_default,16:CPU,0,Yes,996.5,2.6 -FaceDetectionBLS,1,1,FaceDetectionBLS_config_default,16:CPU,0,Yes,589.2,2.1 -FaceDetectionBLS,1,16,FaceDetectionBLS_config_6,7:GPU,0,Yes,2763.8,7.9 -FaceDetectionBLS,1,4,FaceDetectionBLS_config_6,7:GPU,0,Yes,1691.0,3.2 -FaceDetectionBLS,1,8,FaceDetectionBLS_config_6,7:GPU,0,Yes,1608.4,7.6 -FaceDetectionBLS,1,2,FaceDetectionBLS_config_6,7:GPU,0,Yes,1018.4,2.4 -FaceDetectionBLS,1,1,FaceDetectionBLS_config_6,7:GPU,0,Yes,544.2,2.3 -FaceDetectionBLS,1,16,FaceDetectionBLS_config_5,6:GPU,0,Yes,2607.1,9.0 -FaceDetectionBLS,1,16,FaceDetectionBLS_config_3,5:GPU,0,Yes,2216.5,9.9 -FaceDetectionBLS,1,16,FaceDetectionBLS_config_4,4:GPU,0,Yes,1853.8,10.9 -FaceDetectionBLS,1,16,FaceDetectionBLS_config_2,3:GPU,0,Yes,1571.6,11.9 -FaceDetectionBLS,1,16,FaceDetectionBLS_config_1,2:GPU,0,Yes,1174.7,15.5 -FaceDetectionBLS,1,16,FaceDetectionBLS_config_0,1:GPU,0,Yes,652.3,27.1 +bls,1,16,bls_config_6,8:GPU,0,Yes,10077.3,2.2 +bls,1,32,bls_config_6,8:GPU,0,Yes,9915.6,4.7 +bls,1,16,bls_config_6,8:GPU,0,Yes,9824.1,2.4 +bls,1,64,bls_config_6,8:GPU,0,Yes,9822.0,10.2 +bls,1,128,bls_config_6,8:GPU,0,Yes,9772.2,18.8 +bls,1,256,bls_config_6,8:GPU,0,Yes,9731.2,34.0 +bls,1,8,bls_config_6,8:GPU,0,Yes,9116.0,1.4 +bls,1,16,bls_config_6,8:GPU,0,Yes,3121.0,6.4 +bls,1,16,bls_config_6,8:GPU,0,Yes,3066.0,6.4 +bls,1,4,bls_config_6,8:GPU,0,Yes,1929.9,2.5 +bls,1,2,bls_config_6,8:GPU,0,Yes,1116.3,2.1 +bls,1,1,bls_config_6,8:GPU,0,Yes,580.9,2.2 +bls,1,14,bls_config_7,7:GPU,0,Yes,9629.4,1.8 +bls,1,14,bls_config_7,7:GPU,0,Yes,9610.9,1.8 +bls,1,14,bls_config_7,7:GPU,0,Yes,9551.4,1.9 +bls,1,14,bls_config_7,7:GPU,0,Yes,9532.6,1.9 +bls,1,14,bls_config_7,7:GPU,0,Yes,9489.7,2.0 +bls,1,14,bls_config_7,7:GPU,0,Yes,9477.9,2.2 +bls,1,16,bls_config_7,7:GPU,0,Yes,9460.3,2.2 +bls,1,14,bls_config_7,7:GPU,0,Yes,9412.5,2.4 +bls,1,32,bls_config_7,7:GPU,0,Yes,9378.9,4.2 +bls,1,64,bls_config_7,7:GPU,0,Yes,9183.6,10.3 +bls,1,14,bls_config_7,7:GPU,0,Yes,9161.4,2.2 +bls,1,8,bls_config_7,7:GPU,0,Yes,9099.5,1.3 +bls,1,4,bls_config_7,7:GPU,0,Yes,1932.8,2.5 +bls,1,2,bls_config_7,7:GPU,0,Yes,1113.1,2.2 +bls,1,1,bls_config_7,7:GPU,0,Yes,587.5,2.1 +bls,1,12,bls_config_5,6:GPU,0,Yes,8801.1,1.8 +bls,1,12,bls_config_5,6:GPU,0,Yes,8475.9,2.1 +bls,1,12,bls_config_5,6:GPU,0,Yes,7284.4,2.5 +bls,1,512,bls_config_5,6:GPU,0,Yes,4719.6,166.0 +bls,1,256,bls_config_5,6:GPU,0,Yes,4261.4,86.8 +bls,1,64,bls_config_5,6:GPU,0,Yes,3328.9,23.8 +bls,1,128,bls_config_5,6:GPU,0,Yes,3107.5,45.3 +bls,1,8,bls_config_5,6:GPU,0,Yes,3096.0,3.4 +bls,1,16,bls_config_5,6:GPU,0,Yes,3014.9,6.4 +bls,1,32,bls_config_5,6:GPU,0,Yes,2979.5,12.4 +bls,1,4,bls_config_5,6:GPU,0,Yes,1956.5,2.6 +bls,1,2,bls_config_5,6:GPU,0,Yes,1113.3,2.3 +bls,1,1,bls_config_5,6:GPU,0,Yes,614.8,2.0 +bls,1,10,bls_config_3,5:GPU,0,Yes,2591.5,4.6 +bls,1,10,bls_config_3,5:GPU,0,Yes,2581.2,4.6 +bls,1,8,bls_config_4,4:GPU,0,Yes,2259.2,4.0 +bls,1,6,bls_config_2,3:GPU,0,Yes,1874.2,3.9 +bls,1,4,bls_config_1,2:GPU,0,Yes,1316.5,3.4 +bls,1,2,bls_config_0,1:GPU,0,Yes,849.0,3.1 +bls,1,2,bls_config_0,1:GPU,0,Yes,844.6,3.0 +bls,1,2,bls_config_0,1:GPU,0,Yes,756.2,3.1 +bls,1,2,bls_config_default,1:CPU,0,Yes,832.4,3.1 +bls,1,4,bls_config_default,1:CPU,0,Yes,815.9,6.0 +bls,1,16,bls_config_default,1:CPU,0,Yes,815.9,24.0 +bls,1,8,bls_config_default,1:CPU,0,Yes,796.9,11.8 +bls,1,1,bls_config_default,1:CPU,0,Yes,778.0,1.7 diff --git a/tests/common/bls-ckpt/golden-metrics-server-only.csv b/tests/common/bls-ckpt/golden-metrics-server-only.csv index 3e96263fd..77240adbf 100644 --- a/tests/common/bls-ckpt/golden-metrics-server-only.csv +++ b/tests/common/bls-ckpt/golden-metrics-server-only.csv @@ -1,3 +1,3 @@ Model,GPU UUID,GPU Memory Usage (MB),GPU Utilization (%),GPU Power Usage (W) -triton-server,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,845.0,0.0,55.7 +triton-server,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,872.0,0.0,56.0 diff --git a/tests/common/ensemble-ckpt/0.ckpt b/tests/common/ensemble-ckpt/0.ckpt index c4e8d0cef..2acf38be5 100644 --- a/tests/common/ensemble-ckpt/0.ckpt +++ b/tests/common/ensemble-ckpt/0.ckpt @@ -1,59 +1,77 @@ { "ResultManager.results": { "_results": { - "ensemble_python_resnet50": { - "ensemble_python_resnet50_config_default": [ + "ensemble_add_sub": { + "ensemble_add_sub_config_default": [ { "_triton_env": {}, "_model_run_configs": [ { - "_model_name": "ensemble_python_resnet50", - "_model_config": { - "name": "ensemble_python_resnet50_config_default", - "platform": "ensemble", - "maxBatchSize": 256, - "input": [ - { - "name": "INPUT", - "dataType": "TYPE_UINT8", - "dims": [ - "-1" - ] - } - ], - "output": [ - { - "name": "OUTPUT", - "dataType": "TYPE_FP32", - "dims": [ - "1000" - ] - } - ], - "ensembleScheduling": { - "step": [ + "_model_name": "ensemble_add_sub", + "_model_config_variant": { + "model_config": { + "name": "ensemble_add_sub", + "platform": "ensemble", + "input": [ { - "modelName": "preprocess_config_default", - "modelVersion": "-1", - "inputMap": { - "INPUT_0": "INPUT" - }, - "outputMap": { - "OUTPUT_0": "preprocessed_image" - } + "name": "INPUT0", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + }, + { + "name": "INPUT1", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "output": [ + { + "name": "OUTPUT0", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] }, { - "modelName": "resnet50_trt_config_default", - "modelVersion": "-1", - "inputMap": { - "input": "preprocessed_image" + "name": "OUTPUT1", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "ensembleScheduling": { + "step": [ + { + "modelName": "add", + "modelVersion": "1", + "inputMap": { + "INPUT1": "INPUT1", + "INPUT0": "INPUT0" + }, + "outputMap": { + "OUTPUT0": "OUTPUT0" + } }, - "outputMap": { - "output": "OUTPUT" + { + "modelName": "sub", + "modelVersion": "1", + "inputMap": { + "INPUT1": "INPUT1", + "INPUT0": "INPUT0" + }, + "outputMap": { + "OUTPUT1": "OUTPUT1" + } } - } - ] + ] + } }, + "variant_name": "ensemble_add_sub_config_default", "cpu_only": false }, "_perf_config": { @@ -74,10 +92,10 @@ "stability-percentage": null, "max-trials": null, "percentile": null, - "input-data": "/swdev/profile_models/test_image", + "input-data": null, "shared-memory": null, "output-shared-memory-size": null, - "shape": "INPUT:1005970", + "shape": null, "sequence-length": null, "sequence-id-range": null, "string-length": null, @@ -101,15 +119,16 @@ "ssl-https-private-key-file": null, "collect-metrics": "True", "metrics-url": "http://localhost:8002/metrics", - "metrics-interval": 1000.0 + "metrics-interval": 1000, + "bls-composing-models": null }, "_options": { - "-m": "ensemble_python_resnet50_config_default", + "-m": "ensemble_add_sub", "-x": null, "-b": 1, "-u": "localhost:8001", "-i": "grpc", - "-f": "ensemble_python_resnet50_config_default-results.csv", + "-f": "ensemble_add_sub-results.csv", "-H": null }, "_verbose": { @@ -136,63 +155,81 @@ "shape": null } }, - "_composing_configs": [ - { - "name": "preprocess_config_default", - "maxBatchSize": 256, - "input": [ - { - "name": "INPUT_0", - "dataType": "TYPE_UINT8", - "dims": [ - "-1" - ] - } - ], - "output": [ - { - "name": "OUTPUT_0", - "dataType": "TYPE_FP32", - "dims": [ - "3", - "224", - "224" - ] - } - ], - "instanceGroup": [ - { - "kind": "KIND_CPU" - } - ], - "backend": "python", + "_composing_config_variants": [ + { + "model_config": { + "name": "add", + "input": [ + { + "name": "INPUT0", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + }, + { + "name": "INPUT1", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "output": [ + { + "name": "OUTPUT0", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "instanceGroup": [ + { + "kind": "KIND_CPU" + } + ], + "backend": "python" + }, + "variant_name": "add_config_default", "cpu_only": false }, { - "name": "resnet50_trt_config_default", - "platform": "tensorrt_plan", - "maxBatchSize": 256, - "input": [ - { - "name": "input", - "dataType": "TYPE_FP32", - "dims": [ - "3", - "-1", - "-1" - ] - } - ], - "output": [ - { - "name": "output", - "dataType": "TYPE_FP32", - "dims": [ - "1000" - ], - "labelFilename": "labels.txt" - } - ], + "model_config": { + "name": "sub", + "input": [ + { + "name": "INPUT0", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + }, + { + "name": "INPUT1", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "output": [ + { + "name": "OUTPUT1", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "instanceGroup": [ + { + "kind": "KIND_CPU" + } + ], + "backend": "python" + }, + "variant_name": "sub_config_default", "cpu_only": false } ] @@ -200,14 +237,14 @@ ] }, { - "-m ensemble_python_resnet50_config_default -b 1 -i grpc -f ensemble_python_resnet50_config_default-results.csv --verbose-csv --concurrency-range=1 --input-data=/swdev/profile_models/test_image --shape=INPUT:1005970 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "ensemble_python_resnet50_config_default", + "ensemble_add_sub_config_default -m ensemble_add_sub -b 1 -i grpc -f ensemble_add_sub-results.csv --verbose-csv --concurrency-range=1 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000 add_config_default,sub_config_default": { + "_model_variants_name": "ensemble_add_sub_config_default", "_gpu_data": { "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ [ "gpu_used_memory", { - "_value": 1862.270976, + "_value": 870.31808, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } @@ -215,7 +252,7 @@ [ "gpu_free_memory", { - "_value": 23533.191168, + "_value": 24899.485696, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } @@ -223,7 +260,7 @@ [ "gpu_utilization", { - "_value": 1.25, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -231,7 +268,7 @@ [ "gpu_power_usage", { - "_value": 56.4694, + "_value": 56.321, "_timestamp": 0, "_device_uuid": null } @@ -242,7 +279,7 @@ [ "gpu_used_memory", { - "_value": 1862.270976, + "_value": 870.31808, "_timestamp": 0, "_device_uuid": null } @@ -250,7 +287,7 @@ [ "gpu_free_memory", { - "_value": 23533.191168, + "_value": 24899.485696, "_timestamp": 0, "_device_uuid": null } @@ -258,7 +295,7 @@ [ "gpu_utilization", { - "_value": 1.25, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -266,7 +303,7 @@ [ "gpu_power_usage", { - "_value": 56.4694, + "_value": 56.321, "_timestamp": 0, "_device_uuid": null } @@ -276,7 +313,7 @@ "gpu_used_memory": [ "gpu_used_memory", { - "_value": 1862.270976, + "_value": 870.31808, "_timestamp": 0, "_device_uuid": null } @@ -284,7 +321,7 @@ "gpu_free_memory": [ "gpu_free_memory", { - "_value": 23533.191168, + "_value": 24899.485696, "_timestamp": 0, "_device_uuid": null } @@ -292,7 +329,7 @@ "gpu_utilization": [ "gpu_utilization", { - "_value": 1.25, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -300,7 +337,7 @@ "gpu_power_usage": [ "gpu_power_usage", { - "_value": 56.4694, + "_value": 56.321, "_timestamp": 0, "_device_uuid": null } @@ -308,58 +345,59 @@ }, "_model_config_measurements": [ { - "_model_config_name": "ensemble_python_resnet50_config_default", + "_model_config_name": "ensemble_add_sub", "_model_specific_pa_params": { "batch-size": 1, - "concurrency-range": 1 + "concurrency-range": 1, + "request-rate-range": null }, "_non_gpu_data": [ [ "perf_latency_avg", { - "_value": 64.902, + "_value": 0.565, "_timestamp": 0 } ], [ "perf_latency_p90", { - "_value": 66.267, + "_value": 0.639, "_timestamp": 0 } ], [ "perf_latency_p95", { - "_value": 78.854, + "_value": 0.697, "_timestamp": 0 } ], [ "perf_latency_p99", { - "_value": 82.38, + "_value": 0.866, "_timestamp": 0 } ], [ "perf_throughput", { - "_value": 15.3298, + "_value": 1764.33, "_timestamp": 0 } ], [ "perf_client_send_recv", { - "_value": 0.165, + "_value": 0.005, "_timestamp": 0 } ], [ "perf_client_response_wait", { - "_value": 64.708, + "_value": 0.544, "_timestamp": 0 } ], @@ -373,21 +411,21 @@ [ "perf_server_compute_infer", { - "_value": 62.812, + "_value": 0.324, "_timestamp": 0 } ], [ "perf_server_compute_input", { - "_value": 0.277, + "_value": 0.043, "_timestamp": 0 } ], [ "perf_server_compute_output", { - "_value": 0.226, + "_value": 0.126, "_timestamp": 0 } ] @@ -396,49 +434,49 @@ "perf_latency_avg": [ "perf_latency_avg", { - "_value": 64.902, + "_value": 0.565, "_timestamp": 0 } ], "perf_latency_p90": [ "perf_latency_p90", { - "_value": 66.267, + "_value": 0.639, "_timestamp": 0 } ], "perf_latency_p95": [ "perf_latency_p95", { - "_value": 78.854, + "_value": 0.697, "_timestamp": 0 } ], "perf_latency_p99": [ "perf_latency_p99", { - "_value": 82.38, + "_value": 0.866, "_timestamp": 0 } ], "perf_throughput": [ "perf_throughput", { - "_value": 15.3298, + "_value": 1764.33, "_timestamp": 0 } ], "perf_client_send_recv": [ "perf_client_send_recv", { - "_value": 0.165, + "_value": 0.005, "_timestamp": 0 } ], "perf_client_response_wait": [ "perf_client_response_wait", { - "_value": 64.708, + "_value": 0.544, "_timestamp": 0 } ], @@ -452,21 +490,21 @@ "perf_server_compute_infer": [ "perf_server_compute_infer", { - "_value": 62.812, + "_value": 0.324, "_timestamp": 0 } ], "perf_server_compute_input": [ "perf_server_compute_input", { - "_value": 0.277, + "_value": 0.043, "_timestamp": 0 } ], "perf_server_compute_output": [ "perf_server_compute_output", { - "_value": 0.226, + "_value": 0.126, "_timestamp": 0 } ] @@ -474,14 +512,14 @@ } ] }, - "-m ensemble_python_resnet50_config_default -b 1 -i grpc -f ensemble_python_resnet50_config_default-results.csv --verbose-csv --concurrency-range=2 --input-data=/swdev/profile_models/test_image --shape=INPUT:1005970 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "ensemble_python_resnet50_config_default", + "ensemble_add_sub_config_default -m ensemble_add_sub -b 1 -i grpc -f ensemble_add_sub-results.csv --verbose-csv --concurrency-range=2 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000 add_config_default,sub_config_default": { + "_model_variants_name": "ensemble_add_sub_config_default", "_gpu_data": { "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ [ "gpu_used_memory", { - "_value": 1862.270976, + "_value": 870.31808, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } @@ -489,7 +527,7 @@ [ "gpu_free_memory", { - "_value": 23533.191168, + "_value": 24899.485696, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } @@ -497,7 +535,7 @@ [ "gpu_utilization", { - "_value": 1.6, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -505,7 +543,7 @@ [ "gpu_power_usage", { - "_value": 57.4051, + "_value": 56.7245, "_timestamp": 0, "_device_uuid": null } @@ -516,7 +554,7 @@ [ "gpu_used_memory", { - "_value": 1862.270976, + "_value": 870.31808, "_timestamp": 0, "_device_uuid": null } @@ -524,7 +562,7 @@ [ "gpu_free_memory", { - "_value": 23533.191168, + "_value": 24899.485696, "_timestamp": 0, "_device_uuid": null } @@ -532,7 +570,7 @@ [ "gpu_utilization", { - "_value": 1.6, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -540,7 +578,7 @@ [ "gpu_power_usage", { - "_value": 57.4051, + "_value": 56.7245, "_timestamp": 0, "_device_uuid": null } @@ -550,7 +588,7 @@ "gpu_used_memory": [ "gpu_used_memory", { - "_value": 1862.270976, + "_value": 870.31808, "_timestamp": 0, "_device_uuid": null } @@ -558,7 +596,7 @@ "gpu_free_memory": [ "gpu_free_memory", { - "_value": 23533.191168, + "_value": 24899.485696, "_timestamp": 0, "_device_uuid": null } @@ -566,7 +604,7 @@ "gpu_utilization": [ "gpu_utilization", { - "_value": 1.6, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -574,7 +612,7 @@ "gpu_power_usage": [ "gpu_power_usage", { - "_value": 57.4051, + "_value": 56.7245, "_timestamp": 0, "_device_uuid": null } @@ -582,86 +620,87 @@ }, "_model_config_measurements": [ { - "_model_config_name": "ensemble_python_resnet50_config_default", + "_model_config_name": "ensemble_add_sub", "_model_specific_pa_params": { "batch-size": 1, - "concurrency-range": 2 + "concurrency-range": 2, + "request-rate-range": null }, "_non_gpu_data": [ [ "perf_latency_avg", { - "_value": 119.717, + "_value": 0.706, "_timestamp": 0 } ], [ "perf_latency_p90", { - "_value": 121.897, + "_value": 0.817, "_timestamp": 0 } ], [ "perf_latency_p95", { - "_value": 127.903, + "_value": 0.864, "_timestamp": 0 } ], [ "perf_latency_p99", { - "_value": 138.86, + "_value": 0.949, "_timestamp": 0 } ], [ "perf_throughput", { - "_value": 16.5958, + "_value": 2825.37, "_timestamp": 0 } ], [ "perf_client_send_recv", { - "_value": 0.176, + "_value": 0.006, "_timestamp": 0 } ], [ "perf_client_response_wait", { - "_value": 119.511, + "_value": 0.683, "_timestamp": 0 } ], [ "perf_server_queue", { - "_value": 0.002, + "_value": 0.001, "_timestamp": 0 } ], [ "perf_server_compute_infer", { - "_value": 60.507, + "_value": 0.333, "_timestamp": 0 } ], [ "perf_server_compute_input", { - "_value": 0.342, + "_value": 0.044, "_timestamp": 0 } ], [ "perf_server_compute_output", { - "_value": 0.25, + "_value": 0.136, "_timestamp": 0 } ] @@ -670,77 +709,77 @@ "perf_latency_avg": [ "perf_latency_avg", { - "_value": 119.717, + "_value": 0.706, "_timestamp": 0 } ], "perf_latency_p90": [ "perf_latency_p90", { - "_value": 121.897, + "_value": 0.817, "_timestamp": 0 } ], "perf_latency_p95": [ "perf_latency_p95", { - "_value": 127.903, + "_value": 0.864, "_timestamp": 0 } ], "perf_latency_p99": [ "perf_latency_p99", { - "_value": 138.86, + "_value": 0.949, "_timestamp": 0 } ], "perf_throughput": [ "perf_throughput", { - "_value": 16.5958, + "_value": 2825.37, "_timestamp": 0 } ], "perf_client_send_recv": [ "perf_client_send_recv", { - "_value": 0.176, + "_value": 0.006, "_timestamp": 0 } ], "perf_client_response_wait": [ "perf_client_response_wait", { - "_value": 119.511, + "_value": 0.683, "_timestamp": 0 } ], "perf_server_queue": [ "perf_server_queue", { - "_value": 0.002, + "_value": 0.001, "_timestamp": 0 } ], "perf_server_compute_infer": [ "perf_server_compute_infer", { - "_value": 60.507, + "_value": 0.333, "_timestamp": 0 } ], "perf_server_compute_input": [ "perf_server_compute_input", { - "_value": 0.342, + "_value": 0.044, "_timestamp": 0 } ], "perf_server_compute_output": [ "perf_server_compute_output", { - "_value": 0.25, + "_value": 0.136, "_timestamp": 0 } ] @@ -748,14 +787,14 @@ } ] }, - "-m ensemble_python_resnet50_config_default -b 1 -i grpc -f ensemble_python_resnet50_config_default-results.csv --verbose-csv --concurrency-range=4 --input-data=/swdev/profile_models/test_image --shape=INPUT:1005970 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "ensemble_python_resnet50_config_default", + "ensemble_add_sub_config_default -m ensemble_add_sub -b 1 -i grpc -f ensemble_add_sub-results.csv --verbose-csv --concurrency-range=4 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000 add_config_default,sub_config_default": { + "_model_variants_name": "ensemble_add_sub_config_default", "_gpu_data": { "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ [ "gpu_used_memory", { - "_value": 1862.270976, + "_value": 870.31808, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } @@ -763,7 +802,7 @@ [ "gpu_free_memory", { - "_value": 23533.191168, + "_value": 24899.485696, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } @@ -771,7 +810,7 @@ [ "gpu_utilization", { - "_value": 1.83333, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -779,7 +818,7 @@ [ "gpu_power_usage", { - "_value": 57.3872, + "_value": 56.6613, "_timestamp": 0, "_device_uuid": null } @@ -790,7 +829,7 @@ [ "gpu_used_memory", { - "_value": 1862.270976, + "_value": 870.31808, "_timestamp": 0, "_device_uuid": null } @@ -798,7 +837,7 @@ [ "gpu_free_memory", { - "_value": 23533.191168, + "_value": 24899.485696, "_timestamp": 0, "_device_uuid": null } @@ -806,7 +845,7 @@ [ "gpu_utilization", { - "_value": 1.83333, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -814,7 +853,7 @@ [ "gpu_power_usage", { - "_value": 57.3872, + "_value": 56.6613, "_timestamp": 0, "_device_uuid": null } @@ -824,7 +863,7 @@ "gpu_used_memory": [ "gpu_used_memory", { - "_value": 1862.270976, + "_value": 870.31808, "_timestamp": 0, "_device_uuid": null } @@ -832,7 +871,7 @@ "gpu_free_memory": [ "gpu_free_memory", { - "_value": 23533.191168, + "_value": 24899.485696, "_timestamp": 0, "_device_uuid": null } @@ -840,7 +879,7 @@ "gpu_utilization": [ "gpu_utilization", { - "_value": 1.83333, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -848,7 +887,7 @@ "gpu_power_usage": [ "gpu_power_usage", { - "_value": 57.3872, + "_value": 56.6613, "_timestamp": 0, "_device_uuid": null } @@ -856,86 +895,87 @@ }, "_model_config_measurements": [ { - "_model_config_name": "ensemble_python_resnet50_config_default", + "_model_config_name": "ensemble_add_sub", "_model_specific_pa_params": { "batch-size": 1, - "concurrency-range": 4 + "concurrency-range": 4, + "request-rate-range": null }, "_non_gpu_data": [ [ "perf_latency_avg", { - "_value": 240.06, + "_value": 1.4, "_timestamp": 0 } ], [ "perf_latency_p90", { - "_value": 243.316, + "_value": 1.616, "_timestamp": 0 } ], [ "perf_latency_p95", { - "_value": 257.803, + "_value": 1.695, "_timestamp": 0 } ], [ "perf_latency_p99", { - "_value": 261.813, + "_value": 1.879, "_timestamp": 0 } ], [ "perf_throughput", { - "_value": 16.4965, + "_value": 2851.58, "_timestamp": 0 } ], [ "perf_client_send_recv", { - "_value": 0.182, + "_value": 0.006, "_timestamp": 0 } ], [ "perf_client_response_wait", { - "_value": 239.846, + "_value": 1.374, "_timestamp": 0 } ], [ "perf_server_queue", { - "_value": 0.002, + "_value": 0.001, "_timestamp": 0 } ], [ "perf_server_compute_infer", { - "_value": 61.069, + "_value": 0.346, "_timestamp": 0 } ], [ "perf_server_compute_input", { - "_value": 0.3, + "_value": 0.045, "_timestamp": 0 } ], [ "perf_server_compute_output", { - "_value": 0.222, + "_value": 0.134, "_timestamp": 0 } ] @@ -944,77 +984,77 @@ "perf_latency_avg": [ "perf_latency_avg", { - "_value": 240.06, + "_value": 1.4, "_timestamp": 0 } ], "perf_latency_p90": [ "perf_latency_p90", { - "_value": 243.316, + "_value": 1.616, "_timestamp": 0 } ], "perf_latency_p95": [ "perf_latency_p95", { - "_value": 257.803, + "_value": 1.695, "_timestamp": 0 } ], "perf_latency_p99": [ "perf_latency_p99", { - "_value": 261.813, + "_value": 1.879, "_timestamp": 0 } ], "perf_throughput": [ "perf_throughput", { - "_value": 16.4965, + "_value": 2851.58, "_timestamp": 0 } ], "perf_client_send_recv": [ "perf_client_send_recv", { - "_value": 0.182, + "_value": 0.006, "_timestamp": 0 } ], "perf_client_response_wait": [ "perf_client_response_wait", { - "_value": 239.846, + "_value": 1.374, "_timestamp": 0 } ], "perf_server_queue": [ "perf_server_queue", { - "_value": 0.002, + "_value": 0.001, "_timestamp": 0 } ], "perf_server_compute_infer": [ "perf_server_compute_infer", { - "_value": 61.069, + "_value": 0.346, "_timestamp": 0 } ], "perf_server_compute_input": [ "perf_server_compute_input", { - "_value": 0.3, + "_value": 0.045, "_timestamp": 0 } ], "perf_server_compute_output": [ "perf_server_compute_output", { - "_value": 0.222, + "_value": 0.134, "_timestamp": 0 } ] @@ -1022,14 +1062,14 @@ } ] }, - "-m ensemble_python_resnet50_config_default -b 1 -i grpc -f ensemble_python_resnet50_config_default-results.csv --verbose-csv --concurrency-range=8 --input-data=/swdev/profile_models/test_image --shape=INPUT:1005970 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "ensemble_python_resnet50_config_default", + "ensemble_add_sub_config_default -m ensemble_add_sub -b 1 -i grpc -f ensemble_add_sub-results.csv --verbose-csv --concurrency-range=8 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000 add_config_default,sub_config_default": { + "_model_variants_name": "ensemble_add_sub_config_default", "_gpu_data": { "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ [ "gpu_used_memory", { - "_value": 1862.270976, + "_value": 870.31808, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } @@ -1037,7 +1077,7 @@ [ "gpu_free_memory", { - "_value": 23533.191168, + "_value": 24899.485696, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } @@ -1045,7 +1085,7 @@ [ "gpu_utilization", { - "_value": 1.8181800000000001, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -1053,7 +1093,7 @@ [ "gpu_power_usage", { - "_value": 57.3005, + "_value": 56.648, "_timestamp": 0, "_device_uuid": null } @@ -1064,7 +1104,7 @@ [ "gpu_used_memory", { - "_value": 1862.270976, + "_value": 870.31808, "_timestamp": 0, "_device_uuid": null } @@ -1072,7 +1112,7 @@ [ "gpu_free_memory", { - "_value": 23533.191168, + "_value": 24899.485696, "_timestamp": 0, "_device_uuid": null } @@ -1080,7 +1120,7 @@ [ "gpu_utilization", { - "_value": 1.8181800000000001, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -1088,7 +1128,7 @@ [ "gpu_power_usage", { - "_value": 57.3005, + "_value": 56.648, "_timestamp": 0, "_device_uuid": null } @@ -1098,7 +1138,7 @@ "gpu_used_memory": [ "gpu_used_memory", { - "_value": 1862.270976, + "_value": 870.31808, "_timestamp": 0, "_device_uuid": null } @@ -1106,7 +1146,7 @@ "gpu_free_memory": [ "gpu_free_memory", { - "_value": 23533.191168, + "_value": 24899.485696, "_timestamp": 0, "_device_uuid": null } @@ -1114,7 +1154,7 @@ "gpu_utilization": [ "gpu_utilization", { - "_value": 1.8181800000000001, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -1122,7 +1162,7 @@ "gpu_power_usage": [ "gpu_power_usage", { - "_value": 57.3005, + "_value": 56.648, "_timestamp": 0, "_device_uuid": null } @@ -1130,86 +1170,87 @@ }, "_model_config_measurements": [ { - "_model_config_name": "ensemble_python_resnet50_config_default", + "_model_config_name": "ensemble_add_sub", "_model_specific_pa_params": { "batch-size": 1, - "concurrency-range": 8 + "concurrency-range": 8, + "request-rate-range": null }, "_non_gpu_data": [ [ "perf_latency_avg", { - "_value": 482.401, + "_value": 2.89, "_timestamp": 0 } ], [ "perf_latency_p90", { - "_value": 503.979, + "_value": 3.362, "_timestamp": 0 } ], [ "perf_latency_p95", { - "_value": 507.311, + "_value": 3.499, "_timestamp": 0 } ], [ "perf_latency_p99", { - "_value": 526.907, + "_value": 3.77, "_timestamp": 0 } ], [ "perf_throughput", { - "_value": 16.1775, + "_value": 2763.39, "_timestamp": 0 } ], [ "perf_client_send_recv", { - "_value": 0.245, + "_value": 0.007, "_timestamp": 0 } ], [ "perf_client_response_wait", { - "_value": 482.122, + "_value": 2.86, "_timestamp": 0 } ], [ "perf_server_queue", { - "_value": 0.002, + "_value": 0.001, "_timestamp": 0 } ], [ "perf_server_compute_infer", { - "_value": 62.129, + "_value": 0.365, "_timestamp": 0 } ], [ "perf_server_compute_input", { - "_value": 0.299, + "_value": 0.046, "_timestamp": 0 } ], [ "perf_server_compute_output", { - "_value": 0.241, + "_value": 0.137, "_timestamp": 0 } ] @@ -1218,77 +1259,77 @@ "perf_latency_avg": [ "perf_latency_avg", { - "_value": 482.401, + "_value": 2.89, "_timestamp": 0 } ], "perf_latency_p90": [ "perf_latency_p90", { - "_value": 503.979, + "_value": 3.362, "_timestamp": 0 } ], "perf_latency_p95": [ "perf_latency_p95", { - "_value": 507.311, + "_value": 3.499, "_timestamp": 0 } ], "perf_latency_p99": [ "perf_latency_p99", { - "_value": 526.907, + "_value": 3.77, "_timestamp": 0 } ], "perf_throughput": [ "perf_throughput", { - "_value": 16.1775, + "_value": 2763.39, "_timestamp": 0 } ], "perf_client_send_recv": [ "perf_client_send_recv", { - "_value": 0.245, + "_value": 0.007, "_timestamp": 0 } ], "perf_client_response_wait": [ "perf_client_response_wait", { - "_value": 482.122, + "_value": 2.86, "_timestamp": 0 } ], "perf_server_queue": [ "perf_server_queue", { - "_value": 0.002, + "_value": 0.001, "_timestamp": 0 } ], "perf_server_compute_infer": [ "perf_server_compute_infer", { - "_value": 62.129, + "_value": 0.365, "_timestamp": 0 } ], "perf_server_compute_input": [ "perf_server_compute_input", { - "_value": 0.299, + "_value": 0.046, "_timestamp": 0 } ], "perf_server_compute_output": [ "perf_server_compute_output", { - "_value": 0.241, + "_value": 0.137, "_timestamp": 0 } ] @@ -1296,14 +1337,14 @@ } ] }, - "-m ensemble_python_resnet50_config_default -b 1 -i grpc -f ensemble_python_resnet50_config_default-results.csv --verbose-csv --concurrency-range=16 --input-data=/swdev/profile_models/test_image --shape=INPUT:1005970 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "ensemble_python_resnet50_config_default", + "ensemble_add_sub_config_default -m ensemble_add_sub -b 1 -i grpc -f ensemble_add_sub-results.csv --verbose-csv --concurrency-range=16 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000 add_config_default,sub_config_default": { + "_model_variants_name": "ensemble_add_sub_config_default", "_gpu_data": { "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ [ "gpu_used_memory", { - "_value": 1862.270976, + "_value": 870.31808, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } @@ -1311,7 +1352,7 @@ [ "gpu_free_memory", { - "_value": 23533.191168, + "_value": 24899.485696, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } @@ -1319,7 +1360,7 @@ [ "gpu_utilization", { - "_value": 2.0, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -1327,7 +1368,7 @@ [ "gpu_power_usage", { - "_value": 57.1957, + "_value": 56.7327, "_timestamp": 0, "_device_uuid": null } @@ -1338,7 +1379,7 @@ [ "gpu_used_memory", { - "_value": 1862.270976, + "_value": 870.31808, "_timestamp": 0, "_device_uuid": null } @@ -1346,7 +1387,7 @@ [ "gpu_free_memory", { - "_value": 23533.191168, + "_value": 24899.485696, "_timestamp": 0, "_device_uuid": null } @@ -1354,7 +1395,7 @@ [ "gpu_utilization", { - "_value": 2.0, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -1362,7 +1403,7 @@ [ "gpu_power_usage", { - "_value": 57.1957, + "_value": 56.7327, "_timestamp": 0, "_device_uuid": null } @@ -1372,7 +1413,7 @@ "gpu_used_memory": [ "gpu_used_memory", { - "_value": 1862.270976, + "_value": 870.31808, "_timestamp": 0, "_device_uuid": null } @@ -1380,7 +1421,7 @@ "gpu_free_memory": [ "gpu_free_memory", { - "_value": 23533.191168, + "_value": 24899.485696, "_timestamp": 0, "_device_uuid": null } @@ -1388,7 +1429,7 @@ "gpu_utilization": [ "gpu_utilization", { - "_value": 2.0, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -1396,7 +1437,7 @@ "gpu_power_usage": [ "gpu_power_usage", { - "_value": 57.1957, + "_value": 56.7327, "_timestamp": 0, "_device_uuid": null } @@ -1404,86 +1445,87 @@ }, "_model_config_measurements": [ { - "_model_config_name": "ensemble_python_resnet50_config_default", + "_model_config_name": "ensemble_add_sub", "_model_specific_pa_params": { "batch-size": 1, - "concurrency-range": 16 + "concurrency-range": 16, + "request-rate-range": null }, "_non_gpu_data": [ [ "perf_latency_avg", { - "_value": 975.636, + "_value": 5.731, "_timestamp": 0 } ], [ "perf_latency_p90", { - "_value": 992.047, + "_value": 6.499, "_timestamp": 0 } ], [ "perf_latency_p95", { - "_value": 1005.614, + "_value": 6.683, "_timestamp": 0 } ], [ "perf_latency_p99", { - "_value": 1011.585, + "_value": 7.159, "_timestamp": 0 } ], [ "perf_throughput", { - "_value": 16.3599, + "_value": 2786.78, "_timestamp": 0 } ], [ "perf_client_send_recv", { - "_value": 0.157, + "_value": 0.007, "_timestamp": 0 } ], [ "perf_client_response_wait", { - "_value": 975.445, + "_value": 5.7, "_timestamp": 0 } ], [ "perf_server_queue", { - "_value": 0.002, + "_value": 0.001, "_timestamp": 0 } ], [ "perf_server_compute_infer", { - "_value": 61.556, + "_value": 0.361, "_timestamp": 0 } ], [ "perf_server_compute_input", { - "_value": 0.3, + "_value": 0.046, "_timestamp": 0 } ], [ "perf_server_compute_output", { - "_value": 0.244, + "_value": 0.138, "_timestamp": 0 } ] @@ -1492,77 +1534,77 @@ "perf_latency_avg": [ "perf_latency_avg", { - "_value": 975.636, + "_value": 5.731, "_timestamp": 0 } ], "perf_latency_p90": [ "perf_latency_p90", { - "_value": 992.047, + "_value": 6.499, "_timestamp": 0 } ], "perf_latency_p95": [ "perf_latency_p95", { - "_value": 1005.614, + "_value": 6.683, "_timestamp": 0 } ], "perf_latency_p99": [ "perf_latency_p99", { - "_value": 1011.585, + "_value": 7.159, "_timestamp": 0 } ], "perf_throughput": [ "perf_throughput", { - "_value": 16.3599, + "_value": 2786.78, "_timestamp": 0 } ], "perf_client_send_recv": [ "perf_client_send_recv", { - "_value": 0.157, + "_value": 0.007, "_timestamp": 0 } ], "perf_client_response_wait": [ "perf_client_response_wait", { - "_value": 975.445, + "_value": 5.7, "_timestamp": 0 } ], "perf_server_queue": [ "perf_server_queue", { - "_value": 0.002, + "_value": 0.001, "_timestamp": 0 } ], "perf_server_compute_infer": [ "perf_server_compute_infer", { - "_value": 61.556, + "_value": 0.361, "_timestamp": 0 } ], "perf_server_compute_input": [ "perf_server_compute_input", { - "_value": 0.3, + "_value": 0.046, "_timestamp": 0 } ], "perf_server_compute_output": [ "perf_server_compute_output", { - "_value": 0.244, + "_value": 0.138, "_timestamp": 0 } ] @@ -1572,58 +1614,77 @@ } } ], - "ensemble_python_resnet50_config_0": [ + "ensemble_add_sub_config_0": [ { "_triton_env": {}, "_model_run_configs": [ { - "_model_name": "ensemble_python_resnet50", - "_model_config": { - "name": "ensemble_python_resnet50_config_0", - "platform": "ensemble", - "maxBatchSize": 1, - "input": [ - { - "name": "INPUT", - "dataType": "TYPE_UINT8", - "dims": [ - "-1" - ] - } - ], - "output": [ - { - "name": "OUTPUT", - "dataType": "TYPE_FP32", - "dims": [ - "1000" - ] - } - ], - "ensembleScheduling": { - "step": [ + "_model_name": "ensemble_add_sub", + "_model_config_variant": { + "model_config": { + "name": "ensemble_add_sub", + "platform": "ensemble", + "maxBatchSize": 1, + "input": [ { - "modelName": "preprocess_config_0", - "modelVersion": "-1", - "inputMap": { - "INPUT_0": "INPUT" - }, - "outputMap": { - "OUTPUT_0": "preprocessed_image" - } + "name": "INPUT0", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + }, + { + "name": "INPUT1", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "output": [ + { + "name": "OUTPUT0", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] }, { - "modelName": "resnet50_trt_config_0", - "modelVersion": "-1", - "inputMap": { - "input": "preprocessed_image" + "name": "OUTPUT1", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "ensembleScheduling": { + "step": [ + { + "modelName": "add", + "modelVersion": "1", + "inputMap": { + "INPUT1": "INPUT1", + "INPUT0": "INPUT0" + }, + "outputMap": { + "OUTPUT0": "OUTPUT0" + } }, - "outputMap": { - "output": "OUTPUT" + { + "modelName": "sub", + "modelVersion": "1", + "inputMap": { + "INPUT1": "INPUT1", + "INPUT0": "INPUT0" + }, + "outputMap": { + "OUTPUT1": "OUTPUT1" + } } - } - ] + ] + } }, + "variant_name": "ensemble_add_sub_config_0", "cpu_only": false }, "_perf_config": { @@ -1644,10 +1705,10 @@ "stability-percentage": null, "max-trials": null, "percentile": null, - "input-data": "/swdev/profile_models/test_image", + "input-data": null, "shared-memory": null, "output-shared-memory-size": null, - "shape": "INPUT:1005970", + "shape": null, "sequence-length": null, "sequence-id-range": null, "string-length": null, @@ -1671,15 +1732,16 @@ "ssl-https-private-key-file": null, "collect-metrics": "True", "metrics-url": "http://localhost:8002/metrics", - "metrics-interval": 1000.0 + "metrics-interval": 1000, + "bls-composing-models": null }, "_options": { - "-m": "ensemble_python_resnet50_config_0", + "-m": "ensemble_add_sub", "-x": null, "-b": 1, "-u": "localhost:8001", "-i": "grpc", - "-f": "ensemble_python_resnet50_config_0-results.csv", + "-f": "ensemble_add_sub-results.csv", "-H": null }, "_verbose": { @@ -1706,72 +1768,83 @@ "shape": null } }, - "_composing_configs": [ - { - "name": "preprocess_config_0", - "maxBatchSize": 1, - "input": [ - { - "name": "INPUT_0", - "dataType": "TYPE_UINT8", - "dims": [ - "-1" - ] - } - ], - "output": [ - { - "name": "OUTPUT_0", - "dataType": "TYPE_FP32", - "dims": [ - "3", - "224", - "224" - ] - } - ], - "instanceGroup": [ - { - "count": 1, - "kind": "KIND_GPU" - } - ], - "dynamicBatching": {}, - "backend": "python", + "_composing_config_variants": [ + { + "model_config": { + "name": "add", + "input": [ + { + "name": "INPUT0", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + }, + { + "name": "INPUT1", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "output": [ + { + "name": "OUTPUT0", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "instanceGroup": [ + { + "count": 1, + "kind": "KIND_GPU" + } + ], + "backend": "python" + }, + "variant_name": "add_config_0", "cpu_only": false }, { - "name": "resnet50_trt_config_0", - "platform": "tensorrt_plan", - "maxBatchSize": 1, - "input": [ - { - "name": "input", - "dataType": "TYPE_FP32", - "dims": [ - "3", - "-1", - "-1" - ] - } - ], - "output": [ - { - "name": "output", - "dataType": "TYPE_FP32", - "dims": [ - "1000" - ], - "labelFilename": "labels.txt" - } - ], - "instanceGroup": [ - { - "count": 1, - "kind": "KIND_GPU" - } - ], - "dynamicBatching": {}, + "model_config": { + "name": "sub", + "input": [ + { + "name": "INPUT0", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + }, + { + "name": "INPUT1", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "output": [ + { + "name": "OUTPUT1", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "instanceGroup": [ + { + "count": 1, + "kind": "KIND_GPU" + } + ], + "backend": "python" + }, + "variant_name": "sub_config_0", "cpu_only": false } ] @@ -1779,14 +1852,14 @@ ] }, { - "-m ensemble_python_resnet50_config_0 -b 1 -i grpc -f ensemble_python_resnet50_config_0-results.csv --verbose-csv --concurrency-range=2 --input-data=/swdev/profile_models/test_image --shape=INPUT:1005970 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "ensemble_python_resnet50_config_0", + "ensemble_add_sub_config_0 -m ensemble_add_sub -b 1 -i grpc -f ensemble_add_sub-results.csv --verbose-csv --concurrency-range=2 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000 add_config_0,sub_config_0": { + "_model_variants_name": "ensemble_add_sub_config_0", "_gpu_data": { "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ [ "gpu_used_memory", { - "_value": 1707.081728, + "_value": 870.31808, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } @@ -1794,7 +1867,7 @@ [ "gpu_free_memory", { - "_value": 23688.380416, + "_value": 24899.485696, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } @@ -1802,7 +1875,7 @@ [ "gpu_utilization", { - "_value": 1.5, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -1810,7 +1883,7 @@ [ "gpu_power_usage", { - "_value": 56.5671, + "_value": 56.5558, "_timestamp": 0, "_device_uuid": null } @@ -1821,7 +1894,7 @@ [ "gpu_used_memory", { - "_value": 1707.081728, + "_value": 870.31808, "_timestamp": 0, "_device_uuid": null } @@ -1829,7 +1902,7 @@ [ "gpu_free_memory", { - "_value": 23688.380416, + "_value": 24899.485696, "_timestamp": 0, "_device_uuid": null } @@ -1837,7 +1910,7 @@ [ "gpu_utilization", { - "_value": 1.5, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -1845,7 +1918,7 @@ [ "gpu_power_usage", { - "_value": 56.5671, + "_value": 56.5558, "_timestamp": 0, "_device_uuid": null } @@ -1855,7 +1928,7 @@ "gpu_used_memory": [ "gpu_used_memory", { - "_value": 1707.081728, + "_value": 870.31808, "_timestamp": 0, "_device_uuid": null } @@ -1863,7 +1936,7 @@ "gpu_free_memory": [ "gpu_free_memory", { - "_value": 23688.380416, + "_value": 24899.485696, "_timestamp": 0, "_device_uuid": null } @@ -1871,7 +1944,7 @@ "gpu_utilization": [ "gpu_utilization", { - "_value": 1.5, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -1879,7 +1952,7 @@ "gpu_power_usage": [ "gpu_power_usage", { - "_value": 56.5671, + "_value": 56.5558, "_timestamp": 0, "_device_uuid": null } @@ -1887,86 +1960,87 @@ }, "_model_config_measurements": [ { - "_model_config_name": "ensemble_python_resnet50_config_0", + "_model_config_name": "ensemble_add_sub", "_model_specific_pa_params": { "batch-size": 1, - "concurrency-range": 2 + "concurrency-range": 2, + "request-rate-range": null }, "_non_gpu_data": [ [ "perf_latency_avg", { - "_value": 126.491, + "_value": 0.719, "_timestamp": 0 } ], [ "perf_latency_p90", { - "_value": 128.09, + "_value": 0.819, "_timestamp": 0 } ], [ "perf_latency_p95", { - "_value": 128.56, + "_value": 0.856, "_timestamp": 0 } ], [ "perf_latency_p99", { - "_value": 144.584, + "_value": 0.965, "_timestamp": 0 } ], [ "perf_throughput", { - "_value": 15.7464, + "_value": 2772.97, "_timestamp": 0 } ], [ "perf_client_send_recv", { - "_value": 0.164, + "_value": 0.006, "_timestamp": 0 } ], [ "perf_client_response_wait", { - "_value": 126.297, + "_value": 0.695, "_timestamp": 0 } ], [ "perf_server_queue", { - "_value": 0.002, + "_value": 0.001, "_timestamp": 0 } ], [ "perf_server_compute_infer", { - "_value": 63.934, + "_value": 0.339, "_timestamp": 0 } ], [ "perf_server_compute_input", { - "_value": 0.352, + "_value": 0.044, "_timestamp": 0 } ], [ "perf_server_compute_output", { - "_value": 0.233, + "_value": 0.138, "_timestamp": 0 } ] @@ -1975,317 +2049,108 @@ "perf_latency_avg": [ "perf_latency_avg", { - "_value": 126.491, + "_value": 0.719, "_timestamp": 0 } ], "perf_latency_p90": [ "perf_latency_p90", { - "_value": 128.09, + "_value": 0.819, "_timestamp": 0 } ], "perf_latency_p95": [ "perf_latency_p95", { - "_value": 128.56, + "_value": 0.856, "_timestamp": 0 } ], "perf_latency_p99": [ "perf_latency_p99", { - "_value": 144.584, + "_value": 0.965, "_timestamp": 0 } ], "perf_throughput": [ "perf_throughput", { - "_value": 15.7464, + "_value": 2772.97, "_timestamp": 0 } ], "perf_client_send_recv": [ "perf_client_send_recv", { - "_value": 0.164, + "_value": 0.006, "_timestamp": 0 } ], "perf_client_response_wait": [ "perf_client_response_wait", { - "_value": 126.297, + "_value": 0.695, "_timestamp": 0 } ], "perf_server_queue": [ "perf_server_queue", { - "_value": 0.002, + "_value": 0.001, "_timestamp": 0 } ], "perf_server_compute_infer": [ "perf_server_compute_infer", { - "_value": 63.934, + "_value": 0.339, "_timestamp": 0 } ], "perf_server_compute_input": [ "perf_server_compute_input", { - "_value": 0.352, + "_value": 0.044, "_timestamp": 0 } ], "perf_server_compute_output": [ "perf_server_compute_output", { - "_value": 0.233, + "_value": 0.138, "_timestamp": 0 } ] } } ] - } - } - ], - "ensemble_python_resnet50_config_1": [ - { - "_triton_env": {}, - "_model_run_configs": [ - { - "_model_name": "ensemble_python_resnet50", - "_model_config": { - "name": "ensemble_python_resnet50_config_1", - "platform": "ensemble", - "maxBatchSize": 1, - "input": [ + }, + "ensemble_add_sub_config_0 -m ensemble_add_sub -b 1 -i grpc -f ensemble_add_sub-results.csv --verbose-csv --concurrency-range=1 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000 add_config_0,sub_config_0": { + "_model_variants_name": "ensemble_add_sub_config_0", + "_gpu_data": { + "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ + [ + "gpu_used_memory", { - "name": "INPUT", - "dataType": "TYPE_UINT8", - "dims": [ - "-1" - ] + "_value": 870.31808, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } ], - "output": [ + [ + "gpu_free_memory", { - "name": "OUTPUT", - "dataType": "TYPE_FP32", - "dims": [ - "1000" - ] - } - ], - "ensembleScheduling": { - "step": [ - { - "modelName": "preprocess_config_0", - "modelVersion": "-1", - "inputMap": { - "INPUT_0": "INPUT" - }, - "outputMap": { - "OUTPUT_0": "preprocessed_image" - } - }, - { - "modelName": "resnet50_trt_config_1", - "modelVersion": "-1", - "inputMap": { - "input": "preprocessed_image" - }, - "outputMap": { - "output": "OUTPUT" - } - } - ] - }, - "cpu_only": false - }, - "_perf_config": { - "_args": { - "service-kind": null, - "model-signature-name": null, - "async": null, - "sync": null, - "measurement-interval": null, - "concurrency-range": 2, - "request-rate-range": null, - "request-distribution": null, - "request-intervals": null, - "binary-search": null, - "num-of-sequences": null, - "latency-threshold": null, - "max-threads": null, - "stability-percentage": null, - "max-trials": null, - "percentile": null, - "input-data": "/swdev/profile_models/test_image", - "shared-memory": null, - "output-shared-memory-size": null, - "shape": "INPUT:1005970", - "sequence-length": null, - "sequence-id-range": null, - "string-length": null, - "string-data": null, - "measurement-mode": "count_windows", - "measurement-request-count": null, - "streaming": null, - "grpc-compression-algorithm": null, - "triton-server-directory": null, - "model-repository": null, - "ssl-grpc-use-ssl": null, - "ssl-grpc-root-certifications-file": null, - "ssl-grpc-private-key-file": null, - "ssl-grpc-certificate-chain-file": null, - "ssl-https-verify-peer": null, - "ssl-https-verify-host": null, - "ssl-https-ca-certificates-file": null, - "ssl-https-client-certificate-type": null, - "ssl-https-client-certificate-file": null, - "ssl-https-private-key-type": null, - "ssl-https-private-key-file": null, - "collect-metrics": "True", - "metrics-url": "http://localhost:8002/metrics", - "metrics-interval": 1000.0 - }, - "_options": { - "-m": "ensemble_python_resnet50_config_1", - "-x": null, - "-b": 1, - "-u": "localhost:8001", - "-i": "grpc", - "-f": "ensemble_python_resnet50_config_1-results.csv", - "-H": null - }, - "_verbose": { - "-v": null, - "-v -v": null, - "--verbose-csv": "--verbose-csv" - }, - "_input_to_options": { - "model-name": "-m", - "model-version": "-x", - "batch-size": "-b", - "url": "-u", - "protocol": "-i", - "latency-report-file": "-f", - "http-header": "-H" - }, - "_input_to_verbose": { - "verbose": "-v", - "extra-verbose": "-v -v", - "verbose-csv": "--verbose-csv" - }, - "_additive_args": { - "input-data": null, - "shape": null - } - }, - "_composing_configs": [ - { - "name": "preprocess_config_0", - "maxBatchSize": 1, - "input": [ - { - "name": "INPUT_0", - "dataType": "TYPE_UINT8", - "dims": [ - "-1" - ] - } - ], - "output": [ - { - "name": "OUTPUT_0", - "dataType": "TYPE_FP32", - "dims": [ - "3", - "224", - "224" - ] - } - ], - "instanceGroup": [ - { - "count": 1, - "kind": "KIND_GPU" - } - ], - "dynamicBatching": {}, - "backend": "python", - "cpu_only": false - }, - { - "name": "resnet50_trt_config_1", - "platform": "tensorrt_plan", - "maxBatchSize": 1, - "input": [ - { - "name": "input", - "dataType": "TYPE_FP32", - "dims": [ - "3", - "-1", - "-1" - ] - } - ], - "output": [ - { - "name": "output", - "dataType": "TYPE_FP32", - "dims": [ - "1000" - ], - "labelFilename": "labels.txt" - } - ], - "instanceGroup": [ - { - "count": 2, - "kind": "KIND_GPU" - } - ], - "dynamicBatching": {}, - "cpu_only": false - } - ] - } - ] - }, - { - "-m ensemble_python_resnet50_config_1 -b 1 -i grpc -f ensemble_python_resnet50_config_1-results.csv --verbose-csv --concurrency-range=2 --input-data=/swdev/profile_models/test_image --shape=INPUT:1005970 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "ensemble_python_resnet50_config_1", - "_gpu_data": { - "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ - [ - "gpu_used_memory", - { - "_value": 2789.21216, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_free_memory", - { - "_value": 22606.249984000002, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + "_value": 24899.485696, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } ], [ "gpu_utilization", { - "_value": 1.5555599999999998, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -2293,7 +2158,7 @@ [ "gpu_power_usage", { - "_value": 56.8051, + "_value": 56.7783, "_timestamp": 0, "_device_uuid": null } @@ -2304,7 +2169,7 @@ [ "gpu_used_memory", { - "_value": 2789.21216, + "_value": 870.31808, "_timestamp": 0, "_device_uuid": null } @@ -2312,7 +2177,7 @@ [ "gpu_free_memory", { - "_value": 22606.249984000002, + "_value": 24899.485696, "_timestamp": 0, "_device_uuid": null } @@ -2320,7 +2185,7 @@ [ "gpu_utilization", { - "_value": 1.5555599999999998, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -2328,7 +2193,7 @@ [ "gpu_power_usage", { - "_value": 56.8051, + "_value": 56.7783, "_timestamp": 0, "_device_uuid": null } @@ -2338,7 +2203,7 @@ "gpu_used_memory": [ "gpu_used_memory", { - "_value": 2789.21216, + "_value": 870.31808, "_timestamp": 0, "_device_uuid": null } @@ -2346,7 +2211,7 @@ "gpu_free_memory": [ "gpu_free_memory", { - "_value": 22606.249984000002, + "_value": 24899.485696, "_timestamp": 0, "_device_uuid": null } @@ -2354,7 +2219,7 @@ "gpu_utilization": [ "gpu_utilization", { - "_value": 1.5555599999999998, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -2362,7 +2227,7 @@ "gpu_power_usage": [ "gpu_power_usage", { - "_value": 56.8051, + "_value": 56.7783, "_timestamp": 0, "_device_uuid": null } @@ -2370,86 +2235,87 @@ }, "_model_config_measurements": [ { - "_model_config_name": "ensemble_python_resnet50_config_1", + "_model_config_name": "ensemble_add_sub", "_model_specific_pa_params": { "batch-size": 1, - "concurrency-range": 2 + "concurrency-range": 1, + "request-rate-range": null }, "_non_gpu_data": [ [ "perf_latency_avg", { - "_value": 104.218, + "_value": 0.561, "_timestamp": 0 } ], [ "perf_latency_p90", { - "_value": 105.102, + "_value": 0.633, "_timestamp": 0 } ], [ "perf_latency_p95", { - "_value": 116.818, + "_value": 0.668, "_timestamp": 0 } ], [ "perf_latency_p99", { - "_value": 135.586, + "_value": 0.774, "_timestamp": 0 } ], [ "perf_throughput", { - "_value": 19.1059, + "_value": 1775.27, "_timestamp": 0 } ], [ "perf_client_send_recv", { - "_value": 0.263, + "_value": 0.004, "_timestamp": 0 } ], [ "perf_client_response_wait", { - "_value": 103.92, + "_value": 0.543, "_timestamp": 0 } ], [ "perf_server_queue", { - "_value": 0.002, + "_value": 0.0, "_timestamp": 0 } ], [ "perf_server_compute_infer", { - "_value": 52.715, + "_value": 0.338, "_timestamp": 0 } ], [ "perf_server_compute_input", { - "_value": 0.35, + "_value": 0.043, "_timestamp": 0 } ], [ "perf_server_compute_output", { - "_value": 0.265, + "_value": 0.135, "_timestamp": 0 } ] @@ -2458,301 +2324,92 @@ "perf_latency_avg": [ "perf_latency_avg", { - "_value": 104.218, + "_value": 0.561, "_timestamp": 0 } ], "perf_latency_p90": [ "perf_latency_p90", { - "_value": 105.102, + "_value": 0.633, "_timestamp": 0 } ], "perf_latency_p95": [ "perf_latency_p95", { - "_value": 116.818, + "_value": 0.668, "_timestamp": 0 } ], "perf_latency_p99": [ "perf_latency_p99", { - "_value": 135.586, + "_value": 0.774, "_timestamp": 0 } ], "perf_throughput": [ "perf_throughput", { - "_value": 19.1059, + "_value": 1775.27, "_timestamp": 0 } ], "perf_client_send_recv": [ "perf_client_send_recv", { - "_value": 0.263, + "_value": 0.004, "_timestamp": 0 } ], "perf_client_response_wait": [ "perf_client_response_wait", { - "_value": 103.92, + "_value": 0.543, "_timestamp": 0 } ], "perf_server_queue": [ "perf_server_queue", { - "_value": 0.002, + "_value": 0.0, "_timestamp": 0 } ], "perf_server_compute_infer": [ "perf_server_compute_infer", { - "_value": 52.715, + "_value": 0.338, "_timestamp": 0 } ], "perf_server_compute_input": [ "perf_server_compute_input", { - "_value": 0.35, + "_value": 0.043, "_timestamp": 0 } ], "perf_server_compute_output": [ "perf_server_compute_output", { - "_value": 0.265, + "_value": 0.135, "_timestamp": 0 } ] } } ] - } - } - ], - "ensemble_python_resnet50_config_2": [ - { - "_triton_env": {}, - "_model_run_configs": [ - { - "_model_name": "ensemble_python_resnet50", - "_model_config": { - "name": "ensemble_python_resnet50_config_2", - "platform": "ensemble", - "maxBatchSize": 2, - "input": [ - { - "name": "INPUT", - "dataType": "TYPE_UINT8", - "dims": [ - "-1" - ] - } - ], - "output": [ - { - "name": "OUTPUT", - "dataType": "TYPE_FP32", - "dims": [ - "1000" - ] - } - ], - "ensembleScheduling": { - "step": [ - { - "modelName": "preprocess_config_1", - "modelVersion": "-1", - "inputMap": { - "INPUT_0": "INPUT" - }, - "outputMap": { - "OUTPUT_0": "preprocessed_image" - } - }, - { - "modelName": "resnet50_trt_config_2", - "modelVersion": "-1", - "inputMap": { - "input": "preprocessed_image" - }, - "outputMap": { - "output": "OUTPUT" - } - } - ] - }, - "cpu_only": false - }, - "_perf_config": { - "_args": { - "service-kind": null, - "model-signature-name": null, - "async": null, - "sync": null, - "measurement-interval": null, - "concurrency-range": 8, - "request-rate-range": null, - "request-distribution": null, - "request-intervals": null, - "binary-search": null, - "num-of-sequences": null, - "latency-threshold": null, - "max-threads": null, - "stability-percentage": null, - "max-trials": null, - "percentile": null, - "input-data": "/swdev/profile_models/test_image", - "shared-memory": null, - "output-shared-memory-size": null, - "shape": "INPUT:1005970", - "sequence-length": null, - "sequence-id-range": null, - "string-length": null, - "string-data": null, - "measurement-mode": "count_windows", - "measurement-request-count": null, - "streaming": null, - "grpc-compression-algorithm": null, - "triton-server-directory": null, - "model-repository": null, - "ssl-grpc-use-ssl": null, - "ssl-grpc-root-certifications-file": null, - "ssl-grpc-private-key-file": null, - "ssl-grpc-certificate-chain-file": null, - "ssl-https-verify-peer": null, - "ssl-https-verify-host": null, - "ssl-https-ca-certificates-file": null, - "ssl-https-client-certificate-type": null, - "ssl-https-client-certificate-file": null, - "ssl-https-private-key-type": null, - "ssl-https-private-key-file": null, - "collect-metrics": "True", - "metrics-url": "http://localhost:8002/metrics", - "metrics-interval": 1000.0 - }, - "_options": { - "-m": "ensemble_python_resnet50_config_2", - "-x": null, - "-b": 1, - "-u": "localhost:8001", - "-i": "grpc", - "-f": "ensemble_python_resnet50_config_2-results.csv", - "-H": null - }, - "_verbose": { - "-v": null, - "-v -v": null, - "--verbose-csv": "--verbose-csv" - }, - "_input_to_options": { - "model-name": "-m", - "model-version": "-x", - "batch-size": "-b", - "url": "-u", - "protocol": "-i", - "latency-report-file": "-f", - "http-header": "-H" - }, - "_input_to_verbose": { - "verbose": "-v", - "extra-verbose": "-v -v", - "verbose-csv": "--verbose-csv" - }, - "_additive_args": { - "input-data": null, - "shape": null - } - }, - "_composing_configs": [ - { - "name": "preprocess_config_1", - "maxBatchSize": 2, - "input": [ - { - "name": "INPUT_0", - "dataType": "TYPE_UINT8", - "dims": [ - "-1" - ] - } - ], - "output": [ - { - "name": "OUTPUT_0", - "dataType": "TYPE_FP32", - "dims": [ - "3", - "224", - "224" - ] - } - ], - "instanceGroup": [ - { - "count": 2, - "kind": "KIND_GPU" - } - ], - "dynamicBatching": {}, - "backend": "python", - "cpu_only": false - }, - { - "name": "resnet50_trt_config_2", - "platform": "tensorrt_plan", - "maxBatchSize": 2, - "input": [ - { - "name": "input", - "dataType": "TYPE_FP32", - "dims": [ - "3", - "-1", - "-1" - ] - } - ], - "output": [ - { - "name": "output", - "dataType": "TYPE_FP32", - "dims": [ - "1000" - ], - "labelFilename": "labels.txt" - } - ], - "instanceGroup": [ - { - "count": 1, - "kind": "KIND_GPU" - } - ], - "dynamicBatching": {}, - "cpu_only": false - } - ] - } - ] - }, - { - "-m ensemble_python_resnet50_config_2 -b 1 -i grpc -f ensemble_python_resnet50_config_2-results.csv --verbose-csv --concurrency-range=8 --input-data=/swdev/profile_models/test_image --shape=INPUT:1005970 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "ensemble_python_resnet50_config_2", + }, + "ensemble_add_sub_config_0 -m ensemble_add_sub -b 1 -i grpc -f ensemble_add_sub-results.csv --verbose-csv --concurrency-range=4 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000 add_config_0,sub_config_0": { + "_model_variants_name": "ensemble_add_sub_config_0", "_gpu_data": { "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ [ "gpu_used_memory", { - "_value": 1707.081728, + "_value": 870.31808, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } @@ -2760,7 +2417,7 @@ [ "gpu_free_memory", { - "_value": 23688.380416, + "_value": 24899.485696, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } @@ -2768,7 +2425,7 @@ [ "gpu_utilization", { - "_value": 2.16667, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -2776,7 +2433,7 @@ [ "gpu_power_usage", { - "_value": 57.5712, + "_value": 56.6303, "_timestamp": 0, "_device_uuid": null } @@ -2787,7 +2444,7 @@ [ "gpu_used_memory", { - "_value": 1707.081728, + "_value": 870.31808, "_timestamp": 0, "_device_uuid": null } @@ -2795,7 +2452,7 @@ [ "gpu_free_memory", { - "_value": 23688.380416, + "_value": 24899.485696, "_timestamp": 0, "_device_uuid": null } @@ -2803,7 +2460,7 @@ [ "gpu_utilization", { - "_value": 2.16667, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -2811,7 +2468,7 @@ [ "gpu_power_usage", { - "_value": 57.5712, + "_value": 56.6303, "_timestamp": 0, "_device_uuid": null } @@ -2821,7 +2478,7 @@ "gpu_used_memory": [ "gpu_used_memory", { - "_value": 1707.081728, + "_value": 870.31808, "_timestamp": 0, "_device_uuid": null } @@ -2829,7 +2486,7 @@ "gpu_free_memory": [ "gpu_free_memory", { - "_value": 23688.380416, + "_value": 24899.485696, "_timestamp": 0, "_device_uuid": null } @@ -2837,7 +2494,7 @@ "gpu_utilization": [ "gpu_utilization", { - "_value": 2.16667, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -2845,7 +2502,7 @@ "gpu_power_usage": [ "gpu_power_usage", { - "_value": 57.5712, + "_value": 56.6303, "_timestamp": 0, "_device_uuid": null } @@ -2853,58 +2510,59 @@ }, "_model_config_measurements": [ { - "_model_config_name": "ensemble_python_resnet50_config_2", + "_model_config_name": "ensemble_add_sub", "_model_specific_pa_params": { "batch-size": 1, - "concurrency-range": 8 + "concurrency-range": 4, + "request-rate-range": null }, "_non_gpu_data": [ [ "perf_latency_avg", { - "_value": 239.039, + "_value": 1.4, "_timestamp": 0 } ], [ "perf_latency_p90", { - "_value": 261.736, + "_value": 1.604, "_timestamp": 0 } ], [ "perf_latency_p95", { - "_value": 268.209, + "_value": 1.685, "_timestamp": 0 } ], [ "perf_latency_p99", { - "_value": 303.134, + "_value": 1.861, "_timestamp": 0 } ], [ "perf_throughput", { - "_value": 32.6532, + "_value": 2852.42, "_timestamp": 0 } ], [ "perf_client_send_recv", { - "_value": 0.274, + "_value": 0.006, "_timestamp": 0 } ], [ "perf_client_response_wait", { - "_value": 238.735, + "_value": 1.374, "_timestamp": 0 } ], @@ -2918,21 +2576,21 @@ [ "perf_server_compute_infer", { - "_value": 119.692, + "_value": 0.342, "_timestamp": 0 } ], [ "perf_server_compute_input", { - "_value": 0.396, + "_value": 0.045, "_timestamp": 0 } ], [ "perf_server_compute_output", { - "_value": 0.501, + "_value": 0.136, "_timestamp": 0 } ] @@ -2941,49 +2599,49 @@ "perf_latency_avg": [ "perf_latency_avg", { - "_value": 239.039, + "_value": 1.4, "_timestamp": 0 } ], "perf_latency_p90": [ "perf_latency_p90", { - "_value": 261.736, + "_value": 1.604, "_timestamp": 0 } ], "perf_latency_p95": [ "perf_latency_p95", { - "_value": 268.209, + "_value": 1.685, "_timestamp": 0 } ], "perf_latency_p99": [ "perf_latency_p99", { - "_value": 303.134, + "_value": 1.861, "_timestamp": 0 } ], "perf_throughput": [ "perf_throughput", { - "_value": 32.6532, + "_value": 2852.42, "_timestamp": 0 } ], "perf_client_send_recv": [ "perf_client_send_recv", { - "_value": 0.274, + "_value": 0.006, "_timestamp": 0 } ], "perf_client_response_wait": [ "perf_client_response_wait", { - "_value": 238.735, + "_value": 1.374, "_timestamp": 0 } ], @@ -2997,253 +2655,44 @@ "perf_server_compute_infer": [ "perf_server_compute_infer", { - "_value": 119.692, + "_value": 0.342, "_timestamp": 0 } ], "perf_server_compute_input": [ "perf_server_compute_input", { - "_value": 0.396, + "_value": 0.045, "_timestamp": 0 } ], "perf_server_compute_output": [ "perf_server_compute_output", { - "_value": 0.501, + "_value": 0.136, "_timestamp": 0 } ] } } ] - } - } - ], - "ensemble_python_resnet50_config_3": [ - { - "_triton_env": {}, - "_model_run_configs": [ - { - "_model_name": "ensemble_python_resnet50", - "_model_config": { - "name": "ensemble_python_resnet50_config_3", - "platform": "ensemble", - "maxBatchSize": 1, - "input": [ + }, + "ensemble_add_sub_config_0 -m ensemble_add_sub -b 1 -i grpc -f ensemble_add_sub-results.csv --verbose-csv --concurrency-range=8 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000 add_config_0,sub_config_0": { + "_model_variants_name": "ensemble_add_sub_config_0", + "_gpu_data": { + "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ + [ + "gpu_used_memory", { - "name": "INPUT", - "dataType": "TYPE_UINT8", - "dims": [ - "-1" - ] + "_value": 870.31808, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } ], - "output": [ + [ + "gpu_free_memory", { - "name": "OUTPUT", - "dataType": "TYPE_FP32", - "dims": [ - "1000" - ] - } - ], - "ensembleScheduling": { - "step": [ - { - "modelName": "preprocess_config_0", - "modelVersion": "-1", - "inputMap": { - "INPUT_0": "INPUT" - }, - "outputMap": { - "OUTPUT_0": "preprocessed_image" - } - }, - { - "modelName": "resnet50_trt_config_3", - "modelVersion": "-1", - "inputMap": { - "input": "preprocessed_image" - }, - "outputMap": { - "output": "OUTPUT" - } - } - ] - }, - "cpu_only": false - }, - "_perf_config": { - "_args": { - "service-kind": null, - "model-signature-name": null, - "async": null, - "sync": null, - "measurement-interval": null, - "concurrency-range": 2, - "request-rate-range": null, - "request-distribution": null, - "request-intervals": null, - "binary-search": null, - "num-of-sequences": null, - "latency-threshold": null, - "max-threads": null, - "stability-percentage": null, - "max-trials": null, - "percentile": null, - "input-data": "/swdev/profile_models/test_image", - "shared-memory": null, - "output-shared-memory-size": null, - "shape": "INPUT:1005970", - "sequence-length": null, - "sequence-id-range": null, - "string-length": null, - "string-data": null, - "measurement-mode": "count_windows", - "measurement-request-count": null, - "streaming": null, - "grpc-compression-algorithm": null, - "triton-server-directory": null, - "model-repository": null, - "ssl-grpc-use-ssl": null, - "ssl-grpc-root-certifications-file": null, - "ssl-grpc-private-key-file": null, - "ssl-grpc-certificate-chain-file": null, - "ssl-https-verify-peer": null, - "ssl-https-verify-host": null, - "ssl-https-ca-certificates-file": null, - "ssl-https-client-certificate-type": null, - "ssl-https-client-certificate-file": null, - "ssl-https-private-key-type": null, - "ssl-https-private-key-file": null, - "collect-metrics": "True", - "metrics-url": "http://localhost:8002/metrics", - "metrics-interval": 1000.0 - }, - "_options": { - "-m": "ensemble_python_resnet50_config_3", - "-x": null, - "-b": 1, - "-u": "localhost:8001", - "-i": "grpc", - "-f": "ensemble_python_resnet50_config_3-results.csv", - "-H": null - }, - "_verbose": { - "-v": null, - "-v -v": null, - "--verbose-csv": "--verbose-csv" - }, - "_input_to_options": { - "model-name": "-m", - "model-version": "-x", - "batch-size": "-b", - "url": "-u", - "protocol": "-i", - "latency-report-file": "-f", - "http-header": "-H" - }, - "_input_to_verbose": { - "verbose": "-v", - "extra-verbose": "-v -v", - "verbose-csv": "--verbose-csv" - }, - "_additive_args": { - "input-data": null, - "shape": null - } - }, - "_composing_configs": [ - { - "name": "preprocess_config_0", - "maxBatchSize": 1, - "input": [ - { - "name": "INPUT_0", - "dataType": "TYPE_UINT8", - "dims": [ - "-1" - ] - } - ], - "output": [ - { - "name": "OUTPUT_0", - "dataType": "TYPE_FP32", - "dims": [ - "3", - "224", - "224" - ] - } - ], - "instanceGroup": [ - { - "count": 1, - "kind": "KIND_GPU" - } - ], - "dynamicBatching": {}, - "backend": "python", - "cpu_only": false - }, - { - "name": "resnet50_trt_config_3", - "platform": "tensorrt_plan", - "maxBatchSize": 4, - "input": [ - { - "name": "input", - "dataType": "TYPE_FP32", - "dims": [ - "3", - "-1", - "-1" - ] - } - ], - "output": [ - { - "name": "output", - "dataType": "TYPE_FP32", - "dims": [ - "1000" - ], - "labelFilename": "labels.txt" - } - ], - "instanceGroup": [ - { - "count": 3, - "kind": "KIND_GPU" - } - ], - "dynamicBatching": {}, - "cpu_only": false - } - ] - } - ] - }, - { - "-m ensemble_python_resnet50_config_3 -b 1 -i grpc -f ensemble_python_resnet50_config_3-results.csv --verbose-csv --concurrency-range=2 --input-data=/swdev/profile_models/test_image --shape=INPUT:1005970 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "ensemble_python_resnet50_config_3", - "_gpu_data": { - "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ - [ - "gpu_used_memory", - { - "_value": 3881.828352, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_free_memory", - { - "_value": 21513.633792, + "_value": 24899.485696, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } @@ -3251,7 +2700,7 @@ [ "gpu_utilization", { - "_value": 1.5833300000000001, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -3259,7 +2708,7 @@ [ "gpu_power_usage", { - "_value": 57.0387, + "_value": 56.7028, "_timestamp": 0, "_device_uuid": null } @@ -3270,7 +2719,7 @@ [ "gpu_used_memory", { - "_value": 3881.828352, + "_value": 870.31808, "_timestamp": 0, "_device_uuid": null } @@ -3278,7 +2727,7 @@ [ "gpu_free_memory", { - "_value": 21513.633792, + "_value": 24899.485696, "_timestamp": 0, "_device_uuid": null } @@ -3286,7 +2735,7 @@ [ "gpu_utilization", { - "_value": 1.5833300000000001, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -3294,7 +2743,7 @@ [ "gpu_power_usage", { - "_value": 57.0387, + "_value": 56.7028, "_timestamp": 0, "_device_uuid": null } @@ -3304,7 +2753,7 @@ "gpu_used_memory": [ "gpu_used_memory", { - "_value": 3881.828352, + "_value": 870.31808, "_timestamp": 0, "_device_uuid": null } @@ -3312,7 +2761,7 @@ "gpu_free_memory": [ "gpu_free_memory", { - "_value": 21513.633792, + "_value": 24899.485696, "_timestamp": 0, "_device_uuid": null } @@ -3320,7 +2769,7 @@ "gpu_utilization": [ "gpu_utilization", { - "_value": 1.5833300000000001, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -3328,7 +2777,7 @@ "gpu_power_usage": [ "gpu_power_usage", { - "_value": 57.0387, + "_value": 56.7028, "_timestamp": 0, "_device_uuid": null } @@ -3336,86 +2785,87 @@ }, "_model_config_measurements": [ { - "_model_config_name": "ensemble_python_resnet50_config_3", + "_model_config_name": "ensemble_add_sub", "_model_specific_pa_params": { "batch-size": 1, - "concurrency-range": 2 + "concurrency-range": 8, + "request-rate-range": null }, "_non_gpu_data": [ [ "perf_latency_avg", { - "_value": 122.399, + "_value": 2.961, "_timestamp": 0 } ], [ "perf_latency_p90", { - "_value": 123.557, + "_value": 3.436, "_timestamp": 0 } ], [ "perf_latency_p95", { - "_value": 135.078, + "_value": 3.566, "_timestamp": 0 } ], [ "perf_latency_p99", { - "_value": 139.719, + "_value": 3.862, "_timestamp": 0 } ], [ "perf_throughput", { - "_value": 16.2463, + "_value": 2697.78, "_timestamp": 0 } ], [ "perf_client_send_recv", { - "_value": 0.164, + "_value": 0.007, "_timestamp": 0 } ], [ "perf_client_response_wait", { - "_value": 122.206, + "_value": 2.931, "_timestamp": 0 } ], [ "perf_server_queue", { - "_value": 0.002, + "_value": 0.001, "_timestamp": 0 } ], [ "perf_server_compute_infer", { - "_value": 62.043, + "_value": 0.37, "_timestamp": 0 } ], [ "perf_server_compute_input", { - "_value": 0.289, + "_value": 0.047, "_timestamp": 0 } ], [ "perf_server_compute_output", { - "_value": 0.223, + "_value": 0.141, "_timestamp": 0 } ] @@ -3424,138 +2874,432 @@ "perf_latency_avg": [ "perf_latency_avg", { - "_value": 122.399, + "_value": 2.961, "_timestamp": 0 } ], "perf_latency_p90": [ "perf_latency_p90", { - "_value": 123.557, + "_value": 3.436, "_timestamp": 0 } ], "perf_latency_p95": [ "perf_latency_p95", { - "_value": 135.078, + "_value": 3.566, "_timestamp": 0 } ], "perf_latency_p99": [ "perf_latency_p99", { - "_value": 139.719, + "_value": 3.862, "_timestamp": 0 } ], "perf_throughput": [ "perf_throughput", { - "_value": 16.2463, + "_value": 2697.78, "_timestamp": 0 } ], "perf_client_send_recv": [ "perf_client_send_recv", { - "_value": 0.164, + "_value": 0.007, "_timestamp": 0 } ], "perf_client_response_wait": [ "perf_client_response_wait", { - "_value": 122.206, + "_value": 2.931, "_timestamp": 0 } ], "perf_server_queue": [ "perf_server_queue", { - "_value": 0.002, + "_value": 0.001, "_timestamp": 0 } ], "perf_server_compute_infer": [ "perf_server_compute_infer", { - "_value": 62.043, + "_value": 0.37, "_timestamp": 0 } ], "perf_server_compute_input": [ "perf_server_compute_input", { - "_value": 0.289, + "_value": 0.047, "_timestamp": 0 } ], "perf_server_compute_output": [ "perf_server_compute_output", { - "_value": 0.223, + "_value": 0.141, "_timestamp": 0 } ] } } ] - } - } - ], - "ensemble_python_resnet50_config_4": [ - { - "_triton_env": {}, - "_model_run_configs": [ - { - "_model_name": "ensemble_python_resnet50", - "_model_config": { - "name": "ensemble_python_resnet50_config_4", - "platform": "ensemble", - "maxBatchSize": 2, - "input": [ + }, + "ensemble_add_sub_config_0 -m ensemble_add_sub -b 1 -i grpc -f ensemble_add_sub-results.csv --verbose-csv --concurrency-range=16 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000 add_config_0,sub_config_0": { + "_model_variants_name": "ensemble_add_sub_config_0", + "_gpu_data": { + "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ + [ + "gpu_used_memory", { - "name": "INPUT", - "dataType": "TYPE_UINT8", - "dims": [ - "-1" - ] + "_value": 870.31808, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } ], - "output": [ + [ + "gpu_free_memory", { - "name": "OUTPUT", - "dataType": "TYPE_FP32", - "dims": [ - "1000" - ] + "_value": 24899.485696, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } ], - "ensembleScheduling": { - "step": [ + [ + "gpu_utilization", + { + "_value": 0.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 48.4545, + "_timestamp": 0, + "_device_uuid": null + } + ] + ] + }, + "_avg_gpu_data": [ + [ + "gpu_used_memory", + { + "_value": 870.31808, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_free_memory", + { + "_value": 24899.485696, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_utilization", + { + "_value": 0.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 48.4545, + "_timestamp": 0, + "_device_uuid": null + } + ] + ], + "_avg_gpu_data_from_tag": { + "gpu_used_memory": [ + "gpu_used_memory", + { + "_value": 870.31808, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_free_memory": [ + "gpu_free_memory", + { + "_value": 24899.485696, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_utilization": [ + "gpu_utilization", + { + "_value": 0.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_power_usage": [ + "gpu_power_usage", + { + "_value": 48.4545, + "_timestamp": 0, + "_device_uuid": null + } + ] + }, + "_model_config_measurements": [ + { + "_model_config_name": "ensemble_add_sub", + "_model_specific_pa_params": { + "batch-size": 1, + "concurrency-range": 16, + "request-rate-range": null + }, + "_non_gpu_data": [ + [ + "perf_latency_avg", { - "modelName": "preprocess_config_2", - "modelVersion": "-1", - "inputMap": { - "INPUT_0": "INPUT" - }, - "outputMap": { - "OUTPUT_0": "preprocessed_image" - } - }, + "_value": 5.758, + "_timestamp": 0 + } + ], + [ + "perf_latency_p90", { - "modelName": "resnet50_trt_config_2", - "modelVersion": "-1", - "inputMap": { - "input": "preprocessed_image" - }, - "outputMap": { - "output": "OUTPUT" - } + "_value": 6.525, + "_timestamp": 0 + } + ], + [ + "perf_latency_p95", + { + "_value": 6.729, + "_timestamp": 0 + } + ], + [ + "perf_latency_p99", + { + "_value": 7.174, + "_timestamp": 0 + } + ], + [ + "perf_throughput", + { + "_value": 2773.69, + "_timestamp": 0 + } + ], + [ + "perf_client_send_recv", + { + "_value": 0.007, + "_timestamp": 0 + } + ], + [ + "perf_client_response_wait", + { + "_value": 5.728, + "_timestamp": 0 + } + ], + [ + "perf_server_queue", + { + "_value": 0.001, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_infer", + { + "_value": 0.362, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_input", + { + "_value": 0.046, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_output", + { + "_value": 0.139, + "_timestamp": 0 + } + ] + ], + "_non_gpu_data_from_tag": { + "perf_latency_avg": [ + "perf_latency_avg", + { + "_value": 5.758, + "_timestamp": 0 + } + ], + "perf_latency_p90": [ + "perf_latency_p90", + { + "_value": 6.525, + "_timestamp": 0 + } + ], + "perf_latency_p95": [ + "perf_latency_p95", + { + "_value": 6.729, + "_timestamp": 0 + } + ], + "perf_latency_p99": [ + "perf_latency_p99", + { + "_value": 7.174, + "_timestamp": 0 + } + ], + "perf_throughput": [ + "perf_throughput", + { + "_value": 2773.69, + "_timestamp": 0 + } + ], + "perf_client_send_recv": [ + "perf_client_send_recv", + { + "_value": 0.007, + "_timestamp": 0 + } + ], + "perf_client_response_wait": [ + "perf_client_response_wait", + { + "_value": 5.728, + "_timestamp": 0 + } + ], + "perf_server_queue": [ + "perf_server_queue", + { + "_value": 0.001, + "_timestamp": 0 + } + ], + "perf_server_compute_infer": [ + "perf_server_compute_infer", + { + "_value": 0.362, + "_timestamp": 0 + } + ], + "perf_server_compute_input": [ + "perf_server_compute_input", + { + "_value": 0.046, + "_timestamp": 0 + } + ], + "perf_server_compute_output": [ + "perf_server_compute_output", + { + "_value": 0.139, + "_timestamp": 0 } ] + } + } + ] + } + } + ], + "ensemble_add_sub_config_1": [ + { + "_triton_env": {}, + "_model_run_configs": [ + { + "_model_name": "ensemble_add_sub", + "_model_config_variant": { + "model_config": { + "name": "ensemble_add_sub", + "platform": "ensemble", + "maxBatchSize": 1, + "input": [ + { + "name": "INPUT0", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + }, + { + "name": "INPUT1", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "output": [ + { + "name": "OUTPUT0", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + }, + { + "name": "OUTPUT1", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "ensembleScheduling": { + "step": [ + { + "modelName": "add", + "modelVersion": "1", + "inputMap": { + "INPUT0": "INPUT0", + "INPUT1": "INPUT1" + }, + "outputMap": { + "OUTPUT0": "OUTPUT0" + } + }, + { + "modelName": "sub", + "modelVersion": "1", + "inputMap": { + "INPUT0": "INPUT0", + "INPUT1": "INPUT1" + }, + "outputMap": { + "OUTPUT1": "OUTPUT1" + } + } + ] + } }, + "variant_name": "ensemble_add_sub_config_1", "cpu_only": false }, "_perf_config": { @@ -3565,7 +3309,7 @@ "async": null, "sync": null, "measurement-interval": null, - "concurrency-range": 24, + "concurrency-range": 2, "request-rate-range": null, "request-distribution": null, "request-intervals": null, @@ -3576,10 +3320,10 @@ "stability-percentage": null, "max-trials": null, "percentile": null, - "input-data": "/swdev/profile_models/test_image", + "input-data": null, "shared-memory": null, "output-shared-memory-size": null, - "shape": "INPUT:1005970", + "shape": null, "sequence-length": null, "sequence-id-range": null, "string-length": null, @@ -3603,15 +3347,16 @@ "ssl-https-private-key-file": null, "collect-metrics": "True", "metrics-url": "http://localhost:8002/metrics", - "metrics-interval": 1000.0 + "metrics-interval": 1000, + "bls-composing-models": null }, "_options": { - "-m": "ensemble_python_resnet50_config_4", + "-m": "ensemble_add_sub", "-x": null, "-b": 1, "-u": "localhost:8001", "-i": "grpc", - "-f": "ensemble_python_resnet50_config_4-results.csv", + "-f": "ensemble_add_sub-results.csv", "-H": null }, "_verbose": { @@ -3638,72 +3383,83 @@ "shape": null } }, - "_composing_configs": [ - { - "name": "preprocess_config_2", - "maxBatchSize": 4, - "input": [ - { - "name": "INPUT_0", - "dataType": "TYPE_UINT8", - "dims": [ - "-1" - ] - } - ], - "output": [ - { - "name": "OUTPUT_0", - "dataType": "TYPE_FP32", - "dims": [ - "3", - "224", - "224" - ] - } - ], - "instanceGroup": [ - { - "count": 3, - "kind": "KIND_GPU" - } - ], - "dynamicBatching": {}, - "backend": "python", + "_composing_config_variants": [ + { + "model_config": { + "name": "add", + "input": [ + { + "name": "INPUT0", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + }, + { + "name": "INPUT1", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "output": [ + { + "name": "OUTPUT0", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "instanceGroup": [ + { + "count": 1, + "kind": "KIND_GPU" + } + ], + "backend": "python" + }, + "variant_name": "add_config_0", "cpu_only": false }, { - "name": "resnet50_trt_config_2", - "platform": "tensorrt_plan", - "maxBatchSize": 2, - "input": [ - { - "name": "input", - "dataType": "TYPE_FP32", - "dims": [ - "3", - "-1", - "-1" - ] - } - ], - "output": [ - { - "name": "output", - "dataType": "TYPE_FP32", - "dims": [ - "1000" - ], - "labelFilename": "labels.txt" - } - ], - "instanceGroup": [ - { - "count": 1, - "kind": "KIND_GPU" - } - ], - "dynamicBatching": {}, + "model_config": { + "name": "sub", + "input": [ + { + "name": "INPUT0", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + }, + { + "name": "INPUT1", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "output": [ + { + "name": "OUTPUT1", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "instanceGroup": [ + { + "count": 2, + "kind": "KIND_GPU" + } + ], + "backend": "python" + }, + "variant_name": "sub_config_1", "cpu_only": false } ] @@ -3711,14 +3467,14 @@ ] }, { - "-m ensemble_python_resnet50_config_4 -b 1 -i grpc -f ensemble_python_resnet50_config_4-results.csv --verbose-csv --concurrency-range=24 --input-data=/swdev/profile_models/test_image --shape=INPUT:1005970 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "ensemble_python_resnet50_config_4", + "ensemble_add_sub_config_1 -m ensemble_add_sub -b 1 -i grpc -f ensemble_add_sub-results.csv --verbose-csv --concurrency-range=2 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000 add_config_0,sub_config_1": { + "_model_variants_name": "ensemble_add_sub_config_1", "_gpu_data": { "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ [ "gpu_used_memory", { - "_value": 1707.081728, + "_value": 870.31808, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } @@ -3726,7 +3482,7 @@ [ "gpu_free_memory", { - "_value": 23688.380416, + "_value": 24899.485696, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } @@ -3734,7 +3490,7 @@ [ "gpu_utilization", { - "_value": 3.6666700000000003, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -3742,7 +3498,7 @@ [ "gpu_power_usage", { - "_value": 58.07, + "_value": 56.485, "_timestamp": 0, "_device_uuid": null } @@ -3753,7 +3509,7 @@ [ "gpu_used_memory", { - "_value": 1707.081728, + "_value": 870.31808, "_timestamp": 0, "_device_uuid": null } @@ -3761,7 +3517,7 @@ [ "gpu_free_memory", { - "_value": 23688.380416, + "_value": 24899.485696, "_timestamp": 0, "_device_uuid": null } @@ -3769,7 +3525,7 @@ [ "gpu_utilization", { - "_value": 3.6666700000000003, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -3777,7 +3533,7 @@ [ "gpu_power_usage", { - "_value": 58.07, + "_value": 56.485, "_timestamp": 0, "_device_uuid": null } @@ -3787,7 +3543,7 @@ "gpu_used_memory": [ "gpu_used_memory", { - "_value": 1707.081728, + "_value": 870.31808, "_timestamp": 0, "_device_uuid": null } @@ -3795,7 +3551,7 @@ "gpu_free_memory": [ "gpu_free_memory", { - "_value": 23688.380416, + "_value": 24899.485696, "_timestamp": 0, "_device_uuid": null } @@ -3803,7 +3559,7 @@ "gpu_utilization": [ "gpu_utilization", { - "_value": 3.6666700000000003, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -3811,7 +3567,7 @@ "gpu_power_usage": [ "gpu_power_usage", { - "_value": 58.07, + "_value": 56.485, "_timestamp": 0, "_device_uuid": null } @@ -3819,58 +3575,59 @@ }, "_model_config_measurements": [ { - "_model_config_name": "ensemble_python_resnet50_config_4", + "_model_config_name": "ensemble_add_sub", "_model_specific_pa_params": { "batch-size": 1, - "concurrency-range": 24 + "concurrency-range": 2, + "request-rate-range": null }, "_non_gpu_data": [ [ "perf_latency_avg", { - "_value": 476.231, + "_value": 0.702, "_timestamp": 0 } ], [ "perf_latency_p90", { - "_value": 527.439, + "_value": 0.809, "_timestamp": 0 } ], [ "perf_latency_p95", { - "_value": 535.716, + "_value": 0.851, "_timestamp": 0 } ], [ "perf_latency_p99", { - "_value": 577.74, + "_value": 0.95, "_timestamp": 0 } ], [ "perf_throughput", { - "_value": 50.3814, + "_value": 2837.63, "_timestamp": 0 } ], [ "perf_client_send_recv", { - "_value": 0.222, + "_value": 0.006, "_timestamp": 0 } ], [ "perf_client_response_wait", { - "_value": 475.979, + "_value": 0.68, "_timestamp": 0 } ], @@ -3884,21 +3641,21 @@ [ "perf_server_compute_infer", { - "_value": 237.166, + "_value": 0.336, "_timestamp": 0 } ], [ "perf_server_compute_input", { - "_value": 0.582, + "_value": 0.043, "_timestamp": 0 } ], [ "perf_server_compute_output", { - "_value": 0.92, + "_value": 0.134, "_timestamp": 0 } ] @@ -3907,49 +3664,49 @@ "perf_latency_avg": [ "perf_latency_avg", { - "_value": 476.231, + "_value": 0.702, "_timestamp": 0 } ], "perf_latency_p90": [ "perf_latency_p90", { - "_value": 527.439, + "_value": 0.809, "_timestamp": 0 } ], "perf_latency_p95": [ "perf_latency_p95", { - "_value": 535.716, + "_value": 0.851, "_timestamp": 0 } ], "perf_latency_p99": [ "perf_latency_p99", { - "_value": 577.74, + "_value": 0.95, "_timestamp": 0 } ], "perf_throughput": [ "perf_throughput", { - "_value": 50.3814, + "_value": 2837.63, "_timestamp": 0 } ], "perf_client_send_recv": [ "perf_client_send_recv", { - "_value": 0.222, + "_value": 0.006, "_timestamp": 0 } ], "perf_client_response_wait": [ "perf_client_response_wait", { - "_value": 475.979, + "_value": 0.68, "_timestamp": 0 } ], @@ -3963,261 +3720,52 @@ "perf_server_compute_infer": [ "perf_server_compute_infer", { - "_value": 237.166, + "_value": 0.336, "_timestamp": 0 } ], "perf_server_compute_input": [ "perf_server_compute_input", { - "_value": 0.582, + "_value": 0.043, "_timestamp": 0 } ], "perf_server_compute_output": [ "perf_server_compute_output", { - "_value": 0.92, + "_value": 0.134, "_timestamp": 0 } ] } } ] - } - } - ], - "ensemble_python_resnet50_config_5": [ - { - "_triton_env": {}, - "_model_run_configs": [ - { - "_model_name": "ensemble_python_resnet50", - "_model_config": { - "name": "ensemble_python_resnet50_config_5", - "platform": "ensemble", - "maxBatchSize": 4, - "input": [ + }, + "ensemble_add_sub_config_1 -m ensemble_add_sub -b 1 -i grpc -f ensemble_add_sub-results.csv --verbose-csv --concurrency-range=1 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000 add_config_0,sub_config_1": { + "_model_variants_name": "ensemble_add_sub_config_1", + "_gpu_data": { + "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ + [ + "gpu_used_memory", { - "name": "INPUT", - "dataType": "TYPE_UINT8", - "dims": [ - "-1" - ] + "_value": 870.31808, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } ], - "output": [ + [ + "gpu_free_memory", { - "name": "OUTPUT", - "dataType": "TYPE_FP32", - "dims": [ - "1000" - ] - } - ], - "ensembleScheduling": { - "step": [ - { - "modelName": "preprocess_config_2", - "modelVersion": "-1", - "inputMap": { - "INPUT_0": "INPUT" - }, - "outputMap": { - "OUTPUT_0": "preprocessed_image" - } - }, - { - "modelName": "resnet50_trt_config_4", - "modelVersion": "-1", - "inputMap": { - "input": "preprocessed_image" - }, - "outputMap": { - "output": "OUTPUT" - } - } - ] - }, - "cpu_only": false - }, - "_perf_config": { - "_args": { - "service-kind": null, - "model-signature-name": null, - "async": null, - "sync": null, - "measurement-interval": null, - "concurrency-range": 24, - "request-rate-range": null, - "request-distribution": null, - "request-intervals": null, - "binary-search": null, - "num-of-sequences": null, - "latency-threshold": null, - "max-threads": null, - "stability-percentage": null, - "max-trials": null, - "percentile": null, - "input-data": "/swdev/profile_models/test_image", - "shared-memory": null, - "output-shared-memory-size": null, - "shape": "INPUT:1005970", - "sequence-length": null, - "sequence-id-range": null, - "string-length": null, - "string-data": null, - "measurement-mode": "count_windows", - "measurement-request-count": null, - "streaming": null, - "grpc-compression-algorithm": null, - "triton-server-directory": null, - "model-repository": null, - "ssl-grpc-use-ssl": null, - "ssl-grpc-root-certifications-file": null, - "ssl-grpc-private-key-file": null, - "ssl-grpc-certificate-chain-file": null, - "ssl-https-verify-peer": null, - "ssl-https-verify-host": null, - "ssl-https-ca-certificates-file": null, - "ssl-https-client-certificate-type": null, - "ssl-https-client-certificate-file": null, - "ssl-https-private-key-type": null, - "ssl-https-private-key-file": null, - "collect-metrics": "True", - "metrics-url": "http://localhost:8002/metrics", - "metrics-interval": 1000.0 - }, - "_options": { - "-m": "ensemble_python_resnet50_config_5", - "-x": null, - "-b": 1, - "-u": "localhost:8001", - "-i": "grpc", - "-f": "ensemble_python_resnet50_config_5-results.csv", - "-H": null - }, - "_verbose": { - "-v": null, - "-v -v": null, - "--verbose-csv": "--verbose-csv" - }, - "_input_to_options": { - "model-name": "-m", - "model-version": "-x", - "batch-size": "-b", - "url": "-u", - "protocol": "-i", - "latency-report-file": "-f", - "http-header": "-H" - }, - "_input_to_verbose": { - "verbose": "-v", - "extra-verbose": "-v -v", - "verbose-csv": "--verbose-csv" - }, - "_additive_args": { - "input-data": null, - "shape": null - } - }, - "_composing_configs": [ - { - "name": "preprocess_config_2", - "maxBatchSize": 4, - "input": [ - { - "name": "INPUT_0", - "dataType": "TYPE_UINT8", - "dims": [ - "-1" - ] - } - ], - "output": [ - { - "name": "OUTPUT_0", - "dataType": "TYPE_FP32", - "dims": [ - "3", - "224", - "224" - ] - } - ], - "instanceGroup": [ - { - "count": 3, - "kind": "KIND_GPU" - } - ], - "dynamicBatching": {}, - "backend": "python", - "cpu_only": false - }, - { - "name": "resnet50_trt_config_4", - "platform": "tensorrt_plan", - "maxBatchSize": 4, - "input": [ - { - "name": "input", - "dataType": "TYPE_FP32", - "dims": [ - "3", - "-1", - "-1" - ] - } - ], - "output": [ - { - "name": "output", - "dataType": "TYPE_FP32", - "dims": [ - "1000" - ], - "labelFilename": "labels.txt" - } - ], - "instanceGroup": [ - { - "count": 2, - "kind": "KIND_GPU" - } - ], - "dynamicBatching": {}, - "cpu_only": false - } - ] - } - ] - }, - { - "-m ensemble_python_resnet50_config_5 -b 1 -i grpc -f ensemble_python_resnet50_config_5-results.csv --verbose-csv --concurrency-range=24 --input-data=/swdev/profile_models/test_image --shape=INPUT:1005970 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "ensemble_python_resnet50_config_5", - "_gpu_data": { - "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ - [ - "gpu_used_memory", - { - "_value": 2797.600768, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_free_memory", - { - "_value": 22597.861376, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + "_value": 24899.485696, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } ], [ "gpu_utilization", { - "_value": 2.0, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -4225,7 +3773,7 @@ [ "gpu_power_usage", { - "_value": 57.8959, + "_value": 56.7663, "_timestamp": 0, "_device_uuid": null } @@ -4236,7 +3784,7 @@ [ "gpu_used_memory", { - "_value": 2797.600768, + "_value": 870.31808, "_timestamp": 0, "_device_uuid": null } @@ -4244,7 +3792,7 @@ [ "gpu_free_memory", { - "_value": 22597.861376, + "_value": 24899.485696, "_timestamp": 0, "_device_uuid": null } @@ -4252,7 +3800,7 @@ [ "gpu_utilization", { - "_value": 2.0, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -4260,7 +3808,7 @@ [ "gpu_power_usage", { - "_value": 57.8959, + "_value": 56.7663, "_timestamp": 0, "_device_uuid": null } @@ -4270,7 +3818,7 @@ "gpu_used_memory": [ "gpu_used_memory", { - "_value": 2797.600768, + "_value": 870.31808, "_timestamp": 0, "_device_uuid": null } @@ -4278,7 +3826,7 @@ "gpu_free_memory": [ "gpu_free_memory", { - "_value": 22597.861376, + "_value": 24899.485696, "_timestamp": 0, "_device_uuid": null } @@ -4286,7 +3834,7 @@ "gpu_utilization": [ "gpu_utilization", { - "_value": 2.0, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -4294,7 +3842,7 @@ "gpu_power_usage": [ "gpu_power_usage", { - "_value": 57.8959, + "_value": 56.7663, "_timestamp": 0, "_device_uuid": null } @@ -4302,86 +3850,87 @@ }, "_model_config_measurements": [ { - "_model_config_name": "ensemble_python_resnet50_config_5", + "_model_config_name": "ensemble_add_sub", "_model_specific_pa_params": { "batch-size": 1, - "concurrency-range": 24 + "concurrency-range": 1, + "request-rate-range": null }, "_non_gpu_data": [ [ "perf_latency_avg", { - "_value": 469.359, + "_value": 0.558, "_timestamp": 0 } ], [ "perf_latency_p90", { - "_value": 536.438, + "_value": 0.647, "_timestamp": 0 } ], [ "perf_latency_p95", { - "_value": 554.136, + "_value": 0.693, "_timestamp": 0 } ], [ "perf_latency_p99", { - "_value": 610.216, + "_value": 0.794, "_timestamp": 0 } ], [ "perf_throughput", { - "_value": 48.4815, + "_value": 1785.0, "_timestamp": 0 } ], [ "perf_client_send_recv", { - "_value": 0.989, + "_value": 0.004, "_timestamp": 0 } ], [ "perf_client_response_wait", { - "_value": 468.339, + "_value": 0.541, "_timestamp": 0 } ], [ "perf_server_queue", { - "_value": 0.001, + "_value": 0.0, "_timestamp": 0 } ], [ "perf_server_compute_infer", { - "_value": 235.251, + "_value": 0.332, "_timestamp": 0 } ], [ "perf_server_compute_input", { - "_value": 0.878, + "_value": 0.043, "_timestamp": 0 } ], [ "perf_server_compute_output", { - "_value": 0.474, + "_value": 0.131, "_timestamp": 0 } ] @@ -4390,301 +3939,367 @@ "perf_latency_avg": [ "perf_latency_avg", { - "_value": 469.359, + "_value": 0.558, "_timestamp": 0 } ], "perf_latency_p90": [ "perf_latency_p90", { - "_value": 536.438, + "_value": 0.647, "_timestamp": 0 } ], "perf_latency_p95": [ "perf_latency_p95", { - "_value": 554.136, + "_value": 0.693, "_timestamp": 0 } ], "perf_latency_p99": [ "perf_latency_p99", { - "_value": 610.216, + "_value": 0.794, "_timestamp": 0 } ], "perf_throughput": [ "perf_throughput", { - "_value": 48.4815, + "_value": 1785.0, "_timestamp": 0 } ], "perf_client_send_recv": [ "perf_client_send_recv", { - "_value": 0.989, + "_value": 0.004, "_timestamp": 0 } ], "perf_client_response_wait": [ "perf_client_response_wait", { - "_value": 468.339, + "_value": 0.541, "_timestamp": 0 } ], "perf_server_queue": [ "perf_server_queue", { - "_value": 0.001, + "_value": 0.0, "_timestamp": 0 } ], "perf_server_compute_infer": [ "perf_server_compute_infer", { - "_value": 235.251, + "_value": 0.332, "_timestamp": 0 } ], "perf_server_compute_input": [ "perf_server_compute_input", { - "_value": 0.878, + "_value": 0.043, "_timestamp": 0 } ], "perf_server_compute_output": [ "perf_server_compute_output", { - "_value": 0.474, + "_value": 0.131, "_timestamp": 0 } ] } } ] - } - } - ], - "ensemble_python_resnet50_config_6": [ - { - "_triton_env": {}, - "_model_run_configs": [ - { - "_model_name": "ensemble_python_resnet50", - "_model_config": { - "name": "ensemble_python_resnet50_config_6", - "platform": "ensemble", - "maxBatchSize": 8, - "input": [ + }, + "ensemble_add_sub_config_1 -m ensemble_add_sub -b 1 -i grpc -f ensemble_add_sub-results.csv --verbose-csv --concurrency-range=4 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000 add_config_0,sub_config_1": { + "_model_variants_name": "ensemble_add_sub_config_1", + "_gpu_data": { + "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ + [ + "gpu_used_memory", { - "name": "INPUT", - "dataType": "TYPE_UINT8", - "dims": [ - "-1" - ] + "_value": 870.31808, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_free_memory", + { + "_value": 24899.485696, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } ], - "output": [ + [ + "gpu_utilization", { - "name": "OUTPUT", - "dataType": "TYPE_FP32", - "dims": [ - "1000" - ] + "_value": 0.0, + "_timestamp": 0, + "_device_uuid": null } ], - "ensembleScheduling": { - "step": [ + [ + "gpu_power_usage", + { + "_value": 56.8155, + "_timestamp": 0, + "_device_uuid": null + } + ] + ] + }, + "_avg_gpu_data": [ + [ + "gpu_used_memory", + { + "_value": 870.31808, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_free_memory", + { + "_value": 24899.485696, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_utilization", + { + "_value": 0.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 56.8155, + "_timestamp": 0, + "_device_uuid": null + } + ] + ], + "_avg_gpu_data_from_tag": { + "gpu_used_memory": [ + "gpu_used_memory", + { + "_value": 870.31808, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_free_memory": [ + "gpu_free_memory", + { + "_value": 24899.485696, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_utilization": [ + "gpu_utilization", + { + "_value": 0.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_power_usage": [ + "gpu_power_usage", + { + "_value": 56.8155, + "_timestamp": 0, + "_device_uuid": null + } + ] + }, + "_model_config_measurements": [ + { + "_model_config_name": "ensemble_add_sub", + "_model_specific_pa_params": { + "batch-size": 1, + "concurrency-range": 4, + "request-rate-range": null + }, + "_non_gpu_data": [ + [ + "perf_latency_avg", { - "modelName": "preprocess_config_3", - "modelVersion": "-1", - "inputMap": { - "INPUT_0": "INPUT" - }, - "outputMap": { - "OUTPUT_0": "preprocessed_image" - } - }, + "_value": 1.559, + "_timestamp": 0 + } + ], + [ + "perf_latency_p90", { - "modelName": "resnet50_trt_config_5", - "modelVersion": "-1", - "inputMap": { - "input": "preprocessed_image" - }, - "outputMap": { - "output": "OUTPUT" - } + "_value": 1.808, + "_timestamp": 0 } - ] - }, - "cpu_only": false - }, - "_perf_config": { - "_args": { - "service-kind": null, - "model-signature-name": null, - "async": null, - "sync": null, - "measurement-interval": null, - "concurrency-range": 160, - "request-rate-range": null, - "request-distribution": null, - "request-intervals": null, - "binary-search": null, - "num-of-sequences": null, - "latency-threshold": null, - "max-threads": null, - "stability-percentage": null, - "max-trials": null, - "percentile": null, - "input-data": "/swdev/profile_models/test_image", - "shared-memory": null, - "output-shared-memory-size": null, - "shape": "INPUT:1005970", - "sequence-length": null, - "sequence-id-range": null, - "string-length": null, - "string-data": null, - "measurement-mode": "count_windows", - "measurement-request-count": 200, - "streaming": null, - "grpc-compression-algorithm": null, - "triton-server-directory": null, - "model-repository": null, - "ssl-grpc-use-ssl": null, - "ssl-grpc-root-certifications-file": null, - "ssl-grpc-private-key-file": null, - "ssl-grpc-certificate-chain-file": null, - "ssl-https-verify-peer": null, - "ssl-https-verify-host": null, - "ssl-https-ca-certificates-file": null, - "ssl-https-client-certificate-type": null, - "ssl-https-client-certificate-file": null, - "ssl-https-private-key-type": null, - "ssl-https-private-key-file": null, - "collect-metrics": "True", - "metrics-url": "http://localhost:8002/metrics", - "metrics-interval": 1000.0 - }, - "_options": { - "-m": "ensemble_python_resnet50_config_6", - "-x": null, - "-b": 1, - "-u": "localhost:8001", - "-i": "grpc", - "-f": "ensemble_python_resnet50_config_6-results.csv", - "-H": null - }, - "_verbose": { - "-v": null, - "-v -v": null, - "--verbose-csv": "--verbose-csv" - }, - "_input_to_options": { - "model-name": "-m", - "model-version": "-x", - "batch-size": "-b", - "url": "-u", - "protocol": "-i", - "latency-report-file": "-f", - "http-header": "-H" - }, - "_input_to_verbose": { - "verbose": "-v", - "extra-verbose": "-v -v", - "verbose-csv": "--verbose-csv" - }, - "_additive_args": { - "input-data": null, - "shape": null - } - }, - "_composing_configs": [ - { - "name": "preprocess_config_3", - "maxBatchSize": 16, - "input": [ + ], + [ + "perf_latency_p95", { - "name": "INPUT_0", - "dataType": "TYPE_UINT8", - "dims": [ - "-1" - ] + "_value": 1.887, + "_timestamp": 0 } ], - "output": [ + [ + "perf_latency_p99", { - "name": "OUTPUT_0", - "dataType": "TYPE_FP32", - "dims": [ - "3", - "224", - "224" - ] + "_value": 2.064, + "_timestamp": 0 } ], - "instanceGroup": [ + [ + "perf_throughput", { - "count": 5, - "kind": "KIND_GPU" + "_value": 2561.85, + "_timestamp": 0 } ], - "dynamicBatching": {}, - "backend": "python", - "cpu_only": false - }, - { - "name": "resnet50_trt_config_5", - "platform": "tensorrt_plan", - "maxBatchSize": 8, - "input": [ + [ + "perf_client_send_recv", { - "name": "input", - "dataType": "TYPE_FP32", - "dims": [ - "3", - "-1", - "-1" - ] + "_value": 0.007, + "_timestamp": 0 } ], - "output": [ + [ + "perf_client_response_wait", { - "name": "output", - "dataType": "TYPE_FP32", - "dims": [ - "1000" - ], - "labelFilename": "labels.txt" + "_value": 1.528, + "_timestamp": 0 } ], - "instanceGroup": [ + [ + "perf_server_queue", { - "count": 1, - "kind": "KIND_GPU" + "_value": 0.001, + "_timestamp": 0 } ], - "dynamicBatching": {}, - "cpu_only": false + [ + "perf_server_compute_infer", + { + "_value": 0.382, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_input", + { + "_value": 0.049, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_output", + { + "_value": 0.148, + "_timestamp": 0 + } + ] + ], + "_non_gpu_data_from_tag": { + "perf_latency_avg": [ + "perf_latency_avg", + { + "_value": 1.559, + "_timestamp": 0 + } + ], + "perf_latency_p90": [ + "perf_latency_p90", + { + "_value": 1.808, + "_timestamp": 0 + } + ], + "perf_latency_p95": [ + "perf_latency_p95", + { + "_value": 1.887, + "_timestamp": 0 + } + ], + "perf_latency_p99": [ + "perf_latency_p99", + { + "_value": 2.064, + "_timestamp": 0 + } + ], + "perf_throughput": [ + "perf_throughput", + { + "_value": 2561.85, + "_timestamp": 0 + } + ], + "perf_client_send_recv": [ + "perf_client_send_recv", + { + "_value": 0.007, + "_timestamp": 0 + } + ], + "perf_client_response_wait": [ + "perf_client_response_wait", + { + "_value": 1.528, + "_timestamp": 0 + } + ], + "perf_server_queue": [ + "perf_server_queue", + { + "_value": 0.001, + "_timestamp": 0 + } + ], + "perf_server_compute_infer": [ + "perf_server_compute_infer", + { + "_value": 0.382, + "_timestamp": 0 + } + ], + "perf_server_compute_input": [ + "perf_server_compute_input", + { + "_value": 0.049, + "_timestamp": 0 + } + ], + "perf_server_compute_output": [ + "perf_server_compute_output", + { + "_value": 0.148, + "_timestamp": 0 + } + ] } - ] - } - ] - }, - { - "-m ensemble_python_resnet50_config_6 -b 1 -i grpc -f ensemble_python_resnet50_config_6-results.csv --verbose-csv --concurrency-range=160 --input-data=/swdev/profile_models/test_image --shape=INPUT:1005970 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "ensemble_python_resnet50_config_6", + } + ] + }, + "ensemble_add_sub_config_1 -m ensemble_add_sub -b 1 -i grpc -f ensemble_add_sub-results.csv --verbose-csv --concurrency-range=8 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000 add_config_0,sub_config_1": { + "_model_variants_name": "ensemble_add_sub_config_1", "_gpu_data": { "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ [ "gpu_used_memory", { - "_value": 1715.470336, + "_value": 870.31808, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } @@ -4692,7 +4307,7 @@ [ "gpu_free_memory", { - "_value": 23679.991808000002, + "_value": 24899.485696, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } @@ -4700,7 +4315,7 @@ [ "gpu_utilization", { - "_value": 2.0, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -4708,7 +4323,7 @@ [ "gpu_power_usage", { - "_value": 58.5071, + "_value": 56.6602, "_timestamp": 0, "_device_uuid": null } @@ -4719,7 +4334,7 @@ [ "gpu_used_memory", { - "_value": 1715.470336, + "_value": 870.31808, "_timestamp": 0, "_device_uuid": null } @@ -4727,7 +4342,7 @@ [ "gpu_free_memory", { - "_value": 23679.991808000002, + "_value": 24899.485696, "_timestamp": 0, "_device_uuid": null } @@ -4735,7 +4350,7 @@ [ "gpu_utilization", { - "_value": 2.0, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -4743,7 +4358,7 @@ [ "gpu_power_usage", { - "_value": 58.5071, + "_value": 56.6602, "_timestamp": 0, "_device_uuid": null } @@ -4753,7 +4368,7 @@ "gpu_used_memory": [ "gpu_used_memory", { - "_value": 1715.470336, + "_value": 870.31808, "_timestamp": 0, "_device_uuid": null } @@ -4761,7 +4376,7 @@ "gpu_free_memory": [ "gpu_free_memory", { - "_value": 23679.991808000002, + "_value": 24899.485696, "_timestamp": 0, "_device_uuid": null } @@ -4769,7 +4384,7 @@ "gpu_utilization": [ "gpu_utilization", { - "_value": 2.0, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -4777,7 +4392,7 @@ "gpu_power_usage": [ "gpu_power_usage", { - "_value": 58.5071, + "_value": 56.6602, "_timestamp": 0, "_device_uuid": null } @@ -4785,58 +4400,59 @@ }, "_model_config_measurements": [ { - "_model_config_name": "ensemble_python_resnet50_config_6", + "_model_config_name": "ensemble_add_sub", "_model_specific_pa_params": { "batch-size": 1, - "concurrency-range": 160 + "concurrency-range": 8, + "request-rate-range": null }, "_non_gpu_data": [ [ "perf_latency_avg", { - "_value": 3587.968, + "_value": 2.789, "_timestamp": 0 } ], [ "perf_latency_p90", { - "_value": 3864.033, + "_value": 3.299, "_timestamp": 0 } ], [ "perf_latency_p95", { - "_value": 4018.395, + "_value": 3.427, "_timestamp": 0 } ], [ "perf_latency_p99", { - "_value": 4193.818, + "_value": 3.655, "_timestamp": 0 } ], [ "perf_throughput", { - "_value": 42.9915, + "_value": 2865.88, "_timestamp": 0 } ], [ "perf_client_send_recv", { - "_value": 0.227, + "_value": 0.006, "_timestamp": 0 } ], [ "perf_client_response_wait", { - "_value": 3587.72, + "_value": 2.761, "_timestamp": 0 } ], @@ -4850,21 +4466,21 @@ [ "perf_server_compute_infer", { - "_value": 1808.862, + "_value": 0.351, "_timestamp": 0 } ], [ "perf_server_compute_input", { - "_value": 2.712, + "_value": 0.044, "_timestamp": 0 } ], [ "perf_server_compute_output", { - "_value": 3.044, + "_value": 0.13, "_timestamp": 0 } ] @@ -4873,49 +4489,49 @@ "perf_latency_avg": [ "perf_latency_avg", { - "_value": 3587.968, + "_value": 2.789, "_timestamp": 0 } ], "perf_latency_p90": [ "perf_latency_p90", { - "_value": 3864.033, + "_value": 3.299, "_timestamp": 0 } ], "perf_latency_p95": [ "perf_latency_p95", { - "_value": 4018.395, + "_value": 3.427, "_timestamp": 0 } ], "perf_latency_p99": [ "perf_latency_p99", { - "_value": 4193.818, + "_value": 3.655, "_timestamp": 0 } ], "perf_throughput": [ "perf_throughput", { - "_value": 42.9915, + "_value": 2865.88, "_timestamp": 0 } ], "perf_client_send_recv": [ "perf_client_send_recv", { - "_value": 0.227, + "_value": 0.006, "_timestamp": 0 } ], "perf_client_response_wait": [ "perf_client_response_wait", { - "_value": 3587.72, + "_value": 2.761, "_timestamp": 0 } ], @@ -4929,261 +4545,52 @@ "perf_server_compute_infer": [ "perf_server_compute_infer", { - "_value": 1808.862, + "_value": 0.351, "_timestamp": 0 } ], "perf_server_compute_input": [ "perf_server_compute_input", { - "_value": 2.712, + "_value": 0.044, "_timestamp": 0 } ], "perf_server_compute_output": [ "perf_server_compute_output", { - "_value": 3.044, + "_value": 0.13, "_timestamp": 0 } ] } } ] - } - } - ], - "ensemble_python_resnet50_config_7": [ - { - "_triton_env": {}, - "_model_run_configs": [ - { - "_model_name": "ensemble_python_resnet50", - "_model_config": { - "name": "ensemble_python_resnet50_config_7", - "platform": "ensemble", - "maxBatchSize": 4, - "input": [ + }, + "ensemble_add_sub_config_1 -m ensemble_add_sub -b 1 -i grpc -f ensemble_add_sub-results.csv --verbose-csv --concurrency-range=16 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000 add_config_0,sub_config_1": { + "_model_variants_name": "ensemble_add_sub_config_1", + "_gpu_data": { + "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ + [ + "gpu_used_memory", { - "name": "INPUT", - "dataType": "TYPE_UINT8", - "dims": [ - "-1" - ] + "_value": 870.31808, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } ], - "output": [ + [ + "gpu_free_memory", { - "name": "OUTPUT", - "dataType": "TYPE_FP32", - "dims": [ - "1000" - ] - } - ], - "ensembleScheduling": { - "step": [ - { - "modelName": "preprocess_config_2", - "modelVersion": "-1", - "inputMap": { - "INPUT_0": "INPUT" - }, - "outputMap": { - "OUTPUT_0": "preprocessed_image" - } - }, - { - "modelName": "resnet50_trt_config_6", - "modelVersion": "-1", - "inputMap": { - "input": "preprocessed_image" - }, - "outputMap": { - "output": "OUTPUT" - } - } - ] - }, - "cpu_only": false - }, - "_perf_config": { - "_args": { - "service-kind": null, - "model-signature-name": null, - "async": null, - "sync": null, - "measurement-interval": null, - "concurrency-range": 24, - "request-rate-range": null, - "request-distribution": null, - "request-intervals": null, - "binary-search": null, - "num-of-sequences": null, - "latency-threshold": null, - "max-threads": null, - "stability-percentage": null, - "max-trials": null, - "percentile": null, - "input-data": "/swdev/profile_models/test_image", - "shared-memory": null, - "output-shared-memory-size": null, - "shape": "INPUT:1005970", - "sequence-length": null, - "sequence-id-range": null, - "string-length": null, - "string-data": null, - "measurement-mode": "count_windows", - "measurement-request-count": null, - "streaming": null, - "grpc-compression-algorithm": null, - "triton-server-directory": null, - "model-repository": null, - "ssl-grpc-use-ssl": null, - "ssl-grpc-root-certifications-file": null, - "ssl-grpc-private-key-file": null, - "ssl-grpc-certificate-chain-file": null, - "ssl-https-verify-peer": null, - "ssl-https-verify-host": null, - "ssl-https-ca-certificates-file": null, - "ssl-https-client-certificate-type": null, - "ssl-https-client-certificate-file": null, - "ssl-https-private-key-type": null, - "ssl-https-private-key-file": null, - "collect-metrics": "True", - "metrics-url": "http://localhost:8002/metrics", - "metrics-interval": 1000.0 - }, - "_options": { - "-m": "ensemble_python_resnet50_config_7", - "-x": null, - "-b": 1, - "-u": "localhost:8001", - "-i": "grpc", - "-f": "ensemble_python_resnet50_config_7-results.csv", - "-H": null - }, - "_verbose": { - "-v": null, - "-v -v": null, - "--verbose-csv": "--verbose-csv" - }, - "_input_to_options": { - "model-name": "-m", - "model-version": "-x", - "batch-size": "-b", - "url": "-u", - "protocol": "-i", - "latency-report-file": "-f", - "http-header": "-H" - }, - "_input_to_verbose": { - "verbose": "-v", - "extra-verbose": "-v -v", - "verbose-csv": "--verbose-csv" - }, - "_additive_args": { - "input-data": null, - "shape": null - } - }, - "_composing_configs": [ - { - "name": "preprocess_config_2", - "maxBatchSize": 4, - "input": [ - { - "name": "INPUT_0", - "dataType": "TYPE_UINT8", - "dims": [ - "-1" - ] - } - ], - "output": [ - { - "name": "OUTPUT_0", - "dataType": "TYPE_FP32", - "dims": [ - "3", - "224", - "224" - ] - } - ], - "instanceGroup": [ - { - "count": 3, - "kind": "KIND_GPU" - } - ], - "dynamicBatching": {}, - "backend": "python", - "cpu_only": false - }, - { - "name": "resnet50_trt_config_6", - "platform": "tensorrt_plan", - "maxBatchSize": 4, - "input": [ - { - "name": "input", - "dataType": "TYPE_FP32", - "dims": [ - "3", - "-1", - "-1" - ] - } - ], - "output": [ - { - "name": "output", - "dataType": "TYPE_FP32", - "dims": [ - "1000" - ], - "labelFilename": "labels.txt" - } - ], - "instanceGroup": [ - { - "count": 1, - "kind": "KIND_GPU" - } - ], - "dynamicBatching": {}, - "cpu_only": false - } - ] - } - ] - }, - { - "-m ensemble_python_resnet50_config_7 -b 1 -i grpc -f ensemble_python_resnet50_config_7-results.csv --verbose-csv --concurrency-range=24 --input-data=/swdev/profile_models/test_image --shape=INPUT:1005970 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "ensemble_python_resnet50_config_7", - "_gpu_data": { - "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ - [ - "gpu_used_memory", - { - "_value": 1711.276032, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_free_memory", - { - "_value": 23684.186112, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + "_value": 24899.485696, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } ], [ "gpu_utilization", { - "_value": 2.8, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -5191,7 +4598,7 @@ [ "gpu_power_usage", { - "_value": 57.96, + "_value": 35.5865, "_timestamp": 0, "_device_uuid": null } @@ -5202,7 +4609,7 @@ [ "gpu_used_memory", { - "_value": 1711.276032, + "_value": 870.31808, "_timestamp": 0, "_device_uuid": null } @@ -5210,7 +4617,7 @@ [ "gpu_free_memory", { - "_value": 23684.186112, + "_value": 24899.485696, "_timestamp": 0, "_device_uuid": null } @@ -5218,7 +4625,7 @@ [ "gpu_utilization", { - "_value": 2.8, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -5226,7 +4633,7 @@ [ "gpu_power_usage", { - "_value": 57.96, + "_value": 35.5865, "_timestamp": 0, "_device_uuid": null } @@ -5236,7 +4643,7 @@ "gpu_used_memory": [ "gpu_used_memory", { - "_value": 1711.276032, + "_value": 870.31808, "_timestamp": 0, "_device_uuid": null } @@ -5244,7 +4651,7 @@ "gpu_free_memory": [ "gpu_free_memory", { - "_value": 23684.186112, + "_value": 24899.485696, "_timestamp": 0, "_device_uuid": null } @@ -5252,7 +4659,7 @@ "gpu_utilization": [ "gpu_utilization", { - "_value": 2.8, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -5260,7 +4667,7 @@ "gpu_power_usage": [ "gpu_power_usage", { - "_value": 57.96, + "_value": 35.5865, "_timestamp": 0, "_device_uuid": null } @@ -5268,86 +4675,87 @@ }, "_model_config_measurements": [ { - "_model_config_name": "ensemble_python_resnet50_config_7", + "_model_config_name": "ensemble_add_sub", "_model_specific_pa_params": { "batch-size": 1, - "concurrency-range": 24 + "concurrency-range": 16, + "request-rate-range": null }, "_non_gpu_data": [ [ "perf_latency_avg", { - "_value": 483.446, + "_value": 5.701, "_timestamp": 0 } ], [ "perf_latency_p90", { - "_value": 527.748, + "_value": 6.596, "_timestamp": 0 } ], [ "perf_latency_p95", { - "_value": 535.631, + "_value": 6.819, "_timestamp": 0 } ], [ "perf_latency_p99", { - "_value": 552.581, + "_value": 7.253, "_timestamp": 0 } ], [ "perf_throughput", { - "_value": 49.5796, + "_value": 2801.72, "_timestamp": 0 } ], [ "perf_client_send_recv", { - "_value": 0.23, + "_value": 0.007, "_timestamp": 0 } ], [ "perf_client_response_wait", { - "_value": 483.189, + "_value": 5.671, "_timestamp": 0 } ], [ "perf_server_queue", { - "_value": 0.002, + "_value": 0.001, "_timestamp": 0 } ], [ "perf_server_compute_infer", { - "_value": 242.088, + "_value": 0.359, "_timestamp": 0 } ], [ "perf_server_compute_input", { - "_value": 0.578, + "_value": 0.046, "_timestamp": 0 } ], [ "perf_server_compute_output", { - "_value": 0.768, + "_value": 0.138, "_timestamp": 0 } ] @@ -5356,77 +4764,77 @@ "perf_latency_avg": [ "perf_latency_avg", { - "_value": 483.446, + "_value": 5.701, "_timestamp": 0 } ], "perf_latency_p90": [ "perf_latency_p90", { - "_value": 527.748, + "_value": 6.596, "_timestamp": 0 } ], "perf_latency_p95": [ "perf_latency_p95", { - "_value": 535.631, + "_value": 6.819, "_timestamp": 0 } ], "perf_latency_p99": [ "perf_latency_p99", { - "_value": 552.581, + "_value": 7.253, "_timestamp": 0 } ], "perf_throughput": [ "perf_throughput", { - "_value": 49.5796, + "_value": 2801.72, "_timestamp": 0 } ], "perf_client_send_recv": [ "perf_client_send_recv", { - "_value": 0.23, + "_value": 0.007, "_timestamp": 0 } ], "perf_client_response_wait": [ "perf_client_response_wait", { - "_value": 483.189, + "_value": 5.671, "_timestamp": 0 } ], "perf_server_queue": [ "perf_server_queue", { - "_value": 0.002, + "_value": 0.001, "_timestamp": 0 } ], "perf_server_compute_infer": [ "perf_server_compute_infer", { - "_value": 242.088, + "_value": 0.359, "_timestamp": 0 } ], "perf_server_compute_input": [ "perf_server_compute_input", { - "_value": 0.578, + "_value": 0.046, "_timestamp": 0 } ], "perf_server_compute_output": [ "perf_server_compute_output", { - "_value": 0.768, + "_value": 0.138, "_timestamp": 0 } ] @@ -5436,58 +4844,77 @@ } } ], - "ensemble_python_resnet50_config_8": [ + "ensemble_add_sub_config_2": [ { "_triton_env": {}, "_model_run_configs": [ { - "_model_name": "ensemble_python_resnet50", - "_model_config": { - "name": "ensemble_python_resnet50_config_8", - "platform": "ensemble", - "maxBatchSize": 2, - "input": [ - { - "name": "INPUT", - "dataType": "TYPE_UINT8", - "dims": [ - "-1" - ] - } - ], - "output": [ - { - "name": "OUTPUT", - "dataType": "TYPE_FP32", - "dims": [ - "1000" - ] - } - ], - "ensembleScheduling": { - "step": [ + "_model_name": "ensemble_add_sub", + "_model_config_variant": { + "model_config": { + "name": "ensemble_add_sub", + "platform": "ensemble", + "maxBatchSize": 1, + "input": [ { - "modelName": "preprocess_config_4", - "modelVersion": "-1", - "inputMap": { - "INPUT_0": "INPUT" - }, - "outputMap": { - "OUTPUT_0": "preprocessed_image" - } + "name": "INPUT0", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + }, + { + "name": "INPUT1", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "output": [ + { + "name": "OUTPUT0", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] }, { - "modelName": "resnet50_trt_config_6", - "modelVersion": "-1", - "inputMap": { - "input": "preprocessed_image" + "name": "OUTPUT1", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "ensembleScheduling": { + "step": [ + { + "modelName": "add", + "modelVersion": "1", + "inputMap": { + "INPUT1": "INPUT1", + "INPUT0": "INPUT0" + }, + "outputMap": { + "OUTPUT0": "OUTPUT0" + } }, - "outputMap": { - "output": "OUTPUT" + { + "modelName": "sub", + "modelVersion": "1", + "inputMap": { + "INPUT1": "INPUT1", + "INPUT0": "INPUT0" + }, + "outputMap": { + "OUTPUT1": "OUTPUT1" + } } - } - ] + ] + } }, + "variant_name": "ensemble_add_sub_config_2", "cpu_only": false }, "_perf_config": { @@ -5497,7 +4924,7 @@ "async": null, "sync": null, "measurement-interval": null, - "concurrency-range": 12, + "concurrency-range": 4, "request-rate-range": null, "request-distribution": null, "request-intervals": null, @@ -5508,10 +4935,10 @@ "stability-percentage": null, "max-trials": null, "percentile": null, - "input-data": "/swdev/profile_models/test_image", + "input-data": null, "shared-memory": null, "output-shared-memory-size": null, - "shape": "INPUT:1005970", + "shape": null, "sequence-length": null, "sequence-id-range": null, "string-length": null, @@ -5535,15 +4962,16 @@ "ssl-https-private-key-file": null, "collect-metrics": "True", "metrics-url": "http://localhost:8002/metrics", - "metrics-interval": 1000.0 + "metrics-interval": 1000, + "bls-composing-models": null }, "_options": { - "-m": "ensemble_python_resnet50_config_8", + "-m": "ensemble_add_sub", "-x": null, "-b": 1, "-u": "localhost:8001", "-i": "grpc", - "-f": "ensemble_python_resnet50_config_8-results.csv", + "-f": "ensemble_add_sub-results.csv", "-H": null }, "_verbose": { @@ -5570,72 +4998,83 @@ "shape": null } }, - "_composing_configs": [ - { - "name": "preprocess_config_4", - "maxBatchSize": 2, - "input": [ - { - "name": "INPUT_0", - "dataType": "TYPE_UINT8", - "dims": [ - "-1" - ] - } - ], - "output": [ - { - "name": "OUTPUT_0", - "dataType": "TYPE_FP32", - "dims": [ - "3", - "224", - "224" - ] - } - ], - "instanceGroup": [ - { - "count": 3, - "kind": "KIND_GPU" - } - ], - "dynamicBatching": {}, - "backend": "python", + "_composing_config_variants": [ + { + "model_config": { + "name": "add", + "input": [ + { + "name": "INPUT0", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + }, + { + "name": "INPUT1", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "output": [ + { + "name": "OUTPUT0", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "instanceGroup": [ + { + "count": 2, + "kind": "KIND_GPU" + } + ], + "backend": "python" + }, + "variant_name": "add_config_1", "cpu_only": false }, { - "name": "resnet50_trt_config_6", - "platform": "tensorrt_plan", - "maxBatchSize": 4, - "input": [ - { - "name": "input", - "dataType": "TYPE_FP32", - "dims": [ - "3", - "-1", - "-1" - ] - } - ], - "output": [ - { - "name": "output", - "dataType": "TYPE_FP32", - "dims": [ - "1000" - ], - "labelFilename": "labels.txt" - } - ], - "instanceGroup": [ - { - "count": 1, - "kind": "KIND_GPU" - } - ], - "dynamicBatching": {}, + "model_config": { + "name": "sub", + "input": [ + { + "name": "INPUT0", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + }, + { + "name": "INPUT1", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "output": [ + { + "name": "OUTPUT1", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "instanceGroup": [ + { + "count": 1, + "kind": "KIND_GPU" + } + ], + "backend": "python" + }, + "variant_name": "sub_config_0", "cpu_only": false } ] @@ -5643,14 +5082,14 @@ ] }, { - "-m ensemble_python_resnet50_config_8 -b 1 -i grpc -f ensemble_python_resnet50_config_8-results.csv --verbose-csv --concurrency-range=12 --input-data=/swdev/profile_models/test_image --shape=INPUT:1005970 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "ensemble_python_resnet50_config_8", + "ensemble_add_sub_config_2 -m ensemble_add_sub -b 1 -i grpc -f ensemble_add_sub-results.csv --verbose-csv --concurrency-range=4 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000 add_config_1,sub_config_0": { + "_model_variants_name": "ensemble_add_sub_config_2", "_gpu_data": { "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ [ "gpu_used_memory", { - "_value": 1711.276032, + "_value": 870.31808, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } @@ -5658,7 +5097,7 @@ [ "gpu_free_memory", { - "_value": 23684.186112, + "_value": 24899.485696, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } @@ -5666,7 +5105,7 @@ [ "gpu_utilization", { - "_value": 2.57143, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -5674,7 +5113,7 @@ [ "gpu_power_usage", { - "_value": 58.2241, + "_value": 56.5798, "_timestamp": 0, "_device_uuid": null } @@ -5685,7 +5124,7 @@ [ "gpu_used_memory", { - "_value": 1711.276032, + "_value": 870.31808, "_timestamp": 0, "_device_uuid": null } @@ -5693,7 +5132,7 @@ [ "gpu_free_memory", { - "_value": 23684.186112, + "_value": 24899.485696, "_timestamp": 0, "_device_uuid": null } @@ -5701,7 +5140,7 @@ [ "gpu_utilization", { - "_value": 2.57143, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -5709,7 +5148,7 @@ [ "gpu_power_usage", { - "_value": 58.2241, + "_value": 56.5798, "_timestamp": 0, "_device_uuid": null } @@ -5719,7 +5158,7 @@ "gpu_used_memory": [ "gpu_used_memory", { - "_value": 1711.276032, + "_value": 870.31808, "_timestamp": 0, "_device_uuid": null } @@ -5727,7 +5166,7 @@ "gpu_free_memory": [ "gpu_free_memory", { - "_value": 23684.186112, + "_value": 24899.485696, "_timestamp": 0, "_device_uuid": null } @@ -5735,7 +5174,7 @@ "gpu_utilization": [ "gpu_utilization", { - "_value": 2.57143, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -5743,7 +5182,7 @@ "gpu_power_usage": [ "gpu_power_usage", { - "_value": 58.2241, + "_value": 56.5798, "_timestamp": 0, "_device_uuid": null } @@ -5751,58 +5190,59 @@ }, "_model_config_measurements": [ { - "_model_config_name": "ensemble_python_resnet50_config_8", + "_model_config_name": "ensemble_add_sub", "_model_specific_pa_params": { "batch-size": 1, - "concurrency-range": 12 + "concurrency-range": 4, + "request-rate-range": null }, "_non_gpu_data": [ [ "perf_latency_avg", { - "_value": 267.313, + "_value": 1.463, "_timestamp": 0 } ], [ "perf_latency_p90", { - "_value": 308.103, + "_value": 1.697, "_timestamp": 0 } ], [ "perf_latency_p95", { - "_value": 316.268, + "_value": 1.783, "_timestamp": 0 } ], [ "perf_latency_p99", { - "_value": 354.765, + "_value": 2.024, "_timestamp": 0 } ], [ "perf_throughput", { - "_value": 43.804, + "_value": 2729.63, "_timestamp": 0 } ], [ "perf_client_send_recv", { - "_value": 0.329, + "_value": 0.006, "_timestamp": 0 } ], [ "perf_client_response_wait", { - "_value": 266.953, + "_value": 1.436, "_timestamp": 0 } ], @@ -5816,21 +5256,21 @@ [ "perf_server_compute_infer", { - "_value": 133.465, + "_value": 0.364, "_timestamp": 0 } ], [ "perf_server_compute_input", { - "_value": 0.405, + "_value": 0.046, "_timestamp": 0 } ], [ "perf_server_compute_output", { - "_value": 0.555, + "_value": 0.139, "_timestamp": 0 } ] @@ -5839,49 +5279,49 @@ "perf_latency_avg": [ "perf_latency_avg", { - "_value": 267.313, + "_value": 1.463, "_timestamp": 0 } ], "perf_latency_p90": [ "perf_latency_p90", { - "_value": 308.103, + "_value": 1.697, "_timestamp": 0 } ], "perf_latency_p95": [ "perf_latency_p95", { - "_value": 316.268, + "_value": 1.783, "_timestamp": 0 } ], "perf_latency_p99": [ "perf_latency_p99", { - "_value": 354.765, + "_value": 2.024, "_timestamp": 0 } ], "perf_throughput": [ "perf_throughput", { - "_value": 43.804, + "_value": 2729.63, "_timestamp": 0 } ], "perf_client_send_recv": [ "perf_client_send_recv", { - "_value": 0.329, + "_value": 0.006, "_timestamp": 0 } ], "perf_client_response_wait": [ "perf_client_response_wait", { - "_value": 266.953, + "_value": 1.436, "_timestamp": 0 } ], @@ -5895,261 +5335,52 @@ "perf_server_compute_infer": [ "perf_server_compute_infer", { - "_value": 133.465, + "_value": 0.364, "_timestamp": 0 } ], "perf_server_compute_input": [ "perf_server_compute_input", { - "_value": 0.405, + "_value": 0.046, "_timestamp": 0 } ], "perf_server_compute_output": [ "perf_server_compute_output", { - "_value": 0.555, + "_value": 0.139, "_timestamp": 0 } ] } } ] - } - } - ], - "ensemble_python_resnet50_config_9": [ - { - "_triton_env": {}, - "_model_run_configs": [ - { - "_model_name": "ensemble_python_resnet50", - "_model_config": { - "name": "ensemble_python_resnet50_config_9", - "platform": "ensemble", - "maxBatchSize": 4, - "input": [ + }, + "ensemble_add_sub_config_2 -m ensemble_add_sub -b 1 -i grpc -f ensemble_add_sub-results.csv --verbose-csv --concurrency-range=1 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000 add_config_1,sub_config_0": { + "_model_variants_name": "ensemble_add_sub_config_2", + "_gpu_data": { + "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ + [ + "gpu_used_memory", { - "name": "INPUT", - "dataType": "TYPE_UINT8", - "dims": [ - "-1" - ] + "_value": 870.31808, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } ], - "output": [ + [ + "gpu_free_memory", { - "name": "OUTPUT", - "dataType": "TYPE_FP32", - "dims": [ - "1000" - ] - } - ], - "ensembleScheduling": { - "step": [ - { - "modelName": "preprocess_config_5", - "modelVersion": "-1", - "inputMap": { - "INPUT_0": "INPUT" - }, - "outputMap": { - "OUTPUT_0": "preprocessed_image" - } - }, - { - "modelName": "resnet50_trt_config_6", - "modelVersion": "-1", - "inputMap": { - "input": "preprocessed_image" - }, - "outputMap": { - "output": "OUTPUT" - } - } - ] - }, - "cpu_only": false - }, - "_perf_config": { - "_args": { - "service-kind": null, - "model-signature-name": null, - "async": null, - "sync": null, - "measurement-interval": null, - "concurrency-range": 48, - "request-rate-range": null, - "request-distribution": null, - "request-intervals": null, - "binary-search": null, - "num-of-sequences": null, - "latency-threshold": null, - "max-threads": null, - "stability-percentage": null, - "max-trials": null, - "percentile": null, - "input-data": "/swdev/profile_models/test_image", - "shared-memory": null, - "output-shared-memory-size": null, - "shape": "INPUT:1005970", - "sequence-length": null, - "sequence-id-range": null, - "string-length": null, - "string-data": null, - "measurement-mode": "count_windows", - "measurement-request-count": null, - "streaming": null, - "grpc-compression-algorithm": null, - "triton-server-directory": null, - "model-repository": null, - "ssl-grpc-use-ssl": null, - "ssl-grpc-root-certifications-file": null, - "ssl-grpc-private-key-file": null, - "ssl-grpc-certificate-chain-file": null, - "ssl-https-verify-peer": null, - "ssl-https-verify-host": null, - "ssl-https-ca-certificates-file": null, - "ssl-https-client-certificate-type": null, - "ssl-https-client-certificate-file": null, - "ssl-https-private-key-type": null, - "ssl-https-private-key-file": null, - "collect-metrics": "True", - "metrics-url": "http://localhost:8002/metrics", - "metrics-interval": 1000.0 - }, - "_options": { - "-m": "ensemble_python_resnet50_config_9", - "-x": null, - "-b": 1, - "-u": "localhost:8001", - "-i": "grpc", - "-f": "ensemble_python_resnet50_config_9-results.csv", - "-H": null - }, - "_verbose": { - "-v": null, - "-v -v": null, - "--verbose-csv": "--verbose-csv" - }, - "_input_to_options": { - "model-name": "-m", - "model-version": "-x", - "batch-size": "-b", - "url": "-u", - "protocol": "-i", - "latency-report-file": "-f", - "http-header": "-H" - }, - "_input_to_verbose": { - "verbose": "-v", - "extra-verbose": "-v -v", - "verbose-csv": "--verbose-csv" - }, - "_additive_args": { - "input-data": null, - "shape": null - } - }, - "_composing_configs": [ - { - "name": "preprocess_config_5", - "maxBatchSize": 8, - "input": [ - { - "name": "INPUT_0", - "dataType": "TYPE_UINT8", - "dims": [ - "-1" - ] - } - ], - "output": [ - { - "name": "OUTPUT_0", - "dataType": "TYPE_FP32", - "dims": [ - "3", - "224", - "224" - ] - } - ], - "instanceGroup": [ - { - "count": 3, - "kind": "KIND_GPU" - } - ], - "dynamicBatching": {}, - "backend": "python", - "cpu_only": false - }, - { - "name": "resnet50_trt_config_6", - "platform": "tensorrt_plan", - "maxBatchSize": 4, - "input": [ - { - "name": "input", - "dataType": "TYPE_FP32", - "dims": [ - "3", - "-1", - "-1" - ] - } - ], - "output": [ - { - "name": "output", - "dataType": "TYPE_FP32", - "dims": [ - "1000" - ], - "labelFilename": "labels.txt" - } - ], - "instanceGroup": [ - { - "count": 1, - "kind": "KIND_GPU" - } - ], - "dynamicBatching": {}, - "cpu_only": false - } - ] - } - ] - }, - { - "-m ensemble_python_resnet50_config_9 -b 1 -i grpc -f ensemble_python_resnet50_config_9-results.csv --verbose-csv --concurrency-range=48 --input-data=/swdev/profile_models/test_image --shape=INPUT:1005970 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "ensemble_python_resnet50_config_9", - "_gpu_data": { - "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ - [ - "gpu_used_memory", - { - "_value": 1711.276032, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_free_memory", - { - "_value": 23684.186112, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + "_value": 24899.485696, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } ], [ "gpu_utilization", { - "_value": 2.42857, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -6157,7 +5388,7 @@ [ "gpu_power_usage", { - "_value": 57.6587, + "_value": 56.684, "_timestamp": 0, "_device_uuid": null } @@ -6168,7 +5399,7 @@ [ "gpu_used_memory", { - "_value": 1711.276032, + "_value": 870.31808, "_timestamp": 0, "_device_uuid": null } @@ -6176,7 +5407,7 @@ [ "gpu_free_memory", { - "_value": 23684.186112, + "_value": 24899.485696, "_timestamp": 0, "_device_uuid": null } @@ -6184,7 +5415,7 @@ [ "gpu_utilization", { - "_value": 2.42857, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -6192,7 +5423,7 @@ [ "gpu_power_usage", { - "_value": 57.6587, + "_value": 56.684, "_timestamp": 0, "_device_uuid": null } @@ -6202,7 +5433,7 @@ "gpu_used_memory": [ "gpu_used_memory", { - "_value": 1711.276032, + "_value": 870.31808, "_timestamp": 0, "_device_uuid": null } @@ -6210,7 +5441,7 @@ "gpu_free_memory": [ "gpu_free_memory", { - "_value": 23684.186112, + "_value": 24899.485696, "_timestamp": 0, "_device_uuid": null } @@ -6218,7 +5449,7 @@ "gpu_utilization": [ "gpu_utilization", { - "_value": 2.42857, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -6226,7 +5457,7 @@ "gpu_power_usage": [ "gpu_power_usage", { - "_value": 57.6587, + "_value": 56.684, "_timestamp": 0, "_device_uuid": null } @@ -6234,86 +5465,87 @@ }, "_model_config_measurements": [ { - "_model_config_name": "ensemble_python_resnet50_config_9", + "_model_config_name": "ensemble_add_sub", "_model_specific_pa_params": { "batch-size": 1, - "concurrency-range": 48 + "concurrency-range": 1, + "request-rate-range": null }, "_non_gpu_data": [ [ "perf_latency_avg", { - "_value": 933.931, + "_value": 0.563, "_timestamp": 0 } ], [ "perf_latency_p90", { - "_value": 980.641, + "_value": 0.637, "_timestamp": 0 } ], [ "perf_latency_p95", { - "_value": 989.268, + "_value": 0.678, "_timestamp": 0 } ], [ "perf_latency_p99", { - "_value": 996.358, + "_value": 0.777, "_timestamp": 0 } ], [ "perf_throughput", { - "_value": 50.1384, + "_value": 1770.93, "_timestamp": 0 } ], [ "perf_client_send_recv", { - "_value": 0.229, + "_value": 0.005, "_timestamp": 0 } ], [ "perf_client_response_wait", { - "_value": 933.594, + "_value": 0.543, "_timestamp": 0 } ], [ "perf_server_queue", { - "_value": 0.001, + "_value": 0.0, "_timestamp": 0 } ], [ "perf_server_compute_infer", { - "_value": 466.077, + "_value": 0.332, "_timestamp": 0 } ], [ "perf_server_compute_input", { - "_value": 1.303, + "_value": 0.043, "_timestamp": 0 } ], [ "perf_server_compute_output", { - "_value": 1.56, + "_value": 0.134, "_timestamp": 0 } ] @@ -6322,301 +5554,92 @@ "perf_latency_avg": [ "perf_latency_avg", { - "_value": 933.931, + "_value": 0.563, "_timestamp": 0 } ], "perf_latency_p90": [ "perf_latency_p90", { - "_value": 980.641, + "_value": 0.637, "_timestamp": 0 } ], "perf_latency_p95": [ "perf_latency_p95", { - "_value": 989.268, + "_value": 0.678, "_timestamp": 0 } ], "perf_latency_p99": [ "perf_latency_p99", { - "_value": 996.358, + "_value": 0.777, "_timestamp": 0 } ], "perf_throughput": [ "perf_throughput", { - "_value": 50.1384, + "_value": 1770.93, "_timestamp": 0 } ], "perf_client_send_recv": [ "perf_client_send_recv", { - "_value": 0.229, + "_value": 0.005, "_timestamp": 0 } ], "perf_client_response_wait": [ "perf_client_response_wait", { - "_value": 933.594, + "_value": 0.543, "_timestamp": 0 } ], "perf_server_queue": [ "perf_server_queue", { - "_value": 0.001, + "_value": 0.0, "_timestamp": 0 } ], "perf_server_compute_infer": [ "perf_server_compute_infer", { - "_value": 466.077, + "_value": 0.332, "_timestamp": 0 } ], "perf_server_compute_input": [ "perf_server_compute_input", { - "_value": 1.303, + "_value": 0.043, "_timestamp": 0 } ], "perf_server_compute_output": [ "perf_server_compute_output", { - "_value": 1.56, + "_value": 0.134, "_timestamp": 0 } ] } } ] - } - } - ], - "ensemble_python_resnet50_config_10": [ - { - "_triton_env": {}, - "_model_run_configs": [ - { - "_model_name": "ensemble_python_resnet50", - "_model_config": { - "name": "ensemble_python_resnet50_config_10", - "platform": "ensemble", - "maxBatchSize": 4, - "input": [ - { - "name": "INPUT", - "dataType": "TYPE_UINT8", - "dims": [ - "-1" - ] - } - ], - "output": [ - { - "name": "OUTPUT", - "dataType": "TYPE_FP32", - "dims": [ - "1000" - ] - } - ], - "ensembleScheduling": { - "step": [ - { - "modelName": "preprocess_config_6", - "modelVersion": "-1", - "inputMap": { - "INPUT_0": "INPUT" - }, - "outputMap": { - "OUTPUT_0": "preprocessed_image" - } - }, - { - "modelName": "resnet50_trt_config_6", - "modelVersion": "-1", - "inputMap": { - "input": "preprocessed_image" - }, - "outputMap": { - "output": "OUTPUT" - } - } - ] - }, - "cpu_only": false - }, - "_perf_config": { - "_args": { - "service-kind": null, - "model-signature-name": null, - "async": null, - "sync": null, - "measurement-interval": null, - "concurrency-range": 16, - "request-rate-range": null, - "request-distribution": null, - "request-intervals": null, - "binary-search": null, - "num-of-sequences": null, - "latency-threshold": null, - "max-threads": null, - "stability-percentage": null, - "max-trials": null, - "percentile": null, - "input-data": "/swdev/profile_models/test_image", - "shared-memory": null, - "output-shared-memory-size": null, - "shape": "INPUT:1005970", - "sequence-length": null, - "sequence-id-range": null, - "string-length": null, - "string-data": null, - "measurement-mode": "count_windows", - "measurement-request-count": null, - "streaming": null, - "grpc-compression-algorithm": null, - "triton-server-directory": null, - "model-repository": null, - "ssl-grpc-use-ssl": null, - "ssl-grpc-root-certifications-file": null, - "ssl-grpc-private-key-file": null, - "ssl-grpc-certificate-chain-file": null, - "ssl-https-verify-peer": null, - "ssl-https-verify-host": null, - "ssl-https-ca-certificates-file": null, - "ssl-https-client-certificate-type": null, - "ssl-https-client-certificate-file": null, - "ssl-https-private-key-type": null, - "ssl-https-private-key-file": null, - "collect-metrics": "True", - "metrics-url": "http://localhost:8002/metrics", - "metrics-interval": 1000.0 - }, - "_options": { - "-m": "ensemble_python_resnet50_config_10", - "-x": null, - "-b": 1, - "-u": "localhost:8001", - "-i": "grpc", - "-f": "ensemble_python_resnet50_config_10-results.csv", - "-H": null - }, - "_verbose": { - "-v": null, - "-v -v": null, - "--verbose-csv": "--verbose-csv" - }, - "_input_to_options": { - "model-name": "-m", - "model-version": "-x", - "batch-size": "-b", - "url": "-u", - "protocol": "-i", - "latency-report-file": "-f", - "http-header": "-H" - }, - "_input_to_verbose": { - "verbose": "-v", - "extra-verbose": "-v -v", - "verbose-csv": "--verbose-csv" - }, - "_additive_args": { - "input-data": null, - "shape": null - } - }, - "_composing_configs": [ - { - "name": "preprocess_config_6", - "maxBatchSize": 4, - "input": [ - { - "name": "INPUT_0", - "dataType": "TYPE_UINT8", - "dims": [ - "-1" - ] - } - ], - "output": [ - { - "name": "OUTPUT_0", - "dataType": "TYPE_FP32", - "dims": [ - "3", - "224", - "224" - ] - } - ], - "instanceGroup": [ - { - "count": 2, - "kind": "KIND_GPU" - } - ], - "dynamicBatching": {}, - "backend": "python", - "cpu_only": false - }, - { - "name": "resnet50_trt_config_6", - "platform": "tensorrt_plan", - "maxBatchSize": 4, - "input": [ - { - "name": "input", - "dataType": "TYPE_FP32", - "dims": [ - "3", - "-1", - "-1" - ] - } - ], - "output": [ - { - "name": "output", - "dataType": "TYPE_FP32", - "dims": [ - "1000" - ], - "labelFilename": "labels.txt" - } - ], - "instanceGroup": [ - { - "count": 1, - "kind": "KIND_GPU" - } - ], - "dynamicBatching": {}, - "cpu_only": false - } - ] - } - ] - }, - { - "-m ensemble_python_resnet50_config_10 -b 1 -i grpc -f ensemble_python_resnet50_config_10-results.csv --verbose-csv --concurrency-range=16 --input-data=/swdev/profile_models/test_image --shape=INPUT:1005970 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "ensemble_python_resnet50_config_10", + }, + "ensemble_add_sub_config_2 -m ensemble_add_sub -b 1 -i grpc -f ensemble_add_sub-results.csv --verbose-csv --concurrency-range=2 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000 add_config_1,sub_config_0": { + "_model_variants_name": "ensemble_add_sub_config_2", "_gpu_data": { "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ [ "gpu_used_memory", { - "_value": 1711.276032, + "_value": 870.31808, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } @@ -6624,7 +5647,7 @@ [ "gpu_free_memory", { - "_value": 23684.186112, + "_value": 24899.485696, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } @@ -6632,7 +5655,7 @@ [ "gpu_utilization", { - "_value": 2.16667, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -6640,7 +5663,7 @@ [ "gpu_power_usage", { - "_value": 57.2107, + "_value": 56.7113, "_timestamp": 0, "_device_uuid": null } @@ -6651,7 +5674,7 @@ [ "gpu_used_memory", { - "_value": 1711.276032, + "_value": 870.31808, "_timestamp": 0, "_device_uuid": null } @@ -6659,7 +5682,7 @@ [ "gpu_free_memory", { - "_value": 23684.186112, + "_value": 24899.485696, "_timestamp": 0, "_device_uuid": null } @@ -6667,7 +5690,7 @@ [ "gpu_utilization", { - "_value": 2.16667, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -6675,7 +5698,7 @@ [ "gpu_power_usage", { - "_value": 57.2107, + "_value": 56.7113, "_timestamp": 0, "_device_uuid": null } @@ -6685,7 +5708,7 @@ "gpu_used_memory": [ "gpu_used_memory", { - "_value": 1711.276032, + "_value": 870.31808, "_timestamp": 0, "_device_uuid": null } @@ -6693,7 +5716,7 @@ "gpu_free_memory": [ "gpu_free_memory", { - "_value": 23684.186112, + "_value": 24899.485696, "_timestamp": 0, "_device_uuid": null } @@ -6701,7 +5724,7 @@ "gpu_utilization": [ "gpu_utilization", { - "_value": 2.16667, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -6709,7 +5732,7 @@ "gpu_power_usage": [ "gpu_power_usage", { - "_value": 57.2107, + "_value": 56.7113, "_timestamp": 0, "_device_uuid": null } @@ -6717,58 +5740,59 @@ }, "_model_config_measurements": [ { - "_model_config_name": "ensemble_python_resnet50_config_10", + "_model_config_name": "ensemble_add_sub", "_model_specific_pa_params": { "batch-size": 1, - "concurrency-range": 16 + "concurrency-range": 2, + "request-rate-range": null }, "_non_gpu_data": [ [ "perf_latency_avg", { - "_value": 469.431, + "_value": 0.7, "_timestamp": 0 } ], [ "perf_latency_p90", { - "_value": 506.53, + "_value": 0.802, "_timestamp": 0 } ], [ "perf_latency_p95", { - "_value": 517.221, + "_value": 0.846, "_timestamp": 0 } ], [ "perf_latency_p99", { - "_value": 544.403, + "_value": 0.938, "_timestamp": 0 } ], [ "perf_throughput", { - "_value": 33.9879, + "_value": 2847.24, "_timestamp": 0 } ], [ "perf_client_send_recv", { - "_value": 0.201, + "_value": 0.006, "_timestamp": 0 } ], [ "perf_client_response_wait", { - "_value": 469.204, + "_value": 0.677, "_timestamp": 0 } ], @@ -6782,21 +5806,21 @@ [ "perf_server_compute_infer", { - "_value": 235.216, + "_value": 0.334, "_timestamp": 0 } ], [ "perf_server_compute_input", { - "_value": 0.607, + "_value": 0.044, "_timestamp": 0 } ], [ "perf_server_compute_output", { - "_value": 0.83, + "_value": 0.136, "_timestamp": 0 } ] @@ -6805,49 +5829,49 @@ "perf_latency_avg": [ "perf_latency_avg", { - "_value": 469.431, + "_value": 0.7, "_timestamp": 0 } ], "perf_latency_p90": [ "perf_latency_p90", { - "_value": 506.53, + "_value": 0.802, "_timestamp": 0 } ], "perf_latency_p95": [ "perf_latency_p95", { - "_value": 517.221, + "_value": 0.846, "_timestamp": 0 } ], "perf_latency_p99": [ "perf_latency_p99", { - "_value": 544.403, + "_value": 0.938, "_timestamp": 0 } ], "perf_throughput": [ "perf_throughput", { - "_value": 33.9879, + "_value": 2847.24, "_timestamp": 0 } ], "perf_client_send_recv": [ "perf_client_send_recv", { - "_value": 0.201, + "_value": 0.006, "_timestamp": 0 } ], "perf_client_response_wait": [ "perf_client_response_wait", { - "_value": 469.204, + "_value": 0.677, "_timestamp": 0 } ], @@ -6861,245 +5885,36 @@ "perf_server_compute_infer": [ "perf_server_compute_infer", { - "_value": 235.216, + "_value": 0.334, "_timestamp": 0 } ], "perf_server_compute_input": [ "perf_server_compute_input", { - "_value": 0.607, + "_value": 0.044, "_timestamp": 0 } ], "perf_server_compute_output": [ "perf_server_compute_output", { - "_value": 0.83, + "_value": 0.136, "_timestamp": 0 } ] } } ] - } - } - ], - "ensemble_python_resnet50_config_11": [ - { - "_triton_env": {}, - "_model_run_configs": [ - { - "_model_name": "ensemble_python_resnet50", - "_model_config": { - "name": "ensemble_python_resnet50_config_11", - "platform": "ensemble", - "maxBatchSize": 4, - "input": [ - { - "name": "INPUT", - "dataType": "TYPE_UINT8", - "dims": [ - "-1" - ] - } - ], - "output": [ - { - "name": "OUTPUT", - "dataType": "TYPE_FP32", - "dims": [ - "1000" - ] - } - ], - "ensembleScheduling": { - "step": [ - { - "modelName": "preprocess_config_7", - "modelVersion": "-1", - "inputMap": { - "INPUT_0": "INPUT" - }, - "outputMap": { - "OUTPUT_0": "preprocessed_image" - } - }, - { - "modelName": "resnet50_trt_config_6", - "modelVersion": "-1", - "inputMap": { - "input": "preprocessed_image" - }, - "outputMap": { - "output": "OUTPUT" - } - } - ] - }, - "cpu_only": false - }, - "_perf_config": { - "_args": { - "service-kind": null, - "model-signature-name": null, - "async": null, - "sync": null, - "measurement-interval": null, - "concurrency-range": 32, - "request-rate-range": null, - "request-distribution": null, - "request-intervals": null, - "binary-search": null, - "num-of-sequences": null, - "latency-threshold": null, - "max-threads": null, - "stability-percentage": null, - "max-trials": null, - "percentile": null, - "input-data": "/swdev/profile_models/test_image", - "shared-memory": null, - "output-shared-memory-size": null, - "shape": "INPUT:1005970", - "sequence-length": null, - "sequence-id-range": null, - "string-length": null, - "string-data": null, - "measurement-mode": "count_windows", - "measurement-request-count": null, - "streaming": null, - "grpc-compression-algorithm": null, - "triton-server-directory": null, - "model-repository": null, - "ssl-grpc-use-ssl": null, - "ssl-grpc-root-certifications-file": null, - "ssl-grpc-private-key-file": null, - "ssl-grpc-certificate-chain-file": null, - "ssl-https-verify-peer": null, - "ssl-https-verify-host": null, - "ssl-https-ca-certificates-file": null, - "ssl-https-client-certificate-type": null, - "ssl-https-client-certificate-file": null, - "ssl-https-private-key-type": null, - "ssl-https-private-key-file": null, - "collect-metrics": "True", - "metrics-url": "http://localhost:8002/metrics", - "metrics-interval": 1000.0 - }, - "_options": { - "-m": "ensemble_python_resnet50_config_11", - "-x": null, - "-b": 1, - "-u": "localhost:8001", - "-i": "grpc", - "-f": "ensemble_python_resnet50_config_11-results.csv", - "-H": null - }, - "_verbose": { - "-v": null, - "-v -v": null, - "--verbose-csv": "--verbose-csv" - }, - "_input_to_options": { - "model-name": "-m", - "model-version": "-x", - "batch-size": "-b", - "url": "-u", - "protocol": "-i", - "latency-report-file": "-f", - "http-header": "-H" - }, - "_input_to_verbose": { - "verbose": "-v", - "extra-verbose": "-v -v", - "verbose-csv": "--verbose-csv" - }, - "_additive_args": { - "input-data": null, - "shape": null - } - }, - "_composing_configs": [ - { - "name": "preprocess_config_7", - "maxBatchSize": 4, - "input": [ - { - "name": "INPUT_0", - "dataType": "TYPE_UINT8", - "dims": [ - "-1" - ] - } - ], - "output": [ - { - "name": "OUTPUT_0", - "dataType": "TYPE_FP32", - "dims": [ - "3", - "224", - "224" - ] - } - ], - "instanceGroup": [ - { - "count": 4, - "kind": "KIND_GPU" - } - ], - "dynamicBatching": {}, - "backend": "python", - "cpu_only": false - }, - { - "name": "resnet50_trt_config_6", - "platform": "tensorrt_plan", - "maxBatchSize": 4, - "input": [ - { - "name": "input", - "dataType": "TYPE_FP32", - "dims": [ - "3", - "-1", - "-1" - ] - } - ], - "output": [ - { - "name": "output", - "dataType": "TYPE_FP32", - "dims": [ - "1000" - ], - "labelFilename": "labels.txt" - } - ], - "instanceGroup": [ - { - "count": 1, - "kind": "KIND_GPU" - } - ], - "dynamicBatching": {}, - "cpu_only": false - } - ] - } - ] - }, - { - "-m ensemble_python_resnet50_config_11 -b 1 -i grpc -f ensemble_python_resnet50_config_11-results.csv --verbose-csv --concurrency-range=32 --input-data=/swdev/profile_models/test_image --shape=INPUT:1005970 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "ensemble_python_resnet50_config_11", + }, + "ensemble_add_sub_config_2 -m ensemble_add_sub -b 1 -i grpc -f ensemble_add_sub-results.csv --verbose-csv --concurrency-range=8 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000 add_config_1,sub_config_0": { + "_model_variants_name": "ensemble_add_sub_config_2", "_gpu_data": { "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ [ "gpu_used_memory", { - "_value": 1711.276032, + "_value": 870.31808, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } @@ -7107,7 +5922,7 @@ [ "gpu_free_memory", { - "_value": 23684.186112, + "_value": 24899.485696, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } @@ -7115,7 +5930,7 @@ [ "gpu_utilization", { - "_value": 3.0, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -7123,7 +5938,7 @@ [ "gpu_power_usage", { - "_value": 58.557, + "_value": 56.641, "_timestamp": 0, "_device_uuid": null } @@ -7134,7 +5949,7 @@ [ "gpu_used_memory", { - "_value": 1711.276032, + "_value": 870.31808, "_timestamp": 0, "_device_uuid": null } @@ -7142,7 +5957,7 @@ [ "gpu_free_memory", { - "_value": 23684.186112, + "_value": 24899.485696, "_timestamp": 0, "_device_uuid": null } @@ -7150,7 +5965,7 @@ [ "gpu_utilization", { - "_value": 3.0, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -7158,7 +5973,7 @@ [ "gpu_power_usage", { - "_value": 58.557, + "_value": 56.641, "_timestamp": 0, "_device_uuid": null } @@ -7168,7 +5983,7 @@ "gpu_used_memory": [ "gpu_used_memory", { - "_value": 1711.276032, + "_value": 870.31808, "_timestamp": 0, "_device_uuid": null } @@ -7176,7 +5991,7 @@ "gpu_free_memory": [ "gpu_free_memory", { - "_value": 23684.186112, + "_value": 24899.485696, "_timestamp": 0, "_device_uuid": null } @@ -7184,7 +5999,7 @@ "gpu_utilization": [ "gpu_utilization", { - "_value": 3.0, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -7192,7 +6007,7 @@ "gpu_power_usage": [ "gpu_power_usage", { - "_value": 58.557, + "_value": 56.641, "_timestamp": 0, "_device_uuid": null } @@ -7200,58 +6015,59 @@ }, "_model_config_measurements": [ { - "_model_config_name": "ensemble_python_resnet50_config_11", + "_model_config_name": "ensemble_add_sub", "_model_specific_pa_params": { "batch-size": 1, - "concurrency-range": 32 + "concurrency-range": 8, + "request-rate-range": null }, "_non_gpu_data": [ [ "perf_latency_avg", { - "_value": 536.467, + "_value": 2.888, "_timestamp": 0 } ], [ "perf_latency_p90", { - "_value": 600.547, + "_value": 3.357, "_timestamp": 0 } ], [ "perf_latency_p95", { - "_value": 610.342, + "_value": 3.497, "_timestamp": 0 } ], [ "perf_latency_p99", { - "_value": 636.662, + "_value": 3.749, "_timestamp": 0 } ], [ "perf_throughput", { - "_value": 59.9639, + "_value": 2766.53, "_timestamp": 0 } ], [ "perf_client_send_recv", { - "_value": 0.23, + "_value": 0.007, "_timestamp": 0 } ], [ "perf_client_response_wait", { - "_value": 536.21, + "_value": 2.858, "_timestamp": 0 } ], @@ -7265,21 +6081,21 @@ [ "perf_server_compute_infer", { - "_value": 270.016, + "_value": 0.364, "_timestamp": 0 } ], [ "perf_server_compute_input", { - "_value": 0.622, + "_value": 0.046, "_timestamp": 0 } ], [ "perf_server_compute_output", { - "_value": 0.728, + "_value": 0.138, "_timestamp": 0 } ] @@ -7288,49 +6104,49 @@ "perf_latency_avg": [ "perf_latency_avg", { - "_value": 536.467, + "_value": 2.888, "_timestamp": 0 } ], "perf_latency_p90": [ "perf_latency_p90", { - "_value": 600.547, + "_value": 3.357, "_timestamp": 0 } ], "perf_latency_p95": [ "perf_latency_p95", { - "_value": 610.342, + "_value": 3.497, "_timestamp": 0 } ], "perf_latency_p99": [ "perf_latency_p99", { - "_value": 636.662, + "_value": 3.749, "_timestamp": 0 } ], "perf_throughput": [ "perf_throughput", { - "_value": 59.9639, + "_value": 2766.53, "_timestamp": 0 } ], "perf_client_send_recv": [ "perf_client_send_recv", { - "_value": 0.23, + "_value": 0.007, "_timestamp": 0 } ], "perf_client_response_wait": [ "perf_client_response_wait", { - "_value": 536.21, + "_value": 2.858, "_timestamp": 0 } ], @@ -7344,82 +6160,376 @@ "perf_server_compute_infer": [ "perf_server_compute_infer", { - "_value": 270.016, + "_value": 0.364, "_timestamp": 0 } ], "perf_server_compute_input": [ "perf_server_compute_input", { - "_value": 0.622, + "_value": 0.046, "_timestamp": 0 } ], "perf_server_compute_output": [ "perf_server_compute_output", { - "_value": 0.728, + "_value": 0.138, "_timestamp": 0 } ] } } ] - } - } - ], - "ensemble_python_resnet50_config_12": [ - { - "_triton_env": {}, - "_model_run_configs": [ - { - "_model_name": "ensemble_python_resnet50", - "_model_config": { - "name": "ensemble_python_resnet50_config_12", - "platform": "ensemble", - "maxBatchSize": 4, - "input": [ + }, + "ensemble_add_sub_config_2 -m ensemble_add_sub -b 1 -i grpc -f ensemble_add_sub-results.csv --verbose-csv --concurrency-range=16 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000 add_config_1,sub_config_0": { + "_model_variants_name": "ensemble_add_sub_config_2", + "_gpu_data": { + "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ + [ + "gpu_used_memory", { - "name": "INPUT", - "dataType": "TYPE_UINT8", - "dims": [ - "-1" - ] + "_value": 870.31808, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } ], - "output": [ + [ + "gpu_free_memory", { - "name": "OUTPUT", - "dataType": "TYPE_FP32", - "dims": [ - "1000" - ] + "_value": 24899.485696, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_utilization", + { + "_value": 0.0, + "_timestamp": 0, + "_device_uuid": null } ], - "ensembleScheduling": { - "step": [ + [ + "gpu_power_usage", + { + "_value": 56.795, + "_timestamp": 0, + "_device_uuid": null + } + ] + ] + }, + "_avg_gpu_data": [ + [ + "gpu_used_memory", + { + "_value": 870.31808, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_free_memory", + { + "_value": 24899.485696, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_utilization", + { + "_value": 0.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 56.795, + "_timestamp": 0, + "_device_uuid": null + } + ] + ], + "_avg_gpu_data_from_tag": { + "gpu_used_memory": [ + "gpu_used_memory", + { + "_value": 870.31808, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_free_memory": [ + "gpu_free_memory", + { + "_value": 24899.485696, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_utilization": [ + "gpu_utilization", + { + "_value": 0.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_power_usage": [ + "gpu_power_usage", + { + "_value": 56.795, + "_timestamp": 0, + "_device_uuid": null + } + ] + }, + "_model_config_measurements": [ + { + "_model_config_name": "ensemble_add_sub", + "_model_specific_pa_params": { + "batch-size": 1, + "concurrency-range": 16, + "request-rate-range": null + }, + "_non_gpu_data": [ + [ + "perf_latency_avg", { - "modelName": "preprocess_config_2", - "modelVersion": "-1", - "inputMap": { - "INPUT_0": "INPUT" - }, - "outputMap": { - "OUTPUT_0": "preprocessed_image" - } - }, + "_value": 5.91, + "_timestamp": 0 + } + ], + [ + "perf_latency_p90", { - "modelName": "resnet50_trt_config_5", - "modelVersion": "-1", - "inputMap": { - "input": "preprocessed_image" - }, - "outputMap": { - "output": "OUTPUT" - } + "_value": 6.698, + "_timestamp": 0 } - ] + ], + [ + "perf_latency_p95", + { + "_value": 6.906, + "_timestamp": 0 + } + ], + [ + "perf_latency_p99", + { + "_value": 7.706, + "_timestamp": 0 + } + ], + [ + "perf_throughput", + { + "_value": 2702.59, + "_timestamp": 0 + } + ], + [ + "perf_client_send_recv", + { + "_value": 0.007, + "_timestamp": 0 + } + ], + [ + "perf_client_response_wait", + { + "_value": 5.88, + "_timestamp": 0 + } + ], + [ + "perf_server_queue", + { + "_value": 0.001, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_infer", + { + "_value": 0.371, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_input", + { + "_value": 0.047, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_output", + { + "_value": 0.143, + "_timestamp": 0 + } + ] + ], + "_non_gpu_data_from_tag": { + "perf_latency_avg": [ + "perf_latency_avg", + { + "_value": 5.91, + "_timestamp": 0 + } + ], + "perf_latency_p90": [ + "perf_latency_p90", + { + "_value": 6.698, + "_timestamp": 0 + } + ], + "perf_latency_p95": [ + "perf_latency_p95", + { + "_value": 6.906, + "_timestamp": 0 + } + ], + "perf_latency_p99": [ + "perf_latency_p99", + { + "_value": 7.706, + "_timestamp": 0 + } + ], + "perf_throughput": [ + "perf_throughput", + { + "_value": 2702.59, + "_timestamp": 0 + } + ], + "perf_client_send_recv": [ + "perf_client_send_recv", + { + "_value": 0.007, + "_timestamp": 0 + } + ], + "perf_client_response_wait": [ + "perf_client_response_wait", + { + "_value": 5.88, + "_timestamp": 0 + } + ], + "perf_server_queue": [ + "perf_server_queue", + { + "_value": 0.001, + "_timestamp": 0 + } + ], + "perf_server_compute_infer": [ + "perf_server_compute_infer", + { + "_value": 0.371, + "_timestamp": 0 + } + ], + "perf_server_compute_input": [ + "perf_server_compute_input", + { + "_value": 0.047, + "_timestamp": 0 + } + ], + "perf_server_compute_output": [ + "perf_server_compute_output", + { + "_value": 0.143, + "_timestamp": 0 + } + ] + } + } + ] + } + } + ], + "ensemble_add_sub_config_3": [ + { + "_triton_env": {}, + "_model_run_configs": [ + { + "_model_name": "ensemble_add_sub", + "_model_config_variant": { + "model_config": { + "name": "ensemble_add_sub", + "platform": "ensemble", + "maxBatchSize": 1, + "input": [ + { + "name": "INPUT0", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + }, + { + "name": "INPUT1", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "output": [ + { + "name": "OUTPUT0", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + }, + { + "name": "OUTPUT1", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "ensembleScheduling": { + "step": [ + { + "modelName": "add", + "modelVersion": "1", + "inputMap": { + "INPUT1": "INPUT1", + "INPUT0": "INPUT0" + }, + "outputMap": { + "OUTPUT0": "OUTPUT0" + } + }, + { + "modelName": "sub", + "modelVersion": "1", + "inputMap": { + "INPUT0": "INPUT0", + "INPUT1": "INPUT1" + }, + "outputMap": { + "OUTPUT1": "OUTPUT1" + } + } + ] + } }, + "variant_name": "ensemble_add_sub_config_3", "cpu_only": false }, "_perf_config": { @@ -7429,7 +6539,7 @@ "async": null, "sync": null, "measurement-interval": null, - "concurrency-range": 24, + "concurrency-range": 6, "request-rate-range": null, "request-distribution": null, "request-intervals": null, @@ -7440,10 +6550,10 @@ "stability-percentage": null, "max-trials": null, "percentile": null, - "input-data": "/swdev/profile_models/test_image", + "input-data": null, "shared-memory": null, "output-shared-memory-size": null, - "shape": "INPUT:1005970", + "shape": null, "sequence-length": null, "sequence-id-range": null, "string-length": null, @@ -7467,15 +6577,16 @@ "ssl-https-private-key-file": null, "collect-metrics": "True", "metrics-url": "http://localhost:8002/metrics", - "metrics-interval": 1000.0 + "metrics-interval": 1000, + "bls-composing-models": null }, "_options": { - "-m": "ensemble_python_resnet50_config_12", + "-m": "ensemble_add_sub", "-x": null, "-b": 1, "-u": "localhost:8001", "-i": "grpc", - "-f": "ensemble_python_resnet50_config_12-results.csv", + "-f": "ensemble_add_sub-results.csv", "-H": null }, "_verbose": { @@ -7502,72 +6613,83 @@ "shape": null } }, - "_composing_configs": [ - { - "name": "preprocess_config_2", - "maxBatchSize": 4, - "input": [ - { - "name": "INPUT_0", - "dataType": "TYPE_UINT8", - "dims": [ - "-1" - ] - } - ], - "output": [ - { - "name": "OUTPUT_0", - "dataType": "TYPE_FP32", - "dims": [ - "3", - "224", - "224" - ] - } - ], - "instanceGroup": [ - { - "count": 3, - "kind": "KIND_GPU" - } - ], - "dynamicBatching": {}, - "backend": "python", + "_composing_config_variants": [ + { + "model_config": { + "name": "add", + "input": [ + { + "name": "INPUT0", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + }, + { + "name": "INPUT1", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "output": [ + { + "name": "OUTPUT0", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "instanceGroup": [ + { + "count": 3, + "kind": "KIND_GPU" + } + ], + "backend": "python" + }, + "variant_name": "add_config_2", "cpu_only": false }, { - "name": "resnet50_trt_config_5", - "platform": "tensorrt_plan", - "maxBatchSize": 8, - "input": [ - { - "name": "input", - "dataType": "TYPE_FP32", - "dims": [ - "3", - "-1", - "-1" - ] - } - ], - "output": [ - { - "name": "output", - "dataType": "TYPE_FP32", - "dims": [ - "1000" - ], - "labelFilename": "labels.txt" - } - ], - "instanceGroup": [ - { - "count": 1, - "kind": "KIND_GPU" - } - ], - "dynamicBatching": {}, + "model_config": { + "name": "sub", + "input": [ + { + "name": "INPUT0", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + }, + { + "name": "INPUT1", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "output": [ + { + "name": "OUTPUT1", + "dataType": "TYPE_FP32", + "dims": [ + "4" + ] + } + ], + "instanceGroup": [ + { + "count": 3, + "kind": "KIND_GPU" + } + ], + "backend": "python" + }, + "variant_name": "sub_config_2", "cpu_only": false } ] @@ -7575,14 +6697,14 @@ ] }, { - "-m ensemble_python_resnet50_config_12 -b 1 -i grpc -f ensemble_python_resnet50_config_12-results.csv --verbose-csv --concurrency-range=24 --input-data=/swdev/profile_models/test_image --shape=INPUT:1005970 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "ensemble_python_resnet50_config_12", + "ensemble_add_sub_config_3 -m ensemble_add_sub -b 1 -i grpc -f ensemble_add_sub-results.csv --verbose-csv --concurrency-range=6 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000 add_config_2,sub_config_2": { + "_model_variants_name": "ensemble_add_sub_config_3", "_gpu_data": { "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ [ "gpu_used_memory", { - "_value": 1713.373184, + "_value": 870.31808, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } @@ -7590,7 +6712,7 @@ [ "gpu_free_memory", { - "_value": 23682.08896, + "_value": 24899.485696, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } @@ -7598,7 +6720,7 @@ [ "gpu_utilization", { - "_value": 3.0, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -7606,7 +6728,7 @@ [ "gpu_power_usage", { - "_value": 58.0587, + "_value": 56.6983, "_timestamp": 0, "_device_uuid": null } @@ -7617,7 +6739,7 @@ [ "gpu_used_memory", { - "_value": 1713.373184, + "_value": 870.31808, "_timestamp": 0, "_device_uuid": null } @@ -7625,7 +6747,7 @@ [ "gpu_free_memory", { - "_value": 23682.08896, + "_value": 24899.485696, "_timestamp": 0, "_device_uuid": null } @@ -7633,7 +6755,7 @@ [ "gpu_utilization", { - "_value": 3.0, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -7641,7 +6763,7 @@ [ "gpu_power_usage", { - "_value": 58.0587, + "_value": 56.6983, "_timestamp": 0, "_device_uuid": null } @@ -7651,7 +6773,7 @@ "gpu_used_memory": [ "gpu_used_memory", { - "_value": 1713.373184, + "_value": 870.31808, "_timestamp": 0, "_device_uuid": null } @@ -7659,7 +6781,7 @@ "gpu_free_memory": [ "gpu_free_memory", { - "_value": 23682.08896, + "_value": 24899.485696, "_timestamp": 0, "_device_uuid": null } @@ -7667,7 +6789,7 @@ "gpu_utilization": [ "gpu_utilization", { - "_value": 3.0, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -7675,7 +6797,7 @@ "gpu_power_usage": [ "gpu_power_usage", { - "_value": 58.0587, + "_value": 56.6983, "_timestamp": 0, "_device_uuid": null } @@ -7683,58 +6805,59 @@ }, "_model_config_measurements": [ { - "_model_config_name": "ensemble_python_resnet50_config_12", + "_model_config_name": "ensemble_add_sub", "_model_specific_pa_params": { "batch-size": 1, - "concurrency-range": 24 + "concurrency-range": 6, + "request-rate-range": null }, "_non_gpu_data": [ [ "perf_latency_avg", { - "_value": 463.4, + "_value": 2.211, "_timestamp": 0 } ], [ "perf_latency_p90", { - "_value": 493.655, + "_value": 2.534, "_timestamp": 0 } ], [ "perf_latency_p95", { - "_value": 515.281, + "_value": 2.635, "_timestamp": 0 } ], [ "perf_latency_p99", { - "_value": 566.765, + "_value": 2.835, "_timestamp": 0 } ], [ "perf_throughput", { - "_value": 51.9701, + "_value": 2709.16, "_timestamp": 0 } ], [ "perf_client_send_recv", { - "_value": 0.233, + "_value": 0.006, "_timestamp": 0 } ], [ "perf_client_response_wait", { - "_value": 463.137, + "_value": 2.183, "_timestamp": 0 } ], @@ -7748,21 +6871,21 @@ [ "perf_server_compute_infer", { - "_value": 231.906, + "_value": 0.371, "_timestamp": 0 } ], [ "perf_server_compute_input", { - "_value": 0.575, + "_value": 0.047, "_timestamp": 0 } ], [ "perf_server_compute_output", { - "_value": 0.736, + "_value": 0.14, "_timestamp": 0 } ] @@ -7771,49 +6894,49 @@ "perf_latency_avg": [ "perf_latency_avg", { - "_value": 463.4, + "_value": 2.211, "_timestamp": 0 } ], "perf_latency_p90": [ "perf_latency_p90", { - "_value": 493.655, + "_value": 2.534, "_timestamp": 0 } ], "perf_latency_p95": [ "perf_latency_p95", { - "_value": 515.281, + "_value": 2.635, "_timestamp": 0 } ], "perf_latency_p99": [ "perf_latency_p99", { - "_value": 566.765, + "_value": 2.835, "_timestamp": 0 } ], "perf_throughput": [ "perf_throughput", { - "_value": 51.9701, + "_value": 2709.16, "_timestamp": 0 } ], "perf_client_send_recv": [ "perf_client_send_recv", { - "_value": 0.233, + "_value": 0.006, "_timestamp": 0 } ], "perf_client_response_wait": [ "perf_client_response_wait", { - "_value": 463.137, + "_value": 2.183, "_timestamp": 0 } ], @@ -7827,21 +6950,21 @@ "perf_server_compute_infer": [ "perf_server_compute_infer", { - "_value": 231.906, + "_value": 0.371, "_timestamp": 0 } ], "perf_server_compute_input": [ "perf_server_compute_input", { - "_value": 0.575, + "_value": 0.047, "_timestamp": 0 } ], "perf_server_compute_output": [ "perf_server_compute_output", { - "_value": 0.736, + "_value": 0.14, "_timestamp": 0 } ] @@ -7850,19044 +6973,152 @@ ] } } - ], - "ensemble_python_resnet50_config_13": [ + ] + } + } + }, + "ResultManager.server_only_data": { + "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ + [ + "gpu_used_memory", + { + "_value": 870.0, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_free_memory", + { + "_value": 24899.0, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_utilization", + { + "_value": 0.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 56.084, + "_timestamp": 0, + "_device_uuid": null + } + ] + ] + }, + "MetricsManager.gpus": { + "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": { + "name": "NVIDIA TITAN RTX", + "total_memory": 25387401216 + } + }, + "ModelManager.model_variant_name_manager": { + "_model_config_dicts": { + "add_config_0": { + "name": "add", + "input": [ { - "_triton_env": {}, - "_model_run_configs": [ - { - "_model_name": "ensemble_python_resnet50", - "_model_config": { - "name": "ensemble_python_resnet50_config_13", - "platform": "ensemble", - "maxBatchSize": 2, - "input": [ - { - "name": "INPUT", - "dataType": "TYPE_UINT8", - "dims": [ - "-1" - ] - } - ], - "output": [ - { - "name": "OUTPUT", - "dataType": "TYPE_FP32", - "dims": [ - "1000" - ] - } - ], - "ensembleScheduling": { - "step": [ - { - "modelName": "preprocess_config_8", - "modelVersion": "-1", - "inputMap": { - "INPUT_0": "INPUT" - }, - "outputMap": { - "OUTPUT_0": "preprocessed_image" - } - }, - { - "modelName": "resnet50_trt_config_6", - "modelVersion": "-1", - "inputMap": { - "input": "preprocessed_image" - }, - "outputMap": { - "output": "OUTPUT" - } - } - ] - }, - "cpu_only": false - }, - "_perf_config": { - "_args": { - "service-kind": null, - "model-signature-name": null, - "async": null, - "sync": null, - "measurement-interval": null, - "concurrency-range": 16, - "request-rate-range": null, - "request-distribution": null, - "request-intervals": null, - "binary-search": null, - "num-of-sequences": null, - "latency-threshold": null, - "max-threads": null, - "stability-percentage": null, - "max-trials": null, - "percentile": null, - "input-data": "/swdev/profile_models/test_image", - "shared-memory": null, - "output-shared-memory-size": null, - "shape": "INPUT:1005970", - "sequence-length": null, - "sequence-id-range": null, - "string-length": null, - "string-data": null, - "measurement-mode": "count_windows", - "measurement-request-count": null, - "streaming": null, - "grpc-compression-algorithm": null, - "triton-server-directory": null, - "model-repository": null, - "ssl-grpc-use-ssl": null, - "ssl-grpc-root-certifications-file": null, - "ssl-grpc-private-key-file": null, - "ssl-grpc-certificate-chain-file": null, - "ssl-https-verify-peer": null, - "ssl-https-verify-host": null, - "ssl-https-ca-certificates-file": null, - "ssl-https-client-certificate-type": null, - "ssl-https-client-certificate-file": null, - "ssl-https-private-key-type": null, - "ssl-https-private-key-file": null, - "collect-metrics": "True", - "metrics-url": "http://localhost:8002/metrics", - "metrics-interval": 1000.0 - }, - "_options": { - "-m": "ensemble_python_resnet50_config_13", - "-x": null, - "-b": 1, - "-u": "localhost:8001", - "-i": "grpc", - "-f": "ensemble_python_resnet50_config_13-results.csv", - "-H": null - }, - "_verbose": { - "-v": null, - "-v -v": null, - "--verbose-csv": "--verbose-csv" - }, - "_input_to_options": { - "model-name": "-m", - "model-version": "-x", - "batch-size": "-b", - "url": "-u", - "protocol": "-i", - "latency-report-file": "-f", - "http-header": "-H" - }, - "_input_to_verbose": { - "verbose": "-v", - "extra-verbose": "-v -v", - "verbose-csv": "--verbose-csv" - }, - "_additive_args": { - "input-data": null, - "shape": null - } - }, - "_composing_configs": [ - { - "name": "preprocess_config_8", - "maxBatchSize": 2, - "input": [ - { - "name": "INPUT_0", - "dataType": "TYPE_UINT8", - "dims": [ - "-1" - ] - } - ], - "output": [ - { - "name": "OUTPUT_0", - "dataType": "TYPE_FP32", - "dims": [ - "3", - "224", - "224" - ] - } - ], - "instanceGroup": [ - { - "count": 4, - "kind": "KIND_GPU" - } - ], - "dynamicBatching": {}, - "backend": "python", - "cpu_only": false - }, - { - "name": "resnet50_trt_config_6", - "platform": "tensorrt_plan", - "maxBatchSize": 4, - "input": [ - { - "name": "input", - "dataType": "TYPE_FP32", - "dims": [ - "3", - "-1", - "-1" - ] - } - ], - "output": [ - { - "name": "output", - "dataType": "TYPE_FP32", - "dims": [ - "1000" - ], - "labelFilename": "labels.txt" - } - ], - "instanceGroup": [ - { - "count": 1, - "kind": "KIND_GPU" - } - ], - "dynamicBatching": {}, - "cpu_only": false - } - ] - } - ] - }, - { - "-m ensemble_python_resnet50_config_13 -b 1 -i grpc -f ensemble_python_resnet50_config_13-results.csv --verbose-csv --concurrency-range=16 --input-data=/swdev/profile_models/test_image --shape=INPUT:1005970 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "ensemble_python_resnet50_config_13", - "_gpu_data": { - "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ - [ - "gpu_used_memory", - { - "_value": 1711.276032, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_free_memory", - { - "_value": 23684.186112, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_utilization", - { - "_value": 4.75, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 59.211, - "_timestamp": 0, - "_device_uuid": null - } - ] - ] - }, - "_avg_gpu_data": [ - [ - "gpu_used_memory", - { - "_value": 1711.276032, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_free_memory", - { - "_value": 23684.186112, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_utilization", - { - "_value": 4.75, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 59.211, - "_timestamp": 0, - "_device_uuid": null - } - ] - ], - "_avg_gpu_data_from_tag": { - "gpu_used_memory": [ - "gpu_used_memory", - { - "_value": 1711.276032, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_free_memory": [ - "gpu_free_memory", - { - "_value": 23684.186112, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_utilization": [ - "gpu_utilization", - { - "_value": 4.75, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_power_usage": [ - "gpu_power_usage", - { - "_value": 59.211, - "_timestamp": 0, - "_device_uuid": null - } - ] - }, - "_model_config_measurements": [ - { - "_model_config_name": "ensemble_python_resnet50_config_13", - "_model_specific_pa_params": { - "batch-size": 1, - "concurrency-range": 16 - }, - "_non_gpu_data": [ - [ - "perf_latency_avg", - { - "_value": 282.465, - "_timestamp": 0 - } - ], - [ - "perf_latency_p90", - { - "_value": 313.659, - "_timestamp": 0 - } - ], - [ - "perf_latency_p95", - { - "_value": 332.478, - "_timestamp": 0 - } - ], - [ - "perf_latency_p99", - { - "_value": 341.651, - "_timestamp": 0 - } - ], - [ - "perf_throughput", - { - "_value": 57.1808, - "_timestamp": 0 - } - ], - [ - "perf_client_send_recv", - { - "_value": 0.195, - "_timestamp": 0 - } - ], - [ - "perf_client_response_wait", - { - "_value": 282.242, - "_timestamp": 0 - } - ], - [ - "perf_server_queue", - { - "_value": 0.001, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_infer", - { - "_value": 141.199, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_input", - { - "_value": 0.366, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_output", - { - "_value": 0.804, - "_timestamp": 0 - } - ] - ], - "_non_gpu_data_from_tag": { - "perf_latency_avg": [ - "perf_latency_avg", - { - "_value": 282.465, - "_timestamp": 0 - } - ], - "perf_latency_p90": [ - "perf_latency_p90", - { - "_value": 313.659, - "_timestamp": 0 - } - ], - "perf_latency_p95": [ - "perf_latency_p95", - { - "_value": 332.478, - "_timestamp": 0 - } - ], - "perf_latency_p99": [ - "perf_latency_p99", - { - "_value": 341.651, - "_timestamp": 0 - } - ], - "perf_throughput": [ - "perf_throughput", - { - "_value": 57.1808, - "_timestamp": 0 - } - ], - "perf_client_send_recv": [ - "perf_client_send_recv", - { - "_value": 0.195, - "_timestamp": 0 - } - ], - "perf_client_response_wait": [ - "perf_client_response_wait", - { - "_value": 282.242, - "_timestamp": 0 - } - ], - "perf_server_queue": [ - "perf_server_queue", - { - "_value": 0.001, - "_timestamp": 0 - } - ], - "perf_server_compute_infer": [ - "perf_server_compute_infer", - { - "_value": 141.199, - "_timestamp": 0 - } - ], - "perf_server_compute_input": [ - "perf_server_compute_input", - { - "_value": 0.366, - "_timestamp": 0 - } - ], - "perf_server_compute_output": [ - "perf_server_compute_output", - { - "_value": 0.804, - "_timestamp": 0 - } - ] - } - } - ] - } - } - ], - "ensemble_python_resnet50_config_14": [ - { - "_triton_env": {}, - "_model_run_configs": [ - { - "_model_name": "ensemble_python_resnet50", - "_model_config": { - "name": "ensemble_python_resnet50_config_14", - "platform": "ensemble", - "maxBatchSize": 4, - "input": [ - { - "name": "INPUT", - "dataType": "TYPE_UINT8", - "dims": [ - "-1" - ] - } - ], - "output": [ - { - "name": "OUTPUT", - "dataType": "TYPE_FP32", - "dims": [ - "1000" - ] - } - ], - "ensembleScheduling": { - "step": [ - { - "modelName": "preprocess_config_9", - "modelVersion": "-1", - "inputMap": { - "INPUT_0": "INPUT" - }, - "outputMap": { - "OUTPUT_0": "preprocessed_image" - } - }, - { - "modelName": "resnet50_trt_config_6", - "modelVersion": "-1", - "inputMap": { - "input": "preprocessed_image" - }, - "outputMap": { - "output": "OUTPUT" - } - } - ] - }, - "cpu_only": false - }, - "_perf_config": { - "_args": { - "service-kind": null, - "model-signature-name": null, - "async": null, - "sync": null, - "measurement-interval": null, - "concurrency-range": 64, - "request-rate-range": null, - "request-distribution": null, - "request-intervals": null, - "binary-search": null, - "num-of-sequences": null, - "latency-threshold": null, - "max-threads": null, - "stability-percentage": null, - "max-trials": null, - "percentile": null, - "input-data": "/swdev/profile_models/test_image", - "shared-memory": null, - "output-shared-memory-size": null, - "shape": "INPUT:1005970", - "sequence-length": null, - "sequence-id-range": null, - "string-length": null, - "string-data": null, - "measurement-mode": "count_windows", - "measurement-request-count": null, - "streaming": null, - "grpc-compression-algorithm": null, - "triton-server-directory": null, - "model-repository": null, - "ssl-grpc-use-ssl": null, - "ssl-grpc-root-certifications-file": null, - "ssl-grpc-private-key-file": null, - "ssl-grpc-certificate-chain-file": null, - "ssl-https-verify-peer": null, - "ssl-https-verify-host": null, - "ssl-https-ca-certificates-file": null, - "ssl-https-client-certificate-type": null, - "ssl-https-client-certificate-file": null, - "ssl-https-private-key-type": null, - "ssl-https-private-key-file": null, - "collect-metrics": "True", - "metrics-url": "http://localhost:8002/metrics", - "metrics-interval": 1000.0 - }, - "_options": { - "-m": "ensemble_python_resnet50_config_14", - "-x": null, - "-b": 1, - "-u": "localhost:8001", - "-i": "grpc", - "-f": "ensemble_python_resnet50_config_14-results.csv", - "-H": null - }, - "_verbose": { - "-v": null, - "-v -v": null, - "--verbose-csv": "--verbose-csv" - }, - "_input_to_options": { - "model-name": "-m", - "model-version": "-x", - "batch-size": "-b", - "url": "-u", - "protocol": "-i", - "latency-report-file": "-f", - "http-header": "-H" - }, - "_input_to_verbose": { - "verbose": "-v", - "extra-verbose": "-v -v", - "verbose-csv": "--verbose-csv" - }, - "_additive_args": { - "input-data": null, - "shape": null - } - }, - "_composing_configs": [ - { - "name": "preprocess_config_9", - "maxBatchSize": 8, - "input": [ - { - "name": "INPUT_0", - "dataType": "TYPE_UINT8", - "dims": [ - "-1" - ] - } - ], - "output": [ - { - "name": "OUTPUT_0", - "dataType": "TYPE_FP32", - "dims": [ - "3", - "224", - "224" - ] - } - ], - "instanceGroup": [ - { - "count": 4, - "kind": "KIND_GPU" - } - ], - "dynamicBatching": {}, - "backend": "python", - "cpu_only": false - }, - { - "name": "resnet50_trt_config_6", - "platform": "tensorrt_plan", - "maxBatchSize": 4, - "input": [ - { - "name": "input", - "dataType": "TYPE_FP32", - "dims": [ - "3", - "-1", - "-1" - ] - } - ], - "output": [ - { - "name": "output", - "dataType": "TYPE_FP32", - "dims": [ - "1000" - ], - "labelFilename": "labels.txt" - } - ], - "instanceGroup": [ - { - "count": 1, - "kind": "KIND_GPU" - } - ], - "dynamicBatching": {}, - "cpu_only": false - } - ] - } - ] - }, - { - "-m ensemble_python_resnet50_config_14 -b 1 -i grpc -f ensemble_python_resnet50_config_14-results.csv --verbose-csv --concurrency-range=64 --input-data=/swdev/profile_models/test_image --shape=INPUT:1005970 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "ensemble_python_resnet50_config_14", - "_gpu_data": { - "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ - [ - "gpu_used_memory", - { - "_value": 1711.276032, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_free_memory", - { - "_value": 23684.186112, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_utilization", - { - "_value": 3.0, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 57.4603, - "_timestamp": 0, - "_device_uuid": null - } - ] - ] - }, - "_avg_gpu_data": [ - [ - "gpu_used_memory", - { - "_value": 1711.276032, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_free_memory", - { - "_value": 23684.186112, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_utilization", - { - "_value": 3.0, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 57.4603, - "_timestamp": 0, - "_device_uuid": null - } - ] - ], - "_avg_gpu_data_from_tag": { - "gpu_used_memory": [ - "gpu_used_memory", - { - "_value": 1711.276032, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_free_memory": [ - "gpu_free_memory", - { - "_value": 23684.186112, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_utilization": [ - "gpu_utilization", - { - "_value": 3.0, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_power_usage": [ - "gpu_power_usage", - { - "_value": 57.4603, - "_timestamp": 0, - "_device_uuid": null - } - ] - }, - "_model_config_measurements": [ - { - "_model_config_name": "ensemble_python_resnet50_config_14", - "_model_specific_pa_params": { - "batch-size": 1, - "concurrency-range": 64 - }, - "_non_gpu_data": [ - [ - "perf_latency_avg", - { - "_value": 1054.964, - "_timestamp": 0 - } - ], - [ - "perf_latency_p90", - { - "_value": 1166.198, - "_timestamp": 0 - } - ], - [ - "perf_latency_p95", - { - "_value": 1175.743, - "_timestamp": 0 - } - ], - [ - "perf_latency_p99", - { - "_value": 1318.383, - "_timestamp": 0 - } - ], - [ - "perf_throughput", - { - "_value": 63.9578, - "_timestamp": 0 - } - ], - [ - "perf_client_send_recv", - { - "_value": 0.225, - "_timestamp": 0 - } - ], - [ - "perf_client_response_wait", - { - "_value": 1054.714, - "_timestamp": 0 - } - ], - [ - "perf_server_queue", - { - "_value": 0.001, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_infer", - { - "_value": 521.489, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_input", - { - "_value": 1.256, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_output", - { - "_value": 1.677, - "_timestamp": 0 - } - ] - ], - "_non_gpu_data_from_tag": { - "perf_latency_avg": [ - "perf_latency_avg", - { - "_value": 1054.964, - "_timestamp": 0 - } - ], - "perf_latency_p90": [ - "perf_latency_p90", - { - "_value": 1166.198, - "_timestamp": 0 - } - ], - "perf_latency_p95": [ - "perf_latency_p95", - { - "_value": 1175.743, - "_timestamp": 0 - } - ], - "perf_latency_p99": [ - "perf_latency_p99", - { - "_value": 1318.383, - "_timestamp": 0 - } - ], - "perf_throughput": [ - "perf_throughput", - { - "_value": 63.9578, - "_timestamp": 0 - } - ], - "perf_client_send_recv": [ - "perf_client_send_recv", - { - "_value": 0.225, - "_timestamp": 0 - } - ], - "perf_client_response_wait": [ - "perf_client_response_wait", - { - "_value": 1054.714, - "_timestamp": 0 - } - ], - "perf_server_queue": [ - "perf_server_queue", - { - "_value": 0.001, - "_timestamp": 0 - } - ], - "perf_server_compute_infer": [ - "perf_server_compute_infer", - { - "_value": 521.489, - "_timestamp": 0 - } - ], - "perf_server_compute_input": [ - "perf_server_compute_input", - { - "_value": 1.256, - "_timestamp": 0 - } - ], - "perf_server_compute_output": [ - "perf_server_compute_output", - { - "_value": 1.677, - "_timestamp": 0 - } - ] - } - } - ] - } - } - ], - "ensemble_python_resnet50_config_15": [ - { - "_triton_env": {}, - "_model_run_configs": [ - { - "_model_name": "ensemble_python_resnet50", - "_model_config": { - "name": "ensemble_python_resnet50_config_15", - "platform": "ensemble", - "maxBatchSize": 4, - "input": [ - { - "name": "INPUT", - "dataType": "TYPE_UINT8", - "dims": [ - "-1" - ] - } - ], - "output": [ - { - "name": "OUTPUT", - "dataType": "TYPE_FP32", - "dims": [ - "1000" - ] - } - ], - "ensembleScheduling": { - "step": [ - { - "modelName": "preprocess_config_10", - "modelVersion": "-1", - "inputMap": { - "INPUT_0": "INPUT" - }, - "outputMap": { - "OUTPUT_0": "preprocessed_image" - } - }, - { - "modelName": "resnet50_trt_config_6", - "modelVersion": "-1", - "inputMap": { - "input": "preprocessed_image" - }, - "outputMap": { - "output": "OUTPUT" - } - } - ] - }, - "cpu_only": false - }, - "_perf_config": { - "_args": { - "service-kind": null, - "model-signature-name": null, - "async": null, - "sync": null, - "measurement-interval": null, - "concurrency-range": 40, - "request-rate-range": null, - "request-distribution": null, - "request-intervals": null, - "binary-search": null, - "num-of-sequences": null, - "latency-threshold": null, - "max-threads": null, - "stability-percentage": null, - "max-trials": null, - "percentile": null, - "input-data": "/swdev/profile_models/test_image", - "shared-memory": null, - "output-shared-memory-size": null, - "shape": "INPUT:1005970", - "sequence-length": null, - "sequence-id-range": null, - "string-length": null, - "string-data": null, - "measurement-mode": "count_windows", - "measurement-request-count": 100, - "streaming": null, - "grpc-compression-algorithm": null, - "triton-server-directory": null, - "model-repository": null, - "ssl-grpc-use-ssl": null, - "ssl-grpc-root-certifications-file": null, - "ssl-grpc-private-key-file": null, - "ssl-grpc-certificate-chain-file": null, - "ssl-https-verify-peer": null, - "ssl-https-verify-host": null, - "ssl-https-ca-certificates-file": null, - "ssl-https-client-certificate-type": null, - "ssl-https-client-certificate-file": null, - "ssl-https-private-key-type": null, - "ssl-https-private-key-file": null, - "collect-metrics": "True", - "metrics-url": "http://localhost:8002/metrics", - "metrics-interval": 1000.0 - }, - "_options": { - "-m": "ensemble_python_resnet50_config_15", - "-x": null, - "-b": 1, - "-u": "localhost:8001", - "-i": "grpc", - "-f": "ensemble_python_resnet50_config_15-results.csv", - "-H": null - }, - "_verbose": { - "-v": null, - "-v -v": null, - "--verbose-csv": "--verbose-csv" - }, - "_input_to_options": { - "model-name": "-m", - "model-version": "-x", - "batch-size": "-b", - "url": "-u", - "protocol": "-i", - "latency-report-file": "-f", - "http-header": "-H" - }, - "_input_to_verbose": { - "verbose": "-v", - "extra-verbose": "-v -v", - "verbose-csv": "--verbose-csv" - }, - "_additive_args": { - "input-data": null, - "shape": null - } - }, - "_composing_configs": [ - { - "name": "preprocess_config_10", - "maxBatchSize": 4, - "input": [ - { - "name": "INPUT_0", - "dataType": "TYPE_UINT8", - "dims": [ - "-1" - ] - } - ], - "output": [ - { - "name": "OUTPUT_0", - "dataType": "TYPE_FP32", - "dims": [ - "3", - "224", - "224" - ] - } - ], - "instanceGroup": [ - { - "count": 5, - "kind": "KIND_GPU" - } - ], - "dynamicBatching": {}, - "backend": "python", - "cpu_only": false - }, - { - "name": "resnet50_trt_config_6", - "platform": "tensorrt_plan", - "maxBatchSize": 4, - "input": [ - { - "name": "input", - "dataType": "TYPE_FP32", - "dims": [ - "3", - "-1", - "-1" - ] - } - ], - "output": [ - { - "name": "output", - "dataType": "TYPE_FP32", - "dims": [ - "1000" - ], - "labelFilename": "labels.txt" - } - ], - "instanceGroup": [ - { - "count": 1, - "kind": "KIND_GPU" - } - ], - "dynamicBatching": {}, - "cpu_only": false - } - ] - } - ] - }, - { - "-m ensemble_python_resnet50_config_15 -b 1 -i grpc -f ensemble_python_resnet50_config_15-results.csv --verbose-csv --concurrency-range=40 --input-data=/swdev/profile_models/test_image --shape=INPUT:1005970 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "ensemble_python_resnet50_config_15", - "_gpu_data": { - "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ - [ - "gpu_used_memory", - { - "_value": 1713.373184, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_free_memory", - { - "_value": 23682.08896, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_utilization", - { - "_value": 3.16667, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 58.5243, - "_timestamp": 0, - "_device_uuid": null - } - ] - ] - }, - "_avg_gpu_data": [ - [ - "gpu_used_memory", - { - "_value": 1713.373184, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_free_memory", - { - "_value": 23682.08896, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_utilization", - { - "_value": 3.16667, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 58.5243, - "_timestamp": 0, - "_device_uuid": null - } - ] - ], - "_avg_gpu_data_from_tag": { - "gpu_used_memory": [ - "gpu_used_memory", - { - "_value": 1713.373184, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_free_memory": [ - "gpu_free_memory", - { - "_value": 23682.08896, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_utilization": [ - "gpu_utilization", - { - "_value": 3.16667, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_power_usage": [ - "gpu_power_usage", - { - "_value": 58.5243, - "_timestamp": 0, - "_device_uuid": null - } - ] - }, - "_model_config_measurements": [ - { - "_model_config_name": "ensemble_python_resnet50_config_15", - "_model_specific_pa_params": { - "batch-size": 1, - "concurrency-range": 40 - }, - "_non_gpu_data": [ - [ - "perf_latency_avg", - { - "_value": 737.447, - "_timestamp": 0 - } - ], - [ - "perf_latency_p90", - { - "_value": 912.533, - "_timestamp": 0 - } - ], - [ - "perf_latency_p95", - { - "_value": 973.061, - "_timestamp": 0 - } - ], - [ - "perf_latency_p99", - { - "_value": 1121.404, - "_timestamp": 0 - } - ], - [ - "perf_throughput", - { - "_value": 55.9165, - "_timestamp": 0 - } - ], - [ - "perf_client_send_recv", - { - "_value": 0.221, - "_timestamp": 0 - } - ], - [ - "perf_client_response_wait", - { - "_value": 737.2, - "_timestamp": 0 - } - ], - [ - "perf_server_queue", - { - "_value": 0.001, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_infer", - { - "_value": 365.423, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_input", - { - "_value": 0.644, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_output", - { - "_value": 0.837, - "_timestamp": 0 - } - ] - ], - "_non_gpu_data_from_tag": { - "perf_latency_avg": [ - "perf_latency_avg", - { - "_value": 737.447, - "_timestamp": 0 - } - ], - "perf_latency_p90": [ - "perf_latency_p90", - { - "_value": 912.533, - "_timestamp": 0 - } - ], - "perf_latency_p95": [ - "perf_latency_p95", - { - "_value": 973.061, - "_timestamp": 0 - } - ], - "perf_latency_p99": [ - "perf_latency_p99", - { - "_value": 1121.404, - "_timestamp": 0 - } - ], - "perf_throughput": [ - "perf_throughput", - { - "_value": 55.9165, - "_timestamp": 0 - } - ], - "perf_client_send_recv": [ - "perf_client_send_recv", - { - "_value": 0.221, - "_timestamp": 0 - } - ], - "perf_client_response_wait": [ - "perf_client_response_wait", - { - "_value": 737.2, - "_timestamp": 0 - } - ], - "perf_server_queue": [ - "perf_server_queue", - { - "_value": 0.001, - "_timestamp": 0 - } - ], - "perf_server_compute_infer": [ - "perf_server_compute_infer", - { - "_value": 365.423, - "_timestamp": 0 - } - ], - "perf_server_compute_input": [ - "perf_server_compute_input", - { - "_value": 0.644, - "_timestamp": 0 - } - ], - "perf_server_compute_output": [ - "perf_server_compute_output", - { - "_value": 0.837, - "_timestamp": 0 - } - ] - } - } - ] - } - } - ], - "ensemble_python_resnet50_config_16": [ - { - "_triton_env": {}, - "_model_run_configs": [ - { - "_model_name": "ensemble_python_resnet50", - "_model_config": { - "name": "ensemble_python_resnet50_config_16", - "platform": "ensemble", - "maxBatchSize": 2, - "input": [ - { - "name": "INPUT", - "dataType": "TYPE_UINT8", - "dims": [ - "-1" - ] - } - ], - "output": [ - { - "name": "OUTPUT", - "dataType": "TYPE_FP32", - "dims": [ - "1000" - ] - } - ], - "ensembleScheduling": { - "step": [ - { - "modelName": "preprocess_config_7", - "modelVersion": "-1", - "inputMap": { - "INPUT_0": "INPUT" - }, - "outputMap": { - "OUTPUT_0": "preprocessed_image" - } - }, - { - "modelName": "resnet50_trt_config_2", - "modelVersion": "-1", - "inputMap": { - "input": "preprocessed_image" - }, - "outputMap": { - "output": "OUTPUT" - } - } - ] - }, - "cpu_only": false - }, - "_perf_config": { - "_args": { - "service-kind": null, - "model-signature-name": null, - "async": null, - "sync": null, - "measurement-interval": null, - "concurrency-range": 32, - "request-rate-range": null, - "request-distribution": null, - "request-intervals": null, - "binary-search": null, - "num-of-sequences": null, - "latency-threshold": null, - "max-threads": null, - "stability-percentage": null, - "max-trials": null, - "percentile": null, - "input-data": "/swdev/profile_models/test_image", - "shared-memory": null, - "output-shared-memory-size": null, - "shape": "INPUT:1005970", - "sequence-length": null, - "sequence-id-range": null, - "string-length": null, - "string-data": null, - "measurement-mode": "count_windows", - "measurement-request-count": null, - "streaming": null, - "grpc-compression-algorithm": null, - "triton-server-directory": null, - "model-repository": null, - "ssl-grpc-use-ssl": null, - "ssl-grpc-root-certifications-file": null, - "ssl-grpc-private-key-file": null, - "ssl-grpc-certificate-chain-file": null, - "ssl-https-verify-peer": null, - "ssl-https-verify-host": null, - "ssl-https-ca-certificates-file": null, - "ssl-https-client-certificate-type": null, - "ssl-https-client-certificate-file": null, - "ssl-https-private-key-type": null, - "ssl-https-private-key-file": null, - "collect-metrics": "True", - "metrics-url": "http://localhost:8002/metrics", - "metrics-interval": 1000.0 - }, - "_options": { - "-m": "ensemble_python_resnet50_config_16", - "-x": null, - "-b": 1, - "-u": "localhost:8001", - "-i": "grpc", - "-f": "ensemble_python_resnet50_config_16-results.csv", - "-H": null - }, - "_verbose": { - "-v": null, - "-v -v": null, - "--verbose-csv": "--verbose-csv" - }, - "_input_to_options": { - "model-name": "-m", - "model-version": "-x", - "batch-size": "-b", - "url": "-u", - "protocol": "-i", - "latency-report-file": "-f", - "http-header": "-H" - }, - "_input_to_verbose": { - "verbose": "-v", - "extra-verbose": "-v -v", - "verbose-csv": "--verbose-csv" - }, - "_additive_args": { - "input-data": null, - "shape": null - } - }, - "_composing_configs": [ - { - "name": "preprocess_config_7", - "maxBatchSize": 4, - "input": [ - { - "name": "INPUT_0", - "dataType": "TYPE_UINT8", - "dims": [ - "-1" - ] - } - ], - "output": [ - { - "name": "OUTPUT_0", - "dataType": "TYPE_FP32", - "dims": [ - "3", - "224", - "224" - ] - } - ], - "instanceGroup": [ - { - "count": 4, - "kind": "KIND_GPU" - } - ], - "dynamicBatching": {}, - "backend": "python", - "cpu_only": false - }, - { - "name": "resnet50_trt_config_2", - "platform": "tensorrt_plan", - "maxBatchSize": 2, - "input": [ - { - "name": "input", - "dataType": "TYPE_FP32", - "dims": [ - "3", - "-1", - "-1" - ] - } - ], - "output": [ - { - "name": "output", - "dataType": "TYPE_FP32", - "dims": [ - "1000" - ], - "labelFilename": "labels.txt" - } - ], - "instanceGroup": [ - { - "count": 1, - "kind": "KIND_GPU" - } - ], - "dynamicBatching": {}, - "cpu_only": false - } - ] - } - ] - }, - { - "-m ensemble_python_resnet50_config_16 -b 1 -i grpc -f ensemble_python_resnet50_config_16-results.csv --verbose-csv --concurrency-range=32 --input-data=/swdev/profile_models/test_image --shape=INPUT:1005970 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "ensemble_python_resnet50_config_16", - "_gpu_data": { - "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ - [ - "gpu_used_memory", - { - "_value": 1707.081728, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_free_memory", - { - "_value": 23688.380416, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_utilization", - { - "_value": 4.33333, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 58.6923, - "_timestamp": 0, - "_device_uuid": null - } - ] - ] - }, - "_avg_gpu_data": [ - [ - "gpu_used_memory", - { - "_value": 1707.081728, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_free_memory", - { - "_value": 23688.380416, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_utilization", - { - "_value": 4.33333, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 58.6923, - "_timestamp": 0, - "_device_uuid": null - } - ] - ], - "_avg_gpu_data_from_tag": { - "gpu_used_memory": [ - "gpu_used_memory", - { - "_value": 1707.081728, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_free_memory": [ - "gpu_free_memory", - { - "_value": 23688.380416, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_utilization": [ - "gpu_utilization", - { - "_value": 4.33333, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_power_usage": [ - "gpu_power_usage", - { - "_value": 58.6923, - "_timestamp": 0, - "_device_uuid": null - } - ] - }, - "_model_config_measurements": [ - { - "_model_config_name": "ensemble_python_resnet50_config_16", - "_model_specific_pa_params": { - "batch-size": 1, - "concurrency-range": 32 - }, - "_non_gpu_data": [ - [ - "perf_latency_avg", - { - "_value": 515.701, - "_timestamp": 0 - } - ], - [ - "perf_latency_p90", - { - "_value": 628.079, - "_timestamp": 0 - } - ], - [ - "perf_latency_p95", - { - "_value": 638.019, - "_timestamp": 0 - } - ], - [ - "perf_latency_p99", - { - "_value": 645.325, - "_timestamp": 0 - } - ], - [ - "perf_throughput", - { - "_value": 62.6311, - "_timestamp": 0 - } - ], - [ - "perf_client_send_recv", - { - "_value": 0.236, - "_timestamp": 0 - } - ], - [ - "perf_client_response_wait", - { - "_value": 515.422, - "_timestamp": 0 - } - ], - [ - "perf_server_queue", - { - "_value": 0.001, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_infer", - { - "_value": 255.909, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_input", - { - "_value": 0.598, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_output", - { - "_value": 0.993, - "_timestamp": 0 - } - ] - ], - "_non_gpu_data_from_tag": { - "perf_latency_avg": [ - "perf_latency_avg", - { - "_value": 515.701, - "_timestamp": 0 - } - ], - "perf_latency_p90": [ - "perf_latency_p90", - { - "_value": 628.079, - "_timestamp": 0 - } - ], - "perf_latency_p95": [ - "perf_latency_p95", - { - "_value": 638.019, - "_timestamp": 0 - } - ], - "perf_latency_p99": [ - "perf_latency_p99", - { - "_value": 645.325, - "_timestamp": 0 - } - ], - "perf_throughput": [ - "perf_throughput", - { - "_value": 62.6311, - "_timestamp": 0 - } - ], - "perf_client_send_recv": [ - "perf_client_send_recv", - { - "_value": 0.236, - "_timestamp": 0 - } - ], - "perf_client_response_wait": [ - "perf_client_response_wait", - { - "_value": 515.422, - "_timestamp": 0 - } - ], - "perf_server_queue": [ - "perf_server_queue", - { - "_value": 0.001, - "_timestamp": 0 - } - ], - "perf_server_compute_infer": [ - "perf_server_compute_infer", - { - "_value": 255.909, - "_timestamp": 0 - } - ], - "perf_server_compute_input": [ - "perf_server_compute_input", - { - "_value": 0.598, - "_timestamp": 0 - } - ], - "perf_server_compute_output": [ - "perf_server_compute_output", - { - "_value": 0.993, - "_timestamp": 0 - } - ] - } - } - ] - } - } - ], - "ensemble_python_resnet50_config_17": [ - { - "_triton_env": {}, - "_model_run_configs": [ - { - "_model_name": "ensemble_python_resnet50", - "_model_config": { - "name": "ensemble_python_resnet50_config_17", - "platform": "ensemble", - "maxBatchSize": 4, - "input": [ - { - "name": "INPUT", - "dataType": "TYPE_UINT8", - "dims": [ - "-1" - ] - } - ], - "output": [ - { - "name": "OUTPUT", - "dataType": "TYPE_FP32", - "dims": [ - "1000" - ] - } - ], - "ensembleScheduling": { - "step": [ - { - "modelName": "preprocess_config_7", - "modelVersion": "-1", - "inputMap": { - "INPUT_0": "INPUT" - }, - "outputMap": { - "OUTPUT_0": "preprocessed_image" - } - }, - { - "modelName": "resnet50_trt_config_5", - "modelVersion": "-1", - "inputMap": { - "input": "preprocessed_image" - }, - "outputMap": { - "output": "OUTPUT" - } - } - ] - }, - "cpu_only": false - }, - "_perf_config": { - "_args": { - "service-kind": null, - "model-signature-name": null, - "async": null, - "sync": null, - "measurement-interval": null, - "concurrency-range": 32, - "request-rate-range": null, - "request-distribution": null, - "request-intervals": null, - "binary-search": null, - "num-of-sequences": null, - "latency-threshold": null, - "max-threads": null, - "stability-percentage": null, - "max-trials": null, - "percentile": null, - "input-data": "/swdev/profile_models/test_image", - "shared-memory": null, - "output-shared-memory-size": null, - "shape": "INPUT:1005970", - "sequence-length": null, - "sequence-id-range": null, - "string-length": null, - "string-data": null, - "measurement-mode": "count_windows", - "measurement-request-count": 100, - "streaming": null, - "grpc-compression-algorithm": null, - "triton-server-directory": null, - "model-repository": null, - "ssl-grpc-use-ssl": null, - "ssl-grpc-root-certifications-file": null, - "ssl-grpc-private-key-file": null, - "ssl-grpc-certificate-chain-file": null, - "ssl-https-verify-peer": null, - "ssl-https-verify-host": null, - "ssl-https-ca-certificates-file": null, - "ssl-https-client-certificate-type": null, - "ssl-https-client-certificate-file": null, - "ssl-https-private-key-type": null, - "ssl-https-private-key-file": null, - "collect-metrics": "True", - "metrics-url": "http://localhost:8002/metrics", - "metrics-interval": 1000.0 - }, - "_options": { - "-m": "ensemble_python_resnet50_config_17", - "-x": null, - "-b": 1, - "-u": "localhost:8001", - "-i": "grpc", - "-f": "ensemble_python_resnet50_config_17-results.csv", - "-H": null - }, - "_verbose": { - "-v": null, - "-v -v": null, - "--verbose-csv": "--verbose-csv" - }, - "_input_to_options": { - "model-name": "-m", - "model-version": "-x", - "batch-size": "-b", - "url": "-u", - "protocol": "-i", - "latency-report-file": "-f", - "http-header": "-H" - }, - "_input_to_verbose": { - "verbose": "-v", - "extra-verbose": "-v -v", - "verbose-csv": "--verbose-csv" - }, - "_additive_args": { - "input-data": null, - "shape": null - } - }, - "_composing_configs": [ - { - "name": "preprocess_config_7", - "maxBatchSize": 4, - "input": [ - { - "name": "INPUT_0", - "dataType": "TYPE_UINT8", - "dims": [ - "-1" - ] - } - ], - "output": [ - { - "name": "OUTPUT_0", - "dataType": "TYPE_FP32", - "dims": [ - "3", - "224", - "224" - ] - } - ], - "instanceGroup": [ - { - "count": 4, - "kind": "KIND_GPU" - } - ], - "dynamicBatching": {}, - "backend": "python", - "cpu_only": false - }, - { - "name": "resnet50_trt_config_5", - "platform": "tensorrt_plan", - "maxBatchSize": 8, - "input": [ - { - "name": "input", - "dataType": "TYPE_FP32", - "dims": [ - "3", - "-1", - "-1" - ] - } - ], - "output": [ - { - "name": "output", - "dataType": "TYPE_FP32", - "dims": [ - "1000" - ], - "labelFilename": "labels.txt" - } - ], - "instanceGroup": [ - { - "count": 1, - "kind": "KIND_GPU" - } - ], - "dynamicBatching": {}, - "cpu_only": false - } - ] - } - ] - }, - { - "-m ensemble_python_resnet50_config_17 -b 1 -i grpc -f ensemble_python_resnet50_config_17-results.csv --verbose-csv --concurrency-range=32 --input-data=/swdev/profile_models/test_image --shape=INPUT:1005970 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "ensemble_python_resnet50_config_17", - "_gpu_data": { - "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ - [ - "gpu_used_memory", - { - "_value": 1713.373184, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_free_memory", - { - "_value": 23682.08896, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_utilization", - { - "_value": 3.42857, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 58.0471, - "_timestamp": 0, - "_device_uuid": null - } - ] - ] - }, - "_avg_gpu_data": [ - [ - "gpu_used_memory", - { - "_value": 1713.373184, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_free_memory", - { - "_value": 23682.08896, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_utilization", - { - "_value": 3.42857, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 58.0471, - "_timestamp": 0, - "_device_uuid": null - } - ] - ], - "_avg_gpu_data_from_tag": { - "gpu_used_memory": [ - "gpu_used_memory", - { - "_value": 1713.373184, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_free_memory": [ - "gpu_free_memory", - { - "_value": 23682.08896, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_utilization": [ - "gpu_utilization", - { - "_value": 3.42857, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_power_usage": [ - "gpu_power_usage", - { - "_value": 58.0471, - "_timestamp": 0, - "_device_uuid": null - } - ] - }, - "_model_config_measurements": [ - { - "_model_config_name": "ensemble_python_resnet50_config_17", - "_model_specific_pa_params": { - "batch-size": 1, - "concurrency-range": 32 - }, - "_non_gpu_data": [ - [ - "perf_latency_avg", - { - "_value": 540.602, - "_timestamp": 0 - } - ], - [ - "perf_latency_p90", - { - "_value": 614.052, - "_timestamp": 0 - } - ], - [ - "perf_latency_p95", - { - "_value": 633.266, - "_timestamp": 0 - } - ], - [ - "perf_latency_p99", - { - "_value": 722.03, - "_timestamp": 0 - } - ], - [ - "perf_throughput", - { - "_value": 59.3106, - "_timestamp": 0 - } - ], - [ - "perf_client_send_recv", - { - "_value": 0.234, - "_timestamp": 0 - } - ], - [ - "perf_client_response_wait", - { - "_value": 540.34, - "_timestamp": 0 - } - ], - [ - "perf_server_queue", - { - "_value": 0.001, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_infer", - { - "_value": 272.176, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_input", - { - "_value": 0.64, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_output", - { - "_value": 1.126, - "_timestamp": 0 - } - ] - ], - "_non_gpu_data_from_tag": { - "perf_latency_avg": [ - "perf_latency_avg", - { - "_value": 540.602, - "_timestamp": 0 - } - ], - "perf_latency_p90": [ - "perf_latency_p90", - { - "_value": 614.052, - "_timestamp": 0 - } - ], - "perf_latency_p95": [ - "perf_latency_p95", - { - "_value": 633.266, - "_timestamp": 0 - } - ], - "perf_latency_p99": [ - "perf_latency_p99", - { - "_value": 722.03, - "_timestamp": 0 - } - ], - "perf_throughput": [ - "perf_throughput", - { - "_value": 59.3106, - "_timestamp": 0 - } - ], - "perf_client_send_recv": [ - "perf_client_send_recv", - { - "_value": 0.234, - "_timestamp": 0 - } - ], - "perf_client_response_wait": [ - "perf_client_response_wait", - { - "_value": 540.34, - "_timestamp": 0 - } - ], - "perf_server_queue": [ - "perf_server_queue", - { - "_value": 0.001, - "_timestamp": 0 - } - ], - "perf_server_compute_infer": [ - "perf_server_compute_infer", - { - "_value": 272.176, - "_timestamp": 0 - } - ], - "perf_server_compute_input": [ - "perf_server_compute_input", - { - "_value": 0.64, - "_timestamp": 0 - } - ], - "perf_server_compute_output": [ - "perf_server_compute_output", - { - "_value": 1.126, - "_timestamp": 0 - } - ] - } - } - ] - } - } - ], - "ensemble_python_resnet50_config_18": [ - { - "_triton_env": {}, - "_model_run_configs": [ - { - "_model_name": "ensemble_python_resnet50", - "_model_config": { - "name": "ensemble_python_resnet50_config_18", - "platform": "ensemble", - "maxBatchSize": 4, - "input": [ - { - "name": "INPUT", - "dataType": "TYPE_UINT8", - "dims": [ - "-1" - ] - } - ], - "output": [ - { - "name": "OUTPUT", - "dataType": "TYPE_FP32", - "dims": [ - "1000" - ] - } - ], - "ensembleScheduling": { - "step": [ - { - "modelName": "preprocess_config_7", - "modelVersion": "-1", - "inputMap": { - "INPUT_0": "INPUT" - }, - "outputMap": { - "OUTPUT_0": "preprocessed_image" - } - }, - { - "modelName": "resnet50_trt_config_4", - "modelVersion": "-1", - "inputMap": { - "input": "preprocessed_image" - }, - "outputMap": { - "output": "OUTPUT" - } - } - ] - }, - "cpu_only": false - }, - "_perf_config": { - "_args": { - "service-kind": null, - "model-signature-name": null, - "async": null, - "sync": null, - "measurement-interval": null, - "concurrency-range": 32, - "request-rate-range": null, - "request-distribution": null, - "request-intervals": null, - "binary-search": null, - "num-of-sequences": null, - "latency-threshold": null, - "max-threads": null, - "stability-percentage": null, - "max-trials": null, - "percentile": null, - "input-data": "/swdev/profile_models/test_image", - "shared-memory": null, - "output-shared-memory-size": null, - "shape": "INPUT:1005970", - "sequence-length": null, - "sequence-id-range": null, - "string-length": null, - "string-data": null, - "measurement-mode": "count_windows", - "measurement-request-count": 100, - "streaming": null, - "grpc-compression-algorithm": null, - "triton-server-directory": null, - "model-repository": null, - "ssl-grpc-use-ssl": null, - "ssl-grpc-root-certifications-file": null, - "ssl-grpc-private-key-file": null, - "ssl-grpc-certificate-chain-file": null, - "ssl-https-verify-peer": null, - "ssl-https-verify-host": null, - "ssl-https-ca-certificates-file": null, - "ssl-https-client-certificate-type": null, - "ssl-https-client-certificate-file": null, - "ssl-https-private-key-type": null, - "ssl-https-private-key-file": null, - "collect-metrics": "True", - "metrics-url": "http://localhost:8002/metrics", - "metrics-interval": 1000.0 - }, - "_options": { - "-m": "ensemble_python_resnet50_config_18", - "-x": null, - "-b": 1, - "-u": "localhost:8001", - "-i": "grpc", - "-f": "ensemble_python_resnet50_config_18-results.csv", - "-H": null - }, - "_verbose": { - "-v": null, - "-v -v": null, - "--verbose-csv": "--verbose-csv" - }, - "_input_to_options": { - "model-name": "-m", - "model-version": "-x", - "batch-size": "-b", - "url": "-u", - "protocol": "-i", - "latency-report-file": "-f", - "http-header": "-H" - }, - "_input_to_verbose": { - "verbose": "-v", - "extra-verbose": "-v -v", - "verbose-csv": "--verbose-csv" - }, - "_additive_args": { - "input-data": null, - "shape": null - } - }, - "_composing_configs": [ - { - "name": "preprocess_config_7", - "maxBatchSize": 4, - "input": [ - { - "name": "INPUT_0", - "dataType": "TYPE_UINT8", - "dims": [ - "-1" - ] - } - ], - "output": [ - { - "name": "OUTPUT_0", - "dataType": "TYPE_FP32", - "dims": [ - "3", - "224", - "224" - ] - } - ], - "instanceGroup": [ - { - "count": 4, - "kind": "KIND_GPU" - } - ], - "dynamicBatching": {}, - "backend": "python", - "cpu_only": false - }, - { - "name": "resnet50_trt_config_4", - "platform": "tensorrt_plan", - "maxBatchSize": 4, - "input": [ - { - "name": "input", - "dataType": "TYPE_FP32", - "dims": [ - "3", - "-1", - "-1" - ] - } - ], - "output": [ - { - "name": "output", - "dataType": "TYPE_FP32", - "dims": [ - "1000" - ], - "labelFilename": "labels.txt" - } - ], - "instanceGroup": [ - { - "count": 2, - "kind": "KIND_GPU" - } - ], - "dynamicBatching": {}, - "cpu_only": false - } - ] - } - ] - }, - { - "-m ensemble_python_resnet50_config_18 -b 1 -i grpc -f ensemble_python_resnet50_config_18-results.csv --verbose-csv --concurrency-range=32 --input-data=/swdev/profile_models/test_image --shape=INPUT:1005970 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "ensemble_python_resnet50_config_18", - "_gpu_data": { - "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ - [ - "gpu_used_memory", - { - "_value": 2797.600768, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_free_memory", - { - "_value": 22597.861376, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_utilization", - { - "_value": 2.71429, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 58.5544, - "_timestamp": 0, - "_device_uuid": null - } - ] - ] - }, - "_avg_gpu_data": [ - [ - "gpu_used_memory", - { - "_value": 2797.600768, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_free_memory", - { - "_value": 22597.861376, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_utilization", - { - "_value": 2.71429, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 58.5544, - "_timestamp": 0, - "_device_uuid": null - } - ] - ], - "_avg_gpu_data_from_tag": { - "gpu_used_memory": [ - "gpu_used_memory", - { - "_value": 2797.600768, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_free_memory": [ - "gpu_free_memory", - { - "_value": 22597.861376, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_utilization": [ - "gpu_utilization", - { - "_value": 2.71429, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_power_usage": [ - "gpu_power_usage", - { - "_value": 58.5544, - "_timestamp": 0, - "_device_uuid": null - } - ] - }, - "_model_config_measurements": [ - { - "_model_config_name": "ensemble_python_resnet50_config_18", - "_model_specific_pa_params": { - "batch-size": 1, - "concurrency-range": 32 - }, - "_non_gpu_data": [ - [ - "perf_latency_avg", - { - "_value": 571.402, - "_timestamp": 0 - } - ], - [ - "perf_latency_p90", - { - "_value": 656.752, - "_timestamp": 0 - } - ], - [ - "perf_latency_p95", - { - "_value": 661.962, - "_timestamp": 0 - } - ], - [ - "perf_latency_p99", - { - "_value": 720.904, - "_timestamp": 0 - } - ], - [ - "perf_throughput", - { - "_value": 55.969, - "_timestamp": 0 - } - ], - [ - "perf_client_send_recv", - { - "_value": 0.228, - "_timestamp": 0 - } - ], - [ - "perf_client_response_wait", - { - "_value": 571.147, - "_timestamp": 0 - } - ], - [ - "perf_server_queue", - { - "_value": 0.001, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_infer", - { - "_value": 286.014, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_input", - { - "_value": 0.792, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_output", - { - "_value": 0.639, - "_timestamp": 0 - } - ] - ], - "_non_gpu_data_from_tag": { - "perf_latency_avg": [ - "perf_latency_avg", - { - "_value": 571.402, - "_timestamp": 0 - } - ], - "perf_latency_p90": [ - "perf_latency_p90", - { - "_value": 656.752, - "_timestamp": 0 - } - ], - "perf_latency_p95": [ - "perf_latency_p95", - { - "_value": 661.962, - "_timestamp": 0 - } - ], - "perf_latency_p99": [ - "perf_latency_p99", - { - "_value": 720.904, - "_timestamp": 0 - } - ], - "perf_throughput": [ - "perf_throughput", - { - "_value": 55.969, - "_timestamp": 0 - } - ], - "perf_client_send_recv": [ - "perf_client_send_recv", - { - "_value": 0.228, - "_timestamp": 0 - } - ], - "perf_client_response_wait": [ - "perf_client_response_wait", - { - "_value": 571.147, - "_timestamp": 0 - } - ], - "perf_server_queue": [ - "perf_server_queue", - { - "_value": 0.001, - "_timestamp": 0 - } - ], - "perf_server_compute_infer": [ - "perf_server_compute_infer", - { - "_value": 286.014, - "_timestamp": 0 - } - ], - "perf_server_compute_input": [ - "perf_server_compute_input", - { - "_value": 0.792, - "_timestamp": 0 - } - ], - "perf_server_compute_output": [ - "perf_server_compute_output", - { - "_value": 0.639, - "_timestamp": 0 - } - ] - } - } - ] - } - } - ], - "ensemble_python_resnet50_config_19": [ - { - "_triton_env": {}, - "_model_run_configs": [ - { - "_model_name": "ensemble_python_resnet50", - "_model_config": { - "name": "ensemble_python_resnet50_config_19", - "platform": "ensemble", - "maxBatchSize": 4, - "input": [ - { - "name": "INPUT", - "dataType": "TYPE_UINT8", - "dims": [ - "-1" - ] - } - ], - "output": [ - { - "name": "OUTPUT", - "dataType": "TYPE_FP32", - "dims": [ - "1000" - ] - } - ], - "ensembleScheduling": { - "step": [ - { - "modelName": "preprocess_config_11", - "modelVersion": "-1", - "inputMap": { - "INPUT_0": "INPUT" - }, - "outputMap": { - "OUTPUT_0": "preprocessed_image" - } - }, - { - "modelName": "resnet50_trt_config_6", - "modelVersion": "-1", - "inputMap": { - "input": "preprocessed_image" - }, - "outputMap": { - "output": "OUTPUT" - } - } - ] - }, - "cpu_only": false - }, - "_perf_config": { - "_args": { - "service-kind": null, - "model-signature-name": null, - "async": null, - "sync": null, - "measurement-interval": null, - "concurrency-range": 128, - "request-rate-range": null, - "request-distribution": null, - "request-intervals": null, - "binary-search": null, - "num-of-sequences": null, - "latency-threshold": null, - "max-threads": null, - "stability-percentage": null, - "max-trials": null, - "percentile": null, - "input-data": "/swdev/profile_models/test_image", - "shared-memory": null, - "output-shared-memory-size": null, - "shape": "INPUT:1005970", - "sequence-length": null, - "sequence-id-range": null, - "string-length": null, - "string-data": null, - "measurement-mode": "count_windows", - "measurement-request-count": null, - "streaming": null, - "grpc-compression-algorithm": null, - "triton-server-directory": null, - "model-repository": null, - "ssl-grpc-use-ssl": null, - "ssl-grpc-root-certifications-file": null, - "ssl-grpc-private-key-file": null, - "ssl-grpc-certificate-chain-file": null, - "ssl-https-verify-peer": null, - "ssl-https-verify-host": null, - "ssl-https-ca-certificates-file": null, - "ssl-https-client-certificate-type": null, - "ssl-https-client-certificate-file": null, - "ssl-https-private-key-type": null, - "ssl-https-private-key-file": null, - "collect-metrics": "True", - "metrics-url": "http://localhost:8002/metrics", - "metrics-interval": 1000.0 - }, - "_options": { - "-m": "ensemble_python_resnet50_config_19", - "-x": null, - "-b": 1, - "-u": "localhost:8001", - "-i": "grpc", - "-f": "ensemble_python_resnet50_config_19-results.csv", - "-H": null - }, - "_verbose": { - "-v": null, - "-v -v": null, - "--verbose-csv": "--verbose-csv" - }, - "_input_to_options": { - "model-name": "-m", - "model-version": "-x", - "batch-size": "-b", - "url": "-u", - "protocol": "-i", - "latency-report-file": "-f", - "http-header": "-H" - }, - "_input_to_verbose": { - "verbose": "-v", - "extra-verbose": "-v -v", - "verbose-csv": "--verbose-csv" - }, - "_additive_args": { - "input-data": null, - "shape": null - } - }, - "_composing_configs": [ - { - "name": "preprocess_config_11", - "maxBatchSize": 16, - "input": [ - { - "name": "INPUT_0", - "dataType": "TYPE_UINT8", - "dims": [ - "-1" - ] - } - ], - "output": [ - { - "name": "OUTPUT_0", - "dataType": "TYPE_FP32", - "dims": [ - "3", - "224", - "224" - ] - } - ], - "instanceGroup": [ - { - "count": 4, - "kind": "KIND_GPU" - } - ], - "dynamicBatching": {}, - "backend": "python", - "cpu_only": false - }, - { - "name": "resnet50_trt_config_6", - "platform": "tensorrt_plan", - "maxBatchSize": 4, - "input": [ - { - "name": "input", - "dataType": "TYPE_FP32", - "dims": [ - "3", - "-1", - "-1" - ] - } - ], - "output": [ - { - "name": "output", - "dataType": "TYPE_FP32", - "dims": [ - "1000" - ], - "labelFilename": "labels.txt" - } - ], - "instanceGroup": [ - { - "count": 1, - "kind": "KIND_GPU" - } - ], - "dynamicBatching": {}, - "cpu_only": false - } - ] - } - ] - }, - { - "-m ensemble_python_resnet50_config_19 -b 1 -i grpc -f ensemble_python_resnet50_config_19-results.csv --verbose-csv --concurrency-range=128 --input-data=/swdev/profile_models/test_image --shape=INPUT:1005970 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "ensemble_python_resnet50_config_19", - "_gpu_data": { - "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ - [ - "gpu_used_memory", - { - "_value": 1711.276032, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_free_memory", - { - "_value": 23684.186112, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_utilization", - { - "_value": 3.0, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 63.985, - "_timestamp": 0, - "_device_uuid": null - } - ] - ] - }, - "_avg_gpu_data": [ - [ - "gpu_used_memory", - { - "_value": 1711.276032, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_free_memory", - { - "_value": 23684.186112, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_utilization", - { - "_value": 3.0, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 63.985, - "_timestamp": 0, - "_device_uuid": null - } - ] - ], - "_avg_gpu_data_from_tag": { - "gpu_used_memory": [ - "gpu_used_memory", - { - "_value": 1711.276032, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_free_memory": [ - "gpu_free_memory", - { - "_value": 23684.186112, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_utilization": [ - "gpu_utilization", - { - "_value": 3.0, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_power_usage": [ - "gpu_power_usage", - { - "_value": 63.985, - "_timestamp": 0, - "_device_uuid": null - } - ] - }, - "_model_config_measurements": [ - { - "_model_config_name": "ensemble_python_resnet50_config_19", - "_model_specific_pa_params": { - "batch-size": 1, - "concurrency-range": 128 - }, - "_non_gpu_data": [ - [ - "perf_latency_avg", - { - "_value": 2053.253, - "_timestamp": 0 - } - ], - [ - "perf_latency_p90", - { - "_value": 2161.916, - "_timestamp": 0 - } - ], - [ - "perf_latency_p95", - { - "_value": 2195.419, - "_timestamp": 0 - } - ], - [ - "perf_latency_p99", - { - "_value": 2198.828, - "_timestamp": 0 - } - ], - [ - "perf_throughput", - { - "_value": 63.9608, - "_timestamp": 0 - } - ], - [ - "perf_client_send_recv", - { - "_value": 0.193, - "_timestamp": 0 - } - ], - [ - "perf_client_response_wait", - { - "_value": 2053.037, - "_timestamp": 0 - } - ], - [ - "perf_server_queue", - { - "_value": 0.001, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_infer", - { - "_value": 1026.435, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_input", - { - "_value": 3.379, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_output", - { - "_value": 2.754, - "_timestamp": 0 - } - ] - ], - "_non_gpu_data_from_tag": { - "perf_latency_avg": [ - "perf_latency_avg", - { - "_value": 2053.253, - "_timestamp": 0 - } - ], - "perf_latency_p90": [ - "perf_latency_p90", - { - "_value": 2161.916, - "_timestamp": 0 - } - ], - "perf_latency_p95": [ - "perf_latency_p95", - { - "_value": 2195.419, - "_timestamp": 0 - } - ], - "perf_latency_p99": [ - "perf_latency_p99", - { - "_value": 2198.828, - "_timestamp": 0 - } - ], - "perf_throughput": [ - "perf_throughput", - { - "_value": 63.9608, - "_timestamp": 0 - } - ], - "perf_client_send_recv": [ - "perf_client_send_recv", - { - "_value": 0.193, - "_timestamp": 0 - } - ], - "perf_client_response_wait": [ - "perf_client_response_wait", - { - "_value": 2053.037, - "_timestamp": 0 - } - ], - "perf_server_queue": [ - "perf_server_queue", - { - "_value": 0.001, - "_timestamp": 0 - } - ], - "perf_server_compute_infer": [ - "perf_server_compute_infer", - { - "_value": 1026.435, - "_timestamp": 0 - } - ], - "perf_server_compute_input": [ - "perf_server_compute_input", - { - "_value": 3.379, - "_timestamp": 0 - } - ], - "perf_server_compute_output": [ - "perf_server_compute_output", - { - "_value": 2.754, - "_timestamp": 0 - } - ] - } - } - ] - } - } - ], - "ensemble_python_resnet50_config_20": [ - { - "_triton_env": {}, - "_model_run_configs": [ - { - "_model_name": "ensemble_python_resnet50", - "_model_config": { - "name": "ensemble_python_resnet50_config_20", - "platform": "ensemble", - "maxBatchSize": 4, - "input": [ - { - "name": "INPUT", - "dataType": "TYPE_UINT8", - "dims": [ - "-1" - ] - } - ], - "output": [ - { - "name": "OUTPUT", - "dataType": "TYPE_FP32", - "dims": [ - "1000" - ] - } - ], - "ensembleScheduling": { - "step": [ - { - "modelName": "preprocess_config_12", - "modelVersion": "-1", - "inputMap": { - "INPUT_0": "INPUT" - }, - "outputMap": { - "OUTPUT_0": "preprocessed_image" - } - }, - { - "modelName": "resnet50_trt_config_6", - "modelVersion": "-1", - "inputMap": { - "input": "preprocessed_image" - }, - "outputMap": { - "output": "OUTPUT" - } - } - ] - }, - "cpu_only": false - }, - "_perf_config": { - "_args": { - "service-kind": null, - "model-signature-name": null, - "async": null, - "sync": null, - "measurement-interval": null, - "concurrency-range": 80, - "request-rate-range": null, - "request-distribution": null, - "request-intervals": null, - "binary-search": null, - "num-of-sequences": null, - "latency-threshold": null, - "max-threads": null, - "stability-percentage": null, - "max-trials": null, - "percentile": null, - "input-data": "/swdev/profile_models/test_image", - "shared-memory": null, - "output-shared-memory-size": null, - "shape": "INPUT:1005970", - "sequence-length": null, - "sequence-id-range": null, - "string-length": null, - "string-data": null, - "measurement-mode": "count_windows", - "measurement-request-count": 200, - "streaming": null, - "grpc-compression-algorithm": null, - "triton-server-directory": null, - "model-repository": null, - "ssl-grpc-use-ssl": null, - "ssl-grpc-root-certifications-file": null, - "ssl-grpc-private-key-file": null, - "ssl-grpc-certificate-chain-file": null, - "ssl-https-verify-peer": null, - "ssl-https-verify-host": null, - "ssl-https-ca-certificates-file": null, - "ssl-https-client-certificate-type": null, - "ssl-https-client-certificate-file": null, - "ssl-https-private-key-type": null, - "ssl-https-private-key-file": null, - "collect-metrics": "True", - "metrics-url": "http://localhost:8002/metrics", - "metrics-interval": 1000.0 - }, - "_options": { - "-m": "ensemble_python_resnet50_config_20", - "-x": null, - "-b": 1, - "-u": "localhost:8001", - "-i": "grpc", - "-f": "ensemble_python_resnet50_config_20-results.csv", - "-H": null - }, - "_verbose": { - "-v": null, - "-v -v": null, - "--verbose-csv": "--verbose-csv" - }, - "_input_to_options": { - "model-name": "-m", - "model-version": "-x", - "batch-size": "-b", - "url": "-u", - "protocol": "-i", - "latency-report-file": "-f", - "http-header": "-H" - }, - "_input_to_verbose": { - "verbose": "-v", - "extra-verbose": "-v -v", - "verbose-csv": "--verbose-csv" - }, - "_additive_args": { - "input-data": null, - "shape": null - } - }, - "_composing_configs": [ - { - "name": "preprocess_config_12", - "maxBatchSize": 8, - "input": [ - { - "name": "INPUT_0", - "dataType": "TYPE_UINT8", - "dims": [ - "-1" - ] - } - ], - "output": [ - { - "name": "OUTPUT_0", - "dataType": "TYPE_FP32", - "dims": [ - "3", - "224", - "224" - ] - } - ], - "instanceGroup": [ - { - "count": 5, - "kind": "KIND_GPU" - } - ], - "dynamicBatching": {}, - "backend": "python", - "cpu_only": false - }, - { - "name": "resnet50_trt_config_6", - "platform": "tensorrt_plan", - "maxBatchSize": 4, - "input": [ - { - "name": "input", - "dataType": "TYPE_FP32", - "dims": [ - "3", - "-1", - "-1" - ] - } - ], - "output": [ - { - "name": "output", - "dataType": "TYPE_FP32", - "dims": [ - "1000" - ], - "labelFilename": "labels.txt" - } - ], - "instanceGroup": [ - { - "count": 1, - "kind": "KIND_GPU" - } - ], - "dynamicBatching": {}, - "cpu_only": false - } - ] - } - ] - }, - { - "-m ensemble_python_resnet50_config_20 -b 1 -i grpc -f ensemble_python_resnet50_config_20-results.csv --verbose-csv --concurrency-range=80 --input-data=/swdev/profile_models/test_image --shape=INPUT:1005970 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "ensemble_python_resnet50_config_20", - "_gpu_data": { - "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ - [ - "gpu_used_memory", - { - "_value": 1713.373184, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_free_memory", - { - "_value": 23682.08896, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_utilization", - { - "_value": 2.46667, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 57.7442, - "_timestamp": 0, - "_device_uuid": null - } - ] - ] - }, - "_avg_gpu_data": [ - [ - "gpu_used_memory", - { - "_value": 1713.373184, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_free_memory", - { - "_value": 23682.08896, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_utilization", - { - "_value": 2.46667, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 57.7442, - "_timestamp": 0, - "_device_uuid": null - } - ] - ], - "_avg_gpu_data_from_tag": { - "gpu_used_memory": [ - "gpu_used_memory", - { - "_value": 1713.373184, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_free_memory": [ - "gpu_free_memory", - { - "_value": 23682.08896, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_utilization": [ - "gpu_utilization", - { - "_value": 2.46667, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_power_usage": [ - "gpu_power_usage", - { - "_value": 57.7442, - "_timestamp": 0, - "_device_uuid": null - } - ] - }, - "_model_config_measurements": [ - { - "_model_config_name": "ensemble_python_resnet50_config_20", - "_model_specific_pa_params": { - "batch-size": 1, - "concurrency-range": 80 - }, - "_non_gpu_data": [ - [ - "perf_latency_avg", - { - "_value": 1623.652, - "_timestamp": 0 - } - ], - [ - "perf_latency_p90", - { - "_value": 1853.322, - "_timestamp": 0 - } - ], - [ - "perf_latency_p95", - { - "_value": 1954.932, - "_timestamp": 0 - } - ], - [ - "perf_latency_p99", - { - "_value": 2210.973, - "_timestamp": 0 - } - ], - [ - "perf_throughput", - { - "_value": 49.0579, - "_timestamp": 0 - } - ], - [ - "perf_client_send_recv", - { - "_value": 0.201, - "_timestamp": 0 - } - ], - [ - "perf_client_response_wait", - { - "_value": 1623.43, - "_timestamp": 0 - } - ], - [ - "perf_server_queue", - { - "_value": 0.001, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_infer", - { - "_value": 811.941, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_input", - { - "_value": 1.274, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_output", - { - "_value": 1.868, - "_timestamp": 0 - } - ] - ], - "_non_gpu_data_from_tag": { - "perf_latency_avg": [ - "perf_latency_avg", - { - "_value": 1623.652, - "_timestamp": 0 - } - ], - "perf_latency_p90": [ - "perf_latency_p90", - { - "_value": 1853.322, - "_timestamp": 0 - } - ], - "perf_latency_p95": [ - "perf_latency_p95", - { - "_value": 1954.932, - "_timestamp": 0 - } - ], - "perf_latency_p99": [ - "perf_latency_p99", - { - "_value": 2210.973, - "_timestamp": 0 - } - ], - "perf_throughput": [ - "perf_throughput", - { - "_value": 49.0579, - "_timestamp": 0 - } - ], - "perf_client_send_recv": [ - "perf_client_send_recv", - { - "_value": 0.201, - "_timestamp": 0 - } - ], - "perf_client_response_wait": [ - "perf_client_response_wait", - { - "_value": 1623.43, - "_timestamp": 0 - } - ], - "perf_server_queue": [ - "perf_server_queue", - { - "_value": 0.001, - "_timestamp": 0 - } - ], - "perf_server_compute_infer": [ - "perf_server_compute_infer", - { - "_value": 811.941, - "_timestamp": 0 - } - ], - "perf_server_compute_input": [ - "perf_server_compute_input", - { - "_value": 1.274, - "_timestamp": 0 - } - ], - "perf_server_compute_output": [ - "perf_server_compute_output", - { - "_value": 1.868, - "_timestamp": 0 - } - ] - } - } - ] - } - } - ], - "ensemble_python_resnet50_config_21": [ - { - "_triton_env": {}, - "_model_run_configs": [ - { - "_model_name": "ensemble_python_resnet50", - "_model_config": { - "name": "ensemble_python_resnet50_config_21", - "platform": "ensemble", - "maxBatchSize": 2, - "input": [ - { - "name": "INPUT", - "dataType": "TYPE_UINT8", - "dims": [ - "-1" - ] - } - ], - "output": [ - { - "name": "OUTPUT", - "dataType": "TYPE_FP32", - "dims": [ - "1000" - ] - } - ], - "ensembleScheduling": { - "step": [ - { - "modelName": "preprocess_config_9", - "modelVersion": "-1", - "inputMap": { - "INPUT_0": "INPUT" - }, - "outputMap": { - "OUTPUT_0": "preprocessed_image" - } - }, - { - "modelName": "resnet50_trt_config_2", - "modelVersion": "-1", - "inputMap": { - "input": "preprocessed_image" - }, - "outputMap": { - "output": "OUTPUT" - } - } - ] - }, - "cpu_only": false - }, - "_perf_config": { - "_args": { - "service-kind": null, - "model-signature-name": null, - "async": null, - "sync": null, - "measurement-interval": null, - "concurrency-range": 64, - "request-rate-range": null, - "request-distribution": null, - "request-intervals": null, - "binary-search": null, - "num-of-sequences": null, - "latency-threshold": null, - "max-threads": null, - "stability-percentage": null, - "max-trials": null, - "percentile": null, - "input-data": "/swdev/profile_models/test_image", - "shared-memory": null, - "output-shared-memory-size": null, - "shape": "INPUT:1005970", - "sequence-length": null, - "sequence-id-range": null, - "string-length": null, - "string-data": null, - "measurement-mode": "count_windows", - "measurement-request-count": null, - "streaming": null, - "grpc-compression-algorithm": null, - "triton-server-directory": null, - "model-repository": null, - "ssl-grpc-use-ssl": null, - "ssl-grpc-root-certifications-file": null, - "ssl-grpc-private-key-file": null, - "ssl-grpc-certificate-chain-file": null, - "ssl-https-verify-peer": null, - "ssl-https-verify-host": null, - "ssl-https-ca-certificates-file": null, - "ssl-https-client-certificate-type": null, - "ssl-https-client-certificate-file": null, - "ssl-https-private-key-type": null, - "ssl-https-private-key-file": null, - "collect-metrics": "True", - "metrics-url": "http://localhost:8002/metrics", - "metrics-interval": 1000.0 - }, - "_options": { - "-m": "ensemble_python_resnet50_config_21", - "-x": null, - "-b": 1, - "-u": "localhost:8001", - "-i": "grpc", - "-f": "ensemble_python_resnet50_config_21-results.csv", - "-H": null - }, - "_verbose": { - "-v": null, - "-v -v": null, - "--verbose-csv": "--verbose-csv" - }, - "_input_to_options": { - "model-name": "-m", - "model-version": "-x", - "batch-size": "-b", - "url": "-u", - "protocol": "-i", - "latency-report-file": "-f", - "http-header": "-H" - }, - "_input_to_verbose": { - "verbose": "-v", - "extra-verbose": "-v -v", - "verbose-csv": "--verbose-csv" - }, - "_additive_args": { - "input-data": null, - "shape": null - } - }, - "_composing_configs": [ - { - "name": "preprocess_config_9", - "maxBatchSize": 8, - "input": [ - { - "name": "INPUT_0", - "dataType": "TYPE_UINT8", - "dims": [ - "-1" - ] - } - ], - "output": [ - { - "name": "OUTPUT_0", - "dataType": "TYPE_FP32", - "dims": [ - "3", - "224", - "224" - ] - } - ], - "instanceGroup": [ - { - "count": 4, - "kind": "KIND_GPU" - } - ], - "dynamicBatching": {}, - "backend": "python", - "cpu_only": false - }, - { - "name": "resnet50_trt_config_2", - "platform": "tensorrt_plan", - "maxBatchSize": 2, - "input": [ - { - "name": "input", - "dataType": "TYPE_FP32", - "dims": [ - "3", - "-1", - "-1" - ] - } - ], - "output": [ - { - "name": "output", - "dataType": "TYPE_FP32", - "dims": [ - "1000" - ], - "labelFilename": "labels.txt" - } - ], - "instanceGroup": [ - { - "count": 1, - "kind": "KIND_GPU" - } - ], - "dynamicBatching": {}, - "cpu_only": false - } - ] - } - ] - }, - { - "-m ensemble_python_resnet50_config_21 -b 1 -i grpc -f ensemble_python_resnet50_config_21-results.csv --verbose-csv --concurrency-range=64 --input-data=/swdev/profile_models/test_image --shape=INPUT:1005970 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "ensemble_python_resnet50_config_21", - "_gpu_data": { - "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ - [ - "gpu_used_memory", - { - "_value": 1707.081728, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_free_memory", - { - "_value": 23688.380416, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_utilization", - { - "_value": 3.0, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 58.7635, - "_timestamp": 0, - "_device_uuid": null - } - ] - ] - }, - "_avg_gpu_data": [ - [ - "gpu_used_memory", - { - "_value": 1707.081728, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_free_memory", - { - "_value": 23688.380416, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_utilization", - { - "_value": 3.0, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 58.7635, - "_timestamp": 0, - "_device_uuid": null - } - ] - ], - "_avg_gpu_data_from_tag": { - "gpu_used_memory": [ - "gpu_used_memory", - { - "_value": 1707.081728, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_free_memory": [ - "gpu_free_memory", - { - "_value": 23688.380416, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_utilization": [ - "gpu_utilization", - { - "_value": 3.0, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_power_usage": [ - "gpu_power_usage", - { - "_value": 58.7635, - "_timestamp": 0, - "_device_uuid": null - } - ] - }, - "_model_config_measurements": [ - { - "_model_config_name": "ensemble_python_resnet50_config_21", - "_model_specific_pa_params": { - "batch-size": 1, - "concurrency-range": 64 - }, - "_non_gpu_data": [ - [ - "perf_latency_avg", - { - "_value": 1038.491, - "_timestamp": 0 - } - ], - [ - "perf_latency_p90", - { - "_value": 1134.513, - "_timestamp": 0 - } - ], - [ - "perf_latency_p95", - { - "_value": 1149.159, - "_timestamp": 0 - } - ], - [ - "perf_latency_p99", - { - "_value": 1179.337, - "_timestamp": 0 - } - ], - [ - "perf_throughput", - { - "_value": 63.9451, - "_timestamp": 0 - } - ], - [ - "perf_client_send_recv", - { - "_value": 0.23, - "_timestamp": 0 - } - ], - [ - "perf_client_response_wait", - { - "_value": 1038.237, - "_timestamp": 0 - } - ], - [ - "perf_server_queue", - { - "_value": 0.001, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_infer", - { - "_value": 511.47, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_input", - { - "_value": 1.321, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_output", - { - "_value": 1.473, - "_timestamp": 0 - } - ] - ], - "_non_gpu_data_from_tag": { - "perf_latency_avg": [ - "perf_latency_avg", - { - "_value": 1038.491, - "_timestamp": 0 - } - ], - "perf_latency_p90": [ - "perf_latency_p90", - { - "_value": 1134.513, - "_timestamp": 0 - } - ], - "perf_latency_p95": [ - "perf_latency_p95", - { - "_value": 1149.159, - "_timestamp": 0 - } - ], - "perf_latency_p99": [ - "perf_latency_p99", - { - "_value": 1179.337, - "_timestamp": 0 - } - ], - "perf_throughput": [ - "perf_throughput", - { - "_value": 63.9451, - "_timestamp": 0 - } - ], - "perf_client_send_recv": [ - "perf_client_send_recv", - { - "_value": 0.23, - "_timestamp": 0 - } - ], - "perf_client_response_wait": [ - "perf_client_response_wait", - { - "_value": 1038.237, - "_timestamp": 0 - } - ], - "perf_server_queue": [ - "perf_server_queue", - { - "_value": 0.001, - "_timestamp": 0 - } - ], - "perf_server_compute_infer": [ - "perf_server_compute_infer", - { - "_value": 511.47, - "_timestamp": 0 - } - ], - "perf_server_compute_input": [ - "perf_server_compute_input", - { - "_value": 1.321, - "_timestamp": 0 - } - ], - "perf_server_compute_output": [ - "perf_server_compute_output", - { - "_value": 1.473, - "_timestamp": 0 - } - ] - } - } - ] - } - } - ], - "ensemble_python_resnet50_config_22": [ - { - "_triton_env": {}, - "_model_run_configs": [ - { - "_model_name": "ensemble_python_resnet50", - "_model_config": { - "name": "ensemble_python_resnet50_config_22", - "platform": "ensemble", - "maxBatchSize": 8, - "input": [ - { - "name": "INPUT", - "dataType": "TYPE_UINT8", - "dims": [ - "-1" - ] - } - ], - "output": [ - { - "name": "OUTPUT", - "dataType": "TYPE_FP32", - "dims": [ - "1000" - ] - } - ], - "ensembleScheduling": { - "step": [ - { - "modelName": "preprocess_config_9", - "modelVersion": "-1", - "inputMap": { - "INPUT_0": "INPUT" - }, - "outputMap": { - "OUTPUT_0": "preprocessed_image" - } - }, - { - "modelName": "resnet50_trt_config_5", - "modelVersion": "-1", - "inputMap": { - "input": "preprocessed_image" - }, - "outputMap": { - "output": "OUTPUT" - } - } - ] - }, - "cpu_only": false - }, - "_perf_config": { - "_args": { - "service-kind": null, - "model-signature-name": null, - "async": null, - "sync": null, - "measurement-interval": null, - "concurrency-range": 64, - "request-rate-range": null, - "request-distribution": null, - "request-intervals": null, - "binary-search": null, - "num-of-sequences": null, - "latency-threshold": null, - "max-threads": null, - "stability-percentage": null, - "max-trials": null, - "percentile": null, - "input-data": "/swdev/profile_models/test_image", - "shared-memory": null, - "output-shared-memory-size": null, - "shape": "INPUT:1005970", - "sequence-length": null, - "sequence-id-range": null, - "string-length": null, - "string-data": null, - "measurement-mode": "count_windows", - "measurement-request-count": 100, - "streaming": null, - "grpc-compression-algorithm": null, - "triton-server-directory": null, - "model-repository": null, - "ssl-grpc-use-ssl": null, - "ssl-grpc-root-certifications-file": null, - "ssl-grpc-private-key-file": null, - "ssl-grpc-certificate-chain-file": null, - "ssl-https-verify-peer": null, - "ssl-https-verify-host": null, - "ssl-https-ca-certificates-file": null, - "ssl-https-client-certificate-type": null, - "ssl-https-client-certificate-file": null, - "ssl-https-private-key-type": null, - "ssl-https-private-key-file": null, - "collect-metrics": "True", - "metrics-url": "http://localhost:8002/metrics", - "metrics-interval": 1000.0 - }, - "_options": { - "-m": "ensemble_python_resnet50_config_22", - "-x": null, - "-b": 1, - "-u": "localhost:8001", - "-i": "grpc", - "-f": "ensemble_python_resnet50_config_22-results.csv", - "-H": null - }, - "_verbose": { - "-v": null, - "-v -v": null, - "--verbose-csv": "--verbose-csv" - }, - "_input_to_options": { - "model-name": "-m", - "model-version": "-x", - "batch-size": "-b", - "url": "-u", - "protocol": "-i", - "latency-report-file": "-f", - "http-header": "-H" - }, - "_input_to_verbose": { - "verbose": "-v", - "extra-verbose": "-v -v", - "verbose-csv": "--verbose-csv" - }, - "_additive_args": { - "input-data": null, - "shape": null - } - }, - "_composing_configs": [ - { - "name": "preprocess_config_9", - "maxBatchSize": 8, - "input": [ - { - "name": "INPUT_0", - "dataType": "TYPE_UINT8", - "dims": [ - "-1" - ] - } - ], - "output": [ - { - "name": "OUTPUT_0", - "dataType": "TYPE_FP32", - "dims": [ - "3", - "224", - "224" - ] - } - ], - "instanceGroup": [ - { - "count": 4, - "kind": "KIND_GPU" - } - ], - "dynamicBatching": {}, - "backend": "python", - "cpu_only": false - }, - { - "name": "resnet50_trt_config_5", - "platform": "tensorrt_plan", - "maxBatchSize": 8, - "input": [ - { - "name": "input", - "dataType": "TYPE_FP32", - "dims": [ - "3", - "-1", - "-1" - ] - } - ], - "output": [ - { - "name": "output", - "dataType": "TYPE_FP32", - "dims": [ - "1000" - ], - "labelFilename": "labels.txt" - } - ], - "instanceGroup": [ - { - "count": 1, - "kind": "KIND_GPU" - } - ], - "dynamicBatching": {}, - "cpu_only": false - } - ] - } - ] - }, - { - "-m ensemble_python_resnet50_config_22 -b 1 -i grpc -f ensemble_python_resnet50_config_22-results.csv --verbose-csv --concurrency-range=64 --input-data=/swdev/profile_models/test_image --shape=INPUT:1005970 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "ensemble_python_resnet50_config_22", - "_gpu_data": { - "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ - [ - "gpu_used_memory", - { - "_value": 1713.373184, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_free_memory", - { - "_value": 23682.08896, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_utilization", - { - "_value": 2.71429, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 57.8574, - "_timestamp": 0, - "_device_uuid": null - } - ] - ] - }, - "_avg_gpu_data": [ - [ - "gpu_used_memory", - { - "_value": 1713.373184, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_free_memory", - { - "_value": 23682.08896, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_utilization", - { - "_value": 2.71429, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 57.8574, - "_timestamp": 0, - "_device_uuid": null - } - ] - ], - "_avg_gpu_data_from_tag": { - "gpu_used_memory": [ - "gpu_used_memory", - { - "_value": 1713.373184, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_free_memory": [ - "gpu_free_memory", - { - "_value": 23682.08896, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_utilization": [ - "gpu_utilization", - { - "_value": 2.71429, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_power_usage": [ - "gpu_power_usage", - { - "_value": 57.8574, - "_timestamp": 0, - "_device_uuid": null - } - ] - }, - "_model_config_measurements": [ - { - "_model_config_name": "ensemble_python_resnet50_config_22", - "_model_specific_pa_params": { - "batch-size": 1, - "concurrency-range": 64 - }, - "_non_gpu_data": [ - [ - "perf_latency_avg", - { - "_value": 1066.227, - "_timestamp": 0 - } - ], - [ - "perf_latency_p90", - { - "_value": 1174.543, - "_timestamp": 0 - } - ], - [ - "perf_latency_p95", - { - "_value": 1179.421, - "_timestamp": 0 - } - ], - [ - "perf_latency_p99", - { - "_value": 1195.122, - "_timestamp": 0 - } - ], - [ - "perf_throughput", - { - "_value": 61.2811, - "_timestamp": 0 - } - ], - [ - "perf_client_send_recv", - { - "_value": 0.232, - "_timestamp": 0 - } - ], - [ - "perf_client_response_wait", - { - "_value": 1065.969, - "_timestamp": 0 - } - ], - [ - "perf_server_queue", - { - "_value": 0.001, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_infer", - { - "_value": 532.164, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_input", - { - "_value": 1.354, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_output", - { - "_value": 1.163, - "_timestamp": 0 - } - ] - ], - "_non_gpu_data_from_tag": { - "perf_latency_avg": [ - "perf_latency_avg", - { - "_value": 1066.227, - "_timestamp": 0 - } - ], - "perf_latency_p90": [ - "perf_latency_p90", - { - "_value": 1174.543, - "_timestamp": 0 - } - ], - "perf_latency_p95": [ - "perf_latency_p95", - { - "_value": 1179.421, - "_timestamp": 0 - } - ], - "perf_latency_p99": [ - "perf_latency_p99", - { - "_value": 1195.122, - "_timestamp": 0 - } - ], - "perf_throughput": [ - "perf_throughput", - { - "_value": 61.2811, - "_timestamp": 0 - } - ], - "perf_client_send_recv": [ - "perf_client_send_recv", - { - "_value": 0.232, - "_timestamp": 0 - } - ], - "perf_client_response_wait": [ - "perf_client_response_wait", - { - "_value": 1065.969, - "_timestamp": 0 - } - ], - "perf_server_queue": [ - "perf_server_queue", - { - "_value": 0.001, - "_timestamp": 0 - } - ], - "perf_server_compute_infer": [ - "perf_server_compute_infer", - { - "_value": 532.164, - "_timestamp": 0 - } - ], - "perf_server_compute_input": [ - "perf_server_compute_input", - { - "_value": 1.354, - "_timestamp": 0 - } - ], - "perf_server_compute_output": [ - "perf_server_compute_output", - { - "_value": 1.163, - "_timestamp": 0 - } - ] - } - } - ] - } - } - ], - "ensemble_python_resnet50_config_23": [ - { - "_triton_env": {}, - "_model_run_configs": [ - { - "_model_name": "ensemble_python_resnet50", - "_model_config": { - "name": "ensemble_python_resnet50_config_23", - "platform": "ensemble", - "maxBatchSize": 4, - "input": [ - { - "name": "INPUT", - "dataType": "TYPE_UINT8", - "dims": [ - "-1" - ] - } - ], - "output": [ - { - "name": "OUTPUT", - "dataType": "TYPE_FP32", - "dims": [ - "1000" - ] - } - ], - "ensembleScheduling": { - "step": [ - { - "modelName": "preprocess_config_9", - "modelVersion": "-1", - "inputMap": { - "INPUT_0": "INPUT" - }, - "outputMap": { - "OUTPUT_0": "preprocessed_image" - } - }, - { - "modelName": "resnet50_trt_config_4", - "modelVersion": "-1", - "inputMap": { - "input": "preprocessed_image" - }, - "outputMap": { - "output": "OUTPUT" - } - } - ] - }, - "cpu_only": false - }, - "_perf_config": { - "_args": { - "service-kind": null, - "model-signature-name": null, - "async": null, - "sync": null, - "measurement-interval": null, - "concurrency-range": 64, - "request-rate-range": null, - "request-distribution": null, - "request-intervals": null, - "binary-search": null, - "num-of-sequences": null, - "latency-threshold": null, - "max-threads": null, - "stability-percentage": null, - "max-trials": null, - "percentile": null, - "input-data": "/swdev/profile_models/test_image", - "shared-memory": null, - "output-shared-memory-size": null, - "shape": "INPUT:1005970", - "sequence-length": null, - "sequence-id-range": null, - "string-length": null, - "string-data": null, - "measurement-mode": "count_windows", - "measurement-request-count": null, - "streaming": null, - "grpc-compression-algorithm": null, - "triton-server-directory": null, - "model-repository": null, - "ssl-grpc-use-ssl": null, - "ssl-grpc-root-certifications-file": null, - "ssl-grpc-private-key-file": null, - "ssl-grpc-certificate-chain-file": null, - "ssl-https-verify-peer": null, - "ssl-https-verify-host": null, - "ssl-https-ca-certificates-file": null, - "ssl-https-client-certificate-type": null, - "ssl-https-client-certificate-file": null, - "ssl-https-private-key-type": null, - "ssl-https-private-key-file": null, - "collect-metrics": "True", - "metrics-url": "http://localhost:8002/metrics", - "metrics-interval": 1000.0 - }, - "_options": { - "-m": "ensemble_python_resnet50_config_23", - "-x": null, - "-b": 1, - "-u": "localhost:8001", - "-i": "grpc", - "-f": "ensemble_python_resnet50_config_23-results.csv", - "-H": null - }, - "_verbose": { - "-v": null, - "-v -v": null, - "--verbose-csv": "--verbose-csv" - }, - "_input_to_options": { - "model-name": "-m", - "model-version": "-x", - "batch-size": "-b", - "url": "-u", - "protocol": "-i", - "latency-report-file": "-f", - "http-header": "-H" - }, - "_input_to_verbose": { - "verbose": "-v", - "extra-verbose": "-v -v", - "verbose-csv": "--verbose-csv" - }, - "_additive_args": { - "input-data": null, - "shape": null - } - }, - "_composing_configs": [ - { - "name": "preprocess_config_9", - "maxBatchSize": 8, - "input": [ - { - "name": "INPUT_0", - "dataType": "TYPE_UINT8", - "dims": [ - "-1" - ] - } - ], - "output": [ - { - "name": "OUTPUT_0", - "dataType": "TYPE_FP32", - "dims": [ - "3", - "224", - "224" - ] - } - ], - "instanceGroup": [ - { - "count": 4, - "kind": "KIND_GPU" - } - ], - "dynamicBatching": {}, - "backend": "python", - "cpu_only": false - }, - { - "name": "resnet50_trt_config_4", - "platform": "tensorrt_plan", - "maxBatchSize": 4, - "input": [ - { - "name": "input", - "dataType": "TYPE_FP32", - "dims": [ - "3", - "-1", - "-1" - ] - } - ], - "output": [ - { - "name": "output", - "dataType": "TYPE_FP32", - "dims": [ - "1000" - ], - "labelFilename": "labels.txt" - } - ], - "instanceGroup": [ - { - "count": 2, - "kind": "KIND_GPU" - } - ], - "dynamicBatching": {}, - "cpu_only": false - } - ] - } - ] - }, - { - "-m ensemble_python_resnet50_config_23 -b 1 -i grpc -f ensemble_python_resnet50_config_23-results.csv --verbose-csv --concurrency-range=64 --input-data=/swdev/profile_models/test_image --shape=INPUT:1005970 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "ensemble_python_resnet50_config_23", - "_gpu_data": { - "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ - [ - "gpu_used_memory", - { - "_value": 2797.600768, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_free_memory", - { - "_value": 22597.861376, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_utilization", - { - "_value": 2.5, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 60.1845, - "_timestamp": 0, - "_device_uuid": null - } - ] - ] - }, - "_avg_gpu_data": [ - [ - "gpu_used_memory", - { - "_value": 2797.600768, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_free_memory", - { - "_value": 22597.861376, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_utilization", - { - "_value": 2.5, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 60.1845, - "_timestamp": 0, - "_device_uuid": null - } - ] - ], - "_avg_gpu_data_from_tag": { - "gpu_used_memory": [ - "gpu_used_memory", - { - "_value": 2797.600768, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_free_memory": [ - "gpu_free_memory", - { - "_value": 22597.861376, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_utilization": [ - "gpu_utilization", - { - "_value": 2.5, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_power_usage": [ - "gpu_power_usage", - { - "_value": 60.1845, - "_timestamp": 0, - "_device_uuid": null - } - ] - }, - "_model_config_measurements": [ - { - "_model_config_name": "ensemble_python_resnet50_config_23", - "_model_specific_pa_params": { - "batch-size": 1, - "concurrency-range": 64 - }, - "_non_gpu_data": [ - [ - "perf_latency_avg", - { - "_value": 1034.552, - "_timestamp": 0 - } - ], - [ - "perf_latency_p90", - { - "_value": 1094.266, - "_timestamp": 0 - } - ], - [ - "perf_latency_p95", - { - "_value": 1098.096, - "_timestamp": 0 - } - ], - [ - "perf_latency_p99", - { - "_value": 1132.917, - "_timestamp": 0 - } - ], - [ - "perf_throughput", - { - "_value": 63.9634, - "_timestamp": 0 - } - ], - [ - "perf_client_send_recv", - { - "_value": 0.253, - "_timestamp": 0 - } - ], - [ - "perf_client_response_wait", - { - "_value": 1034.277, - "_timestamp": 0 - } - ], - [ - "perf_server_queue", - { - "_value": 0.001, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_infer", - { - "_value": 518.669, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_input", - { - "_value": 1.612, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_output", - { - "_value": 1.136, - "_timestamp": 0 - } - ] - ], - "_non_gpu_data_from_tag": { - "perf_latency_avg": [ - "perf_latency_avg", - { - "_value": 1034.552, - "_timestamp": 0 - } - ], - "perf_latency_p90": [ - "perf_latency_p90", - { - "_value": 1094.266, - "_timestamp": 0 - } - ], - "perf_latency_p95": [ - "perf_latency_p95", - { - "_value": 1098.096, - "_timestamp": 0 - } - ], - "perf_latency_p99": [ - "perf_latency_p99", - { - "_value": 1132.917, - "_timestamp": 0 - } - ], - "perf_throughput": [ - "perf_throughput", - { - "_value": 63.9634, - "_timestamp": 0 - } - ], - "perf_client_send_recv": [ - "perf_client_send_recv", - { - "_value": 0.253, - "_timestamp": 0 - } - ], - "perf_client_response_wait": [ - "perf_client_response_wait", - { - "_value": 1034.277, - "_timestamp": 0 - } - ], - "perf_server_queue": [ - "perf_server_queue", - { - "_value": 0.001, - "_timestamp": 0 - } - ], - "perf_server_compute_infer": [ - "perf_server_compute_infer", - { - "_value": 518.669, - "_timestamp": 0 - } - ], - "perf_server_compute_input": [ - "perf_server_compute_input", - { - "_value": 1.612, - "_timestamp": 0 - } - ], - "perf_server_compute_output": [ - "perf_server_compute_output", - { - "_value": 1.136, - "_timestamp": 0 - } - ] - } - } - ] - }, - "-m ensemble_python_resnet50_config_23 -b 1 -i grpc -f ensemble_python_resnet50_config_23-results.csv --verbose-csv --concurrency-range=1 --input-data=/swdev/profile_models/test_image --shape=INPUT:1005970 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "ensemble_python_resnet50_config_23", - "_gpu_data": { - "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ - [ - "gpu_used_memory", - { - "_value": 2797.600768, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_free_memory", - { - "_value": 22597.861376, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_utilization", - { - "_value": 0.833333, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 57.267, - "_timestamp": 0, - "_device_uuid": null - } - ] - ] - }, - "_avg_gpu_data": [ - [ - "gpu_used_memory", - { - "_value": 2797.600768, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_free_memory", - { - "_value": 22597.861376, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_utilization", - { - "_value": 0.833333, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 57.267, - "_timestamp": 0, - "_device_uuid": null - } - ] - ], - "_avg_gpu_data_from_tag": { - "gpu_used_memory": [ - "gpu_used_memory", - { - "_value": 2797.600768, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_free_memory": [ - "gpu_free_memory", - { - "_value": 22597.861376, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_utilization": [ - "gpu_utilization", - { - "_value": 0.833333, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_power_usage": [ - "gpu_power_usage", - { - "_value": 57.267, - "_timestamp": 0, - "_device_uuid": null - } - ] - }, - "_model_config_measurements": [ - { - "_model_config_name": "ensemble_python_resnet50_config_23", - "_model_specific_pa_params": { - "batch-size": 1, - "concurrency-range": 1 - }, - "_non_gpu_data": [ - [ - "perf_latency_avg", - { - "_value": 70.432, - "_timestamp": 0 - } - ], - [ - "perf_latency_p90", - { - "_value": 80.615, - "_timestamp": 0 - } - ], - [ - "perf_latency_p95", - { - "_value": 81.76, - "_timestamp": 0 - } - ], - [ - "perf_latency_p99", - { - "_value": 93.431, - "_timestamp": 0 - } - ], - [ - "perf_throughput", - { - "_value": 14.1636, - "_timestamp": 0 - } - ], - [ - "perf_client_send_recv", - { - "_value": 0.154, - "_timestamp": 0 - } - ], - [ - "perf_client_response_wait", - { - "_value": 70.252, - "_timestamp": 0 - } - ], - [ - "perf_server_queue", - { - "_value": 0.001, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_infer", - { - "_value": 68.336, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_input", - { - "_value": 0.295, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_output", - { - "_value": 0.244, - "_timestamp": 0 - } - ] - ], - "_non_gpu_data_from_tag": { - "perf_latency_avg": [ - "perf_latency_avg", - { - "_value": 70.432, - "_timestamp": 0 - } - ], - "perf_latency_p90": [ - "perf_latency_p90", - { - "_value": 80.615, - "_timestamp": 0 - } - ], - "perf_latency_p95": [ - "perf_latency_p95", - { - "_value": 81.76, - "_timestamp": 0 - } - ], - "perf_latency_p99": [ - "perf_latency_p99", - { - "_value": 93.431, - "_timestamp": 0 - } - ], - "perf_throughput": [ - "perf_throughput", - { - "_value": 14.1636, - "_timestamp": 0 - } - ], - "perf_client_send_recv": [ - "perf_client_send_recv", - { - "_value": 0.154, - "_timestamp": 0 - } - ], - "perf_client_response_wait": [ - "perf_client_response_wait", - { - "_value": 70.252, - "_timestamp": 0 - } - ], - "perf_server_queue": [ - "perf_server_queue", - { - "_value": 0.001, - "_timestamp": 0 - } - ], - "perf_server_compute_infer": [ - "perf_server_compute_infer", - { - "_value": 68.336, - "_timestamp": 0 - } - ], - "perf_server_compute_input": [ - "perf_server_compute_input", - { - "_value": 0.295, - "_timestamp": 0 - } - ], - "perf_server_compute_output": [ - "perf_server_compute_output", - { - "_value": 0.244, - "_timestamp": 0 - } - ] - } - } - ] - }, - "-m ensemble_python_resnet50_config_23 -b 1 -i grpc -f ensemble_python_resnet50_config_23-results.csv --verbose-csv --concurrency-range=2 --input-data=/swdev/profile_models/test_image --shape=INPUT:1005970 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "ensemble_python_resnet50_config_23", - "_gpu_data": { - "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ - [ - "gpu_used_memory", - { - "_value": 2797.600768, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_free_memory", - { - "_value": 22597.861376, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_utilization", - { - "_value": 2.33333, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 57.7323, - "_timestamp": 0, - "_device_uuid": null - } - ] - ] - }, - "_avg_gpu_data": [ - [ - "gpu_used_memory", - { - "_value": 2797.600768, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_free_memory", - { - "_value": 22597.861376, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_utilization", - { - "_value": 2.33333, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 57.7323, - "_timestamp": 0, - "_device_uuid": null - } - ] - ], - "_avg_gpu_data_from_tag": { - "gpu_used_memory": [ - "gpu_used_memory", - { - "_value": 2797.600768, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_free_memory": [ - "gpu_free_memory", - { - "_value": 22597.861376, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_utilization": [ - "gpu_utilization", - { - "_value": 2.33333, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_power_usage": [ - "gpu_power_usage", - { - "_value": 57.7323, - "_timestamp": 0, - "_device_uuid": null - } - ] - }, - "_model_config_measurements": [ - { - "_model_config_name": "ensemble_python_resnet50_config_23", - "_model_specific_pa_params": { - "batch-size": 1, - "concurrency-range": 2 - }, - "_non_gpu_data": [ - [ - "perf_latency_avg", - { - "_value": 69.137, - "_timestamp": 0 - } - ], - [ - "perf_latency_p90", - { - "_value": 82.691, - "_timestamp": 0 - } - ], - [ - "perf_latency_p95", - { - "_value": 90.244, - "_timestamp": 0 - } - ], - [ - "perf_latency_p99", - { - "_value": 103.562, - "_timestamp": 0 - } - ], - [ - "perf_throughput", - { - "_value": 28.823, - "_timestamp": 0 - } - ], - [ - "perf_client_send_recv", - { - "_value": 0.149, - "_timestamp": 0 - } - ], - [ - "perf_client_response_wait", - { - "_value": 68.963, - "_timestamp": 0 - } - ], - [ - "perf_server_queue", - { - "_value": 0.001, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_infer", - { - "_value": 66.825, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_input", - { - "_value": 0.243, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_output", - { - "_value": 0.233, - "_timestamp": 0 - } - ] - ], - "_non_gpu_data_from_tag": { - "perf_latency_avg": [ - "perf_latency_avg", - { - "_value": 69.137, - "_timestamp": 0 - } - ], - "perf_latency_p90": [ - "perf_latency_p90", - { - "_value": 82.691, - "_timestamp": 0 - } - ], - "perf_latency_p95": [ - "perf_latency_p95", - { - "_value": 90.244, - "_timestamp": 0 - } - ], - "perf_latency_p99": [ - "perf_latency_p99", - { - "_value": 103.562, - "_timestamp": 0 - } - ], - "perf_throughput": [ - "perf_throughput", - { - "_value": 28.823, - "_timestamp": 0 - } - ], - "perf_client_send_recv": [ - "perf_client_send_recv", - { - "_value": 0.149, - "_timestamp": 0 - } - ], - "perf_client_response_wait": [ - "perf_client_response_wait", - { - "_value": 68.963, - "_timestamp": 0 - } - ], - "perf_server_queue": [ - "perf_server_queue", - { - "_value": 0.001, - "_timestamp": 0 - } - ], - "perf_server_compute_infer": [ - "perf_server_compute_infer", - { - "_value": 66.825, - "_timestamp": 0 - } - ], - "perf_server_compute_input": [ - "perf_server_compute_input", - { - "_value": 0.243, - "_timestamp": 0 - } - ], - "perf_server_compute_output": [ - "perf_server_compute_output", - { - "_value": 0.233, - "_timestamp": 0 - } - ] - } - } - ] - }, - "-m ensemble_python_resnet50_config_23 -b 1 -i grpc -f ensemble_python_resnet50_config_23-results.csv --verbose-csv --concurrency-range=4 --input-data=/swdev/profile_models/test_image --shape=INPUT:1005970 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "ensemble_python_resnet50_config_23", - "_gpu_data": { - "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ - [ - "gpu_used_memory", - { - "_value": 2797.600768, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_free_memory", - { - "_value": 22597.861376, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_utilization", - { - "_value": 3.5999999999999996, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 58.7626, - "_timestamp": 0, - "_device_uuid": null - } - ] - ] - }, - "_avg_gpu_data": [ - [ - "gpu_used_memory", - { - "_value": 2797.600768, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_free_memory", - { - "_value": 22597.861376, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_utilization", - { - "_value": 3.5999999999999996, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 58.7626, - "_timestamp": 0, - "_device_uuid": null - } - ] - ], - "_avg_gpu_data_from_tag": { - "gpu_used_memory": [ - "gpu_used_memory", - { - "_value": 2797.600768, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_free_memory": [ - "gpu_free_memory", - { - "_value": 22597.861376, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_utilization": [ - "gpu_utilization", - { - "_value": 3.5999999999999996, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_power_usage": [ - "gpu_power_usage", - { - "_value": 58.7626, - "_timestamp": 0, - "_device_uuid": null - } - ] - }, - "_model_config_measurements": [ - { - "_model_config_name": "ensemble_python_resnet50_config_23", - "_model_specific_pa_params": { - "batch-size": 1, - "concurrency-range": 4 - }, - "_non_gpu_data": [ - [ - "perf_latency_avg", - { - "_value": 79.07, - "_timestamp": 0 - } - ], - [ - "perf_latency_p90", - { - "_value": 104.643, - "_timestamp": 0 - } - ], - [ - "perf_latency_p95", - { - "_value": 119.286, - "_timestamp": 0 - } - ], - [ - "perf_latency_p99", - { - "_value": 176.687, - "_timestamp": 0 - } - ], - [ - "perf_throughput", - { - "_value": 50.1783, - "_timestamp": 0 - } - ], - [ - "perf_client_send_recv", - { - "_value": 0.166, - "_timestamp": 0 - } - ], - [ - "perf_client_response_wait", - { - "_value": 78.88, - "_timestamp": 0 - } - ], - [ - "perf_server_queue", - { - "_value": 0.001, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_infer", - { - "_value": 76.655, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_input", - { - "_value": 0.233, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_output", - { - "_value": 0.32, - "_timestamp": 0 - } - ] - ], - "_non_gpu_data_from_tag": { - "perf_latency_avg": [ - "perf_latency_avg", - { - "_value": 79.07, - "_timestamp": 0 - } - ], - "perf_latency_p90": [ - "perf_latency_p90", - { - "_value": 104.643, - "_timestamp": 0 - } - ], - "perf_latency_p95": [ - "perf_latency_p95", - { - "_value": 119.286, - "_timestamp": 0 - } - ], - "perf_latency_p99": [ - "perf_latency_p99", - { - "_value": 176.687, - "_timestamp": 0 - } - ], - "perf_throughput": [ - "perf_throughput", - { - "_value": 50.1783, - "_timestamp": 0 - } - ], - "perf_client_send_recv": [ - "perf_client_send_recv", - { - "_value": 0.166, - "_timestamp": 0 - } - ], - "perf_client_response_wait": [ - "perf_client_response_wait", - { - "_value": 78.88, - "_timestamp": 0 - } - ], - "perf_server_queue": [ - "perf_server_queue", - { - "_value": 0.001, - "_timestamp": 0 - } - ], - "perf_server_compute_infer": [ - "perf_server_compute_infer", - { - "_value": 76.655, - "_timestamp": 0 - } - ], - "perf_server_compute_input": [ - "perf_server_compute_input", - { - "_value": 0.233, - "_timestamp": 0 - } - ], - "perf_server_compute_output": [ - "perf_server_compute_output", - { - "_value": 0.32, - "_timestamp": 0 - } - ] - } - } - ] - }, - "-m ensemble_python_resnet50_config_23 -b 1 -i grpc -f ensemble_python_resnet50_config_23-results.csv --verbose-csv --concurrency-range=8 --input-data=/swdev/profile_models/test_image --shape=INPUT:1005970 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "ensemble_python_resnet50_config_23", - "_gpu_data": { - "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ - [ - "gpu_used_memory", - { - "_value": 2797.600768, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_free_memory", - { - "_value": 22597.861376, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_utilization", - { - "_value": 3.5000000000000004, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 58.822, - "_timestamp": 0, - "_device_uuid": null - } - ] - ] - }, - "_avg_gpu_data": [ - [ - "gpu_used_memory", - { - "_value": 2797.600768, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_free_memory", - { - "_value": 22597.861376, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_utilization", - { - "_value": 3.5000000000000004, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 58.822, - "_timestamp": 0, - "_device_uuid": null - } - ] - ], - "_avg_gpu_data_from_tag": { - "gpu_used_memory": [ - "gpu_used_memory", - { - "_value": 2797.600768, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_free_memory": [ - "gpu_free_memory", - { - "_value": 22597.861376, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_utilization": [ - "gpu_utilization", - { - "_value": 3.5000000000000004, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_power_usage": [ - "gpu_power_usage", - { - "_value": 58.822, - "_timestamp": 0, - "_device_uuid": null - } - ] - }, - "_model_config_measurements": [ - { - "_model_config_name": "ensemble_python_resnet50_config_23", - "_model_specific_pa_params": { - "batch-size": 1, - "concurrency-range": 8 - }, - "_non_gpu_data": [ - [ - "perf_latency_avg", - { - "_value": 145.676, - "_timestamp": 0 - } - ], - [ - "perf_latency_p90", - { - "_value": 291.278, - "_timestamp": 0 - } - ], - [ - "perf_latency_p95", - { - "_value": 345.544, - "_timestamp": 0 - } - ], - [ - "perf_latency_p99", - { - "_value": 397.287, - "_timestamp": 0 - } - ], - [ - "perf_throughput", - { - "_value": 54.2943, - "_timestamp": 0 - } - ], - [ - "perf_client_send_recv", - { - "_value": 0.311, - "_timestamp": 0 - } - ], - [ - "perf_client_response_wait", - { - "_value": 145.336, - "_timestamp": 0 - } - ], - [ - "perf_server_queue", - { - "_value": 0.001, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_infer", - { - "_value": 123.315, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_input", - { - "_value": 0.496, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_output", - { - "_value": 0.393, - "_timestamp": 0 - } - ] - ], - "_non_gpu_data_from_tag": { - "perf_latency_avg": [ - "perf_latency_avg", - { - "_value": 145.676, - "_timestamp": 0 - } - ], - "perf_latency_p90": [ - "perf_latency_p90", - { - "_value": 291.278, - "_timestamp": 0 - } - ], - "perf_latency_p95": [ - "perf_latency_p95", - { - "_value": 345.544, - "_timestamp": 0 - } - ], - "perf_latency_p99": [ - "perf_latency_p99", - { - "_value": 397.287, - "_timestamp": 0 - } - ], - "perf_throughput": [ - "perf_throughput", - { - "_value": 54.2943, - "_timestamp": 0 - } - ], - "perf_client_send_recv": [ - "perf_client_send_recv", - { - "_value": 0.311, - "_timestamp": 0 - } - ], - "perf_client_response_wait": [ - "perf_client_response_wait", - { - "_value": 145.336, - "_timestamp": 0 - } - ], - "perf_server_queue": [ - "perf_server_queue", - { - "_value": 0.001, - "_timestamp": 0 - } - ], - "perf_server_compute_infer": [ - "perf_server_compute_infer", - { - "_value": 123.315, - "_timestamp": 0 - } - ], - "perf_server_compute_input": [ - "perf_server_compute_input", - { - "_value": 0.496, - "_timestamp": 0 - } - ], - "perf_server_compute_output": [ - "perf_server_compute_output", - { - "_value": 0.393, - "_timestamp": 0 - } - ] - } - } - ] - }, - "-m ensemble_python_resnet50_config_23 -b 1 -i grpc -f ensemble_python_resnet50_config_23-results.csv --verbose-csv --concurrency-range=16 --input-data=/swdev/profile_models/test_image --shape=INPUT:1005970 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "ensemble_python_resnet50_config_23", - "_gpu_data": { - "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ - [ - "gpu_used_memory", - { - "_value": 2797.600768, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_free_memory", - { - "_value": 22597.861376, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_utilization", - { - "_value": 3.83333, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 58.1038, - "_timestamp": 0, - "_device_uuid": null - } - ] - ] - }, - "_avg_gpu_data": [ - [ - "gpu_used_memory", - { - "_value": 2797.600768, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_free_memory", - { - "_value": 22597.861376, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_utilization", - { - "_value": 3.83333, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 58.1038, - "_timestamp": 0, - "_device_uuid": null - } - ] - ], - "_avg_gpu_data_from_tag": { - "gpu_used_memory": [ - "gpu_used_memory", - { - "_value": 2797.600768, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_free_memory": [ - "gpu_free_memory", - { - "_value": 22597.861376, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_utilization": [ - "gpu_utilization", - { - "_value": 3.83333, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_power_usage": [ - "gpu_power_usage", - { - "_value": 58.1038, - "_timestamp": 0, - "_device_uuid": null - } - ] - }, - "_model_config_measurements": [ - { - "_model_config_name": "ensemble_python_resnet50_config_23", - "_model_specific_pa_params": { - "batch-size": 1, - "concurrency-range": 16 - }, - "_non_gpu_data": [ - [ - "perf_latency_avg", - { - "_value": 290.615, - "_timestamp": 0 - } - ], - [ - "perf_latency_p90", - { - "_value": 681.165, - "_timestamp": 0 - } - ], - [ - "perf_latency_p95", - { - "_value": 714.0, - "_timestamp": 0 - } - ], - [ - "perf_latency_p99", - { - "_value": 752.881, - "_timestamp": 0 - } - ], - [ - "perf_throughput", - { - "_value": 53.98, - "_timestamp": 0 - } - ], - [ - "perf_client_send_recv", - { - "_value": 0.207, - "_timestamp": 0 - } - ], - [ - "perf_client_response_wait", - { - "_value": 290.381, - "_timestamp": 0 - } - ], - [ - "perf_server_queue", - { - "_value": 0.001, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_infer", - { - "_value": 260.804, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_input", - { - "_value": 0.664, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_output", - { - "_value": 0.73, - "_timestamp": 0 - } - ] - ], - "_non_gpu_data_from_tag": { - "perf_latency_avg": [ - "perf_latency_avg", - { - "_value": 290.615, - "_timestamp": 0 - } - ], - "perf_latency_p90": [ - "perf_latency_p90", - { - "_value": 681.165, - "_timestamp": 0 - } - ], - "perf_latency_p95": [ - "perf_latency_p95", - { - "_value": 714.0, - "_timestamp": 0 - } - ], - "perf_latency_p99": [ - "perf_latency_p99", - { - "_value": 752.881, - "_timestamp": 0 - } - ], - "perf_throughput": [ - "perf_throughput", - { - "_value": 53.98, - "_timestamp": 0 - } - ], - "perf_client_send_recv": [ - "perf_client_send_recv", - { - "_value": 0.207, - "_timestamp": 0 - } - ], - "perf_client_response_wait": [ - "perf_client_response_wait", - { - "_value": 290.381, - "_timestamp": 0 - } - ], - "perf_server_queue": [ - "perf_server_queue", - { - "_value": 0.001, - "_timestamp": 0 - } - ], - "perf_server_compute_infer": [ - "perf_server_compute_infer", - { - "_value": 260.804, - "_timestamp": 0 - } - ], - "perf_server_compute_input": [ - "perf_server_compute_input", - { - "_value": 0.664, - "_timestamp": 0 - } - ], - "perf_server_compute_output": [ - "perf_server_compute_output", - { - "_value": 0.73, - "_timestamp": 0 - } - ] - } - } - ] - }, - "-m ensemble_python_resnet50_config_23 -b 1 -i grpc -f ensemble_python_resnet50_config_23-results.csv --verbose-csv --concurrency-range=32 --input-data=/swdev/profile_models/test_image --shape=INPUT:1005970 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "ensemble_python_resnet50_config_23", - "_gpu_data": { - "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ - [ - "gpu_used_memory", - { - "_value": 2797.600768, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_free_memory", - { - "_value": 22597.861376, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_utilization", - { - "_value": 2.75, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 58.7523, - "_timestamp": 0, - "_device_uuid": null - } - ] - ] - }, - "_avg_gpu_data": [ - [ - "gpu_used_memory", - { - "_value": 2797.600768, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_free_memory", - { - "_value": 22597.861376, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_utilization", - { - "_value": 2.75, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 58.7523, - "_timestamp": 0, - "_device_uuid": null - } - ] - ], - "_avg_gpu_data_from_tag": { - "gpu_used_memory": [ - "gpu_used_memory", - { - "_value": 2797.600768, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_free_memory": [ - "gpu_free_memory", - { - "_value": 22597.861376, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_utilization": [ - "gpu_utilization", - { - "_value": 2.75, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_power_usage": [ - "gpu_power_usage", - { - "_value": 58.7523, - "_timestamp": 0, - "_device_uuid": null - } - ] - }, - "_model_config_measurements": [ - { - "_model_config_name": "ensemble_python_resnet50_config_23", - "_model_specific_pa_params": { - "batch-size": 1, - "concurrency-range": 32 - }, - "_non_gpu_data": [ - [ - "perf_latency_avg", - { - "_value": 556.369, - "_timestamp": 0 - } - ], - [ - "perf_latency_p90", - { - "_value": 740.835, - "_timestamp": 0 - } - ], - [ - "perf_latency_p95", - { - "_value": 820.573, - "_timestamp": 0 - } - ], - [ - "perf_latency_p99", - { - "_value": 952.515, - "_timestamp": 0 - } - ], - [ - "perf_throughput", - { - "_value": 56.3208, - "_timestamp": 0 - } - ], - [ - "perf_client_send_recv", - { - "_value": 0.214, - "_timestamp": 0 - } - ], - [ - "perf_client_response_wait", - { - "_value": 556.132, - "_timestamp": 0 - } - ], - [ - "perf_server_queue", - { - "_value": 0.001, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_infer", - { - "_value": 453.198, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_input", - { - "_value": 1.211, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_output", - { - "_value": 1.044, - "_timestamp": 0 - } - ] - ], - "_non_gpu_data_from_tag": { - "perf_latency_avg": [ - "perf_latency_avg", - { - "_value": 556.369, - "_timestamp": 0 - } - ], - "perf_latency_p90": [ - "perf_latency_p90", - { - "_value": 740.835, - "_timestamp": 0 - } - ], - "perf_latency_p95": [ - "perf_latency_p95", - { - "_value": 820.573, - "_timestamp": 0 - } - ], - "perf_latency_p99": [ - "perf_latency_p99", - { - "_value": 952.515, - "_timestamp": 0 - } - ], - "perf_throughput": [ - "perf_throughput", - { - "_value": 56.3208, - "_timestamp": 0 - } - ], - "perf_client_send_recv": [ - "perf_client_send_recv", - { - "_value": 0.214, - "_timestamp": 0 - } - ], - "perf_client_response_wait": [ - "perf_client_response_wait", - { - "_value": 556.132, - "_timestamp": 0 - } - ], - "perf_server_queue": [ - "perf_server_queue", - { - "_value": 0.001, - "_timestamp": 0 - } - ], - "perf_server_compute_infer": [ - "perf_server_compute_infer", - { - "_value": 453.198, - "_timestamp": 0 - } - ], - "perf_server_compute_input": [ - "perf_server_compute_input", - { - "_value": 1.211, - "_timestamp": 0 - } - ], - "perf_server_compute_output": [ - "perf_server_compute_output", - { - "_value": 1.044, - "_timestamp": 0 - } - ] - } - } - ] - }, - "-m ensemble_python_resnet50_config_23 -b 1 -i grpc -f ensemble_python_resnet50_config_23-results.csv --verbose-csv --concurrency-range=128 --input-data=/swdev/profile_models/test_image --shape=INPUT:1005970 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "ensemble_python_resnet50_config_23", - "_gpu_data": { - "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ - [ - "gpu_used_memory", - { - "_value": 2797.600768, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_free_memory", - { - "_value": 22597.861376, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_utilization", - { - "_value": 2.42857, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 58.9954, - "_timestamp": 0, - "_device_uuid": null - } - ] - ] - }, - "_avg_gpu_data": [ - [ - "gpu_used_memory", - { - "_value": 2797.600768, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_free_memory", - { - "_value": 22597.861376, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_utilization", - { - "_value": 2.42857, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 58.9954, - "_timestamp": 0, - "_device_uuid": null - } - ] - ], - "_avg_gpu_data_from_tag": { - "gpu_used_memory": [ - "gpu_used_memory", - { - "_value": 2797.600768, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_free_memory": [ - "gpu_free_memory", - { - "_value": 22597.861376, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_utilization": [ - "gpu_utilization", - { - "_value": 2.42857, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_power_usage": [ - "gpu_power_usage", - { - "_value": 58.9954, - "_timestamp": 0, - "_device_uuid": null - } - ] - }, - "_model_config_measurements": [ - { - "_model_config_name": "ensemble_python_resnet50_config_23", - "_model_specific_pa_params": { - "batch-size": 1, - "concurrency-range": 128 - }, - "_non_gpu_data": [ - [ - "perf_latency_avg", - { - "_value": 2376.07, - "_timestamp": 0 - } - ], - [ - "perf_latency_p90", - { - "_value": 2527.073, - "_timestamp": 0 - } - ], - [ - "perf_latency_p95", - { - "_value": 2568.831, - "_timestamp": 0 - } - ], - [ - "perf_latency_p99", - { - "_value": 2795.338, - "_timestamp": 0 - } - ], - [ - "perf_throughput", - { - "_value": 53.854, - "_timestamp": 0 - } - ], - [ - "perf_client_send_recv", - { - "_value": 0.216, - "_timestamp": 0 - } - ], - [ - "perf_client_response_wait", - { - "_value": 2375.832, - "_timestamp": 0 - } - ], - [ - "perf_server_queue", - { - "_value": 0.001, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_infer", - { - "_value": 591.716, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_input", - { - "_value": 1.539, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_output", - { - "_value": 1.346, - "_timestamp": 0 - } - ] - ], - "_non_gpu_data_from_tag": { - "perf_latency_avg": [ - "perf_latency_avg", - { - "_value": 2376.07, - "_timestamp": 0 - } - ], - "perf_latency_p90": [ - "perf_latency_p90", - { - "_value": 2527.073, - "_timestamp": 0 - } - ], - "perf_latency_p95": [ - "perf_latency_p95", - { - "_value": 2568.831, - "_timestamp": 0 - } - ], - "perf_latency_p99": [ - "perf_latency_p99", - { - "_value": 2795.338, - "_timestamp": 0 - } - ], - "perf_throughput": [ - "perf_throughput", - { - "_value": 53.854, - "_timestamp": 0 - } - ], - "perf_client_send_recv": [ - "perf_client_send_recv", - { - "_value": 0.216, - "_timestamp": 0 - } - ], - "perf_client_response_wait": [ - "perf_client_response_wait", - { - "_value": 2375.832, - "_timestamp": 0 - } - ], - "perf_server_queue": [ - "perf_server_queue", - { - "_value": 0.001, - "_timestamp": 0 - } - ], - "perf_server_compute_infer": [ - "perf_server_compute_infer", - { - "_value": 591.716, - "_timestamp": 0 - } - ], - "perf_server_compute_input": [ - "perf_server_compute_input", - { - "_value": 1.539, - "_timestamp": 0 - } - ], - "perf_server_compute_output": [ - "perf_server_compute_output", - { - "_value": 1.346, - "_timestamp": 0 - } - ] - } - } - ] - }, - "-m ensemble_python_resnet50_config_23 -b 1 -i grpc -f ensemble_python_resnet50_config_23-results.csv --verbose-csv --concurrency-range=256 --input-data=/swdev/profile_models/test_image --shape=INPUT:1005970 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "ensemble_python_resnet50_config_23", - "_gpu_data": { - "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ - [ - "gpu_used_memory", - { - "_value": 2797.600768, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_free_memory", - { - "_value": 22597.861376, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_utilization", - { - "_value": 2.53333, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 57.7375, - "_timestamp": 0, - "_device_uuid": null - } - ] - ] - }, - "_avg_gpu_data": [ - [ - "gpu_used_memory", - { - "_value": 2797.600768, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_free_memory", - { - "_value": 22597.861376, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_utilization", - { - "_value": 2.53333, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 57.7375, - "_timestamp": 0, - "_device_uuid": null - } - ] - ], - "_avg_gpu_data_from_tag": { - "gpu_used_memory": [ - "gpu_used_memory", - { - "_value": 2797.600768, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_free_memory": [ - "gpu_free_memory", - { - "_value": 22597.861376, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_utilization": [ - "gpu_utilization", - { - "_value": 2.53333, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_power_usage": [ - "gpu_power_usage", - { - "_value": 57.7375, - "_timestamp": 0, - "_device_uuid": null - } - ] - }, - "_model_config_measurements": [ - { - "_model_config_name": "ensemble_python_resnet50_config_23", - "_model_specific_pa_params": { - "batch-size": 1, - "concurrency-range": 256 - }, - "_non_gpu_data": [ - [ - "perf_latency_avg", - { - "_value": 4744.923, - "_timestamp": 0 - } - ], - [ - "perf_latency_p90", - { - "_value": 4996.097, - "_timestamp": 0 - } - ], - [ - "perf_latency_p95", - { - "_value": 5059.28, - "_timestamp": 0 - } - ], - [ - "perf_latency_p99", - { - "_value": 5115.068, - "_timestamp": 0 - } - ], - [ - "perf_throughput", - { - "_value": 53.3082, - "_timestamp": 0 - } - ], - [ - "perf_client_send_recv", - { - "_value": 0.218, - "_timestamp": 0 - } - ], - [ - "perf_client_response_wait", - { - "_value": 4744.682, - "_timestamp": 0 - } - ], - [ - "perf_server_queue", - { - "_value": 0.001, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_infer", - { - "_value": 593.805, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_input", - { - "_value": 1.613, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_output", - { - "_value": 1.524, - "_timestamp": 0 - } - ] - ], - "_non_gpu_data_from_tag": { - "perf_latency_avg": [ - "perf_latency_avg", - { - "_value": 4744.923, - "_timestamp": 0 - } - ], - "perf_latency_p90": [ - "perf_latency_p90", - { - "_value": 4996.097, - "_timestamp": 0 - } - ], - "perf_latency_p95": [ - "perf_latency_p95", - { - "_value": 5059.28, - "_timestamp": 0 - } - ], - "perf_latency_p99": [ - "perf_latency_p99", - { - "_value": 5115.068, - "_timestamp": 0 - } - ], - "perf_throughput": [ - "perf_throughput", - { - "_value": 53.3082, - "_timestamp": 0 - } - ], - "perf_client_send_recv": [ - "perf_client_send_recv", - { - "_value": 0.218, - "_timestamp": 0 - } - ], - "perf_client_response_wait": [ - "perf_client_response_wait", - { - "_value": 4744.682, - "_timestamp": 0 - } - ], - "perf_server_queue": [ - "perf_server_queue", - { - "_value": 0.001, - "_timestamp": 0 - } - ], - "perf_server_compute_infer": [ - "perf_server_compute_infer", - { - "_value": 593.805, - "_timestamp": 0 - } - ], - "perf_server_compute_input": [ - "perf_server_compute_input", - { - "_value": 1.613, - "_timestamp": 0 - } - ], - "perf_server_compute_output": [ - "perf_server_compute_output", - { - "_value": 1.524, - "_timestamp": 0 - } - ] - } - } - ] - }, - "-m ensemble_python_resnet50_config_23 -b 1 -i grpc -f ensemble_python_resnet50_config_23-results.csv --verbose-csv --concurrency-range=512 --input-data=/swdev/profile_models/test_image --shape=INPUT:1005970 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "ensemble_python_resnet50_config_23", - "_gpu_data": { - "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ - [ - "gpu_used_memory", - { - "_value": 2797.600768, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_free_memory", - { - "_value": 22597.861376, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_utilization", - { - "_value": 2.4, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 58.9738, - "_timestamp": 0, - "_device_uuid": null - } - ] - ] - }, - "_avg_gpu_data": [ - [ - "gpu_used_memory", - { - "_value": 2797.600768, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_free_memory", - { - "_value": 22597.861376, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_utilization", - { - "_value": 2.4, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 58.9738, - "_timestamp": 0, - "_device_uuid": null - } - ] - ], - "_avg_gpu_data_from_tag": { - "gpu_used_memory": [ - "gpu_used_memory", - { - "_value": 2797.600768, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_free_memory": [ - "gpu_free_memory", - { - "_value": 22597.861376, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_utilization": [ - "gpu_utilization", - { - "_value": 2.4, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_power_usage": [ - "gpu_power_usage", - { - "_value": 58.9738, - "_timestamp": 0, - "_device_uuid": null - } - ] - }, - "_model_config_measurements": [ - { - "_model_config_name": "ensemble_python_resnet50_config_23", - "_model_specific_pa_params": { - "batch-size": 1, - "concurrency-range": 512 - }, - "_non_gpu_data": [ - [ - "perf_latency_avg", - { - "_value": 9297.034, - "_timestamp": 0 - } - ], - [ - "perf_latency_p90", - { - "_value": 9611.503, - "_timestamp": 0 - } - ], - [ - "perf_latency_p95", - { - "_value": 9662.401, - "_timestamp": 0 - } - ], - [ - "perf_latency_p99", - { - "_value": 9758.596, - "_timestamp": 0 - } - ], - [ - "perf_throughput", - { - "_value": 54.3845, - "_timestamp": 0 - } - ], - [ - "perf_client_send_recv", - { - "_value": 0.205, - "_timestamp": 0 - } - ], - [ - "perf_client_response_wait", - { - "_value": 9296.807, - "_timestamp": 0 - } - ], - [ - "perf_server_queue", - { - "_value": 0.001, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_infer", - { - "_value": 591.274, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_input", - { - "_value": 1.577, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_output", - { - "_value": 1.224, - "_timestamp": 0 - } - ] - ], - "_non_gpu_data_from_tag": { - "perf_latency_avg": [ - "perf_latency_avg", - { - "_value": 9297.034, - "_timestamp": 0 - } - ], - "perf_latency_p90": [ - "perf_latency_p90", - { - "_value": 9611.503, - "_timestamp": 0 - } - ], - "perf_latency_p95": [ - "perf_latency_p95", - { - "_value": 9662.401, - "_timestamp": 0 - } - ], - "perf_latency_p99": [ - "perf_latency_p99", - { - "_value": 9758.596, - "_timestamp": 0 - } - ], - "perf_throughput": [ - "perf_throughput", - { - "_value": 54.3845, - "_timestamp": 0 - } - ], - "perf_client_send_recv": [ - "perf_client_send_recv", - { - "_value": 0.205, - "_timestamp": 0 - } - ], - "perf_client_response_wait": [ - "perf_client_response_wait", - { - "_value": 9296.807, - "_timestamp": 0 - } - ], - "perf_server_queue": [ - "perf_server_queue", - { - "_value": 0.001, - "_timestamp": 0 - } - ], - "perf_server_compute_infer": [ - "perf_server_compute_infer", - { - "_value": 591.274, - "_timestamp": 0 - } - ], - "perf_server_compute_input": [ - "perf_server_compute_input", - { - "_value": 1.577, - "_timestamp": 0 - } - ], - "perf_server_compute_output": [ - "perf_server_compute_output", - { - "_value": 1.224, - "_timestamp": 0 - } - ] - } - } - ] - } - } - ], - "ensemble_python_resnet50_config_24": [ - { - "_triton_env": {}, - "_model_run_configs": [ - { - "_model_name": "ensemble_python_resnet50", - "_model_config": { - "name": "ensemble_python_resnet50_config_24", - "platform": "ensemble", - "maxBatchSize": 4, - "input": [ - { - "name": "INPUT", - "dataType": "TYPE_UINT8", - "dims": [ - "-1" - ] - } - ], - "output": [ - { - "name": "OUTPUT", - "dataType": "TYPE_FP32", - "dims": [ - "1000" - ] - } - ], - "ensembleScheduling": { - "step": [ - { - "modelName": "preprocess_config_11", - "modelVersion": "-1", - "inputMap": { - "INPUT_0": "INPUT" - }, - "outputMap": { - "OUTPUT_0": "preprocessed_image" - } - }, - { - "modelName": "resnet50_trt_config_4", - "modelVersion": "-1", - "inputMap": { - "input": "preprocessed_image" - }, - "outputMap": { - "output": "OUTPUT" - } - } - ] - }, - "cpu_only": false - }, - "_perf_config": { - "_args": { - "service-kind": null, - "model-signature-name": null, - "async": null, - "sync": null, - "measurement-interval": null, - "concurrency-range": 128, - "request-rate-range": null, - "request-distribution": null, - "request-intervals": null, - "binary-search": null, - "num-of-sequences": null, - "latency-threshold": null, - "max-threads": null, - "stability-percentage": null, - "max-trials": null, - "percentile": null, - "input-data": "/swdev/profile_models/test_image", - "shared-memory": null, - "output-shared-memory-size": null, - "shape": "INPUT:1005970", - "sequence-length": null, - "sequence-id-range": null, - "string-length": null, - "string-data": null, - "measurement-mode": "count_windows", - "measurement-request-count": null, - "streaming": null, - "grpc-compression-algorithm": null, - "triton-server-directory": null, - "model-repository": null, - "ssl-grpc-use-ssl": null, - "ssl-grpc-root-certifications-file": null, - "ssl-grpc-private-key-file": null, - "ssl-grpc-certificate-chain-file": null, - "ssl-https-verify-peer": null, - "ssl-https-verify-host": null, - "ssl-https-ca-certificates-file": null, - "ssl-https-client-certificate-type": null, - "ssl-https-client-certificate-file": null, - "ssl-https-private-key-type": null, - "ssl-https-private-key-file": null, - "collect-metrics": "True", - "metrics-url": "http://localhost:8002/metrics", - "metrics-interval": 1000.0 - }, - "_options": { - "-m": "ensemble_python_resnet50_config_24", - "-x": null, - "-b": 1, - "-u": "localhost:8001", - "-i": "grpc", - "-f": "ensemble_python_resnet50_config_24-results.csv", - "-H": null - }, - "_verbose": { - "-v": null, - "-v -v": null, - "--verbose-csv": "--verbose-csv" - }, - "_input_to_options": { - "model-name": "-m", - "model-version": "-x", - "batch-size": "-b", - "url": "-u", - "protocol": "-i", - "latency-report-file": "-f", - "http-header": "-H" - }, - "_input_to_verbose": { - "verbose": "-v", - "extra-verbose": "-v -v", - "verbose-csv": "--verbose-csv" - }, - "_additive_args": { - "input-data": null, - "shape": null - } - }, - "_composing_configs": [ - { - "name": "preprocess_config_11", - "maxBatchSize": 16, - "input": [ - { - "name": "INPUT_0", - "dataType": "TYPE_UINT8", - "dims": [ - "-1" - ] - } - ], - "output": [ - { - "name": "OUTPUT_0", - "dataType": "TYPE_FP32", - "dims": [ - "3", - "224", - "224" - ] - } - ], - "instanceGroup": [ - { - "count": 4, - "kind": "KIND_GPU" - } - ], - "dynamicBatching": {}, - "backend": "python", - "cpu_only": false - }, - { - "name": "resnet50_trt_config_4", - "platform": "tensorrt_plan", - "maxBatchSize": 4, - "input": [ - { - "name": "input", - "dataType": "TYPE_FP32", - "dims": [ - "3", - "-1", - "-1" - ] - } - ], - "output": [ - { - "name": "output", - "dataType": "TYPE_FP32", - "dims": [ - "1000" - ], - "labelFilename": "labels.txt" - } - ], - "instanceGroup": [ - { - "count": 2, - "kind": "KIND_GPU" - } - ], - "dynamicBatching": {}, - "cpu_only": false - } - ] - } - ] - }, - { - "-m ensemble_python_resnet50_config_24 -b 1 -i grpc -f ensemble_python_resnet50_config_24-results.csv --verbose-csv --concurrency-range=128 --input-data=/swdev/profile_models/test_image --shape=INPUT:1005970 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "ensemble_python_resnet50_config_24", - "_gpu_data": { - "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ - [ - "gpu_used_memory", - { - "_value": 2797.600768, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_free_memory", - { - "_value": 22597.861376, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_utilization", - { - "_value": 2.75, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 59.6235, - "_timestamp": 0, - "_device_uuid": null - } - ] - ] - }, - "_avg_gpu_data": [ - [ - "gpu_used_memory", - { - "_value": 2797.600768, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_free_memory", - { - "_value": 22597.861376, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_utilization", - { - "_value": 2.75, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 59.6235, - "_timestamp": 0, - "_device_uuid": null - } - ] - ], - "_avg_gpu_data_from_tag": { - "gpu_used_memory": [ - "gpu_used_memory", - { - "_value": 2797.600768, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_free_memory": [ - "gpu_free_memory", - { - "_value": 22597.861376, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_utilization": [ - "gpu_utilization", - { - "_value": 2.75, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_power_usage": [ - "gpu_power_usage", - { - "_value": 59.6235, - "_timestamp": 0, - "_device_uuid": null - } - ] - }, - "_model_config_measurements": [ - { - "_model_config_name": "ensemble_python_resnet50_config_24", - "_model_specific_pa_params": { - "batch-size": 1, - "concurrency-range": 128 - }, - "_non_gpu_data": [ - [ - "perf_latency_avg", - { - "_value": 2095.699, - "_timestamp": 0 - } - ], - [ - "perf_latency_p90", - { - "_value": 2213.451, - "_timestamp": 0 - } - ], - [ - "perf_latency_p95", - { - "_value": 2257.194, - "_timestamp": 0 - } - ], - [ - "perf_latency_p99", - { - "_value": 2261.951, - "_timestamp": 0 - } - ], - [ - "perf_throughput", - { - "_value": 63.9625, - "_timestamp": 0 - } - ], - [ - "perf_client_send_recv", - { - "_value": 0.197, - "_timestamp": 0 - } - ], - [ - "perf_client_response_wait", - { - "_value": 2095.48, - "_timestamp": 0 - } - ], - [ - "perf_server_queue", - { - "_value": 0.001, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_infer", - { - "_value": 1038.038, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_input", - { - "_value": 3.982, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_output", - { - "_value": 3.011, - "_timestamp": 0 - } - ] - ], - "_non_gpu_data_from_tag": { - "perf_latency_avg": [ - "perf_latency_avg", - { - "_value": 2095.699, - "_timestamp": 0 - } - ], - "perf_latency_p90": [ - "perf_latency_p90", - { - "_value": 2213.451, - "_timestamp": 0 - } - ], - "perf_latency_p95": [ - "perf_latency_p95", - { - "_value": 2257.194, - "_timestamp": 0 - } - ], - "perf_latency_p99": [ - "perf_latency_p99", - { - "_value": 2261.951, - "_timestamp": 0 - } - ], - "perf_throughput": [ - "perf_throughput", - { - "_value": 63.9625, - "_timestamp": 0 - } - ], - "perf_client_send_recv": [ - "perf_client_send_recv", - { - "_value": 0.197, - "_timestamp": 0 - } - ], - "perf_client_response_wait": [ - "perf_client_response_wait", - { - "_value": 2095.48, - "_timestamp": 0 - } - ], - "perf_server_queue": [ - "perf_server_queue", - { - "_value": 0.001, - "_timestamp": 0 - } - ], - "perf_server_compute_infer": [ - "perf_server_compute_infer", - { - "_value": 1038.038, - "_timestamp": 0 - } - ], - "perf_server_compute_input": [ - "perf_server_compute_input", - { - "_value": 3.982, - "_timestamp": 0 - } - ], - "perf_server_compute_output": [ - "perf_server_compute_output", - { - "_value": 3.011, - "_timestamp": 0 - } - ] - } - } - ] - } - } - ], - "ensemble_python_resnet50_config_25": [ - { - "_triton_env": {}, - "_model_run_configs": [ - { - "_model_name": "ensemble_python_resnet50", - "_model_config": { - "name": "ensemble_python_resnet50_config_25", - "platform": "ensemble", - "maxBatchSize": 4, - "input": [ - { - "name": "INPUT", - "dataType": "TYPE_UINT8", - "dims": [ - "-1" - ] - } - ], - "output": [ - { - "name": "OUTPUT", - "dataType": "TYPE_FP32", - "dims": [ - "1000" - ] - } - ], - "ensembleScheduling": { - "step": [ - { - "modelName": "preprocess_config_5", - "modelVersion": "-1", - "inputMap": { - "INPUT_0": "INPUT" - }, - "outputMap": { - "OUTPUT_0": "preprocessed_image" - } - }, - { - "modelName": "resnet50_trt_config_4", - "modelVersion": "-1", - "inputMap": { - "input": "preprocessed_image" - }, - "outputMap": { - "output": "OUTPUT" - } - } - ] - }, - "cpu_only": false - }, - "_perf_config": { - "_args": { - "service-kind": null, - "model-signature-name": null, - "async": null, - "sync": null, - "measurement-interval": null, - "concurrency-range": 48, - "request-rate-range": null, - "request-distribution": null, - "request-intervals": null, - "binary-search": null, - "num-of-sequences": null, - "latency-threshold": null, - "max-threads": null, - "stability-percentage": null, - "max-trials": null, - "percentile": null, - "input-data": "/swdev/profile_models/test_image", - "shared-memory": null, - "output-shared-memory-size": null, - "shape": "INPUT:1005970", - "sequence-length": null, - "sequence-id-range": null, - "string-length": null, - "string-data": null, - "measurement-mode": "count_windows", - "measurement-request-count": null, - "streaming": null, - "grpc-compression-algorithm": null, - "triton-server-directory": null, - "model-repository": null, - "ssl-grpc-use-ssl": null, - "ssl-grpc-root-certifications-file": null, - "ssl-grpc-private-key-file": null, - "ssl-grpc-certificate-chain-file": null, - "ssl-https-verify-peer": null, - "ssl-https-verify-host": null, - "ssl-https-ca-certificates-file": null, - "ssl-https-client-certificate-type": null, - "ssl-https-client-certificate-file": null, - "ssl-https-private-key-type": null, - "ssl-https-private-key-file": null, - "collect-metrics": "True", - "metrics-url": "http://localhost:8002/metrics", - "metrics-interval": 1000.0 - }, - "_options": { - "-m": "ensemble_python_resnet50_config_25", - "-x": null, - "-b": 1, - "-u": "localhost:8001", - "-i": "grpc", - "-f": "ensemble_python_resnet50_config_25-results.csv", - "-H": null - }, - "_verbose": { - "-v": null, - "-v -v": null, - "--verbose-csv": "--verbose-csv" - }, - "_input_to_options": { - "model-name": "-m", - "model-version": "-x", - "batch-size": "-b", - "url": "-u", - "protocol": "-i", - "latency-report-file": "-f", - "http-header": "-H" - }, - "_input_to_verbose": { - "verbose": "-v", - "extra-verbose": "-v -v", - "verbose-csv": "--verbose-csv" - }, - "_additive_args": { - "input-data": null, - "shape": null - } - }, - "_composing_configs": [ - { - "name": "preprocess_config_5", - "maxBatchSize": 8, - "input": [ - { - "name": "INPUT_0", - "dataType": "TYPE_UINT8", - "dims": [ - "-1" - ] - } - ], - "output": [ - { - "name": "OUTPUT_0", - "dataType": "TYPE_FP32", - "dims": [ - "3", - "224", - "224" - ] - } - ], - "instanceGroup": [ - { - "count": 3, - "kind": "KIND_GPU" - } - ], - "dynamicBatching": {}, - "backend": "python", - "cpu_only": false - }, - { - "name": "resnet50_trt_config_4", - "platform": "tensorrt_plan", - "maxBatchSize": 4, - "input": [ - { - "name": "input", - "dataType": "TYPE_FP32", - "dims": [ - "3", - "-1", - "-1" - ] - } - ], - "output": [ - { - "name": "output", - "dataType": "TYPE_FP32", - "dims": [ - "1000" - ], - "labelFilename": "labels.txt" - } - ], - "instanceGroup": [ - { - "count": 2, - "kind": "KIND_GPU" - } - ], - "dynamicBatching": {}, - "cpu_only": false - } - ] - } - ] - }, - { - "-m ensemble_python_resnet50_config_25 -b 1 -i grpc -f ensemble_python_resnet50_config_25-results.csv --verbose-csv --concurrency-range=48 --input-data=/swdev/profile_models/test_image --shape=INPUT:1005970 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "ensemble_python_resnet50_config_25", - "_gpu_data": { - "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ - [ - "gpu_used_memory", - { - "_value": 2797.600768, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_free_memory", - { - "_value": 22597.861376, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_utilization", - { - "_value": 2.1428599999999998, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 58.5959, - "_timestamp": 0, - "_device_uuid": null - } - ] - ] - }, - "_avg_gpu_data": [ - [ - "gpu_used_memory", - { - "_value": 2797.600768, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_free_memory", - { - "_value": 22597.861376, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_utilization", - { - "_value": 2.1428599999999998, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 58.5959, - "_timestamp": 0, - "_device_uuid": null - } - ] - ], - "_avg_gpu_data_from_tag": { - "gpu_used_memory": [ - "gpu_used_memory", - { - "_value": 2797.600768, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_free_memory": [ - "gpu_free_memory", - { - "_value": 22597.861376, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_utilization": [ - "gpu_utilization", - { - "_value": 2.1428599999999998, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_power_usage": [ - "gpu_power_usage", - { - "_value": 58.5959, - "_timestamp": 0, - "_device_uuid": null - } - ] - }, - "_model_config_measurements": [ - { - "_model_config_name": "ensemble_python_resnet50_config_25", - "_model_specific_pa_params": { - "batch-size": 1, - "concurrency-range": 48 - }, - "_non_gpu_data": [ - [ - "perf_latency_avg", - { - "_value": 942.337, - "_timestamp": 0 - } - ], - [ - "perf_latency_p90", - { - "_value": 980.773, - "_timestamp": 0 - } - ], - [ - "perf_latency_p95", - { - "_value": 1005.948, - "_timestamp": 0 - } - ], - [ - "perf_latency_p99", - { - "_value": 1016.812, - "_timestamp": 0 - } - ], - [ - "perf_throughput", - { - "_value": 49.3162, - "_timestamp": 0 - } - ], - [ - "perf_client_send_recv", - { - "_value": 0.238, - "_timestamp": 0 - } - ], - [ - "perf_client_response_wait", - { - "_value": 942.075, - "_timestamp": 0 - } - ], - [ - "perf_server_queue", - { - "_value": 0.001, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_infer", - { - "_value": 471.562, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_input", - { - "_value": 1.457, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_output", - { - "_value": 1.396, - "_timestamp": 0 - } - ] - ], - "_non_gpu_data_from_tag": { - "perf_latency_avg": [ - "perf_latency_avg", - { - "_value": 942.337, - "_timestamp": 0 - } - ], - "perf_latency_p90": [ - "perf_latency_p90", - { - "_value": 980.773, - "_timestamp": 0 - } - ], - "perf_latency_p95": [ - "perf_latency_p95", - { - "_value": 1005.948, - "_timestamp": 0 - } - ], - "perf_latency_p99": [ - "perf_latency_p99", - { - "_value": 1016.812, - "_timestamp": 0 - } - ], - "perf_throughput": [ - "perf_throughput", - { - "_value": 49.3162, - "_timestamp": 0 - } - ], - "perf_client_send_recv": [ - "perf_client_send_recv", - { - "_value": 0.238, - "_timestamp": 0 - } - ], - "perf_client_response_wait": [ - "perf_client_response_wait", - { - "_value": 942.075, - "_timestamp": 0 - } - ], - "perf_server_queue": [ - "perf_server_queue", - { - "_value": 0.001, - "_timestamp": 0 - } - ], - "perf_server_compute_infer": [ - "perf_server_compute_infer", - { - "_value": 471.562, - "_timestamp": 0 - } - ], - "perf_server_compute_input": [ - "perf_server_compute_input", - { - "_value": 1.457, - "_timestamp": 0 - } - ], - "perf_server_compute_output": [ - "perf_server_compute_output", - { - "_value": 1.396, - "_timestamp": 0 - } - ] - } - } - ] - } - } - ], - "ensemble_python_resnet50_config_26": [ - { - "_triton_env": {}, - "_model_run_configs": [ - { - "_model_name": "ensemble_python_resnet50", - "_model_config": { - "name": "ensemble_python_resnet50_config_26", - "platform": "ensemble", - "maxBatchSize": 4, - "input": [ - { - "name": "INPUT", - "dataType": "TYPE_UINT8", - "dims": [ - "-1" - ] - } - ], - "output": [ - { - "name": "OUTPUT", - "dataType": "TYPE_FP32", - "dims": [ - "1000" - ] - } - ], - "ensembleScheduling": { - "step": [ - { - "modelName": "preprocess_config_12", - "modelVersion": "-1", - "inputMap": { - "INPUT_0": "INPUT" - }, - "outputMap": { - "OUTPUT_0": "preprocessed_image" - } - }, - { - "modelName": "resnet50_trt_config_4", - "modelVersion": "-1", - "inputMap": { - "input": "preprocessed_image" - }, - "outputMap": { - "output": "OUTPUT" - } - } - ] - }, - "cpu_only": false - }, - "_perf_config": { - "_args": { - "service-kind": null, - "model-signature-name": null, - "async": null, - "sync": null, - "measurement-interval": null, - "concurrency-range": 80, - "request-rate-range": null, - "request-distribution": null, - "request-intervals": null, - "binary-search": null, - "num-of-sequences": null, - "latency-threshold": null, - "max-threads": null, - "stability-percentage": null, - "max-trials": null, - "percentile": null, - "input-data": "/swdev/profile_models/test_image", - "shared-memory": null, - "output-shared-memory-size": null, - "shape": "INPUT:1005970", - "sequence-length": null, - "sequence-id-range": null, - "string-length": null, - "string-data": null, - "measurement-mode": "count_windows", - "measurement-request-count": 350, - "streaming": null, - "grpc-compression-algorithm": null, - "triton-server-directory": null, - "model-repository": null, - "ssl-grpc-use-ssl": null, - "ssl-grpc-root-certifications-file": null, - "ssl-grpc-private-key-file": null, - "ssl-grpc-certificate-chain-file": null, - "ssl-https-verify-peer": null, - "ssl-https-verify-host": null, - "ssl-https-ca-certificates-file": null, - "ssl-https-client-certificate-type": null, - "ssl-https-client-certificate-file": null, - "ssl-https-private-key-type": null, - "ssl-https-private-key-file": null, - "collect-metrics": "True", - "metrics-url": "http://localhost:8002/metrics", - "metrics-interval": 1000.0 - }, - "_options": { - "-m": "ensemble_python_resnet50_config_26", - "-x": null, - "-b": 1, - "-u": "localhost:8001", - "-i": "grpc", - "-f": "ensemble_python_resnet50_config_26-results.csv", - "-H": null - }, - "_verbose": { - "-v": null, - "-v -v": null, - "--verbose-csv": "--verbose-csv" - }, - "_input_to_options": { - "model-name": "-m", - "model-version": "-x", - "batch-size": "-b", - "url": "-u", - "protocol": "-i", - "latency-report-file": "-f", - "http-header": "-H" - }, - "_input_to_verbose": { - "verbose": "-v", - "extra-verbose": "-v -v", - "verbose-csv": "--verbose-csv" - }, - "_additive_args": { - "input-data": null, - "shape": null - } - }, - "_composing_configs": [ - { - "name": "preprocess_config_12", - "maxBatchSize": 8, - "input": [ - { - "name": "INPUT_0", - "dataType": "TYPE_UINT8", - "dims": [ - "-1" - ] - } - ], - "output": [ - { - "name": "OUTPUT_0", - "dataType": "TYPE_FP32", - "dims": [ - "3", - "224", - "224" - ] - } - ], - "instanceGroup": [ - { - "count": 5, - "kind": "KIND_GPU" - } - ], - "dynamicBatching": {}, - "backend": "python", - "cpu_only": false - }, - { - "name": "resnet50_trt_config_4", - "platform": "tensorrt_plan", - "maxBatchSize": 4, - "input": [ - { - "name": "input", - "dataType": "TYPE_FP32", - "dims": [ - "3", - "-1", - "-1" - ] - } - ], - "output": [ - { - "name": "output", - "dataType": "TYPE_FP32", - "dims": [ - "1000" - ], - "labelFilename": "labels.txt" - } - ], - "instanceGroup": [ - { - "count": 2, - "kind": "KIND_GPU" - } - ], - "dynamicBatching": {}, - "cpu_only": false - } - ] - } - ] - }, - { - "-m ensemble_python_resnet50_config_26 -b 1 -i grpc -f ensemble_python_resnet50_config_26-results.csv --verbose-csv --concurrency-range=80 --input-data=/swdev/profile_models/test_image --shape=INPUT:1005970 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "ensemble_python_resnet50_config_26", - "_gpu_data": { - "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ - [ - "gpu_used_memory", - { - "_value": 2797.600768, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_free_memory", - { - "_value": 22597.861376, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_utilization", - { - "_value": 2.08, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 58.0269, - "_timestamp": 0, - "_device_uuid": null - } - ] - ] - }, - "_avg_gpu_data": [ - [ - "gpu_used_memory", - { - "_value": 2797.600768, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_free_memory", - { - "_value": 22597.861376, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_utilization", - { - "_value": 2.08, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 58.0269, - "_timestamp": 0, - "_device_uuid": null - } - ] - ], - "_avg_gpu_data_from_tag": { - "gpu_used_memory": [ - "gpu_used_memory", - { - "_value": 2797.600768, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_free_memory": [ - "gpu_free_memory", - { - "_value": 22597.861376, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_utilization": [ - "gpu_utilization", - { - "_value": 2.08, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_power_usage": [ - "gpu_power_usage", - { - "_value": 58.0269, - "_timestamp": 0, - "_device_uuid": null - } - ] - }, - "_model_config_measurements": [ - { - "_model_config_name": "ensemble_python_resnet50_config_26", - "_model_specific_pa_params": { - "batch-size": 1, - "concurrency-range": 80 - }, - "_non_gpu_data": [ - [ - "perf_latency_avg", - { - "_value": 1679.972, - "_timestamp": 0 - } - ], - [ - "perf_latency_p90", - { - "_value": 1981.802, - "_timestamp": 0 - } - ], - [ - "perf_latency_p95", - { - "_value": 2030.118, - "_timestamp": 0 - } - ], - [ - "perf_latency_p99", - { - "_value": 2110.545, - "_timestamp": 0 - } - ], - [ - "perf_throughput", - { - "_value": 47.3264, - "_timestamp": 0 - } - ], - [ - "perf_client_send_recv", - { - "_value": 0.209, - "_timestamp": 0 - } - ], - [ - "perf_client_response_wait", - { - "_value": 1679.742, - "_timestamp": 0 - } - ], - [ - "perf_server_queue", - { - "_value": 0.001, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_infer", - { - "_value": 840.959, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_input", - { - "_value": 1.533, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_output", - { - "_value": 1.486, - "_timestamp": 0 - } - ] - ], - "_non_gpu_data_from_tag": { - "perf_latency_avg": [ - "perf_latency_avg", - { - "_value": 1679.972, - "_timestamp": 0 - } - ], - "perf_latency_p90": [ - "perf_latency_p90", - { - "_value": 1981.802, - "_timestamp": 0 - } - ], - "perf_latency_p95": [ - "perf_latency_p95", - { - "_value": 2030.118, - "_timestamp": 0 - } - ], - "perf_latency_p99": [ - "perf_latency_p99", - { - "_value": 2110.545, - "_timestamp": 0 - } - ], - "perf_throughput": [ - "perf_throughput", - { - "_value": 47.3264, - "_timestamp": 0 - } - ], - "perf_client_send_recv": [ - "perf_client_send_recv", - { - "_value": 0.209, - "_timestamp": 0 - } - ], - "perf_client_response_wait": [ - "perf_client_response_wait", - { - "_value": 1679.742, - "_timestamp": 0 - } - ], - "perf_server_queue": [ - "perf_server_queue", - { - "_value": 0.001, - "_timestamp": 0 - } - ], - "perf_server_compute_infer": [ - "perf_server_compute_infer", - { - "_value": 840.959, - "_timestamp": 0 - } - ], - "perf_server_compute_input": [ - "perf_server_compute_input", - { - "_value": 1.533, - "_timestamp": 0 - } - ], - "perf_server_compute_output": [ - "perf_server_compute_output", - { - "_value": 1.486, - "_timestamp": 0 - } - ] - } - } - ] - } - } - ], - "ensemble_python_resnet50_config_27": [ - { - "_triton_env": {}, - "_model_run_configs": [ - { - "_model_name": "ensemble_python_resnet50", - "_model_config": { - "name": "ensemble_python_resnet50_config_27", - "platform": "ensemble", - "maxBatchSize": 2, - "input": [ - { - "name": "INPUT", - "dataType": "TYPE_UINT8", - "dims": [ - "-1" - ] - } - ], - "output": [ - { - "name": "OUTPUT", - "dataType": "TYPE_FP32", - "dims": [ - "1000" - ] - } - ], - "ensembleScheduling": { - "step": [ - { - "modelName": "preprocess_config_9", - "modelVersion": "-1", - "inputMap": { - "INPUT_0": "INPUT" - }, - "outputMap": { - "OUTPUT_0": "preprocessed_image" - } - }, - { - "modelName": "resnet50_trt_config_7", - "modelVersion": "-1", - "inputMap": { - "input": "preprocessed_image" - }, - "outputMap": { - "output": "OUTPUT" - } - } - ] - }, - "cpu_only": false - }, - "_perf_config": { - "_args": { - "service-kind": null, - "model-signature-name": null, - "async": null, - "sync": null, - "measurement-interval": null, - "concurrency-range": 64, - "request-rate-range": null, - "request-distribution": null, - "request-intervals": null, - "binary-search": null, - "num-of-sequences": null, - "latency-threshold": null, - "max-threads": null, - "stability-percentage": null, - "max-trials": null, - "percentile": null, - "input-data": "/swdev/profile_models/test_image", - "shared-memory": null, - "output-shared-memory-size": null, - "shape": "INPUT:1005970", - "sequence-length": null, - "sequence-id-range": null, - "string-length": null, - "string-data": null, - "measurement-mode": "count_windows", - "measurement-request-count": 100, - "streaming": null, - "grpc-compression-algorithm": null, - "triton-server-directory": null, - "model-repository": null, - "ssl-grpc-use-ssl": null, - "ssl-grpc-root-certifications-file": null, - "ssl-grpc-private-key-file": null, - "ssl-grpc-certificate-chain-file": null, - "ssl-https-verify-peer": null, - "ssl-https-verify-host": null, - "ssl-https-ca-certificates-file": null, - "ssl-https-client-certificate-type": null, - "ssl-https-client-certificate-file": null, - "ssl-https-private-key-type": null, - "ssl-https-private-key-file": null, - "collect-metrics": "True", - "metrics-url": "http://localhost:8002/metrics", - "metrics-interval": 1000.0 - }, - "_options": { - "-m": "ensemble_python_resnet50_config_27", - "-x": null, - "-b": 1, - "-u": "localhost:8001", - "-i": "grpc", - "-f": "ensemble_python_resnet50_config_27-results.csv", - "-H": null - }, - "_verbose": { - "-v": null, - "-v -v": null, - "--verbose-csv": "--verbose-csv" - }, - "_input_to_options": { - "model-name": "-m", - "model-version": "-x", - "batch-size": "-b", - "url": "-u", - "protocol": "-i", - "latency-report-file": "-f", - "http-header": "-H" - }, - "_input_to_verbose": { - "verbose": "-v", - "extra-verbose": "-v -v", - "verbose-csv": "--verbose-csv" - }, - "_additive_args": { - "input-data": null, - "shape": null - } - }, - "_composing_configs": [ - { - "name": "preprocess_config_9", - "maxBatchSize": 8, - "input": [ - { - "name": "INPUT_0", - "dataType": "TYPE_UINT8", - "dims": [ - "-1" - ] - } - ], - "output": [ - { - "name": "OUTPUT_0", - "dataType": "TYPE_FP32", - "dims": [ - "3", - "224", - "224" - ] - } - ], - "instanceGroup": [ - { - "count": 4, - "kind": "KIND_GPU" - } - ], - "dynamicBatching": {}, - "backend": "python", - "cpu_only": false - }, - { - "name": "resnet50_trt_config_7", - "platform": "tensorrt_plan", - "maxBatchSize": 2, - "input": [ - { - "name": "input", - "dataType": "TYPE_FP32", - "dims": [ - "3", - "-1", - "-1" - ] - } - ], - "output": [ - { - "name": "output", - "dataType": "TYPE_FP32", - "dims": [ - "1000" - ], - "labelFilename": "labels.txt" - } - ], - "instanceGroup": [ - { - "count": 2, - "kind": "KIND_GPU" - } - ], - "dynamicBatching": {}, - "cpu_only": false - } - ] - } - ] - }, - { - "-m ensemble_python_resnet50_config_27 -b 1 -i grpc -f ensemble_python_resnet50_config_27-results.csv --verbose-csv --concurrency-range=64 --input-data=/swdev/profile_models/test_image --shape=INPUT:1005970 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "ensemble_python_resnet50_config_27", - "_gpu_data": { - "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ - [ - "gpu_used_memory", - { - "_value": 2791.309312, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_free_memory", - { - "_value": 22604.152832, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_utilization", - { - "_value": 3.16667, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 61.4813, - "_timestamp": 0, - "_device_uuid": null - } - ] - ] - }, - "_avg_gpu_data": [ - [ - "gpu_used_memory", - { - "_value": 2791.309312, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_free_memory", - { - "_value": 22604.152832, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_utilization", - { - "_value": 3.16667, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 61.4813, - "_timestamp": 0, - "_device_uuid": null - } - ] - ], - "_avg_gpu_data_from_tag": { - "gpu_used_memory": [ - "gpu_used_memory", - { - "_value": 2791.309312, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_free_memory": [ - "gpu_free_memory", - { - "_value": 22604.152832, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_utilization": [ - "gpu_utilization", - { - "_value": 3.16667, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_power_usage": [ - "gpu_power_usage", - { - "_value": 61.4813, - "_timestamp": 0, - "_device_uuid": null - } - ] - }, - "_model_config_measurements": [ - { - "_model_config_name": "ensemble_python_resnet50_config_27", - "_model_specific_pa_params": { - "batch-size": 1, - "concurrency-range": 64 - }, - "_non_gpu_data": [ - [ - "perf_latency_avg", - { - "_value": 1163.389, - "_timestamp": 0 - } - ], - [ - "perf_latency_p90", - { - "_value": 1307.288, - "_timestamp": 0 - } - ], - [ - "perf_latency_p95", - { - "_value": 1325.693, - "_timestamp": 0 - } - ], - [ - "perf_latency_p99", - { - "_value": 1438.988, - "_timestamp": 0 - } - ], - [ - "perf_throughput", - { - "_value": 54.5736, - "_timestamp": 0 - } - ], - [ - "perf_client_send_recv", - { - "_value": 0.244, - "_timestamp": 0 - } - ], - [ - "perf_client_response_wait", - { - "_value": 1163.122, - "_timestamp": 0 - } - ], - [ - "perf_server_queue", - { - "_value": 0.001, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_infer", - { - "_value": 578.764, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_input", - { - "_value": 1.446, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_output", - { - "_value": 1.918, - "_timestamp": 0 - } - ] - ], - "_non_gpu_data_from_tag": { - "perf_latency_avg": [ - "perf_latency_avg", - { - "_value": 1163.389, - "_timestamp": 0 - } - ], - "perf_latency_p90": [ - "perf_latency_p90", - { - "_value": 1307.288, - "_timestamp": 0 - } - ], - "perf_latency_p95": [ - "perf_latency_p95", - { - "_value": 1325.693, - "_timestamp": 0 - } - ], - "perf_latency_p99": [ - "perf_latency_p99", - { - "_value": 1438.988, - "_timestamp": 0 - } - ], - "perf_throughput": [ - "perf_throughput", - { - "_value": 54.5736, - "_timestamp": 0 - } - ], - "perf_client_send_recv": [ - "perf_client_send_recv", - { - "_value": 0.244, - "_timestamp": 0 - } - ], - "perf_client_response_wait": [ - "perf_client_response_wait", - { - "_value": 1163.122, - "_timestamp": 0 - } - ], - "perf_server_queue": [ - "perf_server_queue", - { - "_value": 0.001, - "_timestamp": 0 - } - ], - "perf_server_compute_infer": [ - "perf_server_compute_infer", - { - "_value": 578.764, - "_timestamp": 0 - } - ], - "perf_server_compute_input": [ - "perf_server_compute_input", - { - "_value": 1.446, - "_timestamp": 0 - } - ], - "perf_server_compute_output": [ - "perf_server_compute_output", - { - "_value": 1.918, - "_timestamp": 0 - } - ] - } - } - ] - } - } - ], - "ensemble_python_resnet50_config_28": [ - { - "_triton_env": {}, - "_model_run_configs": [ - { - "_model_name": "ensemble_python_resnet50", - "_model_config": { - "name": "ensemble_python_resnet50_config_28", - "platform": "ensemble", - "maxBatchSize": 8, - "input": [ - { - "name": "INPUT", - "dataType": "TYPE_UINT8", - "dims": [ - "-1" - ] - } - ], - "output": [ - { - "name": "OUTPUT", - "dataType": "TYPE_FP32", - "dims": [ - "1000" - ] - } - ], - "ensembleScheduling": { - "step": [ - { - "modelName": "preprocess_config_9", - "modelVersion": "-1", - "inputMap": { - "INPUT_0": "INPUT" - }, - "outputMap": { - "OUTPUT_0": "preprocessed_image" - } - }, - { - "modelName": "resnet50_trt_config_8", - "modelVersion": "-1", - "inputMap": { - "input": "preprocessed_image" - }, - "outputMap": { - "output": "OUTPUT" - } - } - ] - }, - "cpu_only": false - }, - "_perf_config": { - "_args": { - "service-kind": null, - "model-signature-name": null, - "async": null, - "sync": null, - "measurement-interval": null, - "concurrency-range": 64, - "request-rate-range": null, - "request-distribution": null, - "request-intervals": null, - "binary-search": null, - "num-of-sequences": null, - "latency-threshold": null, - "max-threads": null, - "stability-percentage": null, - "max-trials": null, - "percentile": null, - "input-data": "/swdev/profile_models/test_image", - "shared-memory": null, - "output-shared-memory-size": null, - "shape": "INPUT:1005970", - "sequence-length": null, - "sequence-id-range": null, - "string-length": null, - "string-data": null, - "measurement-mode": "count_windows", - "measurement-request-count": null, - "streaming": null, - "grpc-compression-algorithm": null, - "triton-server-directory": null, - "model-repository": null, - "ssl-grpc-use-ssl": null, - "ssl-grpc-root-certifications-file": null, - "ssl-grpc-private-key-file": null, - "ssl-grpc-certificate-chain-file": null, - "ssl-https-verify-peer": null, - "ssl-https-verify-host": null, - "ssl-https-ca-certificates-file": null, - "ssl-https-client-certificate-type": null, - "ssl-https-client-certificate-file": null, - "ssl-https-private-key-type": null, - "ssl-https-private-key-file": null, - "collect-metrics": "True", - "metrics-url": "http://localhost:8002/metrics", - "metrics-interval": 1000.0 - }, - "_options": { - "-m": "ensemble_python_resnet50_config_28", - "-x": null, - "-b": 1, - "-u": "localhost:8001", - "-i": "grpc", - "-f": "ensemble_python_resnet50_config_28-results.csv", - "-H": null - }, - "_verbose": { - "-v": null, - "-v -v": null, - "--verbose-csv": "--verbose-csv" - }, - "_input_to_options": { - "model-name": "-m", - "model-version": "-x", - "batch-size": "-b", - "url": "-u", - "protocol": "-i", - "latency-report-file": "-f", - "http-header": "-H" - }, - "_input_to_verbose": { - "verbose": "-v", - "extra-verbose": "-v -v", - "verbose-csv": "--verbose-csv" - }, - "_additive_args": { - "input-data": null, - "shape": null - } - }, - "_composing_configs": [ - { - "name": "preprocess_config_9", - "maxBatchSize": 8, - "input": [ - { - "name": "INPUT_0", - "dataType": "TYPE_UINT8", - "dims": [ - "-1" - ] - } - ], - "output": [ - { - "name": "OUTPUT_0", - "dataType": "TYPE_FP32", - "dims": [ - "3", - "224", - "224" - ] - } - ], - "instanceGroup": [ - { - "count": 4, - "kind": "KIND_GPU" - } - ], - "dynamicBatching": {}, - "backend": "python", - "cpu_only": false - }, - { - "name": "resnet50_trt_config_8", - "platform": "tensorrt_plan", - "maxBatchSize": 8, - "input": [ - { - "name": "input", - "dataType": "TYPE_FP32", - "dims": [ - "3", - "-1", - "-1" - ] - } - ], - "output": [ - { - "name": "output", - "dataType": "TYPE_FP32", - "dims": [ - "1000" - ], - "labelFilename": "labels.txt" - } - ], - "instanceGroup": [ - { - "count": 2, - "kind": "KIND_GPU" - } - ], - "dynamicBatching": {}, - "cpu_only": false - } - ] - } - ] - }, - { - "-m ensemble_python_resnet50_config_28 -b 1 -i grpc -f ensemble_python_resnet50_config_28-results.csv --verbose-csv --concurrency-range=64 --input-data=/swdev/profile_models/test_image --shape=INPUT:1005970 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "ensemble_python_resnet50_config_28", - "_gpu_data": { - "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ - [ - "gpu_used_memory", - { - "_value": 2801.795072, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_free_memory", - { - "_value": 22593.667072, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_utilization", - { - "_value": 2.66667, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 61.9763, - "_timestamp": 0, - "_device_uuid": null - } - ] - ] - }, - "_avg_gpu_data": [ - [ - "gpu_used_memory", - { - "_value": 2801.795072, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_free_memory", - { - "_value": 22593.667072, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_utilization", - { - "_value": 2.66667, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 61.9763, - "_timestamp": 0, - "_device_uuid": null - } - ] - ], - "_avg_gpu_data_from_tag": { - "gpu_used_memory": [ - "gpu_used_memory", - { - "_value": 2801.795072, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_free_memory": [ - "gpu_free_memory", - { - "_value": 22593.667072, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_utilization": [ - "gpu_utilization", - { - "_value": 2.66667, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_power_usage": [ - "gpu_power_usage", - { - "_value": 61.9763, - "_timestamp": 0, - "_device_uuid": null - } - ] - }, - "_model_config_measurements": [ - { - "_model_config_name": "ensemble_python_resnet50_config_28", - "_model_specific_pa_params": { - "batch-size": 1, - "concurrency-range": 64 - }, - "_non_gpu_data": [ - [ - "perf_latency_avg", - { - "_value": 1016.953, - "_timestamp": 0 - } - ], - [ - "perf_latency_p90", - { - "_value": 1082.052, - "_timestamp": 0 - } - ], - [ - "perf_latency_p95", - { - "_value": 1094.27, - "_timestamp": 0 - } - ], - [ - "perf_latency_p99", - { - "_value": 1104.425, - "_timestamp": 0 - } - ], - [ - "perf_throughput", - { - "_value": 63.9635, - "_timestamp": 0 - } - ], - [ - "perf_client_send_recv", - { - "_value": 0.262, - "_timestamp": 0 - } - ], - [ - "perf_client_response_wait", - { - "_value": 1016.666, - "_timestamp": 0 - } - ], - [ - "perf_server_queue", - { - "_value": 0.001, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_infer", - { - "_value": 507.177, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_input", - { - "_value": 1.807, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_output", - { - "_value": 1.016, - "_timestamp": 0 - } - ] - ], - "_non_gpu_data_from_tag": { - "perf_latency_avg": [ - "perf_latency_avg", - { - "_value": 1016.953, - "_timestamp": 0 - } - ], - "perf_latency_p90": [ - "perf_latency_p90", - { - "_value": 1082.052, - "_timestamp": 0 - } - ], - "perf_latency_p95": [ - "perf_latency_p95", - { - "_value": 1094.27, - "_timestamp": 0 - } - ], - "perf_latency_p99": [ - "perf_latency_p99", - { - "_value": 1104.425, - "_timestamp": 0 - } - ], - "perf_throughput": [ - "perf_throughput", - { - "_value": 63.9635, - "_timestamp": 0 - } - ], - "perf_client_send_recv": [ - "perf_client_send_recv", - { - "_value": 0.262, - "_timestamp": 0 - } - ], - "perf_client_response_wait": [ - "perf_client_response_wait", - { - "_value": 1016.666, - "_timestamp": 0 - } - ], - "perf_server_queue": [ - "perf_server_queue", - { - "_value": 0.001, - "_timestamp": 0 - } - ], - "perf_server_compute_infer": [ - "perf_server_compute_infer", - { - "_value": 507.177, - "_timestamp": 0 - } - ], - "perf_server_compute_input": [ - "perf_server_compute_input", - { - "_value": 1.807, - "_timestamp": 0 - } - ], - "perf_server_compute_output": [ - "perf_server_compute_output", - { - "_value": 1.016, - "_timestamp": 0 - } - ] - } - } - ] - }, - "-m ensemble_python_resnet50_config_28 -b 1 -i grpc -f ensemble_python_resnet50_config_28-results.csv --verbose-csv --concurrency-range=1 --input-data=/swdev/profile_models/test_image --shape=INPUT:1005970 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "ensemble_python_resnet50_config_28", - "_gpu_data": { - "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ - [ - "gpu_used_memory", - { - "_value": 2801.795072, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_free_memory", - { - "_value": 22593.667072, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_utilization", - { - "_value": 1.30769, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 57.4609, - "_timestamp": 0, - "_device_uuid": null - } - ] - ] - }, - "_avg_gpu_data": [ - [ - "gpu_used_memory", - { - "_value": 2801.795072, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_free_memory", - { - "_value": 22593.667072, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_utilization", - { - "_value": 1.30769, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 57.4609, - "_timestamp": 0, - "_device_uuid": null - } - ] - ], - "_avg_gpu_data_from_tag": { - "gpu_used_memory": [ - "gpu_used_memory", - { - "_value": 2801.795072, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_free_memory": [ - "gpu_free_memory", - { - "_value": 22593.667072, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_utilization": [ - "gpu_utilization", - { - "_value": 1.30769, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_power_usage": [ - "gpu_power_usage", - { - "_value": 57.4609, - "_timestamp": 0, - "_device_uuid": null - } - ] - }, - "_model_config_measurements": [ - { - "_model_config_name": "ensemble_python_resnet50_config_28", - "_model_specific_pa_params": { - "batch-size": 1, - "concurrency-range": 1 - }, - "_non_gpu_data": [ - [ - "perf_latency_avg", - { - "_value": 66.496, - "_timestamp": 0 - } - ], - [ - "perf_latency_p90", - { - "_value": 78.741, - "_timestamp": 0 - } - ], - [ - "perf_latency_p95", - { - "_value": 79.455, - "_timestamp": 0 - } - ], - [ - "perf_latency_p99", - { - "_value": 82.38, - "_timestamp": 0 - } - ], - [ - "perf_throughput", - { - "_value": 15.0797, - "_timestamp": 0 - } - ], - [ - "perf_client_send_recv", - { - "_value": 0.142, - "_timestamp": 0 - } - ], - [ - "perf_client_response_wait", - { - "_value": 66.329, - "_timestamp": 0 - } - ], - [ - "perf_server_queue", - { - "_value": 0.001, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_infer", - { - "_value": 64.612, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_input", - { - "_value": 0.269, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_output", - { - "_value": 0.232, - "_timestamp": 0 - } - ] - ], - "_non_gpu_data_from_tag": { - "perf_latency_avg": [ - "perf_latency_avg", - { - "_value": 66.496, - "_timestamp": 0 - } - ], - "perf_latency_p90": [ - "perf_latency_p90", - { - "_value": 78.741, - "_timestamp": 0 - } - ], - "perf_latency_p95": [ - "perf_latency_p95", - { - "_value": 79.455, - "_timestamp": 0 - } - ], - "perf_latency_p99": [ - "perf_latency_p99", - { - "_value": 82.38, - "_timestamp": 0 - } - ], - "perf_throughput": [ - "perf_throughput", - { - "_value": 15.0797, - "_timestamp": 0 - } - ], - "perf_client_send_recv": [ - "perf_client_send_recv", - { - "_value": 0.142, - "_timestamp": 0 - } - ], - "perf_client_response_wait": [ - "perf_client_response_wait", - { - "_value": 66.329, - "_timestamp": 0 - } - ], - "perf_server_queue": [ - "perf_server_queue", - { - "_value": 0.001, - "_timestamp": 0 - } - ], - "perf_server_compute_infer": [ - "perf_server_compute_infer", - { - "_value": 64.612, - "_timestamp": 0 - } - ], - "perf_server_compute_input": [ - "perf_server_compute_input", - { - "_value": 0.269, - "_timestamp": 0 - } - ], - "perf_server_compute_output": [ - "perf_server_compute_output", - { - "_value": 0.232, - "_timestamp": 0 - } - ] - } - } - ] - }, - "-m ensemble_python_resnet50_config_28 -b 1 -i grpc -f ensemble_python_resnet50_config_28-results.csv --verbose-csv --concurrency-range=2 --input-data=/swdev/profile_models/test_image --shape=INPUT:1005970 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "ensemble_python_resnet50_config_28", - "_gpu_data": { - "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ - [ - "gpu_used_memory", - { - "_value": 2801.795072, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_free_memory", - { - "_value": 22593.667072, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_utilization", - { - "_value": 2.33333, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 57.7923, - "_timestamp": 0, - "_device_uuid": null - } - ] - ] - }, - "_avg_gpu_data": [ - [ - "gpu_used_memory", - { - "_value": 2801.795072, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_free_memory", - { - "_value": 22593.667072, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_utilization", - { - "_value": 2.33333, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 57.7923, - "_timestamp": 0, - "_device_uuid": null - } - ] - ], - "_avg_gpu_data_from_tag": { - "gpu_used_memory": [ - "gpu_used_memory", - { - "_value": 2801.795072, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_free_memory": [ - "gpu_free_memory", - { - "_value": 22593.667072, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_utilization": [ - "gpu_utilization", - { - "_value": 2.33333, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_power_usage": [ - "gpu_power_usage", - { - "_value": 57.7923, - "_timestamp": 0, - "_device_uuid": null - } - ] - }, - "_model_config_measurements": [ - { - "_model_config_name": "ensemble_python_resnet50_config_28", - "_model_specific_pa_params": { - "batch-size": 1, - "concurrency-range": 2 - }, - "_non_gpu_data": [ - [ - "perf_latency_avg", - { - "_value": 66.639, - "_timestamp": 0 - } - ], - [ - "perf_latency_p90", - { - "_value": 80.425, - "_timestamp": 0 - } - ], - [ - "perf_latency_p95", - { - "_value": 84.565, - "_timestamp": 0 - } - ], - [ - "perf_latency_p99", - { - "_value": 94.12, - "_timestamp": 0 - } - ], - [ - "perf_throughput", - { - "_value": 29.824, - "_timestamp": 0 - } - ], - [ - "perf_client_send_recv", - { - "_value": 0.148, - "_timestamp": 0 - } - ], - [ - "perf_client_response_wait", - { - "_value": 66.468, - "_timestamp": 0 - } - ], - [ - "perf_server_queue", - { - "_value": 0.001, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_infer", - { - "_value": 64.323, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_input", - { - "_value": 0.246, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_output", - { - "_value": 0.197, - "_timestamp": 0 - } - ] - ], - "_non_gpu_data_from_tag": { - "perf_latency_avg": [ - "perf_latency_avg", - { - "_value": 66.639, - "_timestamp": 0 - } - ], - "perf_latency_p90": [ - "perf_latency_p90", - { - "_value": 80.425, - "_timestamp": 0 - } - ], - "perf_latency_p95": [ - "perf_latency_p95", - { - "_value": 84.565, - "_timestamp": 0 - } - ], - "perf_latency_p99": [ - "perf_latency_p99", - { - "_value": 94.12, - "_timestamp": 0 - } - ], - "perf_throughput": [ - "perf_throughput", - { - "_value": 29.824, - "_timestamp": 0 - } - ], - "perf_client_send_recv": [ - "perf_client_send_recv", - { - "_value": 0.148, - "_timestamp": 0 - } - ], - "perf_client_response_wait": [ - "perf_client_response_wait", - { - "_value": 66.468, - "_timestamp": 0 - } - ], - "perf_server_queue": [ - "perf_server_queue", - { - "_value": 0.001, - "_timestamp": 0 - } - ], - "perf_server_compute_infer": [ - "perf_server_compute_infer", - { - "_value": 64.323, - "_timestamp": 0 - } - ], - "perf_server_compute_input": [ - "perf_server_compute_input", - { - "_value": 0.246, - "_timestamp": 0 - } - ], - "perf_server_compute_output": [ - "perf_server_compute_output", - { - "_value": 0.197, - "_timestamp": 0 - } - ] - } - } - ] - }, - "-m ensemble_python_resnet50_config_28 -b 1 -i grpc -f ensemble_python_resnet50_config_28-results.csv --verbose-csv --concurrency-range=4 --input-data=/swdev/profile_models/test_image --shape=INPUT:1005970 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "ensemble_python_resnet50_config_28", - "_gpu_data": { - "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ - [ - "gpu_used_memory", - { - "_value": 2801.795072, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_free_memory", - { - "_value": 22593.667072, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_utilization", - { - "_value": 3.5999999999999996, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 58.352, - "_timestamp": 0, - "_device_uuid": null - } - ] - ] - }, - "_avg_gpu_data": [ - [ - "gpu_used_memory", - { - "_value": 2801.795072, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_free_memory", - { - "_value": 22593.667072, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_utilization", - { - "_value": 3.5999999999999996, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 58.352, - "_timestamp": 0, - "_device_uuid": null - } - ] - ], - "_avg_gpu_data_from_tag": { - "gpu_used_memory": [ - "gpu_used_memory", - { - "_value": 2801.795072, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_free_memory": [ - "gpu_free_memory", - { - "_value": 22593.667072, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_utilization": [ - "gpu_utilization", - { - "_value": 3.5999999999999996, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_power_usage": [ - "gpu_power_usage", - { - "_value": 58.352, - "_timestamp": 0, - "_device_uuid": null - } - ] - }, - "_model_config_measurements": [ - { - "_model_config_name": "ensemble_python_resnet50_config_28", - "_model_specific_pa_params": { - "batch-size": 1, - "concurrency-range": 4 - }, - "_non_gpu_data": [ - [ - "perf_latency_avg", - { - "_value": 77.733, - "_timestamp": 0 - } - ], - [ - "perf_latency_p90", - { - "_value": 103.158, - "_timestamp": 0 - } - ], - [ - "perf_latency_p95", - { - "_value": 111.217, - "_timestamp": 0 - } - ], - [ - "perf_latency_p99", - { - "_value": 133.904, - "_timestamp": 0 - } - ], - [ - "perf_throughput", - { - "_value": 50.9711, - "_timestamp": 0 - } - ], - [ - "perf_client_send_recv", - { - "_value": 0.174, - "_timestamp": 0 - } - ], - [ - "perf_client_response_wait", - { - "_value": 77.536, - "_timestamp": 0 - } - ], - [ - "perf_server_queue", - { - "_value": 0.001, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_infer", - { - "_value": 74.965, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_input", - { - "_value": 0.24, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_output", - { - "_value": 0.26, - "_timestamp": 0 - } - ] - ], - "_non_gpu_data_from_tag": { - "perf_latency_avg": [ - "perf_latency_avg", - { - "_value": 77.733, - "_timestamp": 0 - } - ], - "perf_latency_p90": [ - "perf_latency_p90", - { - "_value": 103.158, - "_timestamp": 0 - } - ], - "perf_latency_p95": [ - "perf_latency_p95", - { - "_value": 111.217, - "_timestamp": 0 - } - ], - "perf_latency_p99": [ - "perf_latency_p99", - { - "_value": 133.904, - "_timestamp": 0 - } - ], - "perf_throughput": [ - "perf_throughput", - { - "_value": 50.9711, - "_timestamp": 0 - } - ], - "perf_client_send_recv": [ - "perf_client_send_recv", - { - "_value": 0.174, - "_timestamp": 0 - } - ], - "perf_client_response_wait": [ - "perf_client_response_wait", - { - "_value": 77.536, - "_timestamp": 0 - } - ], - "perf_server_queue": [ - "perf_server_queue", - { - "_value": 0.001, - "_timestamp": 0 - } - ], - "perf_server_compute_infer": [ - "perf_server_compute_infer", - { - "_value": 74.965, - "_timestamp": 0 - } - ], - "perf_server_compute_input": [ - "perf_server_compute_input", - { - "_value": 0.24, - "_timestamp": 0 - } - ], - "perf_server_compute_output": [ - "perf_server_compute_output", - { - "_value": 0.26, - "_timestamp": 0 - } - ] - } - } - ] - }, - "-m ensemble_python_resnet50_config_28 -b 1 -i grpc -f ensemble_python_resnet50_config_28-results.csv --verbose-csv --concurrency-range=8 --input-data=/swdev/profile_models/test_image --shape=INPUT:1005970 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "ensemble_python_resnet50_config_28", - "_gpu_data": { - "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ - [ - "gpu_used_memory", - { - "_value": 2801.795072, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_free_memory", - { - "_value": 22593.667072, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_utilization", - { - "_value": 4.75, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 58.95, - "_timestamp": 0, - "_device_uuid": null - } - ] - ] - }, - "_avg_gpu_data": [ - [ - "gpu_used_memory", - { - "_value": 2801.795072, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_free_memory", - { - "_value": 22593.667072, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_utilization", - { - "_value": 4.75, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 58.95, - "_timestamp": 0, - "_device_uuid": null - } - ] - ], - "_avg_gpu_data_from_tag": { - "gpu_used_memory": [ - "gpu_used_memory", - { - "_value": 2801.795072, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_free_memory": [ - "gpu_free_memory", - { - "_value": 22593.667072, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_utilization": [ - "gpu_utilization", - { - "_value": 4.75, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_power_usage": [ - "gpu_power_usage", - { - "_value": 58.95, - "_timestamp": 0, - "_device_uuid": null - } - ] - }, - "_model_config_measurements": [ - { - "_model_config_name": "ensemble_python_resnet50_config_28", - "_model_specific_pa_params": { - "batch-size": 1, - "concurrency-range": 8 - }, - "_non_gpu_data": [ - [ - "perf_latency_avg", - { - "_value": 142.355, - "_timestamp": 0 - } - ], - [ - "perf_latency_p90", - { - "_value": 231.138, - "_timestamp": 0 - } - ], - [ - "perf_latency_p95", - { - "_value": 267.983, - "_timestamp": 0 - } - ], - [ - "perf_latency_p99", - { - "_value": 370.523, - "_timestamp": 0 - } - ], - [ - "perf_throughput", - { - "_value": 55.2967, - "_timestamp": 0 - } - ], - [ - "perf_client_send_recv", - { - "_value": 0.21, - "_timestamp": 0 - } - ], - [ - "perf_client_response_wait", - { - "_value": 142.114, - "_timestamp": 0 - } - ], - [ - "perf_server_queue", - { - "_value": 0.001, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_infer", - { - "_value": 120.518, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_input", - { - "_value": 0.388, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_output", - { - "_value": 0.323, - "_timestamp": 0 - } - ] - ], - "_non_gpu_data_from_tag": { - "perf_latency_avg": [ - "perf_latency_avg", - { - "_value": 142.355, - "_timestamp": 0 - } - ], - "perf_latency_p90": [ - "perf_latency_p90", - { - "_value": 231.138, - "_timestamp": 0 - } - ], - "perf_latency_p95": [ - "perf_latency_p95", - { - "_value": 267.983, - "_timestamp": 0 - } - ], - "perf_latency_p99": [ - "perf_latency_p99", - { - "_value": 370.523, - "_timestamp": 0 - } - ], - "perf_throughput": [ - "perf_throughput", - { - "_value": 55.2967, - "_timestamp": 0 - } - ], - "perf_client_send_recv": [ - "perf_client_send_recv", - { - "_value": 0.21, - "_timestamp": 0 - } - ], - "perf_client_response_wait": [ - "perf_client_response_wait", - { - "_value": 142.114, - "_timestamp": 0 - } - ], - "perf_server_queue": [ - "perf_server_queue", - { - "_value": 0.001, - "_timestamp": 0 - } - ], - "perf_server_compute_infer": [ - "perf_server_compute_infer", - { - "_value": 120.518, - "_timestamp": 0 - } - ], - "perf_server_compute_input": [ - "perf_server_compute_input", - { - "_value": 0.388, - "_timestamp": 0 - } - ], - "perf_server_compute_output": [ - "perf_server_compute_output", - { - "_value": 0.323, - "_timestamp": 0 - } - ] - } - } - ] - }, - "-m ensemble_python_resnet50_config_28 -b 1 -i grpc -f ensemble_python_resnet50_config_28-results.csv --verbose-csv --concurrency-range=16 --input-data=/swdev/profile_models/test_image --shape=INPUT:1005970 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "ensemble_python_resnet50_config_28", - "_gpu_data": { - "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ - [ - "gpu_used_memory", - { - "_value": 2801.795072, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_free_memory", - { - "_value": 22593.667072, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_utilization", - { - "_value": 4.0, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 59.438, - "_timestamp": 0, - "_device_uuid": null - } - ] - ] - }, - "_avg_gpu_data": [ - [ - "gpu_used_memory", - { - "_value": 2801.795072, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_free_memory", - { - "_value": 22593.667072, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_utilization", - { - "_value": 4.0, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 59.438, - "_timestamp": 0, - "_device_uuid": null - } - ] - ], - "_avg_gpu_data_from_tag": { - "gpu_used_memory": [ - "gpu_used_memory", - { - "_value": 2801.795072, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_free_memory": [ - "gpu_free_memory", - { - "_value": 22593.667072, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_utilization": [ - "gpu_utilization", - { - "_value": 4.0, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_power_usage": [ - "gpu_power_usage", - { - "_value": 59.438, - "_timestamp": 0, - "_device_uuid": null - } - ] - }, - "_model_config_measurements": [ - { - "_model_config_name": "ensemble_python_resnet50_config_28", - "_model_specific_pa_params": { - "batch-size": 1, - "concurrency-range": 16 - }, - "_non_gpu_data": [ - [ - "perf_latency_avg", - { - "_value": 274.165, - "_timestamp": 0 - } - ], - [ - "perf_latency_p90", - { - "_value": 526.286, - "_timestamp": 0 - } - ], - [ - "perf_latency_p95", - { - "_value": 541.98, - "_timestamp": 0 - } - ], - [ - "perf_latency_p99", - { - "_value": 579.058, - "_timestamp": 0 - } - ], - [ - "perf_throughput", - { - "_value": 59.9629, - "_timestamp": 0 - } - ], - [ - "perf_client_send_recv", - { - "_value": 0.225, - "_timestamp": 0 - } - ], - [ - "perf_client_response_wait", - { - "_value": 273.912, - "_timestamp": 0 - } - ], - [ - "perf_server_queue", - { - "_value": 0.001, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_infer", - { - "_value": 238.563, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_input", - { - "_value": 0.755, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_output", - { - "_value": 0.683, - "_timestamp": 0 - } - ] - ], - "_non_gpu_data_from_tag": { - "perf_latency_avg": [ - "perf_latency_avg", - { - "_value": 274.165, - "_timestamp": 0 - } - ], - "perf_latency_p90": [ - "perf_latency_p90", - { - "_value": 526.286, - "_timestamp": 0 - } - ], - "perf_latency_p95": [ - "perf_latency_p95", - { - "_value": 541.98, - "_timestamp": 0 - } - ], - "perf_latency_p99": [ - "perf_latency_p99", - { - "_value": 579.058, - "_timestamp": 0 - } - ], - "perf_throughput": [ - "perf_throughput", - { - "_value": 59.9629, - "_timestamp": 0 - } - ], - "perf_client_send_recv": [ - "perf_client_send_recv", - { - "_value": 0.225, - "_timestamp": 0 - } - ], - "perf_client_response_wait": [ - "perf_client_response_wait", - { - "_value": 273.912, - "_timestamp": 0 - } - ], - "perf_server_queue": [ - "perf_server_queue", - { - "_value": 0.001, - "_timestamp": 0 - } - ], - "perf_server_compute_infer": [ - "perf_server_compute_infer", - { - "_value": 238.563, - "_timestamp": 0 - } - ], - "perf_server_compute_input": [ - "perf_server_compute_input", - { - "_value": 0.755, - "_timestamp": 0 - } - ], - "perf_server_compute_output": [ - "perf_server_compute_output", - { - "_value": 0.683, - "_timestamp": 0 - } - ] - } - } - ] - }, - "-m ensemble_python_resnet50_config_28 -b 1 -i grpc -f ensemble_python_resnet50_config_28-results.csv --verbose-csv --concurrency-range=32 --input-data=/swdev/profile_models/test_image --shape=INPUT:1005970 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "ensemble_python_resnet50_config_28", - "_gpu_data": { - "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ - [ - "gpu_used_memory", - { - "_value": 2801.795072, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_free_memory", - { - "_value": 22593.667072, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_utilization", - { - "_value": 2.85714, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 59.7799, - "_timestamp": 0, - "_device_uuid": null - } - ] - ] - }, - "_avg_gpu_data": [ - [ - "gpu_used_memory", - { - "_value": 2801.795072, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_free_memory", - { - "_value": 22593.667072, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_utilization", - { - "_value": 2.85714, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 59.7799, - "_timestamp": 0, - "_device_uuid": null - } - ] - ], - "_avg_gpu_data_from_tag": { - "gpu_used_memory": [ - "gpu_used_memory", - { - "_value": 2801.795072, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_free_memory": [ - "gpu_free_memory", - { - "_value": 22593.667072, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_utilization": [ - "gpu_utilization", - { - "_value": 2.85714, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_power_usage": [ - "gpu_power_usage", - { - "_value": 59.7799, - "_timestamp": 0, - "_device_uuid": null - } - ] - }, - "_model_config_measurements": [ - { - "_model_config_name": "ensemble_python_resnet50_config_28", - "_model_specific_pa_params": { - "batch-size": 1, - "concurrency-range": 32 - }, - "_non_gpu_data": [ - [ - "perf_latency_avg", - { - "_value": 524.983, - "_timestamp": 0 - } - ], - [ - "perf_latency_p90", - { - "_value": 672.189, - "_timestamp": 0 - } - ], - [ - "perf_latency_p95", - { - "_value": 678.066, - "_timestamp": 0 - } - ], - [ - "perf_latency_p99", - { - "_value": 705.281, - "_timestamp": 0 - } - ], - [ - "perf_throughput", - { - "_value": 60.6222, - "_timestamp": 0 - } - ], - [ - "perf_client_send_recv", - { - "_value": 0.225, - "_timestamp": 0 - } - ], - [ - "perf_client_response_wait", - { - "_value": 524.735, - "_timestamp": 0 - } - ], - [ - "perf_server_queue", - { - "_value": 0.001, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_infer", - { - "_value": 469.111, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_input", - { - "_value": 1.505, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_output", - { - "_value": 0.88, - "_timestamp": 0 - } - ] - ], - "_non_gpu_data_from_tag": { - "perf_latency_avg": [ - "perf_latency_avg", - { - "_value": 524.983, - "_timestamp": 0 - } - ], - "perf_latency_p90": [ - "perf_latency_p90", - { - "_value": 672.189, - "_timestamp": 0 - } - ], - "perf_latency_p95": [ - "perf_latency_p95", - { - "_value": 678.066, - "_timestamp": 0 - } - ], - "perf_latency_p99": [ - "perf_latency_p99", - { - "_value": 705.281, - "_timestamp": 0 - } - ], - "perf_throughput": [ - "perf_throughput", - { - "_value": 60.6222, - "_timestamp": 0 - } - ], - "perf_client_send_recv": [ - "perf_client_send_recv", - { - "_value": 0.225, - "_timestamp": 0 - } - ], - "perf_client_response_wait": [ - "perf_client_response_wait", - { - "_value": 524.735, - "_timestamp": 0 - } - ], - "perf_server_queue": [ - "perf_server_queue", - { - "_value": 0.001, - "_timestamp": 0 - } - ], - "perf_server_compute_infer": [ - "perf_server_compute_infer", - { - "_value": 469.111, - "_timestamp": 0 - } - ], - "perf_server_compute_input": [ - "perf_server_compute_input", - { - "_value": 1.505, - "_timestamp": 0 - } - ], - "perf_server_compute_output": [ - "perf_server_compute_output", - { - "_value": 0.88, - "_timestamp": 0 - } - ] - } - } - ] - }, - "-m ensemble_python_resnet50_config_28 -b 1 -i grpc -f ensemble_python_resnet50_config_28-results.csv --verbose-csv --concurrency-range=128 --input-data=/swdev/profile_models/test_image --shape=INPUT:1005970 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "ensemble_python_resnet50_config_28", - "_gpu_data": { - "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ - [ - "gpu_used_memory", - { - "_value": 2801.795072, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_free_memory", - { - "_value": 22593.667072, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_utilization", - { - "_value": 2.66667, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 59.8155, - "_timestamp": 0, - "_device_uuid": null - } - ] - ] - }, - "_avg_gpu_data": [ - [ - "gpu_used_memory", - { - "_value": 2801.795072, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_free_memory", - { - "_value": 22593.667072, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_utilization", - { - "_value": 2.66667, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 59.8155, - "_timestamp": 0, - "_device_uuid": null - } - ] - ], - "_avg_gpu_data_from_tag": { - "gpu_used_memory": [ - "gpu_used_memory", - { - "_value": 2801.795072, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_free_memory": [ - "gpu_free_memory", - { - "_value": 22593.667072, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_utilization": [ - "gpu_utilization", - { - "_value": 2.66667, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_power_usage": [ - "gpu_power_usage", - { - "_value": 59.8155, - "_timestamp": 0, - "_device_uuid": null - } - ] - }, - "_model_config_measurements": [ - { - "_model_config_name": "ensemble_python_resnet50_config_28", - "_model_specific_pa_params": { - "batch-size": 1, - "concurrency-range": 128 - }, - "_non_gpu_data": [ - [ - "perf_latency_avg", - { - "_value": 2226.393, - "_timestamp": 0 - } - ], - [ - "perf_latency_p90", - { - "_value": 2357.81, - "_timestamp": 0 - } - ], - [ - "perf_latency_p95", - { - "_value": 2405.998, - "_timestamp": 0 - } - ], - [ - "perf_latency_p99", - { - "_value": 2505.26, - "_timestamp": 0 - } - ], - [ - "perf_throughput", - { - "_value": 56.0811, - "_timestamp": 0 - } - ], - [ - "perf_client_send_recv", - { - "_value": 0.227, - "_timestamp": 0 - } - ], - [ - "perf_client_response_wait", - { - "_value": 2226.221, - "_timestamp": 0 - } - ], - [ - "perf_server_queue", - { - "_value": 0.001, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_infer", - { - "_value": 563.635, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_input", - { - "_value": 1.776, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_output", - { - "_value": 1.161, - "_timestamp": 0 - } - ] - ], - "_non_gpu_data_from_tag": { - "perf_latency_avg": [ - "perf_latency_avg", - { - "_value": 2226.393, - "_timestamp": 0 - } - ], - "perf_latency_p90": [ - "perf_latency_p90", - { - "_value": 2357.81, - "_timestamp": 0 - } - ], - "perf_latency_p95": [ - "perf_latency_p95", - { - "_value": 2405.998, - "_timestamp": 0 - } - ], - "perf_latency_p99": [ - "perf_latency_p99", - { - "_value": 2505.26, - "_timestamp": 0 - } - ], - "perf_throughput": [ - "perf_throughput", - { - "_value": 56.0811, - "_timestamp": 0 - } - ], - "perf_client_send_recv": [ - "perf_client_send_recv", - { - "_value": 0.227, - "_timestamp": 0 - } - ], - "perf_client_response_wait": [ - "perf_client_response_wait", - { - "_value": 2226.221, - "_timestamp": 0 - } - ], - "perf_server_queue": [ - "perf_server_queue", - { - "_value": 0.001, - "_timestamp": 0 - } - ], - "perf_server_compute_infer": [ - "perf_server_compute_infer", - { - "_value": 563.635, - "_timestamp": 0 - } - ], - "perf_server_compute_input": [ - "perf_server_compute_input", - { - "_value": 1.776, - "_timestamp": 0 - } - ], - "perf_server_compute_output": [ - "perf_server_compute_output", - { - "_value": 1.161, - "_timestamp": 0 - } - ] - } - } - ] - }, - "-m ensemble_python_resnet50_config_28 -b 1 -i grpc -f ensemble_python_resnet50_config_28-results.csv --verbose-csv --concurrency-range=256 --input-data=/swdev/profile_models/test_image --shape=INPUT:1005970 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "ensemble_python_resnet50_config_28", - "_gpu_data": { - "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ - [ - "gpu_used_memory", - { - "_value": 2801.795072, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_free_memory", - { - "_value": 22593.667072, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_utilization", - { - "_value": 2.8333299999999997, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 59.0408, - "_timestamp": 0, - "_device_uuid": null - } - ] - ] - }, - "_avg_gpu_data": [ - [ - "gpu_used_memory", - { - "_value": 2801.795072, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_free_memory", - { - "_value": 22593.667072, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_utilization", - { - "_value": 2.8333299999999997, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 59.0408, - "_timestamp": 0, - "_device_uuid": null - } - ] - ], - "_avg_gpu_data_from_tag": { - "gpu_used_memory": [ - "gpu_used_memory", - { - "_value": 2801.795072, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_free_memory": [ - "gpu_free_memory", - { - "_value": 22593.667072, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_utilization": [ - "gpu_utilization", - { - "_value": 2.8333299999999997, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_power_usage": [ - "gpu_power_usage", - { - "_value": 59.0408, - "_timestamp": 0, - "_device_uuid": null - } - ] - }, - "_model_config_measurements": [ - { - "_model_config_name": "ensemble_python_resnet50_config_28", - "_model_specific_pa_params": { - "batch-size": 1, - "concurrency-range": 256 - }, - "_non_gpu_data": [ - [ - "perf_latency_avg", - { - "_value": 4324.26, - "_timestamp": 0 - } - ], - [ - "perf_latency_p90", - { - "_value": 4568.935, - "_timestamp": 0 - } - ], - [ - "perf_latency_p95", - { - "_value": 4575.364, - "_timestamp": 0 - } - ], - [ - "perf_latency_p99", - { - "_value": 4677.595, - "_timestamp": 0 - } - ], - [ - "perf_throughput", - { - "_value": 62.6148, - "_timestamp": 0 - } - ], - [ - "perf_client_send_recv", - { - "_value": 0.245, - "_timestamp": 0 - } - ], - [ - "perf_client_response_wait", - { - "_value": 4323.99, - "_timestamp": 0 - } - ], - [ - "perf_server_queue", - { - "_value": 0.001, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_infer", - { - "_value": 530.136, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_input", - { - "_value": 1.802, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_output", - { - "_value": 0.896, - "_timestamp": 0 - } - ] - ], - "_non_gpu_data_from_tag": { - "perf_latency_avg": [ - "perf_latency_avg", - { - "_value": 4324.26, - "_timestamp": 0 - } - ], - "perf_latency_p90": [ - "perf_latency_p90", - { - "_value": 4568.935, - "_timestamp": 0 - } - ], - "perf_latency_p95": [ - "perf_latency_p95", - { - "_value": 4575.364, - "_timestamp": 0 - } - ], - "perf_latency_p99": [ - "perf_latency_p99", - { - "_value": 4677.595, - "_timestamp": 0 - } - ], - "perf_throughput": [ - "perf_throughput", - { - "_value": 62.6148, - "_timestamp": 0 - } - ], - "perf_client_send_recv": [ - "perf_client_send_recv", - { - "_value": 0.245, - "_timestamp": 0 - } - ], - "perf_client_response_wait": [ - "perf_client_response_wait", - { - "_value": 4323.99, - "_timestamp": 0 - } - ], - "perf_server_queue": [ - "perf_server_queue", - { - "_value": 0.001, - "_timestamp": 0 - } - ], - "perf_server_compute_infer": [ - "perf_server_compute_infer", - { - "_value": 530.136, - "_timestamp": 0 - } - ], - "perf_server_compute_input": [ - "perf_server_compute_input", - { - "_value": 1.802, - "_timestamp": 0 - } - ], - "perf_server_compute_output": [ - "perf_server_compute_output", - { - "_value": 0.896, - "_timestamp": 0 - } - ] - } - } - ] - }, - "-m ensemble_python_resnet50_config_28 -b 1 -i grpc -f ensemble_python_resnet50_config_28-results.csv --verbose-csv --concurrency-range=512 --input-data=/swdev/profile_models/test_image --shape=INPUT:1005970 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "ensemble_python_resnet50_config_28", - "_gpu_data": { - "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ - [ - "gpu_used_memory", - { - "_value": 2801.795072, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_free_memory", - { - "_value": 22593.667072, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_utilization", - { - "_value": 2.66667, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 58.5077, - "_timestamp": 0, - "_device_uuid": null - } - ] - ] - }, - "_avg_gpu_data": [ - [ - "gpu_used_memory", - { - "_value": 2801.795072, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_free_memory", - { - "_value": 22593.667072, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_utilization", - { - "_value": 2.66667, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 58.5077, - "_timestamp": 0, - "_device_uuid": null - } - ] - ], - "_avg_gpu_data_from_tag": { - "gpu_used_memory": [ - "gpu_used_memory", - { - "_value": 2801.795072, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_free_memory": [ - "gpu_free_memory", - { - "_value": 22593.667072, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_utilization": [ - "gpu_utilization", - { - "_value": 2.66667, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_power_usage": [ - "gpu_power_usage", - { - "_value": 58.5077, - "_timestamp": 0, - "_device_uuid": null - } - ] - }, - "_model_config_measurements": [ - { - "_model_config_name": "ensemble_python_resnet50_config_28", - "_model_specific_pa_params": { - "batch-size": 1, - "concurrency-range": 512 - }, - "_non_gpu_data": [ - [ - "perf_latency_avg", - { - "_value": 8370.93, - "_timestamp": 0 - } - ], - [ - "perf_latency_p90", - { - "_value": 8811.491, - "_timestamp": 0 - } - ], - [ - "perf_latency_p95", - { - "_value": 8848.201, - "_timestamp": 0 - } - ], - [ - "perf_latency_p99", - { - "_value": 8873.138, - "_timestamp": 0 - } - ], - [ - "perf_throughput", - { - "_value": 60.4238, - "_timestamp": 0 - } - ], - [ - "perf_client_send_recv", - { - "_value": 74.798, - "_timestamp": 0 - } - ], - [ - "perf_client_response_wait", - { - "_value": 8296.018, - "_timestamp": 0 - } - ], - [ - "perf_server_queue", - { - "_value": 0.001, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_infer", - { - "_value": 533.859, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_input", - { - "_value": 1.806, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_output", - { - "_value": 0.966, - "_timestamp": 0 - } - ] - ], - "_non_gpu_data_from_tag": { - "perf_latency_avg": [ - "perf_latency_avg", - { - "_value": 8370.93, - "_timestamp": 0 - } - ], - "perf_latency_p90": [ - "perf_latency_p90", - { - "_value": 8811.491, - "_timestamp": 0 - } - ], - "perf_latency_p95": [ - "perf_latency_p95", - { - "_value": 8848.201, - "_timestamp": 0 - } - ], - "perf_latency_p99": [ - "perf_latency_p99", - { - "_value": 8873.138, - "_timestamp": 0 - } - ], - "perf_throughput": [ - "perf_throughput", - { - "_value": 60.4238, - "_timestamp": 0 - } - ], - "perf_client_send_recv": [ - "perf_client_send_recv", - { - "_value": 74.798, - "_timestamp": 0 - } - ], - "perf_client_response_wait": [ - "perf_client_response_wait", - { - "_value": 8296.018, - "_timestamp": 0 - } - ], - "perf_server_queue": [ - "perf_server_queue", - { - "_value": 0.001, - "_timestamp": 0 - } - ], - "perf_server_compute_infer": [ - "perf_server_compute_infer", - { - "_value": 533.859, - "_timestamp": 0 - } - ], - "perf_server_compute_input": [ - "perf_server_compute_input", - { - "_value": 1.806, - "_timestamp": 0 - } - ], - "perf_server_compute_output": [ - "perf_server_compute_output", - { - "_value": 0.966, - "_timestamp": 0 - } - ] - } - } - ] - } - } - ], - "ensemble_python_resnet50_config_29": [ - { - "_triton_env": {}, - "_model_run_configs": [ - { - "_model_name": "ensemble_python_resnet50", - "_model_config": { - "name": "ensemble_python_resnet50_config_29", - "platform": "ensemble", - "maxBatchSize": 4, - "input": [ - { - "name": "INPUT", - "dataType": "TYPE_UINT8", - "dims": [ - "-1" - ] - } - ], - "output": [ - { - "name": "OUTPUT", - "dataType": "TYPE_FP32", - "dims": [ - "1000" - ] - } - ], - "ensembleScheduling": { - "step": [ - { - "modelName": "preprocess_config_9", - "modelVersion": "-1", - "inputMap": { - "INPUT_0": "INPUT" - }, - "outputMap": { - "OUTPUT_0": "preprocessed_image" - } - }, - { - "modelName": "resnet50_trt_config_3", - "modelVersion": "-1", - "inputMap": { - "input": "preprocessed_image" - }, - "outputMap": { - "output": "OUTPUT" - } - } - ] - }, - "cpu_only": false - }, - "_perf_config": { - "_args": { - "service-kind": null, - "model-signature-name": null, - "async": null, - "sync": null, - "measurement-interval": null, - "concurrency-range": 64, - "request-rate-range": null, - "request-distribution": null, - "request-intervals": null, - "binary-search": null, - "num-of-sequences": null, - "latency-threshold": null, - "max-threads": null, - "stability-percentage": null, - "max-trials": null, - "percentile": null, - "input-data": "/swdev/profile_models/test_image", - "shared-memory": null, - "output-shared-memory-size": null, - "shape": "INPUT:1005970", - "sequence-length": null, - "sequence-id-range": null, - "string-length": null, - "string-data": null, - "measurement-mode": "count_windows", - "measurement-request-count": null, - "streaming": null, - "grpc-compression-algorithm": null, - "triton-server-directory": null, - "model-repository": null, - "ssl-grpc-use-ssl": null, - "ssl-grpc-root-certifications-file": null, - "ssl-grpc-private-key-file": null, - "ssl-grpc-certificate-chain-file": null, - "ssl-https-verify-peer": null, - "ssl-https-verify-host": null, - "ssl-https-ca-certificates-file": null, - "ssl-https-client-certificate-type": null, - "ssl-https-client-certificate-file": null, - "ssl-https-private-key-type": null, - "ssl-https-private-key-file": null, - "collect-metrics": "True", - "metrics-url": "http://localhost:8002/metrics", - "metrics-interval": 1000.0 - }, - "_options": { - "-m": "ensemble_python_resnet50_config_29", - "-x": null, - "-b": 1, - "-u": "localhost:8001", - "-i": "grpc", - "-f": "ensemble_python_resnet50_config_29-results.csv", - "-H": null - }, - "_verbose": { - "-v": null, - "-v -v": null, - "--verbose-csv": "--verbose-csv" - }, - "_input_to_options": { - "model-name": "-m", - "model-version": "-x", - "batch-size": "-b", - "url": "-u", - "protocol": "-i", - "latency-report-file": "-f", - "http-header": "-H" - }, - "_input_to_verbose": { - "verbose": "-v", - "extra-verbose": "-v -v", - "verbose-csv": "--verbose-csv" - }, - "_additive_args": { - "input-data": null, - "shape": null - } - }, - "_composing_configs": [ - { - "name": "preprocess_config_9", - "maxBatchSize": 8, - "input": [ - { - "name": "INPUT_0", - "dataType": "TYPE_UINT8", - "dims": [ - "-1" - ] - } - ], - "output": [ - { - "name": "OUTPUT_0", - "dataType": "TYPE_FP32", - "dims": [ - "3", - "224", - "224" - ] - } - ], - "instanceGroup": [ - { - "count": 4, - "kind": "KIND_GPU" - } - ], - "dynamicBatching": {}, - "backend": "python", - "cpu_only": false - }, - { - "name": "resnet50_trt_config_3", - "platform": "tensorrt_plan", - "maxBatchSize": 4, - "input": [ - { - "name": "input", - "dataType": "TYPE_FP32", - "dims": [ - "3", - "-1", - "-1" - ] - } - ], - "output": [ - { - "name": "output", - "dataType": "TYPE_FP32", - "dims": [ - "1000" - ], - "labelFilename": "labels.txt" - } - ], - "instanceGroup": [ - { - "count": 3, - "kind": "KIND_GPU" - } - ], - "dynamicBatching": {}, - "cpu_only": false - } - ] - } - ] - }, - { - "-m ensemble_python_resnet50_config_29 -b 1 -i grpc -f ensemble_python_resnet50_config_29-results.csv --verbose-csv --concurrency-range=64 --input-data=/swdev/profile_models/test_image --shape=INPUT:1005970 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "ensemble_python_resnet50_config_29", - "_gpu_data": { - "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ - [ - "gpu_used_memory", - { - "_value": 3881.828352, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_free_memory", - { - "_value": 21513.633792, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_utilization", - { - "_value": 2.0, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 58.5107, - "_timestamp": 0, - "_device_uuid": null - } - ] - ] - }, - "_avg_gpu_data": [ - [ - "gpu_used_memory", - { - "_value": 3881.828352, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_free_memory", - { - "_value": 21513.633792, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_utilization", - { - "_value": 2.0, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 58.5107, - "_timestamp": 0, - "_device_uuid": null - } - ] - ], - "_avg_gpu_data_from_tag": { - "gpu_used_memory": [ - "gpu_used_memory", - { - "_value": 3881.828352, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_free_memory": [ - "gpu_free_memory", - { - "_value": 21513.633792, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_utilization": [ - "gpu_utilization", - { - "_value": 2.0, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_power_usage": [ - "gpu_power_usage", - { - "_value": 58.5107, - "_timestamp": 0, - "_device_uuid": null - } - ] - }, - "_model_config_measurements": [ - { - "_model_config_name": "ensemble_python_resnet50_config_29", - "_model_specific_pa_params": { - "batch-size": 1, - "concurrency-range": 64 - }, - "_non_gpu_data": [ - [ - "perf_latency_avg", - { - "_value": 997.813, - "_timestamp": 0 - } - ], - [ - "perf_latency_p90", - { - "_value": 1064.866, - "_timestamp": 0 - } - ], - [ - "perf_latency_p95", - { - "_value": 1067.709, - "_timestamp": 0 - } - ], - [ - "perf_latency_p99", - { - "_value": 1088.71, - "_timestamp": 0 - } - ], - [ - "perf_throughput", - { - "_value": 63.9621, - "_timestamp": 0 - } - ], - [ - "perf_client_send_recv", - { - "_value": 0.24, - "_timestamp": 0 - } - ], - [ - "perf_client_response_wait", - { - "_value": 997.55, - "_timestamp": 0 - } - ], - [ - "perf_server_queue", - { - "_value": 0.001, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_infer", - { - "_value": 498.513, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_input", - { - "_value": 1.75, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_output", - { - "_value": 1.106, - "_timestamp": 0 - } - ] - ], - "_non_gpu_data_from_tag": { - "perf_latency_avg": [ - "perf_latency_avg", - { - "_value": 997.813, - "_timestamp": 0 - } - ], - "perf_latency_p90": [ - "perf_latency_p90", - { - "_value": 1064.866, - "_timestamp": 0 - } - ], - "perf_latency_p95": [ - "perf_latency_p95", - { - "_value": 1067.709, - "_timestamp": 0 - } - ], - "perf_latency_p99": [ - "perf_latency_p99", - { - "_value": 1088.71, - "_timestamp": 0 - } - ], - "perf_throughput": [ - "perf_throughput", - { - "_value": 63.9621, - "_timestamp": 0 - } - ], - "perf_client_send_recv": [ - "perf_client_send_recv", - { - "_value": 0.24, - "_timestamp": 0 - } - ], - "perf_client_response_wait": [ - "perf_client_response_wait", - { - "_value": 997.55, - "_timestamp": 0 - } - ], - "perf_server_queue": [ - "perf_server_queue", - { - "_value": 0.001, - "_timestamp": 0 - } - ], - "perf_server_compute_infer": [ - "perf_server_compute_infer", - { - "_value": 498.513, - "_timestamp": 0 - } - ], - "perf_server_compute_input": [ - "perf_server_compute_input", - { - "_value": 1.75, - "_timestamp": 0 - } - ], - "perf_server_compute_output": [ - "perf_server_compute_output", - { - "_value": 1.106, - "_timestamp": 0 - } - ] - } - } - ] - }, - "-m ensemble_python_resnet50_config_29 -b 1 -i grpc -f ensemble_python_resnet50_config_29-results.csv --verbose-csv --concurrency-range=1 --input-data=/swdev/profile_models/test_image --shape=INPUT:1005970 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "ensemble_python_resnet50_config_29", - "_gpu_data": { - "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ - [ - "gpu_used_memory", - { - "_value": 3881.828352, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_free_memory", - { - "_value": 21513.633792, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_utilization", - { - "_value": 0.75, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 57.3275, - "_timestamp": 0, - "_device_uuid": null - } - ] - ] - }, - "_avg_gpu_data": [ - [ - "gpu_used_memory", - { - "_value": 3881.828352, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_free_memory", - { - "_value": 21513.633792, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_utilization", - { - "_value": 0.75, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 57.3275, - "_timestamp": 0, - "_device_uuid": null - } - ] - ], - "_avg_gpu_data_from_tag": { - "gpu_used_memory": [ - "gpu_used_memory", - { - "_value": 3881.828352, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_free_memory": [ - "gpu_free_memory", - { - "_value": 21513.633792, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_utilization": [ - "gpu_utilization", - { - "_value": 0.75, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_power_usage": [ - "gpu_power_usage", - { - "_value": 57.3275, - "_timestamp": 0, - "_device_uuid": null - } - ] - }, - "_model_config_measurements": [ - { - "_model_config_name": "ensemble_python_resnet50_config_29", - "_model_specific_pa_params": { - "batch-size": 1, - "concurrency-range": 1 - }, - "_non_gpu_data": [ - [ - "perf_latency_avg", - { - "_value": 71.503, - "_timestamp": 0 - } - ], - [ - "perf_latency_p90", - { - "_value": 81.014, - "_timestamp": 0 - } - ], - [ - "perf_latency_p95", - { - "_value": 82.681, - "_timestamp": 0 - } - ], - [ - "perf_latency_p99", - { - "_value": 91.026, - "_timestamp": 0 - } - ], - [ - "perf_throughput", - { - "_value": 13.9131, - "_timestamp": 0 - } - ], - [ - "perf_client_send_recv", - { - "_value": 0.156, - "_timestamp": 0 - } - ], - [ - "perf_client_response_wait", - { - "_value": 71.319, - "_timestamp": 0 - } - ], - [ - "perf_server_queue", - { - "_value": 0.001, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_infer", - { - "_value": 69.394, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_input", - { - "_value": 0.297, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_output", - { - "_value": 0.249, - "_timestamp": 0 - } - ] - ], - "_non_gpu_data_from_tag": { - "perf_latency_avg": [ - "perf_latency_avg", - { - "_value": 71.503, - "_timestamp": 0 - } - ], - "perf_latency_p90": [ - "perf_latency_p90", - { - "_value": 81.014, - "_timestamp": 0 - } - ], - "perf_latency_p95": [ - "perf_latency_p95", - { - "_value": 82.681, - "_timestamp": 0 - } - ], - "perf_latency_p99": [ - "perf_latency_p99", - { - "_value": 91.026, - "_timestamp": 0 - } - ], - "perf_throughput": [ - "perf_throughput", - { - "_value": 13.9131, - "_timestamp": 0 - } - ], - "perf_client_send_recv": [ - "perf_client_send_recv", - { - "_value": 0.156, - "_timestamp": 0 - } - ], - "perf_client_response_wait": [ - "perf_client_response_wait", - { - "_value": 71.319, - "_timestamp": 0 - } - ], - "perf_server_queue": [ - "perf_server_queue", - { - "_value": 0.001, - "_timestamp": 0 - } - ], - "perf_server_compute_infer": [ - "perf_server_compute_infer", - { - "_value": 69.394, - "_timestamp": 0 - } - ], - "perf_server_compute_input": [ - "perf_server_compute_input", - { - "_value": 0.297, - "_timestamp": 0 - } - ], - "perf_server_compute_output": [ - "perf_server_compute_output", - { - "_value": 0.249, - "_timestamp": 0 - } - ] - } - } - ] - }, - "-m ensemble_python_resnet50_config_29 -b 1 -i grpc -f ensemble_python_resnet50_config_29-results.csv --verbose-csv --concurrency-range=2 --input-data=/swdev/profile_models/test_image --shape=INPUT:1005970 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "ensemble_python_resnet50_config_29", - "_gpu_data": { - "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ - [ - "gpu_used_memory", - { - "_value": 3881.828352, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_free_memory", - { - "_value": 21513.633792, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_utilization", - { - "_value": 2.0, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 57.6905, - "_timestamp": 0, - "_device_uuid": null - } - ] - ] - }, - "_avg_gpu_data": [ - [ - "gpu_used_memory", - { - "_value": 3881.828352, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_free_memory", - { - "_value": 21513.633792, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_utilization", - { - "_value": 2.0, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 57.6905, - "_timestamp": 0, - "_device_uuid": null - } - ] - ], - "_avg_gpu_data_from_tag": { - "gpu_used_memory": [ - "gpu_used_memory", - { - "_value": 3881.828352, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_free_memory": [ - "gpu_free_memory", - { - "_value": 21513.633792, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_utilization": [ - "gpu_utilization", - { - "_value": 2.0, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_power_usage": [ - "gpu_power_usage", - { - "_value": 57.6905, - "_timestamp": 0, - "_device_uuid": null - } - ] - }, - "_model_config_measurements": [ - { - "_model_config_name": "ensemble_python_resnet50_config_29", - "_model_specific_pa_params": { - "batch-size": 1, - "concurrency-range": 2 - }, - "_non_gpu_data": [ - [ - "perf_latency_avg", - { - "_value": 67.833, - "_timestamp": 0 - } - ], - [ - "perf_latency_p90", - { - "_value": 83.777, - "_timestamp": 0 - } - ], - [ - "perf_latency_p95", - { - "_value": 94.759, - "_timestamp": 0 - } - ], - [ - "perf_latency_p99", - { - "_value": 103.375, - "_timestamp": 0 - } - ], - [ - "perf_throughput", - { - "_value": 29.3235, - "_timestamp": 0 - } - ], - [ - "perf_client_send_recv", - { - "_value": 0.152, - "_timestamp": 0 - } - ], - [ - "perf_client_response_wait", - { - "_value": 67.655, - "_timestamp": 0 - } - ], - [ - "perf_server_queue", - { - "_value": 0.001, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_infer", - { - "_value": 65.345, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_input", - { - "_value": 0.254, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_output", - { - "_value": 0.245, - "_timestamp": 0 - } - ] - ], - "_non_gpu_data_from_tag": { - "perf_latency_avg": [ - "perf_latency_avg", - { - "_value": 67.833, - "_timestamp": 0 - } - ], - "perf_latency_p90": [ - "perf_latency_p90", - { - "_value": 83.777, - "_timestamp": 0 - } - ], - "perf_latency_p95": [ - "perf_latency_p95", - { - "_value": 94.759, - "_timestamp": 0 - } - ], - "perf_latency_p99": [ - "perf_latency_p99", - { - "_value": 103.375, - "_timestamp": 0 - } - ], - "perf_throughput": [ - "perf_throughput", - { - "_value": 29.3235, - "_timestamp": 0 - } - ], - "perf_client_send_recv": [ - "perf_client_send_recv", - { - "_value": 0.152, - "_timestamp": 0 - } - ], - "perf_client_response_wait": [ - "perf_client_response_wait", - { - "_value": 67.655, - "_timestamp": 0 - } - ], - "perf_server_queue": [ - "perf_server_queue", - { - "_value": 0.001, - "_timestamp": 0 - } - ], - "perf_server_compute_infer": [ - "perf_server_compute_infer", - { - "_value": 65.345, - "_timestamp": 0 - } - ], - "perf_server_compute_input": [ - "perf_server_compute_input", - { - "_value": 0.254, - "_timestamp": 0 - } - ], - "perf_server_compute_output": [ - "perf_server_compute_output", - { - "_value": 0.245, - "_timestamp": 0 - } - ] - } - } - ] - }, - "-m ensemble_python_resnet50_config_29 -b 1 -i grpc -f ensemble_python_resnet50_config_29-results.csv --verbose-csv --concurrency-range=4 --input-data=/swdev/profile_models/test_image --shape=INPUT:1005970 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "ensemble_python_resnet50_config_29", - "_gpu_data": { - "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ - [ - "gpu_used_memory", - { - "_value": 3881.828352, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_free_memory", - { - "_value": 21513.633792, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_utilization", - { - "_value": 4.75, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 58.8505, - "_timestamp": 0, - "_device_uuid": null - } - ] - ] - }, - "_avg_gpu_data": [ - [ - "gpu_used_memory", - { - "_value": 3881.828352, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_free_memory", - { - "_value": 21513.633792, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_utilization", - { - "_value": 4.75, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 58.8505, - "_timestamp": 0, - "_device_uuid": null - } - ] - ], - "_avg_gpu_data_from_tag": { - "gpu_used_memory": [ - "gpu_used_memory", - { - "_value": 3881.828352, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_free_memory": [ - "gpu_free_memory", - { - "_value": 21513.633792, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_utilization": [ - "gpu_utilization", - { - "_value": 4.75, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_power_usage": [ - "gpu_power_usage", - { - "_value": 58.8505, - "_timestamp": 0, - "_device_uuid": null - } - ] - }, - "_model_config_measurements": [ - { - "_model_config_name": "ensemble_python_resnet50_config_29", - "_model_specific_pa_params": { - "batch-size": 1, - "concurrency-range": 4 - }, - "_non_gpu_data": [ - [ - "perf_latency_avg", - { - "_value": 82.362, - "_timestamp": 0 - } - ], - [ - "perf_latency_p90", - { - "_value": 112.726, - "_timestamp": 0 - } - ], - [ - "perf_latency_p95", - { - "_value": 124.504, - "_timestamp": 0 - } - ], - [ - "perf_latency_p99", - { - "_value": 138.76, - "_timestamp": 0 - } - ], - [ - "perf_throughput", - { - "_value": 48.7267, - "_timestamp": 0 - } - ], - [ - "perf_client_send_recv", - { - "_value": 0.137, - "_timestamp": 0 - } - ], - [ - "perf_client_response_wait", - { - "_value": 82.202, - "_timestamp": 0 - } - ], - [ - "perf_server_queue", - { - "_value": 0.001, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_infer", - { - "_value": 80.067, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_input", - { - "_value": 0.262, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_output", - { - "_value": 0.387, - "_timestamp": 0 - } - ] - ], - "_non_gpu_data_from_tag": { - "perf_latency_avg": [ - "perf_latency_avg", - { - "_value": 82.362, - "_timestamp": 0 - } - ], - "perf_latency_p90": [ - "perf_latency_p90", - { - "_value": 112.726, - "_timestamp": 0 - } - ], - "perf_latency_p95": [ - "perf_latency_p95", - { - "_value": 124.504, - "_timestamp": 0 - } - ], - "perf_latency_p99": [ - "perf_latency_p99", - { - "_value": 138.76, - "_timestamp": 0 - } - ], - "perf_throughput": [ - "perf_throughput", - { - "_value": 48.7267, - "_timestamp": 0 - } - ], - "perf_client_send_recv": [ - "perf_client_send_recv", - { - "_value": 0.137, - "_timestamp": 0 - } - ], - "perf_client_response_wait": [ - "perf_client_response_wait", - { - "_value": 82.202, - "_timestamp": 0 - } - ], - "perf_server_queue": [ - "perf_server_queue", - { - "_value": 0.001, - "_timestamp": 0 - } - ], - "perf_server_compute_infer": [ - "perf_server_compute_infer", - { - "_value": 80.067, - "_timestamp": 0 - } - ], - "perf_server_compute_input": [ - "perf_server_compute_input", - { - "_value": 0.262, - "_timestamp": 0 - } - ], - "perf_server_compute_output": [ - "perf_server_compute_output", - { - "_value": 0.387, - "_timestamp": 0 - } - ] - } - } - ] - }, - "-m ensemble_python_resnet50_config_29 -b 1 -i grpc -f ensemble_python_resnet50_config_29-results.csv --verbose-csv --concurrency-range=8 --input-data=/swdev/profile_models/test_image --shape=INPUT:1005970 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "ensemble_python_resnet50_config_29", - "_gpu_data": { - "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ - [ - "gpu_used_memory", - { - "_value": 3881.828352, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_free_memory", - { - "_value": 21513.633792, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_utilization", - { - "_value": 3.75, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 59.3475, - "_timestamp": 0, - "_device_uuid": null - } - ] - ] - }, - "_avg_gpu_data": [ - [ - "gpu_used_memory", - { - "_value": 3881.828352, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_free_memory", - { - "_value": 21513.633792, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_utilization", - { - "_value": 3.75, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 59.3475, - "_timestamp": 0, - "_device_uuid": null - } - ] - ], - "_avg_gpu_data_from_tag": { - "gpu_used_memory": [ - "gpu_used_memory", - { - "_value": 3881.828352, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_free_memory": [ - "gpu_free_memory", - { - "_value": 21513.633792, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_utilization": [ - "gpu_utilization", - { - "_value": 3.75, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_power_usage": [ - "gpu_power_usage", - { - "_value": 59.3475, - "_timestamp": 0, - "_device_uuid": null - } - ] - }, - "_model_config_measurements": [ - { - "_model_config_name": "ensemble_python_resnet50_config_29", - "_model_specific_pa_params": { - "batch-size": 1, - "concurrency-range": 8 - }, - "_non_gpu_data": [ - [ - "perf_latency_avg", - { - "_value": 144.18, - "_timestamp": 0 - } - ], - [ - "perf_latency_p90", - { - "_value": 314.469, - "_timestamp": 0 - } - ], - [ - "perf_latency_p95", - { - "_value": 318.893, - "_timestamp": 0 - } - ], - [ - "perf_latency_p99", - { - "_value": 362.074, - "_timestamp": 0 - } - ], - [ - "perf_throughput", - { - "_value": 54.3006, - "_timestamp": 0 - } - ], - [ - "perf_client_send_recv", - { - "_value": 0.196, - "_timestamp": 0 - } - ], - [ - "perf_client_response_wait", - { - "_value": 143.955, - "_timestamp": 0 - } - ], - [ - "perf_server_queue", - { - "_value": 0.001, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_infer", - { - "_value": 123.564, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_input", - { - "_value": 0.388, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_output", - { - "_value": 0.451, - "_timestamp": 0 - } - ] - ], - "_non_gpu_data_from_tag": { - "perf_latency_avg": [ - "perf_latency_avg", - { - "_value": 144.18, - "_timestamp": 0 - } - ], - "perf_latency_p90": [ - "perf_latency_p90", - { - "_value": 314.469, - "_timestamp": 0 - } - ], - "perf_latency_p95": [ - "perf_latency_p95", - { - "_value": 318.893, - "_timestamp": 0 - } - ], - "perf_latency_p99": [ - "perf_latency_p99", - { - "_value": 362.074, - "_timestamp": 0 - } - ], - "perf_throughput": [ - "perf_throughput", - { - "_value": 54.3006, - "_timestamp": 0 - } - ], - "perf_client_send_recv": [ - "perf_client_send_recv", - { - "_value": 0.196, - "_timestamp": 0 - } - ], - "perf_client_response_wait": [ - "perf_client_response_wait", - { - "_value": 143.955, - "_timestamp": 0 - } - ], - "perf_server_queue": [ - "perf_server_queue", - { - "_value": 0.001, - "_timestamp": 0 - } - ], - "perf_server_compute_infer": [ - "perf_server_compute_infer", - { - "_value": 123.564, - "_timestamp": 0 - } - ], - "perf_server_compute_input": [ - "perf_server_compute_input", - { - "_value": 0.388, - "_timestamp": 0 - } - ], - "perf_server_compute_output": [ - "perf_server_compute_output", - { - "_value": 0.451, - "_timestamp": 0 - } - ] - } - } - ] - }, - "-m ensemble_python_resnet50_config_29 -b 1 -i grpc -f ensemble_python_resnet50_config_29-results.csv --verbose-csv --concurrency-range=16 --input-data=/swdev/profile_models/test_image --shape=INPUT:1005970 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "ensemble_python_resnet50_config_29", - "_gpu_data": { - "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ - [ - "gpu_used_memory", - { - "_value": 3881.828352, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_free_memory", - { - "_value": 21513.633792, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_utilization", - { - "_value": 3.75, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 58.5282, - "_timestamp": 0, - "_device_uuid": null - } - ] - ] - }, - "_avg_gpu_data": [ - [ - "gpu_used_memory", - { - "_value": 3881.828352, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_free_memory", - { - "_value": 21513.633792, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_utilization", - { - "_value": 3.75, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 58.5282, - "_timestamp": 0, - "_device_uuid": null - } - ] - ], - "_avg_gpu_data_from_tag": { - "gpu_used_memory": [ - "gpu_used_memory", - { - "_value": 3881.828352, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_free_memory": [ - "gpu_free_memory", - { - "_value": 21513.633792, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_utilization": [ - "gpu_utilization", - { - "_value": 3.75, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_power_usage": [ - "gpu_power_usage", - { - "_value": 58.5282, - "_timestamp": 0, - "_device_uuid": null - } - ] - }, - "_model_config_measurements": [ - { - "_model_config_name": "ensemble_python_resnet50_config_29", - "_model_specific_pa_params": { - "batch-size": 1, - "concurrency-range": 16 - }, - "_non_gpu_data": [ - [ - "perf_latency_avg", - { - "_value": 296.291, - "_timestamp": 0 - } - ], - [ - "perf_latency_p90", - { - "_value": 564.858, - "_timestamp": 0 - } - ], - [ - "perf_latency_p95", - { - "_value": 605.173, - "_timestamp": 0 - } - ], - [ - "perf_latency_p99", - { - "_value": 635.932, - "_timestamp": 0 - } - ], - [ - "perf_throughput", - { - "_value": 57.2043, - "_timestamp": 0 - } - ], - [ - "perf_client_send_recv", - { - "_value": 0.193, - "_timestamp": 0 - } - ], - [ - "perf_client_response_wait", - { - "_value": 297.309, - "_timestamp": 0 - } - ], - [ - "perf_server_queue", - { - "_value": 0.001, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_infer", - { - "_value": 264.688, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_input", - { - "_value": 0.81, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_output", - { - "_value": 0.902, - "_timestamp": 0 - } - ] - ], - "_non_gpu_data_from_tag": { - "perf_latency_avg": [ - "perf_latency_avg", - { - "_value": 296.291, - "_timestamp": 0 - } - ], - "perf_latency_p90": [ - "perf_latency_p90", - { - "_value": 564.858, - "_timestamp": 0 - } - ], - "perf_latency_p95": [ - "perf_latency_p95", - { - "_value": 605.173, - "_timestamp": 0 - } - ], - "perf_latency_p99": [ - "perf_latency_p99", - { - "_value": 635.932, - "_timestamp": 0 - } - ], - "perf_throughput": [ - "perf_throughput", - { - "_value": 57.2043, - "_timestamp": 0 - } - ], - "perf_client_send_recv": [ - "perf_client_send_recv", - { - "_value": 0.193, - "_timestamp": 0 - } - ], - "perf_client_response_wait": [ - "perf_client_response_wait", - { - "_value": 297.309, - "_timestamp": 0 - } - ], - "perf_server_queue": [ - "perf_server_queue", - { - "_value": 0.001, - "_timestamp": 0 - } - ], - "perf_server_compute_infer": [ - "perf_server_compute_infer", - { - "_value": 264.688, - "_timestamp": 0 - } - ], - "perf_server_compute_input": [ - "perf_server_compute_input", - { - "_value": 0.81, - "_timestamp": 0 - } - ], - "perf_server_compute_output": [ - "perf_server_compute_output", - { - "_value": 0.902, - "_timestamp": 0 - } - ] - } - } - ] - }, - "-m ensemble_python_resnet50_config_29 -b 1 -i grpc -f ensemble_python_resnet50_config_29-results.csv --verbose-csv --concurrency-range=32 --input-data=/swdev/profile_models/test_image --shape=INPUT:1005970 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "ensemble_python_resnet50_config_29", - "_gpu_data": { - "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ - [ - "gpu_used_memory", - { - "_value": 3881.828352, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_free_memory", - { - "_value": 21513.633792, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_utilization", - { - "_value": 2.66667, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 61.4247, - "_timestamp": 0, - "_device_uuid": null - } - ] - ] - }, - "_avg_gpu_data": [ - [ - "gpu_used_memory", - { - "_value": 3881.828352, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_free_memory", - { - "_value": 21513.633792, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_utilization", - { - "_value": 2.66667, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 61.4247, - "_timestamp": 0, - "_device_uuid": null - } - ] - ], - "_avg_gpu_data_from_tag": { - "gpu_used_memory": [ - "gpu_used_memory", - { - "_value": 3881.828352, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_free_memory": [ - "gpu_free_memory", - { - "_value": 21513.633792, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_utilization": [ - "gpu_utilization", - { - "_value": 2.66667, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_power_usage": [ - "gpu_power_usage", - { - "_value": 61.4247, - "_timestamp": 0, - "_device_uuid": null - } - ] - }, - "_model_config_measurements": [ - { - "_model_config_name": "ensemble_python_resnet50_config_29", - "_model_specific_pa_params": { - "batch-size": 1, - "concurrency-range": 32 - }, - "_non_gpu_data": [ - [ - "perf_latency_avg", - { - "_value": 527.965, - "_timestamp": 0 - } - ], - [ - "perf_latency_p90", - { - "_value": 640.501, - "_timestamp": 0 - } - ], - [ - "perf_latency_p95", - { - "_value": 651.898, - "_timestamp": 0 - } - ], - [ - "perf_latency_p99", - { - "_value": 655.229, - "_timestamp": 0 - } - ], - [ - "perf_throughput", - { - "_value": 59.2975, - "_timestamp": 0 - } - ], - [ - "perf_client_send_recv", - { - "_value": 0.235, - "_timestamp": 0 - } - ], - [ - "perf_client_response_wait", - { - "_value": 527.706, - "_timestamp": 0 - } - ], - [ - "perf_server_queue", - { - "_value": 0.001, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_infer", - { - "_value": 467.336, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_input", - { - "_value": 1.33, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_output", - { - "_value": 0.953, - "_timestamp": 0 - } - ] - ], - "_non_gpu_data_from_tag": { - "perf_latency_avg": [ - "perf_latency_avg", - { - "_value": 527.965, - "_timestamp": 0 - } - ], - "perf_latency_p90": [ - "perf_latency_p90", - { - "_value": 640.501, - "_timestamp": 0 - } - ], - "perf_latency_p95": [ - "perf_latency_p95", - { - "_value": 651.898, - "_timestamp": 0 - } - ], - "perf_latency_p99": [ - "perf_latency_p99", - { - "_value": 655.229, - "_timestamp": 0 - } - ], - "perf_throughput": [ - "perf_throughput", - { - "_value": 59.2975, - "_timestamp": 0 - } - ], - "perf_client_send_recv": [ - "perf_client_send_recv", - { - "_value": 0.235, - "_timestamp": 0 - } - ], - "perf_client_response_wait": [ - "perf_client_response_wait", - { - "_value": 527.706, - "_timestamp": 0 - } - ], - "perf_server_queue": [ - "perf_server_queue", - { - "_value": 0.001, - "_timestamp": 0 - } - ], - "perf_server_compute_infer": [ - "perf_server_compute_infer", - { - "_value": 467.336, - "_timestamp": 0 - } - ], - "perf_server_compute_input": [ - "perf_server_compute_input", - { - "_value": 1.33, - "_timestamp": 0 - } - ], - "perf_server_compute_output": [ - "perf_server_compute_output", - { - "_value": 0.953, - "_timestamp": 0 - } - ] - } - } - ] - }, - "-m ensemble_python_resnet50_config_29 -b 1 -i grpc -f ensemble_python_resnet50_config_29-results.csv --verbose-csv --concurrency-range=128 --input-data=/swdev/profile_models/test_image --shape=INPUT:1005970 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "ensemble_python_resnet50_config_29", - "_gpu_data": { - "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ - [ - "gpu_used_memory", - { - "_value": 3881.828352, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_free_memory", - { - "_value": 21513.633792, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_utilization", - { - "_value": 2.8333299999999997, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 59.2065, - "_timestamp": 0, - "_device_uuid": null - } - ] - ] - }, - "_avg_gpu_data": [ - [ - "gpu_used_memory", - { - "_value": 3881.828352, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_free_memory", - { - "_value": 21513.633792, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_utilization", - { - "_value": 2.8333299999999997, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 59.2065, - "_timestamp": 0, - "_device_uuid": null - } - ] - ], - "_avg_gpu_data_from_tag": { - "gpu_used_memory": [ - "gpu_used_memory", - { - "_value": 3881.828352, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_free_memory": [ - "gpu_free_memory", - { - "_value": 21513.633792, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_utilization": [ - "gpu_utilization", - { - "_value": 2.8333299999999997, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_power_usage": [ - "gpu_power_usage", - { - "_value": 59.2065, - "_timestamp": 0, - "_device_uuid": null - } - ] - }, - "_model_config_measurements": [ - { - "_model_config_name": "ensemble_python_resnet50_config_29", - "_model_specific_pa_params": { - "batch-size": 1, - "concurrency-range": 128 - }, - "_non_gpu_data": [ - [ - "perf_latency_avg", - { - "_value": 2139.868, - "_timestamp": 0 - } - ], - [ - "perf_latency_p90", - { - "_value": 2258.123, - "_timestamp": 0 - } - ], - [ - "perf_latency_p95", - { - "_value": 2265.68, - "_timestamp": 0 - } - ], - [ - "perf_latency_p99", - { - "_value": 2366.481, - "_timestamp": 0 - } - ], - [ - "perf_throughput", - { - "_value": 61.3069, - "_timestamp": 0 - } - ], - [ - "perf_client_send_recv", - { - "_value": 0.225, - "_timestamp": 0 - } - ], - [ - "perf_client_response_wait", - { - "_value": 2139.618, - "_timestamp": 0 - } - ], - [ - "perf_server_queue", - { - "_value": 0.001, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_infer", - { - "_value": 533.663, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_input", - { - "_value": 1.723, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_output", - { - "_value": 1.27, - "_timestamp": 0 - } - ] - ], - "_non_gpu_data_from_tag": { - "perf_latency_avg": [ - "perf_latency_avg", - { - "_value": 2139.868, - "_timestamp": 0 - } - ], - "perf_latency_p90": [ - "perf_latency_p90", - { - "_value": 2258.123, - "_timestamp": 0 - } - ], - "perf_latency_p95": [ - "perf_latency_p95", - { - "_value": 2265.68, - "_timestamp": 0 - } - ], - "perf_latency_p99": [ - "perf_latency_p99", - { - "_value": 2366.481, - "_timestamp": 0 - } - ], - "perf_throughput": [ - "perf_throughput", - { - "_value": 61.3069, - "_timestamp": 0 - } - ], - "perf_client_send_recv": [ - "perf_client_send_recv", - { - "_value": 0.225, - "_timestamp": 0 - } - ], - "perf_client_response_wait": [ - "perf_client_response_wait", - { - "_value": 2139.618, - "_timestamp": 0 - } - ], - "perf_server_queue": [ - "perf_server_queue", - { - "_value": 0.001, - "_timestamp": 0 - } - ], - "perf_server_compute_infer": [ - "perf_server_compute_infer", - { - "_value": 533.663, - "_timestamp": 0 - } - ], - "perf_server_compute_input": [ - "perf_server_compute_input", - { - "_value": 1.723, - "_timestamp": 0 - } - ], - "perf_server_compute_output": [ - "perf_server_compute_output", - { - "_value": 1.27, - "_timestamp": 0 - } - ] - } - } - ] - }, - "-m ensemble_python_resnet50_config_29 -b 1 -i grpc -f ensemble_python_resnet50_config_29-results.csv --verbose-csv --concurrency-range=256 --input-data=/swdev/profile_models/test_image --shape=INPUT:1005970 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "ensemble_python_resnet50_config_29", - "_gpu_data": { - "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ - [ - "gpu_used_memory", - { - "_value": 3881.828352, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_free_memory", - { - "_value": 21513.633792, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_utilization", - { - "_value": 2.5555600000000003, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 58.4368, - "_timestamp": 0, - "_device_uuid": null - } - ] - ] - }, - "_avg_gpu_data": [ - [ - "gpu_used_memory", - { - "_value": 3881.828352, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_free_memory", - { - "_value": 21513.633792, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_utilization", - { - "_value": 2.5555600000000003, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 58.4368, - "_timestamp": 0, - "_device_uuid": null - } - ] - ], - "_avg_gpu_data_from_tag": { - "gpu_used_memory": [ - "gpu_used_memory", - { - "_value": 3881.828352, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_free_memory": [ - "gpu_free_memory", - { - "_value": 21513.633792, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_utilization": [ - "gpu_utilization", - { - "_value": 2.5555600000000003, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_power_usage": [ - "gpu_power_usage", - { - "_value": 58.4368, - "_timestamp": 0, - "_device_uuid": null - } - ] - }, - "_model_config_measurements": [ - { - "_model_config_name": "ensemble_python_resnet50_config_29", - "_model_specific_pa_params": { - "batch-size": 1, - "concurrency-range": 256 - }, - "_non_gpu_data": [ - [ - "perf_latency_avg", - { - "_value": 4475.279, - "_timestamp": 0 - } - ], - [ - "perf_latency_p90", - { - "_value": 4654.34, - "_timestamp": 0 - } - ], - [ - "perf_latency_p95", - { - "_value": 4696.38, - "_timestamp": 0 - } - ], - [ - "perf_latency_p99", - { - "_value": 4798.784, - "_timestamp": 0 - } - ], - [ - "perf_throughput", - { - "_value": 54.2078, - "_timestamp": 0 - } - ], - [ - "perf_client_send_recv", - { - "_value": 0.241, - "_timestamp": 0 - } - ], - [ - "perf_client_response_wait", - { - "_value": 4475.011, - "_timestamp": 0 - } - ], - [ - "perf_server_queue", - { - "_value": 0.001, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_infer", - { - "_value": 571.951, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_input", - { - "_value": 1.758, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_output", - { - "_value": 1.061, - "_timestamp": 0 - } - ] - ], - "_non_gpu_data_from_tag": { - "perf_latency_avg": [ - "perf_latency_avg", - { - "_value": 4475.279, - "_timestamp": 0 - } - ], - "perf_latency_p90": [ - "perf_latency_p90", - { - "_value": 4654.34, - "_timestamp": 0 - } - ], - "perf_latency_p95": [ - "perf_latency_p95", - { - "_value": 4696.38, - "_timestamp": 0 - } - ], - "perf_latency_p99": [ - "perf_latency_p99", - { - "_value": 4798.784, - "_timestamp": 0 - } - ], - "perf_throughput": [ - "perf_throughput", - { - "_value": 54.2078, - "_timestamp": 0 - } - ], - "perf_client_send_recv": [ - "perf_client_send_recv", - { - "_value": 0.241, - "_timestamp": 0 - } - ], - "perf_client_response_wait": [ - "perf_client_response_wait", - { - "_value": 4475.011, - "_timestamp": 0 - } - ], - "perf_server_queue": [ - "perf_server_queue", - { - "_value": 0.001, - "_timestamp": 0 - } - ], - "perf_server_compute_infer": [ - "perf_server_compute_infer", - { - "_value": 571.951, - "_timestamp": 0 - } - ], - "perf_server_compute_input": [ - "perf_server_compute_input", - { - "_value": 1.758, - "_timestamp": 0 - } - ], - "perf_server_compute_output": [ - "perf_server_compute_output", - { - "_value": 1.061, - "_timestamp": 0 - } - ] - } - } - ] - }, - "-m ensemble_python_resnet50_config_29 -b 1 -i grpc -f ensemble_python_resnet50_config_29-results.csv --verbose-csv --concurrency-range=512 --input-data=/swdev/profile_models/test_image --shape=INPUT:1005970 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "ensemble_python_resnet50_config_29", - "_gpu_data": { - "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ - [ - "gpu_used_memory", - { - "_value": 3881.828352, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_free_memory", - { - "_value": 21513.633792, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_utilization", - { - "_value": 2.77778, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 59.0226, - "_timestamp": 0, - "_device_uuid": null - } - ] - ] - }, - "_avg_gpu_data": [ - [ - "gpu_used_memory", - { - "_value": 3881.828352, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_free_memory", - { - "_value": 21513.633792, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_utilization", - { - "_value": 2.77778, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 59.0226, - "_timestamp": 0, - "_device_uuid": null - } - ] - ], - "_avg_gpu_data_from_tag": { - "gpu_used_memory": [ - "gpu_used_memory", - { - "_value": 3881.828352, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_free_memory": [ - "gpu_free_memory", - { - "_value": 21513.633792, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_utilization": [ - "gpu_utilization", - { - "_value": 2.77778, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_power_usage": [ - "gpu_power_usage", - { - "_value": 59.0226, - "_timestamp": 0, - "_device_uuid": null - } - ] - }, - "_model_config_measurements": [ - { - "_model_config_name": "ensemble_python_resnet50_config_29", - "_model_specific_pa_params": { - "batch-size": 1, - "concurrency-range": 512 - }, - "_non_gpu_data": [ - [ - "perf_latency_avg", - { - "_value": 8561.742, - "_timestamp": 0 - } - ], - [ - "perf_latency_p90", - { - "_value": 8863.619, - "_timestamp": 0 - } - ], - [ - "perf_latency_p95", - { - "_value": 8909.449, - "_timestamp": 0 - } - ], - [ - "perf_latency_p99", - { - "_value": 8994.489, - "_timestamp": 0 - } - ], - [ - "perf_throughput", - { - "_value": 57.7067, - "_timestamp": 0 - } - ], - [ - "perf_client_send_recv", - { - "_value": 0.225, - "_timestamp": 0 - } - ], - [ - "perf_client_response_wait", - { - "_value": 8561.494, - "_timestamp": 0 - } - ], - [ - "perf_server_queue", - { - "_value": 0.001, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_infer", - { - "_value": 557.267, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_input", - { - "_value": 1.669, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_output", - { - "_value": 1.079, - "_timestamp": 0 - } - ] - ], - "_non_gpu_data_from_tag": { - "perf_latency_avg": [ - "perf_latency_avg", - { - "_value": 8561.742, - "_timestamp": 0 - } - ], - "perf_latency_p90": [ - "perf_latency_p90", - { - "_value": 8863.619, - "_timestamp": 0 - } - ], - "perf_latency_p95": [ - "perf_latency_p95", - { - "_value": 8909.449, - "_timestamp": 0 - } - ], - "perf_latency_p99": [ - "perf_latency_p99", - { - "_value": 8994.489, - "_timestamp": 0 - } - ], - "perf_throughput": [ - "perf_throughput", - { - "_value": 57.7067, - "_timestamp": 0 - } - ], - "perf_client_send_recv": [ - "perf_client_send_recv", - { - "_value": 0.225, - "_timestamp": 0 - } - ], - "perf_client_response_wait": [ - "perf_client_response_wait", - { - "_value": 8561.494, - "_timestamp": 0 - } - ], - "perf_server_queue": [ - "perf_server_queue", - { - "_value": 0.001, - "_timestamp": 0 - } - ], - "perf_server_compute_infer": [ - "perf_server_compute_infer", - { - "_value": 557.267, - "_timestamp": 0 - } - ], - "perf_server_compute_input": [ - "perf_server_compute_input", - { - "_value": 1.669, - "_timestamp": 0 - } - ], - "perf_server_compute_output": [ - "perf_server_compute_output", - { - "_value": 1.079, - "_timestamp": 0 - } - ] - } - } - ] - } - } - ], - "ensemble_python_resnet50_config_30": [ - { - "_triton_env": {}, - "_model_run_configs": [ - { - "_model_name": "ensemble_python_resnet50", - "_model_config": { - "name": "ensemble_python_resnet50_config_30", - "platform": "ensemble", - "maxBatchSize": 4, - "input": [ - { - "name": "INPUT", - "dataType": "TYPE_UINT8", - "dims": [ - "-1" - ] - } - ], - "output": [ - { - "name": "OUTPUT", - "dataType": "TYPE_FP32", - "dims": [ - "1000" - ] - } - ], - "ensembleScheduling": { - "step": [ - { - "modelName": "preprocess_config_7", - "modelVersion": "-1", - "inputMap": { - "INPUT_0": "INPUT" - }, - "outputMap": { - "OUTPUT_0": "preprocessed_image" - } - }, - { - "modelName": "resnet50_trt_config_8", - "modelVersion": "-1", - "inputMap": { - "input": "preprocessed_image" - }, - "outputMap": { - "output": "OUTPUT" - } - } - ] - }, - "cpu_only": false - }, - "_perf_config": { - "_args": { - "service-kind": null, - "model-signature-name": null, - "async": null, - "sync": null, - "measurement-interval": null, - "concurrency-range": 32, - "request-rate-range": null, - "request-distribution": null, - "request-intervals": null, - "binary-search": null, - "num-of-sequences": null, - "latency-threshold": null, - "max-threads": null, - "stability-percentage": null, - "max-trials": null, - "percentile": null, - "input-data": "/swdev/profile_models/test_image", - "shared-memory": null, - "output-shared-memory-size": null, - "shape": "INPUT:1005970", - "sequence-length": null, - "sequence-id-range": null, - "string-length": null, - "string-data": null, - "measurement-mode": "count_windows", - "measurement-request-count": null, - "streaming": null, - "grpc-compression-algorithm": null, - "triton-server-directory": null, - "model-repository": null, - "ssl-grpc-use-ssl": null, - "ssl-grpc-root-certifications-file": null, - "ssl-grpc-private-key-file": null, - "ssl-grpc-certificate-chain-file": null, - "ssl-https-verify-peer": null, - "ssl-https-verify-host": null, - "ssl-https-ca-certificates-file": null, - "ssl-https-client-certificate-type": null, - "ssl-https-client-certificate-file": null, - "ssl-https-private-key-type": null, - "ssl-https-private-key-file": null, - "collect-metrics": "True", - "metrics-url": "http://localhost:8002/metrics", - "metrics-interval": 1000.0 - }, - "_options": { - "-m": "ensemble_python_resnet50_config_30", - "-x": null, - "-b": 1, - "-u": "localhost:8001", - "-i": "grpc", - "-f": "ensemble_python_resnet50_config_30-results.csv", - "-H": null - }, - "_verbose": { - "-v": null, - "-v -v": null, - "--verbose-csv": "--verbose-csv" - }, - "_input_to_options": { - "model-name": "-m", - "model-version": "-x", - "batch-size": "-b", - "url": "-u", - "protocol": "-i", - "latency-report-file": "-f", - "http-header": "-H" - }, - "_input_to_verbose": { - "verbose": "-v", - "extra-verbose": "-v -v", - "verbose-csv": "--verbose-csv" - }, - "_additive_args": { - "input-data": null, - "shape": null - } - }, - "_composing_configs": [ - { - "name": "preprocess_config_7", - "maxBatchSize": 4, - "input": [ - { - "name": "INPUT_0", - "dataType": "TYPE_UINT8", - "dims": [ - "-1" - ] - } - ], - "output": [ - { - "name": "OUTPUT_0", - "dataType": "TYPE_FP32", - "dims": [ - "3", - "224", - "224" - ] - } - ], - "instanceGroup": [ - { - "count": 4, - "kind": "KIND_GPU" - } - ], - "dynamicBatching": {}, - "backend": "python", - "cpu_only": false - }, - { - "name": "resnet50_trt_config_8", - "platform": "tensorrt_plan", - "maxBatchSize": 8, - "input": [ - { - "name": "input", - "dataType": "TYPE_FP32", - "dims": [ - "3", - "-1", - "-1" - ] - } - ], - "output": [ - { - "name": "output", - "dataType": "TYPE_FP32", - "dims": [ - "1000" - ], - "labelFilename": "labels.txt" - } - ], - "instanceGroup": [ - { - "count": 2, - "kind": "KIND_GPU" - } - ], - "dynamicBatching": {}, - "cpu_only": false - } - ] - } - ] - }, - { - "-m ensemble_python_resnet50_config_30 -b 1 -i grpc -f ensemble_python_resnet50_config_30-results.csv --verbose-csv --concurrency-range=32 --input-data=/swdev/profile_models/test_image --shape=INPUT:1005970 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "ensemble_python_resnet50_config_30", - "_gpu_data": { - "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ - [ - "gpu_used_memory", - { - "_value": 2801.795072, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_free_memory", - { - "_value": 22593.667072, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_utilization", - { - "_value": 3.0, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 60.063, - "_timestamp": 0, - "_device_uuid": null - } - ] - ] - }, - "_avg_gpu_data": [ - [ - "gpu_used_memory", - { - "_value": 2801.795072, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_free_memory", - { - "_value": 22593.667072, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_utilization", - { - "_value": 3.0, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 60.063, - "_timestamp": 0, - "_device_uuid": null - } - ] - ], - "_avg_gpu_data_from_tag": { - "gpu_used_memory": [ - "gpu_used_memory", - { - "_value": 2801.795072, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_free_memory": [ - "gpu_free_memory", - { - "_value": 22593.667072, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_utilization": [ - "gpu_utilization", - { - "_value": 3.0, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_power_usage": [ - "gpu_power_usage", - { - "_value": 60.063, - "_timestamp": 0, - "_device_uuid": null - } - ] - }, - "_model_config_measurements": [ - { - "_model_config_name": "ensemble_python_resnet50_config_30", - "_model_specific_pa_params": { - "batch-size": 1, - "concurrency-range": 32 - }, - "_non_gpu_data": [ - [ - "perf_latency_avg", - { - "_value": 526.163, - "_timestamp": 0 - } - ], - [ - "perf_latency_p90", - { - "_value": 585.499, - "_timestamp": 0 - } - ], - [ - "perf_latency_p95", - { - "_value": 606.113, - "_timestamp": 0 - } - ], - [ - "perf_latency_p99", - { - "_value": 650.538, - "_timestamp": 0 - } - ], - [ - "perf_throughput", - { - "_value": 61.2435, - "_timestamp": 0 - } - ], - [ - "perf_client_send_recv", - { - "_value": 0.238, - "_timestamp": 0 - } - ], - [ - "perf_client_response_wait", - { - "_value": 525.897, - "_timestamp": 0 - } - ], - [ - "perf_server_queue", - { - "_value": 0.001, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_infer", - { - "_value": 261.159, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_input", - { - "_value": 0.785, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_output", - { - "_value": 0.512, - "_timestamp": 0 - } - ] - ], - "_non_gpu_data_from_tag": { - "perf_latency_avg": [ - "perf_latency_avg", - { - "_value": 526.163, - "_timestamp": 0 - } - ], - "perf_latency_p90": [ - "perf_latency_p90", - { - "_value": 585.499, - "_timestamp": 0 - } - ], - "perf_latency_p95": [ - "perf_latency_p95", - { - "_value": 606.113, - "_timestamp": 0 - } - ], - "perf_latency_p99": [ - "perf_latency_p99", - { - "_value": 650.538, - "_timestamp": 0 - } - ], - "perf_throughput": [ - "perf_throughput", - { - "_value": 61.2435, - "_timestamp": 0 - } - ], - "perf_client_send_recv": [ - "perf_client_send_recv", - { - "_value": 0.238, - "_timestamp": 0 - } - ], - "perf_client_response_wait": [ - "perf_client_response_wait", - { - "_value": 525.897, - "_timestamp": 0 - } - ], - "perf_server_queue": [ - "perf_server_queue", - { - "_value": 0.001, - "_timestamp": 0 - } - ], - "perf_server_compute_infer": [ - "perf_server_compute_infer", - { - "_value": 261.159, - "_timestamp": 0 - } - ], - "perf_server_compute_input": [ - "perf_server_compute_input", - { - "_value": 0.785, - "_timestamp": 0 - } - ], - "perf_server_compute_output": [ - "perf_server_compute_output", - { - "_value": 0.512, - "_timestamp": 0 - } - ] - } - } - ] - } - } - ], - "ensemble_python_resnet50_config_31": [ - { - "_triton_env": {}, - "_model_run_configs": [ - { - "_model_name": "ensemble_python_resnet50", - "_model_config": { - "name": "ensemble_python_resnet50_config_31", - "platform": "ensemble", - "maxBatchSize": 8, - "input": [ - { - "name": "INPUT", - "dataType": "TYPE_UINT8", - "dims": [ - "-1" - ] - } - ], - "output": [ - { - "name": "OUTPUT", - "dataType": "TYPE_FP32", - "dims": [ - "1000" - ] - } - ], - "ensembleScheduling": { - "step": [ - { - "modelName": "preprocess_config_11", - "modelVersion": "-1", - "inputMap": { - "INPUT_0": "INPUT" - }, - "outputMap": { - "OUTPUT_0": "preprocessed_image" - } - }, - { - "modelName": "resnet50_trt_config_8", - "modelVersion": "-1", - "inputMap": { - "input": "preprocessed_image" - }, - "outputMap": { - "output": "OUTPUT" - } - } - ] - }, - "cpu_only": false - }, - "_perf_config": { - "_args": { - "service-kind": null, - "model-signature-name": null, - "async": null, - "sync": null, - "measurement-interval": null, - "concurrency-range": 128, - "request-rate-range": null, - "request-distribution": null, - "request-intervals": null, - "binary-search": null, - "num-of-sequences": null, - "latency-threshold": null, - "max-threads": null, - "stability-percentage": null, - "max-trials": null, - "percentile": null, - "input-data": "/swdev/profile_models/test_image", - "shared-memory": null, - "output-shared-memory-size": null, - "shape": "INPUT:1005970", - "sequence-length": null, - "sequence-id-range": null, - "string-length": null, - "string-data": null, - "measurement-mode": "count_windows", - "measurement-request-count": null, - "streaming": null, - "grpc-compression-algorithm": null, - "triton-server-directory": null, - "model-repository": null, - "ssl-grpc-use-ssl": null, - "ssl-grpc-root-certifications-file": null, - "ssl-grpc-private-key-file": null, - "ssl-grpc-certificate-chain-file": null, - "ssl-https-verify-peer": null, - "ssl-https-verify-host": null, - "ssl-https-ca-certificates-file": null, - "ssl-https-client-certificate-type": null, - "ssl-https-client-certificate-file": null, - "ssl-https-private-key-type": null, - "ssl-https-private-key-file": null, - "collect-metrics": "True", - "metrics-url": "http://localhost:8002/metrics", - "metrics-interval": 1000.0 - }, - "_options": { - "-m": "ensemble_python_resnet50_config_31", - "-x": null, - "-b": 1, - "-u": "localhost:8001", - "-i": "grpc", - "-f": "ensemble_python_resnet50_config_31-results.csv", - "-H": null - }, - "_verbose": { - "-v": null, - "-v -v": null, - "--verbose-csv": "--verbose-csv" - }, - "_input_to_options": { - "model-name": "-m", - "model-version": "-x", - "batch-size": "-b", - "url": "-u", - "protocol": "-i", - "latency-report-file": "-f", - "http-header": "-H" - }, - "_input_to_verbose": { - "verbose": "-v", - "extra-verbose": "-v -v", - "verbose-csv": "--verbose-csv" - }, - "_additive_args": { - "input-data": null, - "shape": null - } - }, - "_composing_configs": [ - { - "name": "preprocess_config_11", - "maxBatchSize": 16, - "input": [ - { - "name": "INPUT_0", - "dataType": "TYPE_UINT8", - "dims": [ - "-1" - ] - } - ], - "output": [ - { - "name": "OUTPUT_0", - "dataType": "TYPE_FP32", - "dims": [ - "3", - "224", - "224" - ] - } - ], - "instanceGroup": [ - { - "count": 4, - "kind": "KIND_GPU" - } - ], - "dynamicBatching": {}, - "backend": "python", - "cpu_only": false - }, - { - "name": "resnet50_trt_config_8", - "platform": "tensorrt_plan", - "maxBatchSize": 8, - "input": [ - { - "name": "input", - "dataType": "TYPE_FP32", - "dims": [ - "3", - "-1", - "-1" - ] - } - ], - "output": [ - { - "name": "output", - "dataType": "TYPE_FP32", - "dims": [ - "1000" - ], - "labelFilename": "labels.txt" - } - ], - "instanceGroup": [ - { - "count": 2, - "kind": "KIND_GPU" - } - ], - "dynamicBatching": {}, - "cpu_only": false - } - ] - } - ] - }, - { - "-m ensemble_python_resnet50_config_31 -b 1 -i grpc -f ensemble_python_resnet50_config_31-results.csv --verbose-csv --concurrency-range=128 --input-data=/swdev/profile_models/test_image --shape=INPUT:1005970 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "ensemble_python_resnet50_config_31", - "_gpu_data": { - "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ - [ - "gpu_used_memory", - { - "_value": 2801.795072, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_free_memory", - { - "_value": 22593.667072, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_utilization", - { - "_value": 2.5, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 56.9203, - "_timestamp": 0, - "_device_uuid": null - } - ] - ] - }, - "_avg_gpu_data": [ - [ - "gpu_used_memory", - { - "_value": 2801.795072, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_free_memory", - { - "_value": 22593.667072, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_utilization", - { - "_value": 2.5, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 56.9203, - "_timestamp": 0, - "_device_uuid": null - } - ] - ], - "_avg_gpu_data_from_tag": { - "gpu_used_memory": [ - "gpu_used_memory", - { - "_value": 2801.795072, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_free_memory": [ - "gpu_free_memory", - { - "_value": 22593.667072, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_utilization": [ - "gpu_utilization", - { - "_value": 2.5, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_power_usage": [ - "gpu_power_usage", - { - "_value": 56.9203, - "_timestamp": 0, - "_device_uuid": null - } - ] - }, - "_model_config_measurements": [ - { - "_model_config_name": "ensemble_python_resnet50_config_31", - "_model_specific_pa_params": { - "batch-size": 1, - "concurrency-range": 128 - }, - "_non_gpu_data": [ - [ - "perf_latency_avg", - { - "_value": 2011.124, - "_timestamp": 0 - } - ], - [ - "perf_latency_p90", - { - "_value": 2075.005, - "_timestamp": 0 - } - ], - [ - "perf_latency_p95", - { - "_value": 2082.275, - "_timestamp": 0 - } - ], - [ - "perf_latency_p99", - { - "_value": 2084.033, - "_timestamp": 0 - } - ], - [ - "perf_throughput", - { - "_value": 63.9481, - "_timestamp": 0 - } - ], - [ - "perf_client_send_recv", - { - "_value": 0.276, - "_timestamp": 0 - } - ], - [ - "perf_client_response_wait", - { - "_value": 2010.821, - "_timestamp": 0 - } - ], - [ - "perf_server_queue", - { - "_value": 0.001, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_infer", - { - "_value": 987.335, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_input", - { - "_value": 3.799, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_output", - { - "_value": 2.965, - "_timestamp": 0 - } - ] - ], - "_non_gpu_data_from_tag": { - "perf_latency_avg": [ - "perf_latency_avg", - { - "_value": 2011.124, - "_timestamp": 0 - } - ], - "perf_latency_p90": [ - "perf_latency_p90", - { - "_value": 2075.005, - "_timestamp": 0 - } - ], - "perf_latency_p95": [ - "perf_latency_p95", - { - "_value": 2082.275, - "_timestamp": 0 - } - ], - "perf_latency_p99": [ - "perf_latency_p99", - { - "_value": 2084.033, - "_timestamp": 0 - } - ], - "perf_throughput": [ - "perf_throughput", - { - "_value": 63.9481, - "_timestamp": 0 - } - ], - "perf_client_send_recv": [ - "perf_client_send_recv", - { - "_value": 0.276, - "_timestamp": 0 - } - ], - "perf_client_response_wait": [ - "perf_client_response_wait", - { - "_value": 2010.821, - "_timestamp": 0 - } - ], - "perf_server_queue": [ - "perf_server_queue", - { - "_value": 0.001, - "_timestamp": 0 - } - ], - "perf_server_compute_infer": [ - "perf_server_compute_infer", - { - "_value": 987.335, - "_timestamp": 0 - } - ], - "perf_server_compute_input": [ - "perf_server_compute_input", - { - "_value": 3.799, - "_timestamp": 0 - } - ], - "perf_server_compute_output": [ - "perf_server_compute_output", - { - "_value": 2.965, - "_timestamp": 0 - } - ] - } - } - ] - } - } - ], - "ensemble_python_resnet50_config_32": [ - { - "_triton_env": {}, - "_model_run_configs": [ - { - "_model_name": "ensemble_python_resnet50", - "_model_config": { - "name": "ensemble_python_resnet50_config_32", - "platform": "ensemble", - "maxBatchSize": 8, - "input": [ - { - "name": "INPUT", - "dataType": "TYPE_UINT8", - "dims": [ - "-1" - ] - } - ], - "output": [ - { - "name": "OUTPUT", - "dataType": "TYPE_FP32", - "dims": [ - "1000" - ] - } - ], - "ensembleScheduling": { - "step": [ - { - "modelName": "preprocess_config_5", - "modelVersion": "-1", - "inputMap": { - "INPUT_0": "INPUT" - }, - "outputMap": { - "OUTPUT_0": "preprocessed_image" - } - }, - { - "modelName": "resnet50_trt_config_8", - "modelVersion": "-1", - "inputMap": { - "input": "preprocessed_image" - }, - "outputMap": { - "output": "OUTPUT" - } - } - ] - }, - "cpu_only": false - }, - "_perf_config": { - "_args": { - "service-kind": null, - "model-signature-name": null, - "async": null, - "sync": null, - "measurement-interval": null, - "concurrency-range": 48, - "request-rate-range": null, - "request-distribution": null, - "request-intervals": null, - "binary-search": null, - "num-of-sequences": null, - "latency-threshold": null, - "max-threads": null, - "stability-percentage": null, - "max-trials": null, - "percentile": null, - "input-data": "/swdev/profile_models/test_image", - "shared-memory": null, - "output-shared-memory-size": null, - "shape": "INPUT:1005970", - "sequence-length": null, - "sequence-id-range": null, - "string-length": null, - "string-data": null, - "measurement-mode": "count_windows", - "measurement-request-count": null, - "streaming": null, - "grpc-compression-algorithm": null, - "triton-server-directory": null, - "model-repository": null, - "ssl-grpc-use-ssl": null, - "ssl-grpc-root-certifications-file": null, - "ssl-grpc-private-key-file": null, - "ssl-grpc-certificate-chain-file": null, - "ssl-https-verify-peer": null, - "ssl-https-verify-host": null, - "ssl-https-ca-certificates-file": null, - "ssl-https-client-certificate-type": null, - "ssl-https-client-certificate-file": null, - "ssl-https-private-key-type": null, - "ssl-https-private-key-file": null, - "collect-metrics": "True", - "metrics-url": "http://localhost:8002/metrics", - "metrics-interval": 1000.0 - }, - "_options": { - "-m": "ensemble_python_resnet50_config_32", - "-x": null, - "-b": 1, - "-u": "localhost:8001", - "-i": "grpc", - "-f": "ensemble_python_resnet50_config_32-results.csv", - "-H": null - }, - "_verbose": { - "-v": null, - "-v -v": null, - "--verbose-csv": "--verbose-csv" - }, - "_input_to_options": { - "model-name": "-m", - "model-version": "-x", - "batch-size": "-b", - "url": "-u", - "protocol": "-i", - "latency-report-file": "-f", - "http-header": "-H" - }, - "_input_to_verbose": { - "verbose": "-v", - "extra-verbose": "-v -v", - "verbose-csv": "--verbose-csv" - }, - "_additive_args": { - "input-data": null, - "shape": null - } - }, - "_composing_configs": [ - { - "name": "preprocess_config_5", - "maxBatchSize": 8, - "input": [ - { - "name": "INPUT_0", - "dataType": "TYPE_UINT8", - "dims": [ - "-1" - ] - } - ], - "output": [ - { - "name": "OUTPUT_0", - "dataType": "TYPE_FP32", - "dims": [ - "3", - "224", - "224" - ] - } - ], - "instanceGroup": [ - { - "count": 3, - "kind": "KIND_GPU" - } - ], - "dynamicBatching": {}, - "backend": "python", - "cpu_only": false - }, - { - "name": "resnet50_trt_config_8", - "platform": "tensorrt_plan", - "maxBatchSize": 8, - "input": [ - { - "name": "input", - "dataType": "TYPE_FP32", - "dims": [ - "3", - "-1", - "-1" - ] - } - ], - "output": [ - { - "name": "output", - "dataType": "TYPE_FP32", - "dims": [ - "1000" - ], - "labelFilename": "labels.txt" - } - ], - "instanceGroup": [ - { - "count": 2, - "kind": "KIND_GPU" - } - ], - "dynamicBatching": {}, - "cpu_only": false - } - ] - } - ] - }, - { - "-m ensemble_python_resnet50_config_32 -b 1 -i grpc -f ensemble_python_resnet50_config_32-results.csv --verbose-csv --concurrency-range=48 --input-data=/swdev/profile_models/test_image --shape=INPUT:1005970 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "ensemble_python_resnet50_config_32", - "_gpu_data": { - "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ - [ - "gpu_used_memory", - { - "_value": 2801.795072, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_free_memory", - { - "_value": 22593.667072, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_utilization", - { - "_value": 2.1999999999999997, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 57.2376, - "_timestamp": 0, - "_device_uuid": null - } - ] - ] - }, - "_avg_gpu_data": [ - [ - "gpu_used_memory", - { - "_value": 2801.795072, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_free_memory", - { - "_value": 22593.667072, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_utilization", - { - "_value": 2.1999999999999997, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 57.2376, - "_timestamp": 0, - "_device_uuid": null - } - ] - ], - "_avg_gpu_data_from_tag": { - "gpu_used_memory": [ - "gpu_used_memory", - { - "_value": 2801.795072, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_free_memory": [ - "gpu_free_memory", - { - "_value": 22593.667072, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_utilization": [ - "gpu_utilization", - { - "_value": 2.1999999999999997, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_power_usage": [ - "gpu_power_usage", - { - "_value": 57.2376, - "_timestamp": 0, - "_device_uuid": null - } - ] - }, - "_model_config_measurements": [ - { - "_model_config_name": "ensemble_python_resnet50_config_32", - "_model_specific_pa_params": { - "batch-size": 1, - "concurrency-range": 48 - }, - "_non_gpu_data": [ - [ - "perf_latency_avg", - { - "_value": 952.8, - "_timestamp": 0 - } - ], - [ - "perf_latency_p90", - { - "_value": 1050.095, - "_timestamp": 0 - } - ], - [ - "perf_latency_p95", - { - "_value": 1055.497, - "_timestamp": 0 - } - ], - [ - "perf_latency_p99", - { - "_value": 1064.966, - "_timestamp": 0 - } - ], - [ - "perf_throughput", - { - "_value": 53.9733, - "_timestamp": 0 - } - ], - [ - "perf_client_send_recv", - { - "_value": 0.224, - "_timestamp": 0 - } - ], - [ - "perf_client_response_wait", - { - "_value": 952.552, - "_timestamp": 0 - } - ], - [ - "perf_server_queue", - { - "_value": 0.001, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_infer", - { - "_value": 475.351, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_input", - { - "_value": 1.645, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_output", - { - "_value": 0.88, - "_timestamp": 0 - } - ] - ], - "_non_gpu_data_from_tag": { - "perf_latency_avg": [ - "perf_latency_avg", - { - "_value": 952.8, - "_timestamp": 0 - } - ], - "perf_latency_p90": [ - "perf_latency_p90", - { - "_value": 1050.095, - "_timestamp": 0 - } - ], - "perf_latency_p95": [ - "perf_latency_p95", - { - "_value": 1055.497, - "_timestamp": 0 - } - ], - "perf_latency_p99": [ - "perf_latency_p99", - { - "_value": 1064.966, - "_timestamp": 0 - } - ], - "perf_throughput": [ - "perf_throughput", - { - "_value": 53.9733, - "_timestamp": 0 - } - ], - "perf_client_send_recv": [ - "perf_client_send_recv", - { - "_value": 0.224, - "_timestamp": 0 - } - ], - "perf_client_response_wait": [ - "perf_client_response_wait", - { - "_value": 952.552, - "_timestamp": 0 - } - ], - "perf_server_queue": [ - "perf_server_queue", - { - "_value": 0.001, - "_timestamp": 0 - } - ], - "perf_server_compute_infer": [ - "perf_server_compute_infer", - { - "_value": 475.351, - "_timestamp": 0 - } - ], - "perf_server_compute_input": [ - "perf_server_compute_input", - { - "_value": 1.645, - "_timestamp": 0 - } - ], - "perf_server_compute_output": [ - "perf_server_compute_output", - { - "_value": 0.88, - "_timestamp": 0 - } - ] - } - } - ] - } - } - ], - "ensemble_python_resnet50_config_33": [ - { - "_triton_env": {}, - "_model_run_configs": [ - { - "_model_name": "ensemble_python_resnet50", - "_model_config": { - "name": "ensemble_python_resnet50_config_33", - "platform": "ensemble", - "maxBatchSize": 8, - "input": [ - { - "name": "INPUT", - "dataType": "TYPE_UINT8", - "dims": [ - "-1" - ] - } - ], - "output": [ - { - "name": "OUTPUT", - "dataType": "TYPE_FP32", - "dims": [ - "1000" - ] - } - ], - "ensembleScheduling": { - "step": [ - { - "modelName": "preprocess_config_12", - "modelVersion": "-1", - "inputMap": { - "INPUT_0": "INPUT" - }, - "outputMap": { - "OUTPUT_0": "preprocessed_image" - } - }, - { - "modelName": "resnet50_trt_config_8", - "modelVersion": "-1", - "inputMap": { - "input": "preprocessed_image" - }, - "outputMap": { - "output": "OUTPUT" - } - } - ] - }, - "cpu_only": false - }, - "_perf_config": { - "_args": { - "service-kind": null, - "model-signature-name": null, - "async": null, - "sync": null, - "measurement-interval": null, - "concurrency-range": 80, - "request-rate-range": null, - "request-distribution": null, - "request-intervals": null, - "binary-search": null, - "num-of-sequences": null, - "latency-threshold": null, - "max-threads": null, - "stability-percentage": null, - "max-trials": null, - "percentile": null, - "input-data": "/swdev/profile_models/test_image", - "shared-memory": null, - "output-shared-memory-size": null, - "shape": "INPUT:1005970", - "sequence-length": null, - "sequence-id-range": null, - "string-length": null, - "string-data": null, - "measurement-mode": "count_windows", - "measurement-request-count": 150, - "streaming": null, - "grpc-compression-algorithm": null, - "triton-server-directory": null, - "model-repository": null, - "ssl-grpc-use-ssl": null, - "ssl-grpc-root-certifications-file": null, - "ssl-grpc-private-key-file": null, - "ssl-grpc-certificate-chain-file": null, - "ssl-https-verify-peer": null, - "ssl-https-verify-host": null, - "ssl-https-ca-certificates-file": null, - "ssl-https-client-certificate-type": null, - "ssl-https-client-certificate-file": null, - "ssl-https-private-key-type": null, - "ssl-https-private-key-file": null, - "collect-metrics": "True", - "metrics-url": "http://localhost:8002/metrics", - "metrics-interval": 1000.0 - }, - "_options": { - "-m": "ensemble_python_resnet50_config_33", - "-x": null, - "-b": 1, - "-u": "localhost:8001", - "-i": "grpc", - "-f": "ensemble_python_resnet50_config_33-results.csv", - "-H": null - }, - "_verbose": { - "-v": null, - "-v -v": null, - "--verbose-csv": "--verbose-csv" - }, - "_input_to_options": { - "model-name": "-m", - "model-version": "-x", - "batch-size": "-b", - "url": "-u", - "protocol": "-i", - "latency-report-file": "-f", - "http-header": "-H" - }, - "_input_to_verbose": { - "verbose": "-v", - "extra-verbose": "-v -v", - "verbose-csv": "--verbose-csv" - }, - "_additive_args": { - "input-data": null, - "shape": null - } - }, - "_composing_configs": [ - { - "name": "preprocess_config_12", - "maxBatchSize": 8, - "input": [ - { - "name": "INPUT_0", - "dataType": "TYPE_UINT8", - "dims": [ - "-1" - ] - } - ], - "output": [ - { - "name": "OUTPUT_0", - "dataType": "TYPE_FP32", - "dims": [ - "3", - "224", - "224" - ] - } - ], - "instanceGroup": [ - { - "count": 5, - "kind": "KIND_GPU" - } - ], - "dynamicBatching": {}, - "backend": "python", - "cpu_only": false - }, - { - "name": "resnet50_trt_config_8", - "platform": "tensorrt_plan", - "maxBatchSize": 8, - "input": [ - { - "name": "input", - "dataType": "TYPE_FP32", - "dims": [ - "3", - "-1", - "-1" - ] - } - ], - "output": [ - { - "name": "output", - "dataType": "TYPE_FP32", - "dims": [ - "1000" - ], - "labelFilename": "labels.txt" - } - ], - "instanceGroup": [ - { - "count": 2, - "kind": "KIND_GPU" - } - ], - "dynamicBatching": {}, - "cpu_only": false - } - ] - } - ] - }, - { - "-m ensemble_python_resnet50_config_33 -b 1 -i grpc -f ensemble_python_resnet50_config_33-results.csv --verbose-csv --concurrency-range=80 --input-data=/swdev/profile_models/test_image --shape=INPUT:1005970 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "ensemble_python_resnet50_config_33", - "_gpu_data": { - "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ - [ - "gpu_used_memory", - { - "_value": 2801.795072, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_free_memory", - { - "_value": 22593.667072, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_utilization", - { - "_value": 2.0833299999999997, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 58.3716, - "_timestamp": 0, - "_device_uuid": null - } - ] - ] - }, - "_avg_gpu_data": [ - [ - "gpu_used_memory", - { - "_value": 2801.795072, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_free_memory", - { - "_value": 22593.667072, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_utilization", - { - "_value": 2.0833299999999997, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 58.3716, - "_timestamp": 0, - "_device_uuid": null - } - ] - ], - "_avg_gpu_data_from_tag": { - "gpu_used_memory": [ - "gpu_used_memory", - { - "_value": 2801.795072, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_free_memory": [ - "gpu_free_memory", - { - "_value": 22593.667072, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_utilization": [ - "gpu_utilization", - { - "_value": 2.0833299999999997, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_power_usage": [ - "gpu_power_usage", - { - "_value": 58.3716, - "_timestamp": 0, - "_device_uuid": null - } - ] - }, - "_model_config_measurements": [ - { - "_model_config_name": "ensemble_python_resnet50_config_33", - "_model_specific_pa_params": { - "batch-size": 1, - "concurrency-range": 80 - }, - "_non_gpu_data": [ - [ - "perf_latency_avg", - { - "_value": 1718.371, - "_timestamp": 0 - } - ], - [ - "perf_latency_p90", - { - "_value": 1980.591, - "_timestamp": 0 - } - ], - [ - "perf_latency_p95", - { - "_value": 2064.189, - "_timestamp": 0 - } - ], - [ - "perf_latency_p99", - { - "_value": 2228.842, - "_timestamp": 0 - } - ], - [ - "perf_throughput", - { - "_value": 45.3134, - "_timestamp": 0 - } - ], - [ - "perf_client_send_recv", - { - "_value": 0.215, - "_timestamp": 0 - } - ], - [ - "perf_client_response_wait", - { - "_value": 1718.134, - "_timestamp": 0 - } - ], - [ - "perf_server_queue", - { - "_value": 0.001, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_infer", - { - "_value": 875.158, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_input", - { - "_value": 1.701, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_output", - { - "_value": 1.127, - "_timestamp": 0 - } - ] - ], - "_non_gpu_data_from_tag": { - "perf_latency_avg": [ - "perf_latency_avg", - { - "_value": 1718.371, - "_timestamp": 0 - } - ], - "perf_latency_p90": [ - "perf_latency_p90", - { - "_value": 1980.591, - "_timestamp": 0 - } - ], - "perf_latency_p95": [ - "perf_latency_p95", - { - "_value": 2064.189, - "_timestamp": 0 - } - ], - "perf_latency_p99": [ - "perf_latency_p99", - { - "_value": 2228.842, - "_timestamp": 0 - } - ], - "perf_throughput": [ - "perf_throughput", - { - "_value": 45.3134, - "_timestamp": 0 - } - ], - "perf_client_send_recv": [ - "perf_client_send_recv", - { - "_value": 0.215, - "_timestamp": 0 - } - ], - "perf_client_response_wait": [ - "perf_client_response_wait", - { - "_value": 1718.134, - "_timestamp": 0 - } - ], - "perf_server_queue": [ - "perf_server_queue", - { - "_value": 0.001, - "_timestamp": 0 - } - ], - "perf_server_compute_infer": [ - "perf_server_compute_infer", - { - "_value": 875.158, - "_timestamp": 0 - } - ], - "perf_server_compute_input": [ - "perf_server_compute_input", - { - "_value": 1.701, - "_timestamp": 0 - } - ], - "perf_server_compute_output": [ - "perf_server_compute_output", - { - "_value": 1.127, - "_timestamp": 0 - } - ] - } - } - ] - } - } - ], - "ensemble_python_resnet50_config_34": [ - { - "_triton_env": {}, - "_model_run_configs": [ - { - "_model_name": "ensemble_python_resnet50", - "_model_config": { - "name": "ensemble_python_resnet50_config_34", - "platform": "ensemble", - "maxBatchSize": 8, - "input": [ - { - "name": "INPUT", - "dataType": "TYPE_UINT8", - "dims": [ - "-1" - ] - } - ], - "output": [ - { - "name": "OUTPUT", - "dataType": "TYPE_FP32", - "dims": [ - "1000" - ] - } - ], - "ensembleScheduling": { - "step": [ - { - "modelName": "preprocess_config_9", - "modelVersion": "-1", - "inputMap": { - "INPUT_0": "INPUT" - }, - "outputMap": { - "OUTPUT_0": "preprocessed_image" - } - }, - { - "modelName": "resnet50_trt_config_9", - "modelVersion": "-1", - "inputMap": { - "input": "preprocessed_image" - }, - "outputMap": { - "output": "OUTPUT" - } - } - ] - }, - "cpu_only": false - }, - "_perf_config": { - "_args": { - "service-kind": null, - "model-signature-name": null, - "async": null, - "sync": null, - "measurement-interval": null, - "concurrency-range": 64, - "request-rate-range": null, - "request-distribution": null, - "request-intervals": null, - "binary-search": null, - "num-of-sequences": null, - "latency-threshold": null, - "max-threads": null, - "stability-percentage": null, - "max-trials": null, - "percentile": null, - "input-data": "/swdev/profile_models/test_image", - "shared-memory": null, - "output-shared-memory-size": null, - "shape": "INPUT:1005970", - "sequence-length": null, - "sequence-id-range": null, - "string-length": null, - "string-data": null, - "measurement-mode": "count_windows", - "measurement-request-count": null, - "streaming": null, - "grpc-compression-algorithm": null, - "triton-server-directory": null, - "model-repository": null, - "ssl-grpc-use-ssl": null, - "ssl-grpc-root-certifications-file": null, - "ssl-grpc-private-key-file": null, - "ssl-grpc-certificate-chain-file": null, - "ssl-https-verify-peer": null, - "ssl-https-verify-host": null, - "ssl-https-ca-certificates-file": null, - "ssl-https-client-certificate-type": null, - "ssl-https-client-certificate-file": null, - "ssl-https-private-key-type": null, - "ssl-https-private-key-file": null, - "collect-metrics": "True", - "metrics-url": "http://localhost:8002/metrics", - "metrics-interval": 1000.0 - }, - "_options": { - "-m": "ensemble_python_resnet50_config_34", - "-x": null, - "-b": 1, - "-u": "localhost:8001", - "-i": "grpc", - "-f": "ensemble_python_resnet50_config_34-results.csv", - "-H": null - }, - "_verbose": { - "-v": null, - "-v -v": null, - "--verbose-csv": "--verbose-csv" - }, - "_input_to_options": { - "model-name": "-m", - "model-version": "-x", - "batch-size": "-b", - "url": "-u", - "protocol": "-i", - "latency-report-file": "-f", - "http-header": "-H" - }, - "_input_to_verbose": { - "verbose": "-v", - "extra-verbose": "-v -v", - "verbose-csv": "--verbose-csv" - }, - "_additive_args": { - "input-data": null, - "shape": null - } - }, - "_composing_configs": [ - { - "name": "preprocess_config_9", - "maxBatchSize": 8, - "input": [ - { - "name": "INPUT_0", - "dataType": "TYPE_UINT8", - "dims": [ - "-1" - ] - } - ], - "output": [ - { - "name": "OUTPUT_0", - "dataType": "TYPE_FP32", - "dims": [ - "3", - "224", - "224" - ] - } - ], - "instanceGroup": [ - { - "count": 4, - "kind": "KIND_GPU" - } - ], - "dynamicBatching": {}, - "backend": "python", - "cpu_only": false - }, - { - "name": "resnet50_trt_config_9", - "platform": "tensorrt_plan", - "maxBatchSize": 16, - "input": [ - { - "name": "input", - "dataType": "TYPE_FP32", - "dims": [ - "3", - "-1", - "-1" - ] - } - ], - "output": [ - { - "name": "output", - "dataType": "TYPE_FP32", - "dims": [ - "1000" - ], - "labelFilename": "labels.txt" - } - ], - "instanceGroup": [ - { - "count": 2, - "kind": "KIND_GPU" - } - ], - "dynamicBatching": {}, - "cpu_only": false - } - ] - } - ] - }, - { - "-m ensemble_python_resnet50_config_34 -b 1 -i grpc -f ensemble_python_resnet50_config_34-results.csv --verbose-csv --concurrency-range=64 --input-data=/swdev/profile_models/test_image --shape=INPUT:1005970 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "ensemble_python_resnet50_config_34", - "_gpu_data": { - "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ - [ - "gpu_used_memory", - { - "_value": 2810.18368, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_free_memory", - { - "_value": 22585.278464000003, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_utilization", - { - "_value": 2.75, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 61.226, - "_timestamp": 0, - "_device_uuid": null - } - ] - ] - }, - "_avg_gpu_data": [ - [ - "gpu_used_memory", - { - "_value": 2810.18368, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_free_memory", - { - "_value": 22585.278464000003, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_utilization", - { - "_value": 2.75, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 61.226, - "_timestamp": 0, - "_device_uuid": null - } - ] - ], - "_avg_gpu_data_from_tag": { - "gpu_used_memory": [ - "gpu_used_memory", - { - "_value": 2810.18368, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_free_memory": [ - "gpu_free_memory", - { - "_value": 22585.278464000003, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_utilization": [ - "gpu_utilization", - { - "_value": 2.75, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_power_usage": [ - "gpu_power_usage", - { - "_value": 61.226, - "_timestamp": 0, - "_device_uuid": null - } - ] - }, - "_model_config_measurements": [ - { - "_model_config_name": "ensemble_python_resnet50_config_34", - "_model_specific_pa_params": { - "batch-size": 1, - "concurrency-range": 64 - }, - "_non_gpu_data": [ - [ - "perf_latency_avg", - { - "_value": 991.453, - "_timestamp": 0 - } - ], - [ - "perf_latency_p90", - { - "_value": 1049.864, - "_timestamp": 0 - } - ], - [ - "perf_latency_p95", - { - "_value": 1068.453, - "_timestamp": 0 - } - ], - [ - "perf_latency_p99", - { - "_value": 1079.22, - "_timestamp": 0 - } - ], - [ - "perf_throughput", - { - "_value": 63.9524, - "_timestamp": 0 - } - ], - [ - "perf_client_send_recv", - { - "_value": 0.242, - "_timestamp": 0 - } - ], - [ - "perf_client_response_wait", - { - "_value": 991.173, - "_timestamp": 0 - } - ], - [ - "perf_server_queue", - { - "_value": 0.001, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_infer", - { - "_value": 496.643, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_input", - { - "_value": 1.713, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_output", - { - "_value": 1.087, - "_timestamp": 0 - } - ] - ], - "_non_gpu_data_from_tag": { - "perf_latency_avg": [ - "perf_latency_avg", - { - "_value": 991.453, - "_timestamp": 0 - } - ], - "perf_latency_p90": [ - "perf_latency_p90", - { - "_value": 1049.864, - "_timestamp": 0 - } - ], - "perf_latency_p95": [ - "perf_latency_p95", - { - "_value": 1068.453, - "_timestamp": 0 - } - ], - "perf_latency_p99": [ - "perf_latency_p99", - { - "_value": 1079.22, - "_timestamp": 0 - } - ], - "perf_throughput": [ - "perf_throughput", - { - "_value": 63.9524, - "_timestamp": 0 - } - ], - "perf_client_send_recv": [ - "perf_client_send_recv", - { - "_value": 0.242, - "_timestamp": 0 - } - ], - "perf_client_response_wait": [ - "perf_client_response_wait", - { - "_value": 991.173, - "_timestamp": 0 - } - ], - "perf_server_queue": [ - "perf_server_queue", - { - "_value": 0.001, - "_timestamp": 0 - } - ], - "perf_server_compute_infer": [ - "perf_server_compute_infer", - { - "_value": 496.643, - "_timestamp": 0 - } - ], - "perf_server_compute_input": [ - "perf_server_compute_input", - { - "_value": 1.713, - "_timestamp": 0 - } - ], - "perf_server_compute_output": [ - "perf_server_compute_output", - { - "_value": 1.087, - "_timestamp": 0 - } - ] - } - } - ] - } - } - ], - "ensemble_python_resnet50_config_35": [ - { - "_triton_env": {}, - "_model_run_configs": [ - { - "_model_name": "ensemble_python_resnet50", - "_model_config": { - "name": "ensemble_python_resnet50_config_35", - "platform": "ensemble", - "maxBatchSize": 8, - "input": [ - { - "name": "INPUT", - "dataType": "TYPE_UINT8", - "dims": [ - "-1" - ] - } - ], - "output": [ - { - "name": "OUTPUT", - "dataType": "TYPE_FP32", - "dims": [ - "1000" - ] - } - ], - "ensembleScheduling": { - "step": [ - { - "modelName": "preprocess_config_9", - "modelVersion": "-1", - "inputMap": { - "INPUT_0": "INPUT" - }, - "outputMap": { - "OUTPUT_0": "preprocessed_image" - } - }, - { - "modelName": "resnet50_trt_config_10", - "modelVersion": "-1", - "inputMap": { - "input": "preprocessed_image" - }, - "outputMap": { - "output": "OUTPUT" - } - } - ] - }, - "cpu_only": false - }, - "_perf_config": { - "_args": { - "service-kind": null, - "model-signature-name": null, - "async": null, - "sync": null, - "measurement-interval": null, - "concurrency-range": 64, - "request-rate-range": null, - "request-distribution": null, - "request-intervals": null, - "binary-search": null, - "num-of-sequences": null, - "latency-threshold": null, - "max-threads": null, - "stability-percentage": null, - "max-trials": null, - "percentile": null, - "input-data": "/swdev/profile_models/test_image", - "shared-memory": null, - "output-shared-memory-size": null, - "shape": "INPUT:1005970", - "sequence-length": null, - "sequence-id-range": null, - "string-length": null, - "string-data": null, - "measurement-mode": "count_windows", - "measurement-request-count": null, - "streaming": null, - "grpc-compression-algorithm": null, - "triton-server-directory": null, - "model-repository": null, - "ssl-grpc-use-ssl": null, - "ssl-grpc-root-certifications-file": null, - "ssl-grpc-private-key-file": null, - "ssl-grpc-certificate-chain-file": null, - "ssl-https-verify-peer": null, - "ssl-https-verify-host": null, - "ssl-https-ca-certificates-file": null, - "ssl-https-client-certificate-type": null, - "ssl-https-client-certificate-file": null, - "ssl-https-private-key-type": null, - "ssl-https-private-key-file": null, - "collect-metrics": "True", - "metrics-url": "http://localhost:8002/metrics", - "metrics-interval": 1000.0 - }, - "_options": { - "-m": "ensemble_python_resnet50_config_35", - "-x": null, - "-b": 1, - "-u": "localhost:8001", - "-i": "grpc", - "-f": "ensemble_python_resnet50_config_35-results.csv", - "-H": null - }, - "_verbose": { - "-v": null, - "-v -v": null, - "--verbose-csv": "--verbose-csv" - }, - "_input_to_options": { - "model-name": "-m", - "model-version": "-x", - "batch-size": "-b", - "url": "-u", - "protocol": "-i", - "latency-report-file": "-f", - "http-header": "-H" - }, - "_input_to_verbose": { - "verbose": "-v", - "extra-verbose": "-v -v", - "verbose-csv": "--verbose-csv" - }, - "_additive_args": { - "input-data": null, - "shape": null - } - }, - "_composing_configs": [ - { - "name": "preprocess_config_9", - "maxBatchSize": 8, - "input": [ - { - "name": "INPUT_0", - "dataType": "TYPE_UINT8", - "dims": [ - "-1" - ] - } - ], - "output": [ - { - "name": "OUTPUT_0", - "dataType": "TYPE_FP32", - "dims": [ - "3", - "224", - "224" - ] - } - ], - "instanceGroup": [ - { - "count": 4, - "kind": "KIND_GPU" - } - ], - "dynamicBatching": {}, - "backend": "python", - "cpu_only": false - }, - { - "name": "resnet50_trt_config_10", - "platform": "tensorrt_plan", - "maxBatchSize": 8, - "input": [ - { - "name": "input", - "dataType": "TYPE_FP32", - "dims": [ - "3", - "-1", - "-1" - ] - } - ], - "output": [ - { - "name": "output", - "dataType": "TYPE_FP32", - "dims": [ - "1000" - ], - "labelFilename": "labels.txt" - } - ], - "instanceGroup": [ - { - "count": 3, - "kind": "KIND_GPU" - } - ], - "dynamicBatching": {}, - "cpu_only": false - } - ] - } - ] - }, - { - "-m ensemble_python_resnet50_config_35 -b 1 -i grpc -f ensemble_python_resnet50_config_35-results.csv --verbose-csv --concurrency-range=64 --input-data=/swdev/profile_models/test_image --shape=INPUT:1005970 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "ensemble_python_resnet50_config_35", - "_gpu_data": { - "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ - [ - "gpu_used_memory", - { - "_value": 3888.119808, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_free_memory", - { - "_value": 21507.342336, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_utilization", - { - "_value": 2.66667, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 56.8153, - "_timestamp": 0, - "_device_uuid": null - } - ] - ] - }, - "_avg_gpu_data": [ - [ - "gpu_used_memory", - { - "_value": 3888.119808, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_free_memory", - { - "_value": 21507.342336, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_utilization", - { - "_value": 2.66667, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 56.8153, - "_timestamp": 0, - "_device_uuid": null - } - ] - ], - "_avg_gpu_data_from_tag": { - "gpu_used_memory": [ - "gpu_used_memory", - { - "_value": 3888.119808, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_free_memory": [ - "gpu_free_memory", - { - "_value": 21507.342336, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_utilization": [ - "gpu_utilization", - { - "_value": 2.66667, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_power_usage": [ - "gpu_power_usage", - { - "_value": 56.8153, - "_timestamp": 0, - "_device_uuid": null - } - ] - }, - "_model_config_measurements": [ - { - "_model_config_name": "ensemble_python_resnet50_config_35", - "_model_specific_pa_params": { - "batch-size": 1, - "concurrency-range": 64 - }, - "_non_gpu_data": [ - [ - "perf_latency_avg", - { - "_value": 971.591, - "_timestamp": 0 - } - ], - [ - "perf_latency_p90", - { - "_value": 1010.474, - "_timestamp": 0 - } - ], - [ - "perf_latency_p95", - { - "_value": 1012.934, - "_timestamp": 0 - } - ], - [ - "perf_latency_p99", - { - "_value": 1057.06, - "_timestamp": 0 - } - ], - [ - "perf_throughput", - { - "_value": 63.9613, - "_timestamp": 0 - } - ], - [ - "perf_client_send_recv", - { - "_value": 0.232, - "_timestamp": 0 - } - ], - [ - "perf_client_response_wait", - { - "_value": 971.336, - "_timestamp": 0 - } - ], - [ - "perf_server_queue", - { - "_value": 0.001, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_infer", - { - "_value": 487.517, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_input", - { - "_value": 1.698, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_output", - { - "_value": 0.884, - "_timestamp": 0 - } - ] - ], - "_non_gpu_data_from_tag": { - "perf_latency_avg": [ - "perf_latency_avg", - { - "_value": 971.591, - "_timestamp": 0 - } - ], - "perf_latency_p90": [ - "perf_latency_p90", - { - "_value": 1010.474, - "_timestamp": 0 - } - ], - "perf_latency_p95": [ - "perf_latency_p95", - { - "_value": 1012.934, - "_timestamp": 0 - } - ], - "perf_latency_p99": [ - "perf_latency_p99", - { - "_value": 1057.06, - "_timestamp": 0 - } - ], - "perf_throughput": [ - "perf_throughput", - { - "_value": 63.9613, - "_timestamp": 0 - } - ], - "perf_client_send_recv": [ - "perf_client_send_recv", - { - "_value": 0.232, - "_timestamp": 0 - } - ], - "perf_client_response_wait": [ - "perf_client_response_wait", - { - "_value": 971.336, - "_timestamp": 0 - } - ], - "perf_server_queue": [ - "perf_server_queue", - { - "_value": 0.001, - "_timestamp": 0 - } - ], - "perf_server_compute_infer": [ - "perf_server_compute_infer", - { - "_value": 487.517, - "_timestamp": 0 - } - ], - "perf_server_compute_input": [ - "perf_server_compute_input", - { - "_value": 1.698, - "_timestamp": 0 - } - ], - "perf_server_compute_output": [ - "perf_server_compute_output", - { - "_value": 0.884, - "_timestamp": 0 - } - ] - } - } - ] - } - } - ] - } - } - }, - "ResultManager.server_only_data": { - "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ - [ - "gpu_used_memory", - { - "_value": 457.0, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_free_memory", - { - "_value": 24938.0, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_utilization", - { - "_value": 0.0, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 55.816, - "_timestamp": 0, - "_device_uuid": null - } - ] - ] - }, - "MetricsManager.gpus": { - "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": { - "name": "NVIDIA TITAN RTX", - "total_memory": 25395462144 - } - }, - "ModelManager.model_variant_name_manager": { - "_model_config_dicts": { - "preprocess_config_0": { - "name": "preprocess", - "max_batch_size": 1, - "input": [ - { - "name": "INPUT_0", - "data_type": "TYPE_UINT8", - "dims": [ - "-1" - ] - } - ], - "output": [ - { - "name": "OUTPUT_0", - "data_type": "TYPE_FP32", - "dims": [ - "3", - "224", - "224" - ] - } - ], - "instance_group": [ - { - "count": 1, - "kind": "KIND_GPU" - } - ], - "backend": "python", - "dynamic_batching": {} - }, - "resnet50_trt_config_0": { - "name": "resnet50_trt", - "platform": "tensorrt_plan", - "max_batch_size": 1, - "input": [ - { - "name": "input", - "data_type": "TYPE_FP32", - "dims": [ - "3", - "-1", - "-1" - ] - } - ], - "output": [ - { - "name": "output", - "data_type": "TYPE_FP32", - "dims": [ - "1000" - ], - "label_filename": "labels.txt" - } - ], - "instance_group": [ - { - "count": 1, - "kind": "KIND_GPU" - } - ], - "dynamic_batching": {} - }, - "ensemble_python_resnet50_config_0": { - "key": "preprocess_config_0,resnet50_trt_config_0", - "name": "ensemble_python_resnet50" - }, - "resnet50_trt_config_1": { - "name": "resnet50_trt", - "platform": "tensorrt_plan", - "max_batch_size": 1, - "input": [ - { - "name": "input", - "data_type": "TYPE_FP32", - "dims": [ - "3", - "-1", - "-1" - ] - } - ], - "output": [ - { - "name": "output", - "data_type": "TYPE_FP32", - "dims": [ - "1000" - ], - "label_filename": "labels.txt" - } - ], - "instance_group": [ - { - "count": 2, - "kind": "KIND_GPU" - } - ], - "dynamic_batching": {} - }, - "ensemble_python_resnet50_config_1": { - "key": "preprocess_config_0,resnet50_trt_config_1", - "name": "ensemble_python_resnet50" - }, - "preprocess_config_1": { - "name": "preprocess", - "max_batch_size": 2, - "input": [ - { - "name": "INPUT_0", - "data_type": "TYPE_UINT8", - "dims": [ - "-1" - ] - } - ], - "output": [ - { - "name": "OUTPUT_0", - "data_type": "TYPE_FP32", - "dims": [ - "3", - "224", - "224" - ] - } - ], - "instance_group": [ - { - "count": 2, - "kind": "KIND_GPU" - } - ], - "backend": "python", - "dynamic_batching": {} - }, - "resnet50_trt_config_2": { - "name": "resnet50_trt", - "platform": "tensorrt_plan", - "max_batch_size": 2, - "input": [ - { - "name": "input", - "data_type": "TYPE_FP32", - "dims": [ - "3", - "-1", - "-1" - ] - } - ], - "output": [ - { - "name": "output", - "data_type": "TYPE_FP32", - "dims": [ - "1000" - ], - "label_filename": "labels.txt" - } - ], - "instance_group": [ - { - "count": 1, - "kind": "KIND_GPU" - } - ], - "dynamic_batching": {} - }, - "ensemble_python_resnet50_config_2": { - "key": "preprocess_config_1,resnet50_trt_config_2", - "name": "ensemble_python_resnet50" - }, - "resnet50_trt_config_3": { - "name": "resnet50_trt", - "platform": "tensorrt_plan", - "max_batch_size": 4, - "input": [ - { - "name": "input", - "data_type": "TYPE_FP32", - "dims": [ - "3", - "-1", - "-1" - ] - } - ], - "output": [ - { - "name": "output", - "data_type": "TYPE_FP32", - "dims": [ - "1000" - ], - "label_filename": "labels.txt" - } - ], - "instance_group": [ - { - "count": 3, - "kind": "KIND_GPU" - } - ], - "dynamic_batching": {} - }, - "ensemble_python_resnet50_config_3": { - "key": "preprocess_config_0,resnet50_trt_config_3", - "name": "ensemble_python_resnet50" - }, - "preprocess_config_2": { - "name": "preprocess", - "max_batch_size": 4, - "input": [ - { - "name": "INPUT_0", - "data_type": "TYPE_UINT8", - "dims": [ - "-1" - ] - } - ], - "output": [ - { - "name": "OUTPUT_0", - "data_type": "TYPE_FP32", - "dims": [ - "3", - "224", - "224" - ] - } - ], - "instance_group": [ - { - "count": 3, - "kind": "KIND_GPU" - } - ], - "backend": "python", - "dynamic_batching": {} - }, - "ensemble_python_resnet50_config_4": { - "key": "preprocess_config_2,resnet50_trt_config_2", - "name": "ensemble_python_resnet50" - }, - "resnet50_trt_config_4": { - "name": "resnet50_trt", - "platform": "tensorrt_plan", - "max_batch_size": 4, - "input": [ - { - "name": "input", + "name": "INPUT0", "data_type": "TYPE_FP32", "dims": [ - "3", - "-1", - "-1" + "4" ] - } - ], - "output": [ + }, { - "name": "output", + "name": "INPUT1", "data_type": "TYPE_FP32", "dims": [ - "1000" - ], - "label_filename": "labels.txt" - } - ], - "instance_group": [ - { - "count": 2, - "kind": "KIND_GPU" - } - ], - "dynamic_batching": {} - }, - "ensemble_python_resnet50_config_5": { - "key": "preprocess_config_2,resnet50_trt_config_4", - "name": "ensemble_python_resnet50" - }, - "preprocess_config_3": { - "name": "preprocess", - "max_batch_size": 16, - "input": [ - { - "name": "INPUT_0", - "data_type": "TYPE_UINT8", - "dims": [ - "-1" + "4" ] } ], "output": [ { - "name": "OUTPUT_0", - "data_type": "TYPE_FP32", - "dims": [ - "3", - "224", - "224" - ] - } - ], - "instance_group": [ - { - "count": 5, - "kind": "KIND_GPU" - } - ], - "backend": "python", - "dynamic_batching": {} - }, - "resnet50_trt_config_5": { - "name": "resnet50_trt", - "platform": "tensorrt_plan", - "max_batch_size": 8, - "input": [ - { - "name": "input", + "name": "OUTPUT0", "data_type": "TYPE_FP32", "dims": [ - "3", - "-1", - "-1" + "4" ] } ], - "output": [ - { - "name": "output", - "data_type": "TYPE_FP32", - "dims": [ - "1000" - ], - "label_filename": "labels.txt" - } - ], "instance_group": [ { "count": 1, "kind": "KIND_GPU" } ], - "dynamic_batching": {} + "backend": "python" }, - "ensemble_python_resnet50_config_6": { - "key": "preprocess_config_3,resnet50_trt_config_5", - "name": "ensemble_python_resnet50" - }, - "resnet50_trt_config_6": { - "name": "resnet50_trt", - "platform": "tensorrt_plan", - "max_batch_size": 4, + "sub_config_0": { + "name": "sub", "input": [ { - "name": "input", + "name": "INPUT0", "data_type": "TYPE_FP32", "dims": [ - "3", - "-1", - "-1" + "4" ] - } - ], - "output": [ + }, { - "name": "output", + "name": "INPUT1", "data_type": "TYPE_FP32", "dims": [ - "1000" - ], - "label_filename": "labels.txt" - } - ], - "instance_group": [ - { - "count": 1, - "kind": "KIND_GPU" - } - ], - "dynamic_batching": {} - }, - "ensemble_python_resnet50_config_7": { - "key": "preprocess_config_2,resnet50_trt_config_6", - "name": "ensemble_python_resnet50" - }, - "preprocess_config_4": { - "name": "preprocess", - "max_batch_size": 2, - "input": [ - { - "name": "INPUT_0", - "data_type": "TYPE_UINT8", - "dims": [ - "-1" + "4" ] } ], "output": [ { - "name": "OUTPUT_0", + "name": "OUTPUT1", "data_type": "TYPE_FP32", "dims": [ - "3", - "224", - "224" + "4" ] } ], "instance_group": [ { - "count": 3, + "count": 1, "kind": "KIND_GPU" } ], - "backend": "python", - "dynamic_batching": {} + "backend": "python" }, - "ensemble_python_resnet50_config_8": { - "key": "preprocess_config_4,resnet50_trt_config_6", - "name": "ensemble_python_resnet50" + "ensemble_add_sub_config_0": { + "key": "add_config_0,sub_config_0", + "name": "ensemble_add_sub" }, - "preprocess_config_5": { - "name": "preprocess", - "max_batch_size": 8, + "sub_config_1": { + "name": "sub", "input": [ { - "name": "INPUT_0", - "data_type": "TYPE_UINT8", - "dims": [ - "-1" - ] - } - ], - "output": [ - { - "name": "OUTPUT_0", + "name": "INPUT0", "data_type": "TYPE_FP32", "dims": [ - "3", - "224", - "224" + "4" ] - } - ], - "instance_group": [ - { - "count": 3, - "kind": "KIND_GPU" - } - ], - "backend": "python", - "dynamic_batching": {} - }, - "ensemble_python_resnet50_config_9": { - "key": "preprocess_config_5,resnet50_trt_config_6", - "name": "ensemble_python_resnet50" - }, - "preprocess_config_6": { - "name": "preprocess", - "max_batch_size": 4, - "input": [ + }, { - "name": "INPUT_0", - "data_type": "TYPE_UINT8", + "name": "INPUT1", + "data_type": "TYPE_FP32", "dims": [ - "-1" + "4" ] } ], "output": [ { - "name": "OUTPUT_0", + "name": "OUTPUT1", "data_type": "TYPE_FP32", "dims": [ - "3", - "224", - "224" + "4" ] } ], @@ -26897,442 +7128,130 @@ "kind": "KIND_GPU" } ], - "backend": "python", - "dynamic_batching": {} + "backend": "python" }, - "ensemble_python_resnet50_config_10": { - "key": "preprocess_config_6,resnet50_trt_config_6", - "name": "ensemble_python_resnet50" + "ensemble_add_sub_config_1": { + "key": "add_config_0,sub_config_1", + "name": "ensemble_add_sub" }, - "preprocess_config_7": { - "name": "preprocess", - "max_batch_size": 4, + "add_config_1": { + "name": "add", "input": [ { - "name": "INPUT_0", - "data_type": "TYPE_UINT8", - "dims": [ - "-1" - ] - } - ], - "output": [ - { - "name": "OUTPUT_0", + "name": "INPUT0", "data_type": "TYPE_FP32", "dims": [ - "3", - "224", - "224" - ] - } - ], - "instance_group": [ - { - "count": 4, - "kind": "KIND_GPU" - } - ], - "backend": "python", - "dynamic_batching": {} - }, - "ensemble_python_resnet50_config_11": { - "key": "preprocess_config_7,resnet50_trt_config_6", - "name": "ensemble_python_resnet50" - }, - "ensemble_python_resnet50_config_12": { - "key": "preprocess_config_2,resnet50_trt_config_5", - "name": "ensemble_python_resnet50" - }, - "preprocess_config_8": { - "name": "preprocess", - "max_batch_size": 2, - "input": [ - { - "name": "INPUT_0", - "data_type": "TYPE_UINT8", - "dims": [ - "-1" + "4" ] - } - ], - "output": [ + }, { - "name": "OUTPUT_0", + "name": "INPUT1", "data_type": "TYPE_FP32", "dims": [ - "3", - "224", - "224" - ] - } - ], - "instance_group": [ - { - "count": 4, - "kind": "KIND_GPU" - } - ], - "backend": "python", - "dynamic_batching": {} - }, - "ensemble_python_resnet50_config_13": { - "key": "preprocess_config_8,resnet50_trt_config_6", - "name": "ensemble_python_resnet50" - }, - "preprocess_config_9": { - "name": "preprocess", - "max_batch_size": 8, - "input": [ - { - "name": "INPUT_0", - "data_type": "TYPE_UINT8", - "dims": [ - "-1" + "4" ] } ], "output": [ { - "name": "OUTPUT_0", + "name": "OUTPUT0", "data_type": "TYPE_FP32", "dims": [ - "3", - "224", - "224" + "4" ] } ], "instance_group": [ { - "count": 4, + "count": 2, "kind": "KIND_GPU" } ], - "backend": "python", - "dynamic_batching": {} + "backend": "python" }, - "ensemble_python_resnet50_config_14": { - "key": "preprocess_config_9,resnet50_trt_config_6", - "name": "ensemble_python_resnet50" + "ensemble_add_sub_config_2": { + "key": "add_config_1,sub_config_0", + "name": "ensemble_add_sub" }, - "preprocess_config_10": { - "name": "preprocess", - "max_batch_size": 4, + "add_config_2": { + "name": "add", "input": [ { - "name": "INPUT_0", - "data_type": "TYPE_UINT8", - "dims": [ - "-1" - ] - } - ], - "output": [ - { - "name": "OUTPUT_0", + "name": "INPUT0", "data_type": "TYPE_FP32", "dims": [ - "3", - "224", - "224" - ] - } - ], - "instance_group": [ - { - "count": 5, - "kind": "KIND_GPU" - } - ], - "backend": "python", - "dynamic_batching": {} - }, - "ensemble_python_resnet50_config_15": { - "key": "preprocess_config_10,resnet50_trt_config_6", - "name": "ensemble_python_resnet50" - }, - "ensemble_python_resnet50_config_16": { - "key": "preprocess_config_7,resnet50_trt_config_2", - "name": "ensemble_python_resnet50" - }, - "ensemble_python_resnet50_config_17": { - "key": "preprocess_config_7,resnet50_trt_config_5", - "name": "ensemble_python_resnet50" - }, - "ensemble_python_resnet50_config_18": { - "key": "preprocess_config_7,resnet50_trt_config_4", - "name": "ensemble_python_resnet50" - }, - "preprocess_config_11": { - "name": "preprocess", - "max_batch_size": 16, - "input": [ - { - "name": "INPUT_0", - "data_type": "TYPE_UINT8", - "dims": [ - "-1" + "4" ] - } - ], - "output": [ + }, { - "name": "OUTPUT_0", + "name": "INPUT1", "data_type": "TYPE_FP32", "dims": [ - "3", - "224", - "224" - ] - } - ], - "instance_group": [ - { - "count": 4, - "kind": "KIND_GPU" - } - ], - "backend": "python", - "dynamic_batching": {} - }, - "ensemble_python_resnet50_config_19": { - "key": "preprocess_config_11,resnet50_trt_config_6", - "name": "ensemble_python_resnet50" - }, - "preprocess_config_12": { - "name": "preprocess", - "max_batch_size": 8, - "input": [ - { - "name": "INPUT_0", - "data_type": "TYPE_UINT8", - "dims": [ - "-1" + "4" ] } ], "output": [ { - "name": "OUTPUT_0", - "data_type": "TYPE_FP32", - "dims": [ - "3", - "224", - "224" - ] - } - ], - "instance_group": [ - { - "count": 5, - "kind": "KIND_GPU" - } - ], - "backend": "python", - "dynamic_batching": {} - }, - "ensemble_python_resnet50_config_20": { - "key": "preprocess_config_12,resnet50_trt_config_6", - "name": "ensemble_python_resnet50" - }, - "ensemble_python_resnet50_config_21": { - "key": "preprocess_config_9,resnet50_trt_config_2", - "name": "ensemble_python_resnet50" - }, - "ensemble_python_resnet50_config_22": { - "key": "preprocess_config_9,resnet50_trt_config_5", - "name": "ensemble_python_resnet50" - }, - "ensemble_python_resnet50_config_23": { - "key": "preprocess_config_9,resnet50_trt_config_4", - "name": "ensemble_python_resnet50" - }, - "ensemble_python_resnet50_config_24": { - "key": "preprocess_config_11,resnet50_trt_config_4", - "name": "ensemble_python_resnet50" - }, - "ensemble_python_resnet50_config_25": { - "key": "preprocess_config_5,resnet50_trt_config_4", - "name": "ensemble_python_resnet50" - }, - "ensemble_python_resnet50_config_26": { - "key": "preprocess_config_12,resnet50_trt_config_4", - "name": "ensemble_python_resnet50" - }, - "resnet50_trt_config_7": { - "name": "resnet50_trt", - "platform": "tensorrt_plan", - "max_batch_size": 2, - "input": [ - { - "name": "input", + "name": "OUTPUT0", "data_type": "TYPE_FP32", "dims": [ - "3", - "-1", - "-1" + "4" ] } ], - "output": [ - { - "name": "output", - "data_type": "TYPE_FP32", - "dims": [ - "1000" - ], - "label_filename": "labels.txt" - } - ], "instance_group": [ { - "count": 2, + "count": 3, "kind": "KIND_GPU" } ], - "dynamic_batching": {} + "backend": "python" }, - "ensemble_python_resnet50_config_27": { - "key": "preprocess_config_9,resnet50_trt_config_7", - "name": "ensemble_python_resnet50" - }, - "resnet50_trt_config_8": { - "name": "resnet50_trt", - "platform": "tensorrt_plan", - "max_batch_size": 8, + "sub_config_2": { + "name": "sub", "input": [ { - "name": "input", + "name": "INPUT0", "data_type": "TYPE_FP32", "dims": [ - "3", - "-1", - "-1" + "4" ] - } - ], - "output": [ - { - "name": "output", - "data_type": "TYPE_FP32", - "dims": [ - "1000" - ], - "label_filename": "labels.txt" - } - ], - "instance_group": [ - { - "count": 2, - "kind": "KIND_GPU" - } - ], - "dynamic_batching": {} - }, - "ensemble_python_resnet50_config_28": { - "key": "preprocess_config_9,resnet50_trt_config_8", - "name": "ensemble_python_resnet50" - }, - "ensemble_python_resnet50_config_29": { - "key": "preprocess_config_9,resnet50_trt_config_3", - "name": "ensemble_python_resnet50" - }, - "ensemble_python_resnet50_config_30": { - "key": "preprocess_config_7,resnet50_trt_config_8", - "name": "ensemble_python_resnet50" - }, - "ensemble_python_resnet50_config_31": { - "key": "preprocess_config_11,resnet50_trt_config_8", - "name": "ensemble_python_resnet50" - }, - "ensemble_python_resnet50_config_32": { - "key": "preprocess_config_5,resnet50_trt_config_8", - "name": "ensemble_python_resnet50" - }, - "ensemble_python_resnet50_config_33": { - "key": "preprocess_config_12,resnet50_trt_config_8", - "name": "ensemble_python_resnet50" - }, - "resnet50_trt_config_9": { - "name": "resnet50_trt", - "platform": "tensorrt_plan", - "max_batch_size": 16, - "input": [ + }, { - "name": "input", + "name": "INPUT1", "data_type": "TYPE_FP32", "dims": [ - "3", - "-1", - "-1" + "4" ] } ], "output": [ { - "name": "output", - "data_type": "TYPE_FP32", - "dims": [ - "1000" - ], - "label_filename": "labels.txt" - } - ], - "instance_group": [ - { - "count": 2, - "kind": "KIND_GPU" - } - ], - "dynamic_batching": {} - }, - "ensemble_python_resnet50_config_34": { - "key": "preprocess_config_9,resnet50_trt_config_9", - "name": "ensemble_python_resnet50" - }, - "resnet50_trt_config_10": { - "name": "resnet50_trt", - "platform": "tensorrt_plan", - "max_batch_size": 8, - "input": [ - { - "name": "input", + "name": "OUTPUT1", "data_type": "TYPE_FP32", "dims": [ - "3", - "-1", - "-1" + "4" ] } ], - "output": [ - { - "name": "output", - "data_type": "TYPE_FP32", - "dims": [ - "1000" - ], - "label_filename": "labels.txt" - } - ], "instance_group": [ { "count": 3, "kind": "KIND_GPU" } ], - "dynamic_batching": {} + "backend": "python" }, - "ensemble_python_resnet50_config_35": { - "key": "preprocess_config_9,resnet50_trt_config_10", - "name": "ensemble_python_resnet50" + "ensemble_add_sub_config_3": { + "key": "add_config_2,sub_config_2", + "name": "ensemble_add_sub" } }, "_model_name_index": { - "preprocess": 12, - "resnet50_trt": 10, - "ensemble_python_resnet50": 35 + "add": 2, + "sub": 2, + "ensemble_add_sub": 3 } } } \ No newline at end of file diff --git a/tests/common/ensemble-ckpt/README b/tests/common/ensemble-ckpt/README index 4c4340f72..8e8fa2b11 100644 --- a/tests/common/ensemble-ckpt/README +++ b/tests/common/ensemble-ckpt/README @@ -12,5 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -This checkpoint was created by running the `ensemble_python_resnet50` model. -Details on how to install this can be found at: `https://github.com/triton-inference-server/python_backend/tree/main/examples/preprocessing` +This checkpoint was created by running the Ensemble Quick Start. diff --git a/tests/common/ensemble-ckpt/golden-metrics-model-gpu.csv b/tests/common/ensemble-ckpt/golden-metrics-model-gpu.csv index 57667fd0c..b6af22d8c 100644 --- a/tests/common/ensemble-ckpt/golden-metrics-model-gpu.csv +++ b/tests/common/ensemble-ckpt/golden-metrics-model-gpu.csv @@ -1,70 +1,23 @@ Model,GPU UUID,Batch,Concurrency,Model Config Path,Instance Group,Satisfies Constraints,GPU Memory Usage (MB),GPU Utilization (%),GPU Power Usage (W) -ensemble_python_resnet50,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,64,"ensemble_python_resnet50_config_28: preprocess_config_9, resnet50_trt_config_8","4:GPU,2:GPU",Yes,2801.8,2.7,62.0 -ensemble_python_resnet50,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,256,"ensemble_python_resnet50_config_28: preprocess_config_9, resnet50_trt_config_8","4:GPU,2:GPU",Yes,2801.8,2.8,59.0 -ensemble_python_resnet50,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,32,"ensemble_python_resnet50_config_28: preprocess_config_9, resnet50_trt_config_8","4:GPU,2:GPU",Yes,2801.8,2.9,59.8 -ensemble_python_resnet50,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,512,"ensemble_python_resnet50_config_28: preprocess_config_9, resnet50_trt_config_8","4:GPU,2:GPU",Yes,2801.8,2.7,58.5 -ensemble_python_resnet50,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,16,"ensemble_python_resnet50_config_28: preprocess_config_9, resnet50_trt_config_8","4:GPU,2:GPU",Yes,2801.8,4.0,59.4 -ensemble_python_resnet50,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,128,"ensemble_python_resnet50_config_28: preprocess_config_9, resnet50_trt_config_8","4:GPU,2:GPU",Yes,2801.8,2.7,59.8 -ensemble_python_resnet50,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,8,"ensemble_python_resnet50_config_28: preprocess_config_9, resnet50_trt_config_8","4:GPU,2:GPU",Yes,2801.8,4.8,59.0 -ensemble_python_resnet50,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,4,"ensemble_python_resnet50_config_28: preprocess_config_9, resnet50_trt_config_8","4:GPU,2:GPU",Yes,2801.8,3.6,58.4 -ensemble_python_resnet50,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,2,"ensemble_python_resnet50_config_28: preprocess_config_9, resnet50_trt_config_8","4:GPU,2:GPU",Yes,2801.8,2.3,57.8 -ensemble_python_resnet50,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,1,"ensemble_python_resnet50_config_28: preprocess_config_9, resnet50_trt_config_8","4:GPU,2:GPU",Yes,2801.8,1.3,57.5 -ensemble_python_resnet50,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,64,"ensemble_python_resnet50_config_23: preprocess_config_9, resnet50_trt_config_4","4:GPU,2:GPU",Yes,2797.6,2.5,60.2 -ensemble_python_resnet50,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,32,"ensemble_python_resnet50_config_23: preprocess_config_9, resnet50_trt_config_4","4:GPU,2:GPU",Yes,2797.6,2.8,58.8 -ensemble_python_resnet50,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,512,"ensemble_python_resnet50_config_23: preprocess_config_9, resnet50_trt_config_4","4:GPU,2:GPU",Yes,2797.6,2.4,59.0 -ensemble_python_resnet50,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,8,"ensemble_python_resnet50_config_23: preprocess_config_9, resnet50_trt_config_4","4:GPU,2:GPU",Yes,2797.6,3.5,58.8 -ensemble_python_resnet50,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,16,"ensemble_python_resnet50_config_23: preprocess_config_9, resnet50_trt_config_4","4:GPU,2:GPU",Yes,2797.6,3.8,58.1 -ensemble_python_resnet50,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,128,"ensemble_python_resnet50_config_23: preprocess_config_9, resnet50_trt_config_4","4:GPU,2:GPU",Yes,2797.6,2.4,59.0 -ensemble_python_resnet50,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,256,"ensemble_python_resnet50_config_23: preprocess_config_9, resnet50_trt_config_4","4:GPU,2:GPU",Yes,2797.6,2.5,57.7 -ensemble_python_resnet50,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,4,"ensemble_python_resnet50_config_23: preprocess_config_9, resnet50_trt_config_4","4:GPU,2:GPU",Yes,2797.6,3.6,58.8 -ensemble_python_resnet50,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,2,"ensemble_python_resnet50_config_23: preprocess_config_9, resnet50_trt_config_4","4:GPU,2:GPU",Yes,2797.6,2.3,57.7 -ensemble_python_resnet50,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,1,"ensemble_python_resnet50_config_23: preprocess_config_9, resnet50_trt_config_4","4:GPU,2:GPU",Yes,2797.6,0.8,57.3 -ensemble_python_resnet50,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,128,"ensemble_python_resnet50_config_24: preprocess_config_11, resnet50_trt_config_4","4:GPU,2:GPU",Yes,2797.6,2.8,59.6 -ensemble_python_resnet50,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,64,"ensemble_python_resnet50_config_29: preprocess_config_9, resnet50_trt_config_3","4:GPU,3:GPU",Yes,3881.8,2.0,58.5 -ensemble_python_resnet50,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,128,"ensemble_python_resnet50_config_29: preprocess_config_9, resnet50_trt_config_3","4:GPU,3:GPU",Yes,3881.8,2.8,59.2 -ensemble_python_resnet50,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,32,"ensemble_python_resnet50_config_29: preprocess_config_9, resnet50_trt_config_3","4:GPU,3:GPU",Yes,3881.8,2.7,61.4 -ensemble_python_resnet50,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,512,"ensemble_python_resnet50_config_29: preprocess_config_9, resnet50_trt_config_3","4:GPU,3:GPU",Yes,3881.8,2.8,59.0 -ensemble_python_resnet50,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,16,"ensemble_python_resnet50_config_29: preprocess_config_9, resnet50_trt_config_3","4:GPU,3:GPU",Yes,3881.8,3.8,58.5 -ensemble_python_resnet50,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,8,"ensemble_python_resnet50_config_29: preprocess_config_9, resnet50_trt_config_3","4:GPU,3:GPU",Yes,3881.8,3.8,59.3 -ensemble_python_resnet50,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,256,"ensemble_python_resnet50_config_29: preprocess_config_9, resnet50_trt_config_3","4:GPU,3:GPU",Yes,3881.8,2.6,58.4 -ensemble_python_resnet50,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,4,"ensemble_python_resnet50_config_29: preprocess_config_9, resnet50_trt_config_3","4:GPU,3:GPU",Yes,3881.8,4.8,58.9 -ensemble_python_resnet50,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,2,"ensemble_python_resnet50_config_29: preprocess_config_9, resnet50_trt_config_3","4:GPU,3:GPU",Yes,3881.8,2.0,57.7 -ensemble_python_resnet50,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,1,"ensemble_python_resnet50_config_29: preprocess_config_9, resnet50_trt_config_3","4:GPU,3:GPU",Yes,3881.8,0.8,57.3 -ensemble_python_resnet50,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,64,"ensemble_python_resnet50_config_35: preprocess_config_9, resnet50_trt_config_10","4:GPU,3:GPU",Yes,3888.1,2.7,56.8 -ensemble_python_resnet50,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,128,"ensemble_python_resnet50_config_19: preprocess_config_11, resnet50_trt_config_6","4:GPU,1:GPU",Yes,1711.3,3.0,64.0 -ensemble_python_resnet50,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,64,"ensemble_python_resnet50_config_14: preprocess_config_9, resnet50_trt_config_6","4:GPU,1:GPU",Yes,1711.3,3.0,57.5 -ensemble_python_resnet50,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,64,"ensemble_python_resnet50_config_34: preprocess_config_9, resnet50_trt_config_9","4:GPU,2:GPU",Yes,2810.2,2.8,61.2 -ensemble_python_resnet50,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,128,"ensemble_python_resnet50_config_31: preprocess_config_11, resnet50_trt_config_8","4:GPU,2:GPU",Yes,2801.8,2.5,56.9 -ensemble_python_resnet50,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,64,"ensemble_python_resnet50_config_21: preprocess_config_9, resnet50_trt_config_2","4:GPU,1:GPU",Yes,1707.1,3.0,58.8 -ensemble_python_resnet50,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,32,"ensemble_python_resnet50_config_16: preprocess_config_7, resnet50_trt_config_2","4:GPU,1:GPU",Yes,1707.1,4.3,58.7 -ensemble_python_resnet50,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,64,"ensemble_python_resnet50_config_22: preprocess_config_9, resnet50_trt_config_5","4:GPU,1:GPU",Yes,1713.4,2.7,57.9 -ensemble_python_resnet50,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,32,"ensemble_python_resnet50_config_30: preprocess_config_7, resnet50_trt_config_8","4:GPU,2:GPU",Yes,2801.8,3.0,60.1 -ensemble_python_resnet50,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,32,"ensemble_python_resnet50_config_11: preprocess_config_7, resnet50_trt_config_6","4:GPU,1:GPU",Yes,1711.3,3.0,58.6 -ensemble_python_resnet50,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,32,"ensemble_python_resnet50_config_17: preprocess_config_7, resnet50_trt_config_5","4:GPU,1:GPU",Yes,1713.4,3.4,58.0 -ensemble_python_resnet50,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,16,"ensemble_python_resnet50_config_13: preprocess_config_8, resnet50_trt_config_6","4:GPU,1:GPU",Yes,1711.3,4.8,59.2 -ensemble_python_resnet50,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,32,"ensemble_python_resnet50_config_18: preprocess_config_7, resnet50_trt_config_4","4:GPU,2:GPU",Yes,2797.6,2.7,58.6 -ensemble_python_resnet50,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,40,"ensemble_python_resnet50_config_15: preprocess_config_10, resnet50_trt_config_6","5:GPU,1:GPU",Yes,1713.4,3.2,58.5 -ensemble_python_resnet50,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,64,"ensemble_python_resnet50_config_27: preprocess_config_9, resnet50_trt_config_7","4:GPU,2:GPU",Yes,2791.3,3.2,61.5 -ensemble_python_resnet50,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,48,"ensemble_python_resnet50_config_32: preprocess_config_5, resnet50_trt_config_8","3:GPU,2:GPU",Yes,2801.8,2.2,57.2 -ensemble_python_resnet50,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,24,"ensemble_python_resnet50_config_12: preprocess_config_2, resnet50_trt_config_5","3:GPU,1:GPU",Yes,1713.4,3.0,58.1 -ensemble_python_resnet50,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,24,"ensemble_python_resnet50_config_4: preprocess_config_2, resnet50_trt_config_2","3:GPU,1:GPU",Yes,1707.1,3.7,58.1 -ensemble_python_resnet50,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,48,"ensemble_python_resnet50_config_9: preprocess_config_5, resnet50_trt_config_6","3:GPU,1:GPU",Yes,1711.3,2.4,57.7 -ensemble_python_resnet50,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,24,"ensemble_python_resnet50_config_7: preprocess_config_2, resnet50_trt_config_6","3:GPU,1:GPU",Yes,1711.3,2.8,58.0 -ensemble_python_resnet50,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,48,"ensemble_python_resnet50_config_25: preprocess_config_5, resnet50_trt_config_4","3:GPU,2:GPU",Yes,2797.6,2.1,58.6 -ensemble_python_resnet50,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,80,"ensemble_python_resnet50_config_20: preprocess_config_12, resnet50_trt_config_6","5:GPU,1:GPU",Yes,1713.4,2.5,57.7 -ensemble_python_resnet50,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,24,"ensemble_python_resnet50_config_5: preprocess_config_2, resnet50_trt_config_4","3:GPU,2:GPU",Yes,2797.6,2.0,57.9 -ensemble_python_resnet50,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,80,"ensemble_python_resnet50_config_26: preprocess_config_12, resnet50_trt_config_4","5:GPU,2:GPU",Yes,2797.6,2.1,58.0 -ensemble_python_resnet50,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,80,"ensemble_python_resnet50_config_33: preprocess_config_12, resnet50_trt_config_8","5:GPU,2:GPU",Yes,2801.8,2.1,58.4 -ensemble_python_resnet50,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,12,"ensemble_python_resnet50_config_8: preprocess_config_4, resnet50_trt_config_6","3:GPU,1:GPU",Yes,1711.3,2.6,58.2 -ensemble_python_resnet50,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,160,"ensemble_python_resnet50_config_6: preprocess_config_3, resnet50_trt_config_5","5:GPU,1:GPU",Yes,1715.5,2.0,58.5 -ensemble_python_resnet50,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,16,"ensemble_python_resnet50_config_10: preprocess_config_6, resnet50_trt_config_6","2:GPU,1:GPU",Yes,1711.3,2.2,57.2 -ensemble_python_resnet50,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,8,"ensemble_python_resnet50_config_2: preprocess_config_1, resnet50_trt_config_2","2:GPU,1:GPU",Yes,1707.1,2.2,57.6 -ensemble_python_resnet50,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,2,"ensemble_python_resnet50_config_1: preprocess_config_0, resnet50_trt_config_1","1:GPU,2:GPU",Yes,2789.2,1.6,56.8 -ensemble_python_resnet50,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,2,"ensemble_python_resnet50_config_default: preprocess_config_default, resnet50_trt_config_default","1:CPU,1:GPU",Yes,1862.3,1.6,57.4 -ensemble_python_resnet50,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,4,"ensemble_python_resnet50_config_default: preprocess_config_default, resnet50_trt_config_default","1:CPU,1:GPU",Yes,1862.3,1.8,57.4 -ensemble_python_resnet50,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,16,"ensemble_python_resnet50_config_default: preprocess_config_default, resnet50_trt_config_default","1:CPU,1:GPU",Yes,1862.3,2.0,57.2 -ensemble_python_resnet50,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,8,"ensemble_python_resnet50_config_default: preprocess_config_default, resnet50_trt_config_default","1:CPU,1:GPU",Yes,1862.3,1.8,57.3 -ensemble_python_resnet50,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,1,"ensemble_python_resnet50_config_default: preprocess_config_default, resnet50_trt_config_default","1:CPU,1:GPU",Yes,1862.3,1.2,56.5 -ensemble_python_resnet50,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,2,"ensemble_python_resnet50_config_3: preprocess_config_0, resnet50_trt_config_3","1:GPU,3:GPU",Yes,3881.8,1.6,57.0 -ensemble_python_resnet50,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,2,"ensemble_python_resnet50_config_0: preprocess_config_0, resnet50_trt_config_0","1:GPU,1:GPU",Yes,1707.1,1.5,56.6 +ensemble_add_sub,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,8,"ensemble_add_sub_config_1: add_config_0, sub_config_1","1:GPU,2:GPU",Yes,870.3,0.0,56.7 +ensemble_add_sub,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,2,"ensemble_add_sub_config_1: add_config_0, sub_config_1","1:GPU,2:GPU",Yes,870.3,0.0,56.5 +ensemble_add_sub,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,16,"ensemble_add_sub_config_1: add_config_0, sub_config_1","1:GPU,2:GPU",Yes,870.3,0.0,35.6 +ensemble_add_sub,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,4,"ensemble_add_sub_config_1: add_config_0, sub_config_1","1:GPU,2:GPU",Yes,870.3,0.0,56.8 +ensemble_add_sub,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,1,"ensemble_add_sub_config_1: add_config_0, sub_config_1","1:GPU,2:GPU",Yes,870.3,0.0,56.8 +ensemble_add_sub,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,4,"ensemble_add_sub_config_0: add_config_0, sub_config_0","1:GPU,1:GPU",Yes,870.3,0.0,56.6 +ensemble_add_sub,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,16,"ensemble_add_sub_config_0: add_config_0, sub_config_0","1:GPU,1:GPU",Yes,870.3,0.0,48.5 +ensemble_add_sub,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,2,"ensemble_add_sub_config_0: add_config_0, sub_config_0","1:GPU,1:GPU",Yes,870.3,0.0,56.6 +ensemble_add_sub,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,8,"ensemble_add_sub_config_0: add_config_0, sub_config_0","1:GPU,1:GPU",Yes,870.3,0.0,56.7 +ensemble_add_sub,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,1,"ensemble_add_sub_config_0: add_config_0, sub_config_0","1:GPU,1:GPU",Yes,870.3,0.0,56.8 +ensemble_add_sub,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,4,"ensemble_add_sub_config_default: add_config_default, sub_config_default","1:CPU,1:CPU",Yes,870.3,0.0,56.7 +ensemble_add_sub,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,2,"ensemble_add_sub_config_default: add_config_default, sub_config_default","1:CPU,1:CPU",Yes,870.3,0.0,56.7 +ensemble_add_sub,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,16,"ensemble_add_sub_config_default: add_config_default, sub_config_default","1:CPU,1:CPU",Yes,870.3,0.0,56.7 +ensemble_add_sub,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,8,"ensemble_add_sub_config_default: add_config_default, sub_config_default","1:CPU,1:CPU",Yes,870.3,0.0,56.6 +ensemble_add_sub,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,1,"ensemble_add_sub_config_default: add_config_default, sub_config_default","1:CPU,1:CPU",Yes,870.3,0.0,56.3 +ensemble_add_sub,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,2,"ensemble_add_sub_config_2: add_config_1, sub_config_0","2:GPU,1:GPU",Yes,870.3,0.0,56.7 +ensemble_add_sub,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,8,"ensemble_add_sub_config_2: add_config_1, sub_config_0","2:GPU,1:GPU",Yes,870.3,0.0,56.6 +ensemble_add_sub,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,4,"ensemble_add_sub_config_2: add_config_1, sub_config_0","2:GPU,1:GPU",Yes,870.3,0.0,56.6 +ensemble_add_sub,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,16,"ensemble_add_sub_config_2: add_config_1, sub_config_0","2:GPU,1:GPU",Yes,870.3,0.0,56.8 +ensemble_add_sub,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,1,"ensemble_add_sub_config_2: add_config_1, sub_config_0","2:GPU,1:GPU",Yes,870.3,0.0,56.7 +ensemble_add_sub,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,6,"ensemble_add_sub_config_3: add_config_2, sub_config_2","3:GPU,3:GPU",Yes,870.3,0.0,56.7 diff --git a/tests/common/ensemble-ckpt/golden-metrics-model-inference.csv b/tests/common/ensemble-ckpt/golden-metrics-model-inference.csv index 15d4dfa44..18444821f 100644 --- a/tests/common/ensemble-ckpt/golden-metrics-model-inference.csv +++ b/tests/common/ensemble-ckpt/golden-metrics-model-inference.csv @@ -1,70 +1,23 @@ Model,Batch,Concurrency,Model Config Path,Instance Group,Max Batch Size,Satisfies Constraints,Throughput (infer/sec),p99 Latency (ms) -ensemble_python_resnet50,1,64,"ensemble_python_resnet50_config_28: preprocess_config_9, resnet50_trt_config_8","4:GPU,2:GPU","8,8",Yes,64.0,1104.4 -ensemble_python_resnet50,1,256,"ensemble_python_resnet50_config_28: preprocess_config_9, resnet50_trt_config_8","4:GPU,2:GPU","8,8",Yes,62.6,4677.6 -ensemble_python_resnet50,1,32,"ensemble_python_resnet50_config_28: preprocess_config_9, resnet50_trt_config_8","4:GPU,2:GPU","8,8",Yes,60.6,705.3 -ensemble_python_resnet50,1,512,"ensemble_python_resnet50_config_28: preprocess_config_9, resnet50_trt_config_8","4:GPU,2:GPU","8,8",Yes,60.4,8873.1 -ensemble_python_resnet50,1,16,"ensemble_python_resnet50_config_28: preprocess_config_9, resnet50_trt_config_8","4:GPU,2:GPU","8,8",Yes,60.0,579.1 -ensemble_python_resnet50,1,128,"ensemble_python_resnet50_config_28: preprocess_config_9, resnet50_trt_config_8","4:GPU,2:GPU","8,8",Yes,56.1,2505.3 -ensemble_python_resnet50,1,8,"ensemble_python_resnet50_config_28: preprocess_config_9, resnet50_trt_config_8","4:GPU,2:GPU","8,8",Yes,55.3,370.5 -ensemble_python_resnet50,1,4,"ensemble_python_resnet50_config_28: preprocess_config_9, resnet50_trt_config_8","4:GPU,2:GPU","8,8",Yes,51.0,133.9 -ensemble_python_resnet50,1,2,"ensemble_python_resnet50_config_28: preprocess_config_9, resnet50_trt_config_8","4:GPU,2:GPU","8,8",Yes,29.8,94.1 -ensemble_python_resnet50,1,1,"ensemble_python_resnet50_config_28: preprocess_config_9, resnet50_trt_config_8","4:GPU,2:GPU","8,8",Yes,15.1,82.4 -ensemble_python_resnet50,1,64,"ensemble_python_resnet50_config_23: preprocess_config_9, resnet50_trt_config_4","4:GPU,2:GPU","8,4",Yes,64.0,1132.9 -ensemble_python_resnet50,1,32,"ensemble_python_resnet50_config_23: preprocess_config_9, resnet50_trt_config_4","4:GPU,2:GPU","8,4",Yes,56.3,952.5 -ensemble_python_resnet50,1,512,"ensemble_python_resnet50_config_23: preprocess_config_9, resnet50_trt_config_4","4:GPU,2:GPU","8,4",Yes,54.4,9758.6 -ensemble_python_resnet50,1,8,"ensemble_python_resnet50_config_23: preprocess_config_9, resnet50_trt_config_4","4:GPU,2:GPU","8,4",Yes,54.3,397.3 -ensemble_python_resnet50,1,16,"ensemble_python_resnet50_config_23: preprocess_config_9, resnet50_trt_config_4","4:GPU,2:GPU","8,4",Yes,54.0,752.9 -ensemble_python_resnet50,1,128,"ensemble_python_resnet50_config_23: preprocess_config_9, resnet50_trt_config_4","4:GPU,2:GPU","8,4",Yes,53.9,2795.3 -ensemble_python_resnet50,1,256,"ensemble_python_resnet50_config_23: preprocess_config_9, resnet50_trt_config_4","4:GPU,2:GPU","8,4",Yes,53.3,5115.1 -ensemble_python_resnet50,1,4,"ensemble_python_resnet50_config_23: preprocess_config_9, resnet50_trt_config_4","4:GPU,2:GPU","8,4",Yes,50.2,176.7 -ensemble_python_resnet50,1,2,"ensemble_python_resnet50_config_23: preprocess_config_9, resnet50_trt_config_4","4:GPU,2:GPU","8,4",Yes,28.8,103.6 -ensemble_python_resnet50,1,1,"ensemble_python_resnet50_config_23: preprocess_config_9, resnet50_trt_config_4","4:GPU,2:GPU","8,4",Yes,14.2,93.4 -ensemble_python_resnet50,1,128,"ensemble_python_resnet50_config_24: preprocess_config_11, resnet50_trt_config_4","4:GPU,2:GPU","16,4",Yes,64.0,2262.0 -ensemble_python_resnet50,1,64,"ensemble_python_resnet50_config_29: preprocess_config_9, resnet50_trt_config_3","4:GPU,3:GPU","8,4",Yes,64.0,1088.7 -ensemble_python_resnet50,1,128,"ensemble_python_resnet50_config_29: preprocess_config_9, resnet50_trt_config_3","4:GPU,3:GPU","8,4",Yes,61.3,2366.5 -ensemble_python_resnet50,1,32,"ensemble_python_resnet50_config_29: preprocess_config_9, resnet50_trt_config_3","4:GPU,3:GPU","8,4",Yes,59.3,655.2 -ensemble_python_resnet50,1,512,"ensemble_python_resnet50_config_29: preprocess_config_9, resnet50_trt_config_3","4:GPU,3:GPU","8,4",Yes,57.7,8994.5 -ensemble_python_resnet50,1,16,"ensemble_python_resnet50_config_29: preprocess_config_9, resnet50_trt_config_3","4:GPU,3:GPU","8,4",Yes,57.2,635.9 -ensemble_python_resnet50,1,8,"ensemble_python_resnet50_config_29: preprocess_config_9, resnet50_trt_config_3","4:GPU,3:GPU","8,4",Yes,54.3,362.1 -ensemble_python_resnet50,1,256,"ensemble_python_resnet50_config_29: preprocess_config_9, resnet50_trt_config_3","4:GPU,3:GPU","8,4",Yes,54.2,4798.8 -ensemble_python_resnet50,1,4,"ensemble_python_resnet50_config_29: preprocess_config_9, resnet50_trt_config_3","4:GPU,3:GPU","8,4",Yes,48.7,138.8 -ensemble_python_resnet50,1,2,"ensemble_python_resnet50_config_29: preprocess_config_9, resnet50_trt_config_3","4:GPU,3:GPU","8,4",Yes,29.3,103.4 -ensemble_python_resnet50,1,1,"ensemble_python_resnet50_config_29: preprocess_config_9, resnet50_trt_config_3","4:GPU,3:GPU","8,4",Yes,13.9,91.0 -ensemble_python_resnet50,1,64,"ensemble_python_resnet50_config_35: preprocess_config_9, resnet50_trt_config_10","4:GPU,3:GPU","8,8",Yes,64.0,1057.1 -ensemble_python_resnet50,1,128,"ensemble_python_resnet50_config_19: preprocess_config_11, resnet50_trt_config_6","4:GPU,1:GPU","16,4",Yes,64.0,2198.8 -ensemble_python_resnet50,1,64,"ensemble_python_resnet50_config_14: preprocess_config_9, resnet50_trt_config_6","4:GPU,1:GPU","8,4",Yes,64.0,1318.4 -ensemble_python_resnet50,1,64,"ensemble_python_resnet50_config_34: preprocess_config_9, resnet50_trt_config_9","4:GPU,2:GPU","8,16",Yes,64.0,1079.2 -ensemble_python_resnet50,1,128,"ensemble_python_resnet50_config_31: preprocess_config_11, resnet50_trt_config_8","4:GPU,2:GPU","16,8",Yes,63.9,2084.0 -ensemble_python_resnet50,1,64,"ensemble_python_resnet50_config_21: preprocess_config_9, resnet50_trt_config_2","4:GPU,1:GPU","8,2",Yes,63.9,1179.3 -ensemble_python_resnet50,1,32,"ensemble_python_resnet50_config_16: preprocess_config_7, resnet50_trt_config_2","4:GPU,1:GPU","4,2",Yes,62.6,645.3 -ensemble_python_resnet50,1,64,"ensemble_python_resnet50_config_22: preprocess_config_9, resnet50_trt_config_5","4:GPU,1:GPU","8,8",Yes,61.3,1195.1 -ensemble_python_resnet50,1,32,"ensemble_python_resnet50_config_30: preprocess_config_7, resnet50_trt_config_8","4:GPU,2:GPU","4,8",Yes,61.2,650.5 -ensemble_python_resnet50,1,32,"ensemble_python_resnet50_config_11: preprocess_config_7, resnet50_trt_config_6","4:GPU,1:GPU","4,4",Yes,60.0,636.7 -ensemble_python_resnet50,1,32,"ensemble_python_resnet50_config_17: preprocess_config_7, resnet50_trt_config_5","4:GPU,1:GPU","4,8",Yes,59.3,722.0 -ensemble_python_resnet50,1,16,"ensemble_python_resnet50_config_13: preprocess_config_8, resnet50_trt_config_6","4:GPU,1:GPU","2,4",Yes,57.2,341.7 -ensemble_python_resnet50,1,32,"ensemble_python_resnet50_config_18: preprocess_config_7, resnet50_trt_config_4","4:GPU,2:GPU","4,4",Yes,56.0,720.9 -ensemble_python_resnet50,1,40,"ensemble_python_resnet50_config_15: preprocess_config_10, resnet50_trt_config_6","5:GPU,1:GPU","4,4",Yes,55.9,1121.4 -ensemble_python_resnet50,1,64,"ensemble_python_resnet50_config_27: preprocess_config_9, resnet50_trt_config_7","4:GPU,2:GPU","8,2",Yes,54.6,1439.0 -ensemble_python_resnet50,1,48,"ensemble_python_resnet50_config_32: preprocess_config_5, resnet50_trt_config_8","3:GPU,2:GPU","8,8",Yes,54.0,1065.0 -ensemble_python_resnet50,1,24,"ensemble_python_resnet50_config_12: preprocess_config_2, resnet50_trt_config_5","3:GPU,1:GPU","4,8",Yes,52.0,566.8 -ensemble_python_resnet50,1,24,"ensemble_python_resnet50_config_4: preprocess_config_2, resnet50_trt_config_2","3:GPU,1:GPU","4,2",Yes,50.4,577.7 -ensemble_python_resnet50,1,48,"ensemble_python_resnet50_config_9: preprocess_config_5, resnet50_trt_config_6","3:GPU,1:GPU","8,4",Yes,50.1,996.4 -ensemble_python_resnet50,1,24,"ensemble_python_resnet50_config_7: preprocess_config_2, resnet50_trt_config_6","3:GPU,1:GPU","4,4",Yes,49.6,552.6 -ensemble_python_resnet50,1,48,"ensemble_python_resnet50_config_25: preprocess_config_5, resnet50_trt_config_4","3:GPU,2:GPU","8,4",Yes,49.3,1016.8 -ensemble_python_resnet50,1,80,"ensemble_python_resnet50_config_20: preprocess_config_12, resnet50_trt_config_6","5:GPU,1:GPU","8,4",Yes,49.1,2211.0 -ensemble_python_resnet50,1,24,"ensemble_python_resnet50_config_5: preprocess_config_2, resnet50_trt_config_4","3:GPU,2:GPU","4,4",Yes,48.5,610.2 -ensemble_python_resnet50,1,80,"ensemble_python_resnet50_config_26: preprocess_config_12, resnet50_trt_config_4","5:GPU,2:GPU","8,4",Yes,47.3,2110.5 -ensemble_python_resnet50,1,80,"ensemble_python_resnet50_config_33: preprocess_config_12, resnet50_trt_config_8","5:GPU,2:GPU","8,8",Yes,45.3,2228.8 -ensemble_python_resnet50,1,12,"ensemble_python_resnet50_config_8: preprocess_config_4, resnet50_trt_config_6","3:GPU,1:GPU","2,4",Yes,43.8,354.8 -ensemble_python_resnet50,1,160,"ensemble_python_resnet50_config_6: preprocess_config_3, resnet50_trt_config_5","5:GPU,1:GPU","16,8",Yes,43.0,4193.8 -ensemble_python_resnet50,1,16,"ensemble_python_resnet50_config_10: preprocess_config_6, resnet50_trt_config_6","2:GPU,1:GPU","4,4",Yes,34.0,544.4 -ensemble_python_resnet50,1,8,"ensemble_python_resnet50_config_2: preprocess_config_1, resnet50_trt_config_2","2:GPU,1:GPU","2,2",Yes,32.7,303.1 -ensemble_python_resnet50,1,2,"ensemble_python_resnet50_config_1: preprocess_config_0, resnet50_trt_config_1","1:GPU,2:GPU","1,1",Yes,19.1,135.6 -ensemble_python_resnet50,1,2,"ensemble_python_resnet50_config_default: preprocess_config_default, resnet50_trt_config_default","1:CPU,1:GPU","256,256",Yes,16.6,138.9 -ensemble_python_resnet50,1,4,"ensemble_python_resnet50_config_default: preprocess_config_default, resnet50_trt_config_default","1:CPU,1:GPU","256,256",Yes,16.5,261.8 -ensemble_python_resnet50,1,16,"ensemble_python_resnet50_config_default: preprocess_config_default, resnet50_trt_config_default","1:CPU,1:GPU","256,256",Yes,16.4,1011.6 -ensemble_python_resnet50,1,8,"ensemble_python_resnet50_config_default: preprocess_config_default, resnet50_trt_config_default","1:CPU,1:GPU","256,256",Yes,16.2,526.9 -ensemble_python_resnet50,1,1,"ensemble_python_resnet50_config_default: preprocess_config_default, resnet50_trt_config_default","1:CPU,1:GPU","256,256",Yes,15.3,82.4 -ensemble_python_resnet50,1,2,"ensemble_python_resnet50_config_3: preprocess_config_0, resnet50_trt_config_3","1:GPU,3:GPU","1,4",Yes,16.2,139.7 -ensemble_python_resnet50,1,2,"ensemble_python_resnet50_config_0: preprocess_config_0, resnet50_trt_config_0","1:GPU,1:GPU","1,1",Yes,15.7,144.6 +ensemble_add_sub,1,8,"ensemble_add_sub_config_1: add_config_0, sub_config_1","1:GPU,2:GPU","0,0",Yes,2865.9,3.7 +ensemble_add_sub,1,2,"ensemble_add_sub_config_1: add_config_0, sub_config_1","1:GPU,2:GPU","0,0",Yes,2837.6,0.9 +ensemble_add_sub,1,16,"ensemble_add_sub_config_1: add_config_0, sub_config_1","1:GPU,2:GPU","0,0",Yes,2801.7,7.3 +ensemble_add_sub,1,4,"ensemble_add_sub_config_1: add_config_0, sub_config_1","1:GPU,2:GPU","0,0",Yes,2561.8,2.1 +ensemble_add_sub,1,1,"ensemble_add_sub_config_1: add_config_0, sub_config_1","1:GPU,2:GPU","0,0",Yes,1785.0,0.8 +ensemble_add_sub,1,4,"ensemble_add_sub_config_0: add_config_0, sub_config_0","1:GPU,1:GPU","0,0",Yes,2852.4,1.9 +ensemble_add_sub,1,16,"ensemble_add_sub_config_0: add_config_0, sub_config_0","1:GPU,1:GPU","0,0",Yes,2773.7,7.2 +ensemble_add_sub,1,2,"ensemble_add_sub_config_0: add_config_0, sub_config_0","1:GPU,1:GPU","0,0",Yes,2773.0,1.0 +ensemble_add_sub,1,8,"ensemble_add_sub_config_0: add_config_0, sub_config_0","1:GPU,1:GPU","0,0",Yes,2697.8,3.9 +ensemble_add_sub,1,1,"ensemble_add_sub_config_0: add_config_0, sub_config_0","1:GPU,1:GPU","0,0",Yes,1775.3,0.8 +ensemble_add_sub,1,4,"ensemble_add_sub_config_default: add_config_default, sub_config_default","1:CPU,1:CPU","0,0",Yes,2851.6,1.9 +ensemble_add_sub,1,2,"ensemble_add_sub_config_default: add_config_default, sub_config_default","1:CPU,1:CPU","0,0",Yes,2825.4,0.9 +ensemble_add_sub,1,16,"ensemble_add_sub_config_default: add_config_default, sub_config_default","1:CPU,1:CPU","0,0",Yes,2786.8,7.2 +ensemble_add_sub,1,8,"ensemble_add_sub_config_default: add_config_default, sub_config_default","1:CPU,1:CPU","0,0",Yes,2763.4,3.8 +ensemble_add_sub,1,1,"ensemble_add_sub_config_default: add_config_default, sub_config_default","1:CPU,1:CPU","0,0",Yes,1764.3,0.9 +ensemble_add_sub,1,2,"ensemble_add_sub_config_2: add_config_1, sub_config_0","2:GPU,1:GPU","0,0",Yes,2847.2,0.9 +ensemble_add_sub,1,8,"ensemble_add_sub_config_2: add_config_1, sub_config_0","2:GPU,1:GPU","0,0",Yes,2766.5,3.7 +ensemble_add_sub,1,4,"ensemble_add_sub_config_2: add_config_1, sub_config_0","2:GPU,1:GPU","0,0",Yes,2729.6,2.0 +ensemble_add_sub,1,16,"ensemble_add_sub_config_2: add_config_1, sub_config_0","2:GPU,1:GPU","0,0",Yes,2702.6,7.7 +ensemble_add_sub,1,1,"ensemble_add_sub_config_2: add_config_1, sub_config_0","2:GPU,1:GPU","0,0",Yes,1770.9,0.8 +ensemble_add_sub,1,6,"ensemble_add_sub_config_3: add_config_2, sub_config_2","3:GPU,3:GPU","0,0",Yes,2709.2,2.8 diff --git a/tests/common/ensemble-ckpt/golden-metrics-server-only.csv b/tests/common/ensemble-ckpt/golden-metrics-server-only.csv index 0885b253f..4daa8fc3d 100644 --- a/tests/common/ensemble-ckpt/golden-metrics-server-only.csv +++ b/tests/common/ensemble-ckpt/golden-metrics-server-only.csv @@ -1,3 +1,3 @@ Model,GPU UUID,GPU Memory Usage (MB),GPU Utilization (%),GPU Power Usage (W) -triton-server,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,457.0,0.0,55.8 +triton-server,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,870.0,0.0,56.1 diff --git a/tests/common/multi-model-ckpt/0.ckpt b/tests/common/multi-model-ckpt/0.ckpt index cae073694..a515cc495 100644 --- a/tests/common/multi-model-ckpt/0.ckpt +++ b/tests/common/multi-model-ckpt/0.ckpt @@ -1 +1,7767 @@ -{"ResultManager.results": {"_results": {"resnet50_libtorch,vgg19_libtorch": {"resnet50_libtorch_config_default,vgg19_libtorch_config_default": [{"_triton_env": {}, "_model_run_configs": [{"_model_name": "resnet50_libtorch", "_model_config": {"name": "resnet50_libtorch_config_default", "platform": "pytorch_libtorch", "maxBatchSize": 128, "input": [{"name": "INPUT__0", "dataType": "TYPE_FP32", "format": "FORMAT_NCHW", "dims": ["3", "224", "224"]}], "output": [{"name": "OUTPUT__0", "dataType": "TYPE_FP32", "dims": ["1000"], "labelFilename": "resnet50_labels.txt"}], "cpu_only": false}, "_perf_config": {"_args": {"service-kind": null, "model-signature-name": null, "async": null, "sync": null, "measurement-interval": null, "concurrency-range": 1, "request-rate-range": null, "request-distribution": null, "request-intervals": null, "binary-search": null, "num-of-sequences": null, "latency-threshold": null, "max-threads": null, "stability-percentage": null, "max-trials": null, "percentile": null, "input-data": null, "shared-memory": null, "output-shared-memory-size": null, "shape": null, "sequence-length": null, "sequence-id-range": null, "string-length": null, "string-data": null, "measurement-mode": "count_windows", "measurement-request-count": null, "streaming": null, "grpc-compression-algorithm": null, "triton-server-directory": null, "model-repository": null, "ssl-grpc-use-ssl": null, "ssl-grpc-root-certifications-file": null, "ssl-grpc-private-key-file": null, "ssl-grpc-certificate-chain-file": null, "ssl-https-verify-peer": null, "ssl-https-verify-host": null, "ssl-https-ca-certificates-file": null, "ssl-https-client-certificate-type": null, "ssl-https-client-certificate-file": null, "ssl-https-private-key-type": null, "ssl-https-private-key-file": null, "collect-metrics": "True", "metrics-url": "http://localhost:8002/metrics", "metrics-interval": 1000.0}, "_options": {"-m": "resnet50_libtorch_config_default", "-x": null, "-b": 1, "-u": "localhost:8001", "-i": "grpc", "-f": "resnet50_libtorch_config_default-results.csv", "-H": null}, "_verbose": {"-v": null, "-v -v": null, "--verbose-csv": "--verbose-csv"}, "_input_to_options": {"model-name": "-m", "model-version": "-x", "batch-size": "-b", "url": "-u", "protocol": "-i", "latency-report-file": "-f", "http-header": "-H"}, "_input_to_verbose": {"verbose": "-v", "extra-verbose": "-v -v", "verbose-csv": "--verbose-csv"}, "_additive_args": {"input-data": null, "shape": null}}, "_ensemble_composing_configs": []}, {"_model_name": "vgg19_libtorch", "_model_config": {"name": "vgg19_libtorch_config_default", "platform": "pytorch_libtorch", "maxBatchSize": 128, "input": [{"name": "INPUT__0", "dataType": "TYPE_FP32", "format": "FORMAT_NCHW", "dims": ["3", "224", "224"]}], "output": [{"name": "OUTPUT__0", "dataType": "TYPE_FP32", "dims": ["1000"], "labelFilename": "vgg19_labels.txt"}], "cpu_only": false}, "_perf_config": {"_args": {"service-kind": null, "model-signature-name": null, "async": null, "sync": null, "measurement-interval": null, "concurrency-range": 1, "request-rate-range": null, "request-distribution": null, "request-intervals": null, "binary-search": null, "num-of-sequences": null, "latency-threshold": null, "max-threads": null, "stability-percentage": null, "max-trials": null, "percentile": null, "input-data": null, "shared-memory": null, "output-shared-memory-size": null, "shape": null, "sequence-length": null, "sequence-id-range": null, "string-length": null, "string-data": null, "measurement-mode": "count_windows", "measurement-request-count": null, "streaming": null, "grpc-compression-algorithm": null, "triton-server-directory": null, "model-repository": null, "ssl-grpc-use-ssl": null, "ssl-grpc-root-certifications-file": null, "ssl-grpc-private-key-file": null, "ssl-grpc-certificate-chain-file": null, "ssl-https-verify-peer": null, "ssl-https-verify-host": null, "ssl-https-ca-certificates-file": null, "ssl-https-client-certificate-type": null, "ssl-https-client-certificate-file": null, "ssl-https-private-key-type": null, "ssl-https-private-key-file": null, "collect-metrics": "True", "metrics-url": "http://localhost:8002/metrics", "metrics-interval": 1000.0}, "_options": {"-m": "vgg19_libtorch_config_default", "-x": null, "-b": 1, "-u": "localhost:8001", "-i": "grpc", "-f": "vgg19_libtorch_config_default-results.csv", "-H": null}, "_verbose": {"-v": null, "-v -v": null, "--verbose-csv": "--verbose-csv"}, "_input_to_options": {"model-name": "-m", "model-version": "-x", "batch-size": "-b", "url": "-u", "protocol": "-i", "latency-report-file": "-f", "http-header": "-H"}, "_input_to_verbose": {"verbose": "-v", "extra-verbose": "-v -v", "verbose-csv": "--verbose-csv"}, "_additive_args": {"input-data": null, "shape": null}}, "_ensemble_composing_configs": []}]}, {"-m resnet50_libtorch_config_default -b 1 -i grpc -f resnet50_libtorch_config_default-results.csv --verbose-csv --concurrency-range=1 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0-m vgg19_libtorch_config_default -b 1 -i grpc -f vgg19_libtorch_config_default-results.csv --verbose-csv --concurrency-range=1 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": {"_model_variants_name": "resnet50_libtorch_config_default,vgg19_libtorch_config_default", "_gpu_data": {"GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [["gpu_used_memory", {"_value": 2246.049792, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0"}], ["gpu_free_memory", {"_value": 23149.412352, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0"}], ["gpu_utilization", {"_value": 99.66669999999999, "_timestamp": 0, "_device_uuid": null}], ["gpu_power_usage", {"_value": 279.121, "_timestamp": 0, "_device_uuid": null}]]}, "_avg_gpu_data": [["gpu_used_memory", {"_value": 2246.049792, "_timestamp": 0, "_device_uuid": null}], ["gpu_free_memory", {"_value": 23149.412352, "_timestamp": 0, "_device_uuid": null}], ["gpu_utilization", {"_value": 99.66669999999999, "_timestamp": 0, "_device_uuid": null}], ["gpu_power_usage", {"_value": 279.121, "_timestamp": 0, "_device_uuid": null}]], "_avg_gpu_data_from_tag": {"gpu_used_memory": ["gpu_used_memory", {"_value": 2246.049792, "_timestamp": 0, "_device_uuid": null}], "gpu_free_memory": ["gpu_free_memory", {"_value": 23149.412352, "_timestamp": 0, "_device_uuid": null}], "gpu_utilization": ["gpu_utilization", {"_value": 99.66669999999999, "_timestamp": 0, "_device_uuid": null}], "gpu_power_usage": ["gpu_power_usage", {"_value": 279.121, "_timestamp": 0, "_device_uuid": null}]}, "_model_config_measurements": [{"_model_config_name": "resnet50_libtorch_config_default", "_model_specific_pa_params": {"batch-size": 1, "concurrency-range": 1}, "_non_gpu_data": [["perf_latency_avg", {"_value": 5.698, "_timestamp": 0}], ["perf_latency_p90", {"_value": 5.89, "_timestamp": 0}], ["perf_latency_p95", {"_value": 6.006, "_timestamp": 0}], ["perf_latency_p99", {"_value": 7.214, "_timestamp": 0}], ["perf_throughput", {"_value": 175.528, "_timestamp": 0}], ["perf_client_send_recv", {"_value": 0.095, "_timestamp": 0}], ["perf_client_response_wait", {"_value": 5.582, "_timestamp": 0}], ["perf_server_queue", {"_value": 0.017, "_timestamp": 0}], ["perf_server_compute_infer", {"_value": 4.727, "_timestamp": 0}], ["perf_server_compute_input", {"_value": 0.124, "_timestamp": 0}], ["perf_server_compute_output", {"_value": 0.02, "_timestamp": 0}]], "_non_gpu_data_from_tag": {"perf_latency_avg": ["perf_latency_avg", {"_value": 5.698, "_timestamp": 0}], "perf_latency_p90": ["perf_latency_p90", {"_value": 5.89, "_timestamp": 0}], "perf_latency_p95": ["perf_latency_p95", {"_value": 6.006, "_timestamp": 0}], "perf_latency_p99": ["perf_latency_p99", {"_value": 7.214, "_timestamp": 0}], "perf_throughput": ["perf_throughput", {"_value": 175.528, "_timestamp": 0}], "perf_client_send_recv": ["perf_client_send_recv", {"_value": 0.095, "_timestamp": 0}], "perf_client_response_wait": ["perf_client_response_wait", {"_value": 5.582, "_timestamp": 0}], "perf_server_queue": ["perf_server_queue", {"_value": 0.017, "_timestamp": 0}], "perf_server_compute_infer": ["perf_server_compute_infer", {"_value": 4.727, "_timestamp": 0}], "perf_server_compute_input": ["perf_server_compute_input", {"_value": 0.124, "_timestamp": 0}], "perf_server_compute_output": ["perf_server_compute_output", {"_value": 0.02, "_timestamp": 0}]}}, {"_model_config_name": "vgg19_libtorch_config_default", "_model_specific_pa_params": {"batch-size": 1, "concurrency-range": 1}, "_non_gpu_data": [["perf_latency_avg", {"_value": 5.645, "_timestamp": 0}], ["perf_latency_p90", {"_value": 5.735, "_timestamp": 0}], ["perf_latency_p95", {"_value": 5.751, "_timestamp": 0}], ["perf_latency_p99", {"_value": 5.804, "_timestamp": 0}], ["perf_throughput", {"_value": 176.865, "_timestamp": 0}], ["perf_client_send_recv", {"_value": 0.096, "_timestamp": 0}], ["perf_client_response_wait", {"_value": 5.526, "_timestamp": 0}], ["perf_server_queue", {"_value": 0.016, "_timestamp": 0}], ["perf_server_compute_infer", {"_value": 4.666, "_timestamp": 0}], ["perf_server_compute_input", {"_value": 0.12, "_timestamp": 0}], ["perf_server_compute_output", {"_value": 0.02, "_timestamp": 0}]], "_non_gpu_data_from_tag": {"perf_latency_avg": ["perf_latency_avg", {"_value": 5.645, "_timestamp": 0}], "perf_latency_p90": ["perf_latency_p90", {"_value": 5.735, "_timestamp": 0}], "perf_latency_p95": ["perf_latency_p95", {"_value": 5.751, "_timestamp": 0}], "perf_latency_p99": ["perf_latency_p99", {"_value": 5.804, "_timestamp": 0}], "perf_throughput": ["perf_throughput", {"_value": 176.865, "_timestamp": 0}], "perf_client_send_recv": ["perf_client_send_recv", {"_value": 0.096, "_timestamp": 0}], "perf_client_response_wait": ["perf_client_response_wait", {"_value": 5.526, "_timestamp": 0}], "perf_server_queue": ["perf_server_queue", {"_value": 0.016, "_timestamp": 0}], "perf_server_compute_infer": ["perf_server_compute_infer", {"_value": 4.666, "_timestamp": 0}], "perf_server_compute_input": ["perf_server_compute_input", {"_value": 0.12, "_timestamp": 0}], "perf_server_compute_output": ["perf_server_compute_output", {"_value": 0.02, "_timestamp": 0}]}}]}, "-m resnet50_libtorch_config_default -b 1 -i grpc -f resnet50_libtorch_config_default-results.csv --verbose-csv --concurrency-range=2 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0-m vgg19_libtorch_config_default -b 1 -i grpc -f vgg19_libtorch_config_default-results.csv --verbose-csv --concurrency-range=2 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": {"_model_variants_name": "resnet50_libtorch_config_default,vgg19_libtorch_config_default", "_gpu_data": {"GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [["gpu_used_memory", {"_value": 2246.049792, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0"}], ["gpu_free_memory", {"_value": 23149.412352, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0"}], ["gpu_utilization", {"_value": 92.6667, "_timestamp": 0, "_device_uuid": null}], ["gpu_power_usage", {"_value": 278.705, "_timestamp": 0, "_device_uuid": null}]]}, "_avg_gpu_data": [["gpu_used_memory", {"_value": 2246.049792, "_timestamp": 0, "_device_uuid": null}], ["gpu_free_memory", {"_value": 23149.412352, "_timestamp": 0, "_device_uuid": null}], ["gpu_utilization", {"_value": 92.6667, "_timestamp": 0, "_device_uuid": null}], ["gpu_power_usage", {"_value": 278.705, "_timestamp": 0, "_device_uuid": null}]], "_avg_gpu_data_from_tag": {"gpu_used_memory": ["gpu_used_memory", {"_value": 2246.049792, "_timestamp": 0, "_device_uuid": null}], "gpu_free_memory": ["gpu_free_memory", {"_value": 23149.412352, "_timestamp": 0, "_device_uuid": null}], "gpu_utilization": ["gpu_utilization", {"_value": 92.6667, "_timestamp": 0, "_device_uuid": null}], "gpu_power_usage": ["gpu_power_usage", {"_value": 278.705, "_timestamp": 0, "_device_uuid": null}]}, "_model_config_measurements": [{"_model_config_name": "resnet50_libtorch_config_default", "_model_specific_pa_params": {"batch-size": 1, "concurrency-range": 2}, "_non_gpu_data": [["perf_latency_avg", {"_value": 13.881, "_timestamp": 0}], ["perf_latency_p90", {"_value": 14.553, "_timestamp": 0}], ["perf_latency_p95", {"_value": 14.768, "_timestamp": 0}], ["perf_latency_p99", {"_value": 15.119, "_timestamp": 0}], ["perf_throughput", {"_value": 144.217, "_timestamp": 0}], ["perf_client_send_recv", {"_value": 0.101, "_timestamp": 0}], ["perf_client_response_wait", {"_value": 13.757, "_timestamp": 0}], ["perf_server_queue", {"_value": 6.206, "_timestamp": 0}], ["perf_server_compute_infer", {"_value": 6.698, "_timestamp": 0}], ["perf_server_compute_input", {"_value": 0.151, "_timestamp": 0}], ["perf_server_compute_output", {"_value": 0.02, "_timestamp": 0}]], "_non_gpu_data_from_tag": {"perf_latency_avg": ["perf_latency_avg", {"_value": 13.881, "_timestamp": 0}], "perf_latency_p90": ["perf_latency_p90", {"_value": 14.553, "_timestamp": 0}], "perf_latency_p95": ["perf_latency_p95", {"_value": 14.768, "_timestamp": 0}], "perf_latency_p99": ["perf_latency_p99", {"_value": 15.119, "_timestamp": 0}], "perf_throughput": ["perf_throughput", {"_value": 144.217, "_timestamp": 0}], "perf_client_send_recv": ["perf_client_send_recv", {"_value": 0.101, "_timestamp": 0}], "perf_client_response_wait": ["perf_client_response_wait", {"_value": 13.757, "_timestamp": 0}], "perf_server_queue": ["perf_server_queue", {"_value": 6.206, "_timestamp": 0}], "perf_server_compute_infer": ["perf_server_compute_infer", {"_value": 6.698, "_timestamp": 0}], "perf_server_compute_input": ["perf_server_compute_input", {"_value": 0.151, "_timestamp": 0}], "perf_server_compute_output": ["perf_server_compute_output", {"_value": 0.02, "_timestamp": 0}]}}, {"_model_config_name": "vgg19_libtorch_config_default", "_model_specific_pa_params": {"batch-size": 1, "concurrency-range": 2}, "_non_gpu_data": [["perf_latency_avg", {"_value": 10.086, "_timestamp": 0}], ["perf_latency_p90", {"_value": 10.212, "_timestamp": 0}], ["perf_latency_p95", {"_value": 10.236, "_timestamp": 0}], ["perf_latency_p99", {"_value": 10.279, "_timestamp": 0}], ["perf_throughput", {"_value": 198.178, "_timestamp": 0}], ["perf_client_send_recv", {"_value": 0.097, "_timestamp": 0}], ["perf_client_response_wait", {"_value": 9.965, "_timestamp": 0}], ["perf_server_queue", {"_value": 4.266, "_timestamp": 0}], ["perf_server_compute_infer", {"_value": 4.818, "_timestamp": 0}], ["perf_server_compute_input", {"_value": 0.138, "_timestamp": 0}], ["perf_server_compute_output", {"_value": 0.019, "_timestamp": 0}]], "_non_gpu_data_from_tag": {"perf_latency_avg": ["perf_latency_avg", {"_value": 10.086, "_timestamp": 0}], "perf_latency_p90": ["perf_latency_p90", {"_value": 10.212, "_timestamp": 0}], "perf_latency_p95": ["perf_latency_p95", {"_value": 10.236, "_timestamp": 0}], "perf_latency_p99": ["perf_latency_p99", {"_value": 10.279, "_timestamp": 0}], "perf_throughput": ["perf_throughput", {"_value": 198.178, "_timestamp": 0}], "perf_client_send_recv": ["perf_client_send_recv", {"_value": 0.097, "_timestamp": 0}], "perf_client_response_wait": ["perf_client_response_wait", {"_value": 9.965, "_timestamp": 0}], "perf_server_queue": ["perf_server_queue", {"_value": 4.266, "_timestamp": 0}], "perf_server_compute_infer": ["perf_server_compute_infer", {"_value": 4.818, "_timestamp": 0}], "perf_server_compute_input": ["perf_server_compute_input", {"_value": 0.138, "_timestamp": 0}], "perf_server_compute_output": ["perf_server_compute_output", {"_value": 0.019, "_timestamp": 0}]}}]}, "-m resnet50_libtorch_config_default -b 1 -i grpc -f resnet50_libtorch_config_default-results.csv --verbose-csv --concurrency-range=4 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0-m vgg19_libtorch_config_default -b 1 -i grpc -f vgg19_libtorch_config_default-results.csv --verbose-csv --concurrency-range=4 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": {"_model_variants_name": "resnet50_libtorch_config_default,vgg19_libtorch_config_default", "_gpu_data": {"GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [["gpu_used_memory", {"_value": 2246.049792, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0"}], ["gpu_free_memory", {"_value": 23149.412352, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0"}], ["gpu_utilization", {"_value": 64.66669999999999, "_timestamp": 0, "_device_uuid": null}], ["gpu_power_usage", {"_value": 218.671, "_timestamp": 0, "_device_uuid": null}]]}, "_avg_gpu_data": [["gpu_used_memory", {"_value": 2246.049792, "_timestamp": 0, "_device_uuid": null}], ["gpu_free_memory", {"_value": 23149.412352, "_timestamp": 0, "_device_uuid": null}], ["gpu_utilization", {"_value": 64.66669999999999, "_timestamp": 0, "_device_uuid": null}], ["gpu_power_usage", {"_value": 218.671, "_timestamp": 0, "_device_uuid": null}]], "_avg_gpu_data_from_tag": {"gpu_used_memory": ["gpu_used_memory", {"_value": 2246.049792, "_timestamp": 0, "_device_uuid": null}], "gpu_free_memory": ["gpu_free_memory", {"_value": 23149.412352, "_timestamp": 0, "_device_uuid": null}], "gpu_utilization": ["gpu_utilization", {"_value": 64.66669999999999, "_timestamp": 0, "_device_uuid": null}], "gpu_power_usage": ["gpu_power_usage", {"_value": 218.671, "_timestamp": 0, "_device_uuid": null}]}, "_model_config_measurements": [{"_model_config_name": "resnet50_libtorch_config_default", "_model_specific_pa_params": {"batch-size": 1, "concurrency-range": 4}, "_non_gpu_data": [["perf_latency_avg", {"_value": 27.518, "_timestamp": 0}], ["perf_latency_p90", {"_value": 28.547, "_timestamp": 0}], ["perf_latency_p95", {"_value": 28.682, "_timestamp": 0}], ["perf_latency_p99", {"_value": 29.008, "_timestamp": 0}], ["perf_throughput", {"_value": 144.578, "_timestamp": 0}], ["perf_client_send_recv", {"_value": 0.109, "_timestamp": 0}], ["perf_client_response_wait", {"_value": 27.383, "_timestamp": 0}], ["perf_server_queue", {"_value": 19.823, "_timestamp": 0}], ["perf_server_compute_infer", {"_value": 6.659, "_timestamp": 0}], ["perf_server_compute_input", {"_value": 0.146, "_timestamp": 0}], ["perf_server_compute_output", {"_value": 0.021, "_timestamp": 0}]], "_non_gpu_data_from_tag": {"perf_latency_avg": ["perf_latency_avg", {"_value": 27.518, "_timestamp": 0}], "perf_latency_p90": ["perf_latency_p90", {"_value": 28.547, "_timestamp": 0}], "perf_latency_p95": ["perf_latency_p95", {"_value": 28.682, "_timestamp": 0}], "perf_latency_p99": ["perf_latency_p99", {"_value": 29.008, "_timestamp": 0}], "perf_throughput": ["perf_throughput", {"_value": 144.578, "_timestamp": 0}], "perf_client_send_recv": ["perf_client_send_recv", {"_value": 0.109, "_timestamp": 0}], "perf_client_response_wait": ["perf_client_response_wait", {"_value": 27.383, "_timestamp": 0}], "perf_server_queue": ["perf_server_queue", {"_value": 19.823, "_timestamp": 0}], "perf_server_compute_infer": ["perf_server_compute_infer", {"_value": 6.659, "_timestamp": 0}], "perf_server_compute_input": ["perf_server_compute_input", {"_value": 0.146, "_timestamp": 0}], "perf_server_compute_output": ["perf_server_compute_output", {"_value": 0.021, "_timestamp": 0}]}}, {"_model_config_name": "vgg19_libtorch_config_default", "_model_specific_pa_params": {"batch-size": 1, "concurrency-range": 4}, "_non_gpu_data": [["perf_latency_avg", {"_value": 20.193, "_timestamp": 0}], ["perf_latency_p90", {"_value": 20.387, "_timestamp": 0}], ["perf_latency_p95", {"_value": 20.455, "_timestamp": 0}], ["perf_latency_p99", {"_value": 20.688, "_timestamp": 0}], ["perf_throughput", {"_value": 197.213, "_timestamp": 0}], ["perf_client_send_recv", {"_value": 0.104, "_timestamp": 0}], ["perf_client_response_wait", {"_value": 20.063, "_timestamp": 0}], ["perf_server_queue", {"_value": 14.324, "_timestamp": 0}], ["perf_server_compute_infer", {"_value": 4.825, "_timestamp": 0}], ["perf_server_compute_input", {"_value": 0.142, "_timestamp": 0}], ["perf_server_compute_output", {"_value": 0.02, "_timestamp": 0}]], "_non_gpu_data_from_tag": {"perf_latency_avg": ["perf_latency_avg", {"_value": 20.193, "_timestamp": 0}], "perf_latency_p90": ["perf_latency_p90", {"_value": 20.387, "_timestamp": 0}], "perf_latency_p95": ["perf_latency_p95", {"_value": 20.455, "_timestamp": 0}], "perf_latency_p99": ["perf_latency_p99", {"_value": 20.688, "_timestamp": 0}], "perf_throughput": ["perf_throughput", {"_value": 197.213, "_timestamp": 0}], "perf_client_send_recv": ["perf_client_send_recv", {"_value": 0.104, "_timestamp": 0}], "perf_client_response_wait": ["perf_client_response_wait", {"_value": 20.063, "_timestamp": 0}], "perf_server_queue": ["perf_server_queue", {"_value": 14.324, "_timestamp": 0}], "perf_server_compute_infer": ["perf_server_compute_infer", {"_value": 4.825, "_timestamp": 0}], "perf_server_compute_input": ["perf_server_compute_input", {"_value": 0.142, "_timestamp": 0}], "perf_server_compute_output": ["perf_server_compute_output", {"_value": 0.02, "_timestamp": 0}]}}]}, "-m resnet50_libtorch_config_default -b 1 -i grpc -f resnet50_libtorch_config_default-results.csv --verbose-csv --concurrency-range=8 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0-m vgg19_libtorch_config_default -b 1 -i grpc -f vgg19_libtorch_config_default-results.csv --verbose-csv --concurrency-range=8 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": {"_model_variants_name": "resnet50_libtorch_config_default,vgg19_libtorch_config_default", "_gpu_data": {"GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [["gpu_used_memory", {"_value": 2246.049792, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0"}], ["gpu_free_memory", {"_value": 23149.412352, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0"}], ["gpu_utilization", {"_value": 65.66669999999999, "_timestamp": 0, "_device_uuid": null}], ["gpu_power_usage", {"_value": 221.687, "_timestamp": 0, "_device_uuid": null}]]}, "_avg_gpu_data": [["gpu_used_memory", {"_value": 2246.049792, "_timestamp": 0, "_device_uuid": null}], ["gpu_free_memory", {"_value": 23149.412352, "_timestamp": 0, "_device_uuid": null}], ["gpu_utilization", {"_value": 65.66669999999999, "_timestamp": 0, "_device_uuid": null}], ["gpu_power_usage", {"_value": 221.687, "_timestamp": 0, "_device_uuid": null}]], "_avg_gpu_data_from_tag": {"gpu_used_memory": ["gpu_used_memory", {"_value": 2246.049792, "_timestamp": 0, "_device_uuid": null}], "gpu_free_memory": ["gpu_free_memory", {"_value": 23149.412352, "_timestamp": 0, "_device_uuid": null}], "gpu_utilization": ["gpu_utilization", {"_value": 65.66669999999999, "_timestamp": 0, "_device_uuid": null}], "gpu_power_usage": ["gpu_power_usage", {"_value": 221.687, "_timestamp": 0, "_device_uuid": null}]}, "_model_config_measurements": [{"_model_config_name": "resnet50_libtorch_config_default", "_model_specific_pa_params": {"batch-size": 1, "concurrency-range": 8}, "_non_gpu_data": [["perf_latency_avg", {"_value": 54.792, "_timestamp": 0}], ["perf_latency_p90", {"_value": 56.376, "_timestamp": 0}], ["perf_latency_p95", {"_value": 56.905, "_timestamp": 0}], ["perf_latency_p99", {"_value": 57.742, "_timestamp": 0}], ["perf_throughput", {"_value": 144.556, "_timestamp": 0}], ["perf_client_send_recv", {"_value": 0.123, "_timestamp": 0}], ["perf_client_response_wait", {"_value": 54.642, "_timestamp": 0}], ["perf_server_queue", {"_value": 47.003, "_timestamp": 0}], ["perf_server_compute_infer", {"_value": 6.662, "_timestamp": 0}], ["perf_server_compute_input", {"_value": 0.141, "_timestamp": 0}], ["perf_server_compute_output", {"_value": 0.02, "_timestamp": 0}]], "_non_gpu_data_from_tag": {"perf_latency_avg": ["perf_latency_avg", {"_value": 54.792, "_timestamp": 0}], "perf_latency_p90": ["perf_latency_p90", {"_value": 56.376, "_timestamp": 0}], "perf_latency_p95": ["perf_latency_p95", {"_value": 56.905, "_timestamp": 0}], "perf_latency_p99": ["perf_latency_p99", {"_value": 57.742, "_timestamp": 0}], "perf_throughput": ["perf_throughput", {"_value": 144.556, "_timestamp": 0}], "perf_client_send_recv": ["perf_client_send_recv", {"_value": 0.123, "_timestamp": 0}], "perf_client_response_wait": ["perf_client_response_wait", {"_value": 54.642, "_timestamp": 0}], "perf_server_queue": ["perf_server_queue", {"_value": 47.003, "_timestamp": 0}], "perf_server_compute_infer": ["perf_server_compute_infer", {"_value": 6.662, "_timestamp": 0}], "perf_server_compute_input": ["perf_server_compute_input", {"_value": 0.141, "_timestamp": 0}], "perf_server_compute_output": ["perf_server_compute_output", {"_value": 0.02, "_timestamp": 0}]}}, {"_model_config_name": "vgg19_libtorch_config_default", "_model_specific_pa_params": {"batch-size": 1, "concurrency-range": 8}, "_non_gpu_data": [["perf_latency_avg", {"_value": 40.276, "_timestamp": 0}], ["perf_latency_p90", {"_value": 40.734, "_timestamp": 0}], ["perf_latency_p95", {"_value": 40.84, "_timestamp": 0}], ["perf_latency_p99", {"_value": 41.046, "_timestamp": 0}], ["perf_throughput", {"_value": 197.196, "_timestamp": 0}], ["perf_client_send_recv", {"_value": 0.127, "_timestamp": 0}], ["perf_client_response_wait", {"_value": 40.122, "_timestamp": 0}], ["perf_server_queue", {"_value": 34.326, "_timestamp": 0}], ["perf_server_compute_infer", {"_value": 4.824, "_timestamp": 0}], ["perf_server_compute_input", {"_value": 0.144, "_timestamp": 0}], ["perf_server_compute_output", {"_value": 0.02, "_timestamp": 0}]], "_non_gpu_data_from_tag": {"perf_latency_avg": ["perf_latency_avg", {"_value": 40.276, "_timestamp": 0}], "perf_latency_p90": ["perf_latency_p90", {"_value": 40.734, "_timestamp": 0}], "perf_latency_p95": ["perf_latency_p95", {"_value": 40.84, "_timestamp": 0}], "perf_latency_p99": ["perf_latency_p99", {"_value": 41.046, "_timestamp": 0}], "perf_throughput": ["perf_throughput", {"_value": 197.196, "_timestamp": 0}], "perf_client_send_recv": ["perf_client_send_recv", {"_value": 0.127, "_timestamp": 0}], "perf_client_response_wait": ["perf_client_response_wait", {"_value": 40.122, "_timestamp": 0}], "perf_server_queue": ["perf_server_queue", {"_value": 34.326, "_timestamp": 0}], "perf_server_compute_infer": ["perf_server_compute_infer", {"_value": 4.824, "_timestamp": 0}], "perf_server_compute_input": ["perf_server_compute_input", {"_value": 0.144, "_timestamp": 0}], "perf_server_compute_output": ["perf_server_compute_output", {"_value": 0.02, "_timestamp": 0}]}}]}}], "resnet50_libtorch_config_0,vgg19_libtorch_config_0": [{"_triton_env": {}, "_model_run_configs": [{"_model_name": "resnet50_libtorch", "_model_config": {"name": "resnet50_libtorch_config_0", "platform": "pytorch_libtorch", "maxBatchSize": 1, "input": [{"name": "INPUT__0", "dataType": "TYPE_FP32", "format": "FORMAT_NCHW", "dims": ["3", "224", "224"]}], "output": [{"name": "OUTPUT__0", "dataType": "TYPE_FP32", "dims": ["1000"], "labelFilename": "resnet50_labels.txt"}], "instanceGroup": [{"count": 1, "kind": "KIND_GPU"}], "dynamicBatching": {}, "cpu_only": false}, "_perf_config": {"_args": {"service-kind": null, "model-signature-name": null, "async": null, "sync": null, "measurement-interval": null, "concurrency-range": 2, "request-rate-range": null, "request-distribution": null, "request-intervals": null, "binary-search": null, "num-of-sequences": null, "latency-threshold": null, "max-threads": null, "stability-percentage": null, "max-trials": null, "percentile": null, "input-data": null, "shared-memory": null, "output-shared-memory-size": null, "shape": null, "sequence-length": null, "sequence-id-range": null, "string-length": null, "string-data": null, "measurement-mode": "count_windows", "measurement-request-count": null, "streaming": null, "grpc-compression-algorithm": null, "triton-server-directory": null, "model-repository": null, "ssl-grpc-use-ssl": null, "ssl-grpc-root-certifications-file": null, "ssl-grpc-private-key-file": null, "ssl-grpc-certificate-chain-file": null, "ssl-https-verify-peer": null, "ssl-https-verify-host": null, "ssl-https-ca-certificates-file": null, "ssl-https-client-certificate-type": null, "ssl-https-client-certificate-file": null, "ssl-https-private-key-type": null, "ssl-https-private-key-file": null, "collect-metrics": "True", "metrics-url": "http://localhost:8002/metrics", "metrics-interval": 1000.0}, "_options": {"-m": "resnet50_libtorch_config_0", "-x": null, "-b": 1, "-u": "localhost:8001", "-i": "grpc", "-f": "resnet50_libtorch_config_0-results.csv", "-H": null}, "_verbose": {"-v": null, "-v -v": null, "--verbose-csv": "--verbose-csv"}, "_input_to_options": {"model-name": "-m", "model-version": "-x", "batch-size": "-b", "url": "-u", "protocol": "-i", "latency-report-file": "-f", "http-header": "-H"}, "_input_to_verbose": {"verbose": "-v", "extra-verbose": "-v -v", "verbose-csv": "--verbose-csv"}, "_additive_args": {"input-data": null, "shape": null}}, "_ensemble_composing_configs": []}, {"_model_name": "vgg19_libtorch", "_model_config": {"name": "vgg19_libtorch_config_0", "platform": "pytorch_libtorch", "maxBatchSize": 1, "input": [{"name": "INPUT__0", "dataType": "TYPE_FP32", "format": "FORMAT_NCHW", "dims": ["3", "224", "224"]}], "output": [{"name": "OUTPUT__0", "dataType": "TYPE_FP32", "dims": ["1000"], "labelFilename": "vgg19_labels.txt"}], "instanceGroup": [{"count": 1, "kind": "KIND_GPU"}], "dynamicBatching": {}, "cpu_only": false}, "_perf_config": {"_args": {"service-kind": null, "model-signature-name": null, "async": null, "sync": null, "measurement-interval": null, "concurrency-range": 2, "request-rate-range": null, "request-distribution": null, "request-intervals": null, "binary-search": null, "num-of-sequences": null, "latency-threshold": null, "max-threads": null, "stability-percentage": null, "max-trials": null, "percentile": null, "input-data": null, "shared-memory": null, "output-shared-memory-size": null, "shape": null, "sequence-length": null, "sequence-id-range": null, "string-length": null, "string-data": null, "measurement-mode": "count_windows", "measurement-request-count": null, "streaming": null, "grpc-compression-algorithm": null, "triton-server-directory": null, "model-repository": null, "ssl-grpc-use-ssl": null, "ssl-grpc-root-certifications-file": null, "ssl-grpc-private-key-file": null, "ssl-grpc-certificate-chain-file": null, "ssl-https-verify-peer": null, "ssl-https-verify-host": null, "ssl-https-ca-certificates-file": null, "ssl-https-client-certificate-type": null, "ssl-https-client-certificate-file": null, "ssl-https-private-key-type": null, "ssl-https-private-key-file": null, "collect-metrics": "True", "metrics-url": "http://localhost:8002/metrics", "metrics-interval": 1000.0}, "_options": {"-m": "vgg19_libtorch_config_0", "-x": null, "-b": 1, "-u": "localhost:8001", "-i": "grpc", "-f": "vgg19_libtorch_config_0-results.csv", "-H": null}, "_verbose": {"-v": null, "-v -v": null, "--verbose-csv": "--verbose-csv"}, "_input_to_options": {"model-name": "-m", "model-version": "-x", "batch-size": "-b", "url": "-u", "protocol": "-i", "latency-report-file": "-f", "http-header": "-H"}, "_input_to_verbose": {"verbose": "-v", "extra-verbose": "-v -v", "verbose-csv": "--verbose-csv"}, "_additive_args": {"input-data": null, "shape": null}}, "_ensemble_composing_configs": []}]}, {"-m resnet50_libtorch_config_0 -b 1 -i grpc -f resnet50_libtorch_config_0-results.csv --verbose-csv --concurrency-range=2 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0-m vgg19_libtorch_config_0 -b 1 -i grpc -f vgg19_libtorch_config_0-results.csv --verbose-csv --concurrency-range=2 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": {"_model_variants_name": "resnet50_libtorch_config_0,vgg19_libtorch_config_0", "_gpu_data": {"GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [["gpu_used_memory", {"_value": 2246.049792, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0"}], ["gpu_free_memory", {"_value": 23149.412352, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0"}], ["gpu_utilization", {"_value": 70.3333, "_timestamp": 0, "_device_uuid": null}], ["gpu_power_usage", {"_value": 284.465, "_timestamp": 0, "_device_uuid": null}]]}, "_avg_gpu_data": [["gpu_used_memory", {"_value": 2246.049792, "_timestamp": 0, "_device_uuid": null}], ["gpu_free_memory", {"_value": 23149.412352, "_timestamp": 0, "_device_uuid": null}], ["gpu_utilization", {"_value": 70.3333, "_timestamp": 0, "_device_uuid": null}], ["gpu_power_usage", {"_value": 284.465, "_timestamp": 0, "_device_uuid": null}]], "_avg_gpu_data_from_tag": {"gpu_used_memory": ["gpu_used_memory", {"_value": 2246.049792, "_timestamp": 0, "_device_uuid": null}], "gpu_free_memory": ["gpu_free_memory", {"_value": 23149.412352, "_timestamp": 0, "_device_uuid": null}], "gpu_utilization": ["gpu_utilization", {"_value": 70.3333, "_timestamp": 0, "_device_uuid": null}], "gpu_power_usage": ["gpu_power_usage", {"_value": 284.465, "_timestamp": 0, "_device_uuid": null}]}, "_model_config_measurements": [{"_model_config_name": "resnet50_libtorch_config_0", "_model_specific_pa_params": {"batch-size": 1, "concurrency-range": 2}, "_non_gpu_data": [["perf_latency_avg", {"_value": 13.38, "_timestamp": 0}], ["perf_latency_p90", {"_value": 14.057, "_timestamp": 0}], ["perf_latency_p95", {"_value": 14.193, "_timestamp": 0}], ["perf_latency_p99", {"_value": 14.359, "_timestamp": 0}], ["perf_throughput", {"_value": 149.544, "_timestamp": 0}], ["perf_client_send_recv", {"_value": 0.101, "_timestamp": 0}], ["perf_client_response_wait", {"_value": 13.255, "_timestamp": 0}], ["perf_server_queue", {"_value": 5.897, "_timestamp": 0}], ["perf_server_compute_infer", {"_value": 6.454, "_timestamp": 0}], ["perf_server_compute_input", {"_value": 0.141, "_timestamp": 0}], ["perf_server_compute_output", {"_value": 0.021, "_timestamp": 0}]], "_non_gpu_data_from_tag": {"perf_latency_avg": ["perf_latency_avg", {"_value": 13.38, "_timestamp": 0}], "perf_latency_p90": ["perf_latency_p90", {"_value": 14.057, "_timestamp": 0}], "perf_latency_p95": ["perf_latency_p95", {"_value": 14.193, "_timestamp": 0}], "perf_latency_p99": ["perf_latency_p99", {"_value": 14.359, "_timestamp": 0}], "perf_throughput": ["perf_throughput", {"_value": 149.544, "_timestamp": 0}], "perf_client_send_recv": ["perf_client_send_recv", {"_value": 0.101, "_timestamp": 0}], "perf_client_response_wait": ["perf_client_response_wait", {"_value": 13.255, "_timestamp": 0}], "perf_server_queue": ["perf_server_queue", {"_value": 5.897, "_timestamp": 0}], "perf_server_compute_infer": ["perf_server_compute_infer", {"_value": 6.454, "_timestamp": 0}], "perf_server_compute_input": ["perf_server_compute_input", {"_value": 0.141, "_timestamp": 0}], "perf_server_compute_output": ["perf_server_compute_output", {"_value": 0.021, "_timestamp": 0}]}}, {"_model_config_name": "vgg19_libtorch_config_0", "_model_specific_pa_params": {"batch-size": 1, "concurrency-range": 2}, "_non_gpu_data": [["perf_latency_avg", {"_value": 9.864, "_timestamp": 0}], ["perf_latency_p90", {"_value": 10.01, "_timestamp": 0}], ["perf_latency_p95", {"_value": 10.046, "_timestamp": 0}], ["perf_latency_p99", {"_value": 10.165, "_timestamp": 0}], ["perf_throughput", {"_value": 202.836, "_timestamp": 0}], ["perf_client_send_recv", {"_value": 0.098, "_timestamp": 0}], ["perf_client_response_wait", {"_value": 9.742, "_timestamp": 0}], ["perf_server_queue", {"_value": 4.181, "_timestamp": 0}], ["perf_server_compute_infer", {"_value": 4.697, "_timestamp": 0}], ["perf_server_compute_input", {"_value": 0.143, "_timestamp": 0}], ["perf_server_compute_output", {"_value": 0.019, "_timestamp": 0}]], "_non_gpu_data_from_tag": {"perf_latency_avg": ["perf_latency_avg", {"_value": 9.864, "_timestamp": 0}], "perf_latency_p90": ["perf_latency_p90", {"_value": 10.01, "_timestamp": 0}], "perf_latency_p95": ["perf_latency_p95", {"_value": 10.046, "_timestamp": 0}], "perf_latency_p99": ["perf_latency_p99", {"_value": 10.165, "_timestamp": 0}], "perf_throughput": ["perf_throughput", {"_value": 202.836, "_timestamp": 0}], "perf_client_send_recv": ["perf_client_send_recv", {"_value": 0.098, "_timestamp": 0}], "perf_client_response_wait": ["perf_client_response_wait", {"_value": 9.742, "_timestamp": 0}], "perf_server_queue": ["perf_server_queue", {"_value": 4.181, "_timestamp": 0}], "perf_server_compute_infer": ["perf_server_compute_infer", {"_value": 4.697, "_timestamp": 0}], "perf_server_compute_input": ["perf_server_compute_input", {"_value": 0.143, "_timestamp": 0}], "perf_server_compute_output": ["perf_server_compute_output", {"_value": 0.019, "_timestamp": 0}]}}]}}], "resnet50_libtorch_config_0,vgg19_libtorch_config_4": [{"_triton_env": {}, "_model_run_configs": [{"_model_name": "resnet50_libtorch", "_model_config": {"name": "resnet50_libtorch_config_0", "platform": "pytorch_libtorch", "maxBatchSize": 1, "input": [{"name": "INPUT__0", "dataType": "TYPE_FP32", "format": "FORMAT_NCHW", "dims": ["3", "224", "224"]}], "output": [{"name": "OUTPUT__0", "dataType": "TYPE_FP32", "dims": ["1000"], "labelFilename": "resnet50_labels.txt"}], "instanceGroup": [{"count": 1, "kind": "KIND_GPU"}], "dynamicBatching": {}, "cpu_only": false}, "_perf_config": {"_args": {"service-kind": null, "model-signature-name": null, "async": null, "sync": null, "measurement-interval": null, "concurrency-range": 2, "request-rate-range": null, "request-distribution": null, "request-intervals": null, "binary-search": null, "num-of-sequences": null, "latency-threshold": null, "max-threads": null, "stability-percentage": null, "max-trials": null, "percentile": null, "input-data": null, "shared-memory": null, "output-shared-memory-size": null, "shape": null, "sequence-length": null, "sequence-id-range": null, "string-length": null, "string-data": null, "measurement-mode": "count_windows", "measurement-request-count": null, "streaming": null, "grpc-compression-algorithm": null, "triton-server-directory": null, "model-repository": null, "ssl-grpc-use-ssl": null, "ssl-grpc-root-certifications-file": null, "ssl-grpc-private-key-file": null, "ssl-grpc-certificate-chain-file": null, "ssl-https-verify-peer": null, "ssl-https-verify-host": null, "ssl-https-ca-certificates-file": null, "ssl-https-client-certificate-type": null, "ssl-https-client-certificate-file": null, "ssl-https-private-key-type": null, "ssl-https-private-key-file": null, "collect-metrics": "True", "metrics-url": "http://localhost:8002/metrics", "metrics-interval": 1000.0}, "_options": {"-m": "resnet50_libtorch_config_0", "-x": null, "-b": 1, "-u": "localhost:8001", "-i": "grpc", "-f": "resnet50_libtorch_config_0-results.csv", "-H": null}, "_verbose": {"-v": null, "-v -v": null, "--verbose-csv": "--verbose-csv"}, "_input_to_options": {"model-name": "-m", "model-version": "-x", "batch-size": "-b", "url": "-u", "protocol": "-i", "latency-report-file": "-f", "http-header": "-H"}, "_input_to_verbose": {"verbose": "-v", "extra-verbose": "-v -v", "verbose-csv": "--verbose-csv"}, "_additive_args": {"input-data": null, "shape": null}}, "_ensemble_composing_configs": []}, {"_model_name": "vgg19_libtorch", "_model_config": {"name": "vgg19_libtorch_config_4", "platform": "pytorch_libtorch", "maxBatchSize": 2, "input": [{"name": "INPUT__0", "dataType": "TYPE_FP32", "format": "FORMAT_NCHW", "dims": ["3", "224", "224"]}], "output": [{"name": "OUTPUT__0", "dataType": "TYPE_FP32", "dims": ["1000"], "labelFilename": "vgg19_labels.txt"}], "instanceGroup": [{"count": 1, "kind": "KIND_GPU"}], "dynamicBatching": {}, "cpu_only": false}, "_perf_config": {"_args": {"service-kind": null, "model-signature-name": null, "async": null, "sync": null, "measurement-interval": null, "concurrency-range": 4, "request-rate-range": null, "request-distribution": null, "request-intervals": null, "binary-search": null, "num-of-sequences": null, "latency-threshold": null, "max-threads": null, "stability-percentage": null, "max-trials": null, "percentile": null, "input-data": null, "shared-memory": null, "output-shared-memory-size": null, "shape": null, "sequence-length": null, "sequence-id-range": null, "string-length": null, "string-data": null, "measurement-mode": "count_windows", "measurement-request-count": null, "streaming": null, "grpc-compression-algorithm": null, "triton-server-directory": null, "model-repository": null, "ssl-grpc-use-ssl": null, "ssl-grpc-root-certifications-file": null, "ssl-grpc-private-key-file": null, "ssl-grpc-certificate-chain-file": null, "ssl-https-verify-peer": null, "ssl-https-verify-host": null, "ssl-https-ca-certificates-file": null, "ssl-https-client-certificate-type": null, "ssl-https-client-certificate-file": null, "ssl-https-private-key-type": null, "ssl-https-private-key-file": null, "collect-metrics": "True", "metrics-url": "http://localhost:8002/metrics", "metrics-interval": 1000.0}, "_options": {"-m": "vgg19_libtorch_config_4", "-x": null, "-b": 1, "-u": "localhost:8001", "-i": "grpc", "-f": "vgg19_libtorch_config_4-results.csv", "-H": null}, "_verbose": {"-v": null, "-v -v": null, "--verbose-csv": "--verbose-csv"}, "_input_to_options": {"model-name": "-m", "model-version": "-x", "batch-size": "-b", "url": "-u", "protocol": "-i", "latency-report-file": "-f", "http-header": "-H"}, "_input_to_verbose": {"verbose": "-v", "extra-verbose": "-v -v", "verbose-csv": "--verbose-csv"}, "_additive_args": {"input-data": null, "shape": null}}, "_ensemble_composing_configs": []}]}, {"-m resnet50_libtorch_config_0 -b 1 -i grpc -f resnet50_libtorch_config_0-results.csv --verbose-csv --concurrency-range=2 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0-m vgg19_libtorch_config_4 -b 1 -i grpc -f vgg19_libtorch_config_4-results.csv --verbose-csv --concurrency-range=4 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": {"_model_variants_name": "resnet50_libtorch_config_0,vgg19_libtorch_config_4", "_gpu_data": {"GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [["gpu_used_memory", {"_value": 2273.312768, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0"}], ["gpu_free_memory", {"_value": 23122.149376, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0"}], ["gpu_utilization", {"_value": 71.75, "_timestamp": 0, "_device_uuid": null}], ["gpu_power_usage", {"_value": 279.364, "_timestamp": 0, "_device_uuid": null}]]}, "_avg_gpu_data": [["gpu_used_memory", {"_value": 2273.312768, "_timestamp": 0, "_device_uuid": null}], ["gpu_free_memory", {"_value": 23122.149376, "_timestamp": 0, "_device_uuid": null}], ["gpu_utilization", {"_value": 71.75, "_timestamp": 0, "_device_uuid": null}], ["gpu_power_usage", {"_value": 279.364, "_timestamp": 0, "_device_uuid": null}]], "_avg_gpu_data_from_tag": {"gpu_used_memory": ["gpu_used_memory", {"_value": 2273.312768, "_timestamp": 0, "_device_uuid": null}], "gpu_free_memory": ["gpu_free_memory", {"_value": 23122.149376, "_timestamp": 0, "_device_uuid": null}], "gpu_utilization": ["gpu_utilization", {"_value": 71.75, "_timestamp": 0, "_device_uuid": null}], "gpu_power_usage": ["gpu_power_usage", {"_value": 279.364, "_timestamp": 0, "_device_uuid": null}]}, "_model_config_measurements": [{"_model_config_name": "resnet50_libtorch_config_0", "_model_specific_pa_params": {"batch-size": 1, "concurrency-range": 2}, "_non_gpu_data": [["perf_latency_avg", {"_value": 17.75, "_timestamp": 0}], ["perf_latency_p90", {"_value": 18.721, "_timestamp": 0}], ["perf_latency_p95", {"_value": 19.034, "_timestamp": 0}], ["perf_latency_p99", {"_value": 19.573, "_timestamp": 0}], ["perf_throughput", {"_value": 112.516, "_timestamp": 0}], ["perf_client_send_recv", {"_value": 0.107, "_timestamp": 0}], ["perf_client_response_wait", {"_value": 17.616, "_timestamp": 0}], ["perf_server_queue", {"_value": 8.082, "_timestamp": 0}], ["perf_server_compute_infer", {"_value": 8.618, "_timestamp": 0}], ["perf_server_compute_input", {"_value": 0.151, "_timestamp": 0}], ["perf_server_compute_output", {"_value": 0.021, "_timestamp": 0}]], "_non_gpu_data_from_tag": {"perf_latency_avg": ["perf_latency_avg", {"_value": 17.75, "_timestamp": 0}], "perf_latency_p90": ["perf_latency_p90", {"_value": 18.721, "_timestamp": 0}], "perf_latency_p95": ["perf_latency_p95", {"_value": 19.034, "_timestamp": 0}], "perf_latency_p99": ["perf_latency_p99", {"_value": 19.573, "_timestamp": 0}], "perf_throughput": ["perf_throughput", {"_value": 112.516, "_timestamp": 0}], "perf_client_send_recv": ["perf_client_send_recv", {"_value": 0.107, "_timestamp": 0}], "perf_client_response_wait": ["perf_client_response_wait", {"_value": 17.616, "_timestamp": 0}], "perf_server_queue": ["perf_server_queue", {"_value": 8.082, "_timestamp": 0}], "perf_server_compute_infer": ["perf_server_compute_infer", {"_value": 8.618, "_timestamp": 0}], "perf_server_compute_input": ["perf_server_compute_input", {"_value": 0.151, "_timestamp": 0}], "perf_server_compute_output": ["perf_server_compute_output", {"_value": 0.021, "_timestamp": 0}]}}, {"_model_config_name": "vgg19_libtorch_config_4", "_model_specific_pa_params": {"batch-size": 1, "concurrency-range": 4}, "_non_gpu_data": [["perf_latency_avg", {"_value": 15.412, "_timestamp": 0}], ["perf_latency_p90", {"_value": 15.607, "_timestamp": 0}], ["perf_latency_p95", {"_value": 15.657, "_timestamp": 0}], ["perf_latency_p99", {"_value": 15.714, "_timestamp": 0}], ["perf_throughput", {"_value": 259.663, "_timestamp": 0}], ["perf_client_send_recv", {"_value": 0.124, "_timestamp": 0}], ["perf_client_response_wait", {"_value": 15.263, "_timestamp": 0}], ["perf_server_queue", {"_value": 6.747, "_timestamp": 0}], ["perf_server_compute_infer", {"_value": 7.295, "_timestamp": 0}], ["perf_server_compute_input", {"_value": 0.294, "_timestamp": 0}], ["perf_server_compute_output", {"_value": 0.021, "_timestamp": 0}]], "_non_gpu_data_from_tag": {"perf_latency_avg": ["perf_latency_avg", {"_value": 15.412, "_timestamp": 0}], "perf_latency_p90": ["perf_latency_p90", {"_value": 15.607, "_timestamp": 0}], "perf_latency_p95": ["perf_latency_p95", {"_value": 15.657, "_timestamp": 0}], "perf_latency_p99": ["perf_latency_p99", {"_value": 15.714, "_timestamp": 0}], "perf_throughput": ["perf_throughput", {"_value": 259.663, "_timestamp": 0}], "perf_client_send_recv": ["perf_client_send_recv", {"_value": 0.124, "_timestamp": 0}], "perf_client_response_wait": ["perf_client_response_wait", {"_value": 15.263, "_timestamp": 0}], "perf_server_queue": ["perf_server_queue", {"_value": 6.747, "_timestamp": 0}], "perf_server_compute_infer": ["perf_server_compute_infer", {"_value": 7.295, "_timestamp": 0}], "perf_server_compute_input": ["perf_server_compute_input", {"_value": 0.294, "_timestamp": 0}], "perf_server_compute_output": ["perf_server_compute_output", {"_value": 0.021, "_timestamp": 0}]}}]}}], "resnet50_libtorch_config_5,vgg19_libtorch_config_1": [{"_triton_env": {}, "_model_run_configs": [{"_model_name": "resnet50_libtorch", "_model_config": {"name": "resnet50_libtorch_config_5", "platform": "pytorch_libtorch", "maxBatchSize": 1, "input": [{"name": "INPUT__0", "dataType": "TYPE_FP32", "format": "FORMAT_NCHW", "dims": ["3", "224", "224"]}], "output": [{"name": "OUTPUT__0", "dataType": "TYPE_FP32", "dims": ["1000"], "labelFilename": "resnet50_labels.txt"}], "instanceGroup": [{"count": 2, "kind": "KIND_GPU"}], "dynamicBatching": {}, "cpu_only": false}, "_perf_config": {"_args": {"service-kind": null, "model-signature-name": null, "async": null, "sync": null, "measurement-interval": null, "concurrency-range": 4, "request-rate-range": null, "request-distribution": null, "request-intervals": null, "binary-search": null, "num-of-sequences": null, "latency-threshold": null, "max-threads": null, "stability-percentage": null, "max-trials": null, "percentile": null, "input-data": null, "shared-memory": null, "output-shared-memory-size": null, "shape": null, "sequence-length": null, "sequence-id-range": null, "string-length": null, "string-data": null, "measurement-mode": "count_windows", "measurement-request-count": null, "streaming": null, "grpc-compression-algorithm": null, "triton-server-directory": null, "model-repository": null, "ssl-grpc-use-ssl": null, "ssl-grpc-root-certifications-file": null, "ssl-grpc-private-key-file": null, "ssl-grpc-certificate-chain-file": null, "ssl-https-verify-peer": null, "ssl-https-verify-host": null, "ssl-https-ca-certificates-file": null, "ssl-https-client-certificate-type": null, "ssl-https-client-certificate-file": null, "ssl-https-private-key-type": null, "ssl-https-private-key-file": null, "collect-metrics": "True", "metrics-url": "http://localhost:8002/metrics", "metrics-interval": 1000.0}, "_options": {"-m": "resnet50_libtorch_config_5", "-x": null, "-b": 1, "-u": "localhost:8001", "-i": "grpc", "-f": "resnet50_libtorch_config_5-results.csv", "-H": null}, "_verbose": {"-v": null, "-v -v": null, "--verbose-csv": "--verbose-csv"}, "_input_to_options": {"model-name": "-m", "model-version": "-x", "batch-size": "-b", "url": "-u", "protocol": "-i", "latency-report-file": "-f", "http-header": "-H"}, "_input_to_verbose": {"verbose": "-v", "extra-verbose": "-v -v", "verbose-csv": "--verbose-csv"}, "_additive_args": {"input-data": null, "shape": null}}, "_ensemble_composing_configs": []}, {"_model_name": "vgg19_libtorch", "_model_config": {"name": "vgg19_libtorch_config_1", "platform": "pytorch_libtorch", "maxBatchSize": 1, "input": [{"name": "INPUT__0", "dataType": "TYPE_FP32", "format": "FORMAT_NCHW", "dims": ["3", "224", "224"]}], "output": [{"name": "OUTPUT__0", "dataType": "TYPE_FP32", "dims": ["1000"], "labelFilename": "vgg19_labels.txt"}], "instanceGroup": [{"count": 2, "kind": "KIND_GPU"}], "dynamicBatching": {}, "cpu_only": false}, "_perf_config": {"_args": {"service-kind": null, "model-signature-name": null, "async": null, "sync": null, "measurement-interval": null, "concurrency-range": 4, "request-rate-range": null, "request-distribution": null, "request-intervals": null, "binary-search": null, "num-of-sequences": null, "latency-threshold": null, "max-threads": null, "stability-percentage": null, "max-trials": null, "percentile": null, "input-data": null, "shared-memory": null, "output-shared-memory-size": null, "shape": null, "sequence-length": null, "sequence-id-range": null, "string-length": null, "string-data": null, "measurement-mode": "count_windows", "measurement-request-count": null, "streaming": null, "grpc-compression-algorithm": null, "triton-server-directory": null, "model-repository": null, "ssl-grpc-use-ssl": null, "ssl-grpc-root-certifications-file": null, "ssl-grpc-private-key-file": null, "ssl-grpc-certificate-chain-file": null, "ssl-https-verify-peer": null, "ssl-https-verify-host": null, "ssl-https-ca-certificates-file": null, "ssl-https-client-certificate-type": null, "ssl-https-client-certificate-file": null, "ssl-https-private-key-type": null, "ssl-https-private-key-file": null, "collect-metrics": "True", "metrics-url": "http://localhost:8002/metrics", "metrics-interval": 1000.0}, "_options": {"-m": "vgg19_libtorch_config_1", "-x": null, "-b": 1, "-u": "localhost:8001", "-i": "grpc", "-f": "vgg19_libtorch_config_1-results.csv", "-H": null}, "_verbose": {"-v": null, "-v -v": null, "--verbose-csv": "--verbose-csv"}, "_input_to_options": {"model-name": "-m", "model-version": "-x", "batch-size": "-b", "url": "-u", "protocol": "-i", "latency-report-file": "-f", "http-header": "-H"}, "_input_to_verbose": {"verbose": "-v", "extra-verbose": "-v -v", "verbose-csv": "--verbose-csv"}, "_additive_args": {"input-data": null, "shape": null}}, "_ensemble_composing_configs": []}]}, {"-m resnet50_libtorch_config_5 -b 1 -i grpc -f resnet50_libtorch_config_5-results.csv --verbose-csv --concurrency-range=4 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0-m vgg19_libtorch_config_1 -b 1 -i grpc -f vgg19_libtorch_config_1-results.csv --verbose-csv --concurrency-range=4 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": {"_model_variants_name": "resnet50_libtorch_config_5,vgg19_libtorch_config_1", "_gpu_data": {"GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [["gpu_used_memory", {"_value": 3183.476736, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0"}], ["gpu_free_memory", {"_value": 22211.985408, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0"}], ["gpu_utilization", {"_value": 86.0, "_timestamp": 0, "_device_uuid": null}], ["gpu_power_usage", {"_value": 276.94, "_timestamp": 0, "_device_uuid": null}]]}, "_avg_gpu_data": [["gpu_used_memory", {"_value": 3183.476736, "_timestamp": 0, "_device_uuid": null}], ["gpu_free_memory", {"_value": 22211.985408, "_timestamp": 0, "_device_uuid": null}], ["gpu_utilization", {"_value": 86.0, "_timestamp": 0, "_device_uuid": null}], ["gpu_power_usage", {"_value": 276.94, "_timestamp": 0, "_device_uuid": null}]], "_avg_gpu_data_from_tag": {"gpu_used_memory": ["gpu_used_memory", {"_value": 3183.476736, "_timestamp": 0, "_device_uuid": null}], "gpu_free_memory": ["gpu_free_memory", {"_value": 22211.985408, "_timestamp": 0, "_device_uuid": null}], "gpu_utilization": ["gpu_utilization", {"_value": 86.0, "_timestamp": 0, "_device_uuid": null}], "gpu_power_usage": ["gpu_power_usage", {"_value": 276.94, "_timestamp": 0, "_device_uuid": null}]}, "_model_config_measurements": [{"_model_config_name": "resnet50_libtorch_config_5", "_model_specific_pa_params": {"batch-size": 1, "concurrency-range": 4}, "_non_gpu_data": [["perf_latency_avg", {"_value": 30.918, "_timestamp": 0}], ["perf_latency_p90", {"_value": 32.1, "_timestamp": 0}], ["perf_latency_p95", {"_value": 32.453, "_timestamp": 0}], ["perf_latency_p99", {"_value": 33.094, "_timestamp": 0}], ["perf_throughput", {"_value": 129.236, "_timestamp": 0}], ["perf_client_send_recv", {"_value": 0.105, "_timestamp": 0}], ["perf_client_response_wait", {"_value": 30.787, "_timestamp": 0}], ["perf_server_queue", {"_value": 14.714, "_timestamp": 0}], ["perf_server_compute_infer", {"_value": 15.153, "_timestamp": 0}], ["perf_server_compute_input", {"_value": 0.186, "_timestamp": 0}], ["perf_server_compute_output", {"_value": 0.033, "_timestamp": 0}]], "_non_gpu_data_from_tag": {"perf_latency_avg": ["perf_latency_avg", {"_value": 30.918, "_timestamp": 0}], "perf_latency_p90": ["perf_latency_p90", {"_value": 32.1, "_timestamp": 0}], "perf_latency_p95": ["perf_latency_p95", {"_value": 32.453, "_timestamp": 0}], "perf_latency_p99": ["perf_latency_p99", {"_value": 33.094, "_timestamp": 0}], "perf_throughput": ["perf_throughput", {"_value": 129.236, "_timestamp": 0}], "perf_client_send_recv": ["perf_client_send_recv", {"_value": 0.105, "_timestamp": 0}], "perf_client_response_wait": ["perf_client_response_wait", {"_value": 30.787, "_timestamp": 0}], "perf_server_queue": ["perf_server_queue", {"_value": 14.714, "_timestamp": 0}], "perf_server_compute_infer": ["perf_server_compute_infer", {"_value": 15.153, "_timestamp": 0}], "perf_server_compute_input": ["perf_server_compute_input", {"_value": 0.186, "_timestamp": 0}], "perf_server_compute_output": ["perf_server_compute_output", {"_value": 0.033, "_timestamp": 0}]}}, {"_model_config_name": "vgg19_libtorch_config_1", "_model_specific_pa_params": {"batch-size": 1, "concurrency-range": 4}, "_non_gpu_data": [["perf_latency_avg", {"_value": 17.639, "_timestamp": 0}], ["perf_latency_p90", {"_value": 17.978, "_timestamp": 0}], ["perf_latency_p95", {"_value": 18.049, "_timestamp": 0}], ["perf_latency_p99", {"_value": 18.225, "_timestamp": 0}], ["perf_throughput", {"_value": 226.491, "_timestamp": 0}], ["perf_client_send_recv", {"_value": 0.098, "_timestamp": 0}], ["perf_client_response_wait", {"_value": 17.517, "_timestamp": 0}], ["perf_server_queue", {"_value": 8.118, "_timestamp": 0}], ["perf_server_compute_infer", {"_value": 8.536, "_timestamp": 0}], ["perf_server_compute_input", {"_value": 0.178, "_timestamp": 0}], ["perf_server_compute_output", {"_value": 0.031, "_timestamp": 0}]], "_non_gpu_data_from_tag": {"perf_latency_avg": ["perf_latency_avg", {"_value": 17.639, "_timestamp": 0}], "perf_latency_p90": ["perf_latency_p90", {"_value": 17.978, "_timestamp": 0}], "perf_latency_p95": ["perf_latency_p95", {"_value": 18.049, "_timestamp": 0}], "perf_latency_p99": ["perf_latency_p99", {"_value": 18.225, "_timestamp": 0}], "perf_throughput": ["perf_throughput", {"_value": 226.491, "_timestamp": 0}], "perf_client_send_recv": ["perf_client_send_recv", {"_value": 0.098, "_timestamp": 0}], "perf_client_response_wait": ["perf_client_response_wait", {"_value": 17.517, "_timestamp": 0}], "perf_server_queue": ["perf_server_queue", {"_value": 8.118, "_timestamp": 0}], "perf_server_compute_infer": ["perf_server_compute_infer", {"_value": 8.536, "_timestamp": 0}], "perf_server_compute_input": ["perf_server_compute_input", {"_value": 0.178, "_timestamp": 0}], "perf_server_compute_output": ["perf_server_compute_output", {"_value": 0.031, "_timestamp": 0}]}}]}}], "resnet50_libtorch_config_6,vgg19_libtorch_config_7": [{"_triton_env": {}, "_model_run_configs": [{"_model_name": "resnet50_libtorch", "_model_config": {"name": "resnet50_libtorch_config_6", "platform": "pytorch_libtorch", "maxBatchSize": 2, "input": [{"name": "INPUT__0", "dataType": "TYPE_FP32", "format": "FORMAT_NCHW", "dims": ["3", "224", "224"]}], "output": [{"name": "OUTPUT__0", "dataType": "TYPE_FP32", "dims": ["1000"], "labelFilename": "resnet50_labels.txt"}], "instanceGroup": [{"count": 1, "kind": "KIND_GPU"}], "dynamicBatching": {}, "cpu_only": false}, "_perf_config": {"_args": {"service-kind": null, "model-signature-name": null, "async": null, "sync": null, "measurement-interval": null, "concurrency-range": 4, "request-rate-range": null, "request-distribution": null, "request-intervals": null, "binary-search": null, "num-of-sequences": null, "latency-threshold": null, "max-threads": null, "stability-percentage": null, "max-trials": null, "percentile": null, "input-data": null, "shared-memory": null, "output-shared-memory-size": null, "shape": null, "sequence-length": null, "sequence-id-range": null, "string-length": null, "string-data": null, "measurement-mode": "count_windows", "measurement-request-count": null, "streaming": null, "grpc-compression-algorithm": null, "triton-server-directory": null, "model-repository": null, "ssl-grpc-use-ssl": null, "ssl-grpc-root-certifications-file": null, "ssl-grpc-private-key-file": null, "ssl-grpc-certificate-chain-file": null, "ssl-https-verify-peer": null, "ssl-https-verify-host": null, "ssl-https-ca-certificates-file": null, "ssl-https-client-certificate-type": null, "ssl-https-client-certificate-file": null, "ssl-https-private-key-type": null, "ssl-https-private-key-file": null, "collect-metrics": "True", "metrics-url": "http://localhost:8002/metrics", "metrics-interval": 1000.0}, "_options": {"-m": "resnet50_libtorch_config_6", "-x": null, "-b": 1, "-u": "localhost:8001", "-i": "grpc", "-f": "resnet50_libtorch_config_6-results.csv", "-H": null}, "_verbose": {"-v": null, "-v -v": null, "--verbose-csv": "--verbose-csv"}, "_input_to_options": {"model-name": "-m", "model-version": "-x", "batch-size": "-b", "url": "-u", "protocol": "-i", "latency-report-file": "-f", "http-header": "-H"}, "_input_to_verbose": {"verbose": "-v", "extra-verbose": "-v -v", "verbose-csv": "--verbose-csv"}, "_additive_args": {"input-data": null, "shape": null}}, "_ensemble_composing_configs": []}, {"_model_name": "vgg19_libtorch", "_model_config": {"name": "vgg19_libtorch_config_7", "platform": "pytorch_libtorch", "maxBatchSize": 4, "input": [{"name": "INPUT__0", "dataType": "TYPE_FP32", "format": "FORMAT_NCHW", "dims": ["3", "224", "224"]}], "output": [{"name": "OUTPUT__0", "dataType": "TYPE_FP32", "dims": ["1000"], "labelFilename": "vgg19_labels.txt"}], "instanceGroup": [{"count": 1, "kind": "KIND_GPU"}], "dynamicBatching": {}, "cpu_only": false}, "_perf_config": {"_args": {"service-kind": null, "model-signature-name": null, "async": null, "sync": null, "measurement-interval": null, "concurrency-range": 8, "request-rate-range": null, "request-distribution": null, "request-intervals": null, "binary-search": null, "num-of-sequences": null, "latency-threshold": null, "max-threads": null, "stability-percentage": null, "max-trials": null, "percentile": null, "input-data": null, "shared-memory": null, "output-shared-memory-size": null, "shape": null, "sequence-length": null, "sequence-id-range": null, "string-length": null, "string-data": null, "measurement-mode": "count_windows", "measurement-request-count": null, "streaming": null, "grpc-compression-algorithm": null, "triton-server-directory": null, "model-repository": null, "ssl-grpc-use-ssl": null, "ssl-grpc-root-certifications-file": null, "ssl-grpc-private-key-file": null, "ssl-grpc-certificate-chain-file": null, "ssl-https-verify-peer": null, "ssl-https-verify-host": null, "ssl-https-ca-certificates-file": null, "ssl-https-client-certificate-type": null, "ssl-https-client-certificate-file": null, "ssl-https-private-key-type": null, "ssl-https-private-key-file": null, "collect-metrics": "True", "metrics-url": "http://localhost:8002/metrics", "metrics-interval": 1000.0}, "_options": {"-m": "vgg19_libtorch_config_7", "-x": null, "-b": 1, "-u": "localhost:8001", "-i": "grpc", "-f": "vgg19_libtorch_config_7-results.csv", "-H": null}, "_verbose": {"-v": null, "-v -v": null, "--verbose-csv": "--verbose-csv"}, "_input_to_options": {"model-name": "-m", "model-version": "-x", "batch-size": "-b", "url": "-u", "protocol": "-i", "latency-report-file": "-f", "http-header": "-H"}, "_input_to_verbose": {"verbose": "-v", "extra-verbose": "-v -v", "verbose-csv": "--verbose-csv"}, "_additive_args": {"input-data": null, "shape": null}}, "_ensemble_composing_configs": []}]}, {"-m resnet50_libtorch_config_6 -b 1 -i grpc -f resnet50_libtorch_config_6-results.csv --verbose-csv --concurrency-range=4 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0-m vgg19_libtorch_config_7 -b 1 -i grpc -f vgg19_libtorch_config_7-results.csv --verbose-csv --concurrency-range=8 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": {"_model_variants_name": "resnet50_libtorch_config_6,vgg19_libtorch_config_7", "_gpu_data": {"GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [["gpu_used_memory", {"_value": 2353.004544, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0"}], ["gpu_free_memory", {"_value": 23042.4576, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0"}], ["gpu_utilization", {"_value": 99.0, "_timestamp": 0, "_device_uuid": null}], ["gpu_power_usage", {"_value": 277.631, "_timestamp": 0, "_device_uuid": null}]]}, "_avg_gpu_data": [["gpu_used_memory", {"_value": 2353.004544, "_timestamp": 0, "_device_uuid": null}], ["gpu_free_memory", {"_value": 23042.4576, "_timestamp": 0, "_device_uuid": null}], ["gpu_utilization", {"_value": 99.0, "_timestamp": 0, "_device_uuid": null}], ["gpu_power_usage", {"_value": 277.631, "_timestamp": 0, "_device_uuid": null}]], "_avg_gpu_data_from_tag": {"gpu_used_memory": ["gpu_used_memory", {"_value": 2353.004544, "_timestamp": 0, "_device_uuid": null}], "gpu_free_memory": ["gpu_free_memory", {"_value": 23042.4576, "_timestamp": 0, "_device_uuid": null}], "gpu_utilization": ["gpu_utilization", {"_value": 99.0, "_timestamp": 0, "_device_uuid": null}], "gpu_power_usage": ["gpu_power_usage", {"_value": 277.631, "_timestamp": 0, "_device_uuid": null}]}, "_model_config_measurements": [{"_model_config_name": "resnet50_libtorch_config_6", "_model_specific_pa_params": {"batch-size": 1, "concurrency-range": 4}, "_non_gpu_data": [["perf_latency_avg", {"_value": 32.006, "_timestamp": 0}], ["perf_latency_p90", {"_value": 34.842, "_timestamp": 0}], ["perf_latency_p95", {"_value": 35.251, "_timestamp": 0}], ["perf_latency_p99", {"_value": 35.741, "_timestamp": 0}], ["perf_throughput", {"_value": 125.213, "_timestamp": 0}], ["perf_client_send_recv", {"_value": 0.149, "_timestamp": 0}], ["perf_client_response_wait", {"_value": 31.828, "_timestamp": 0}], ["perf_server_queue", {"_value": 14.957, "_timestamp": 0}], ["perf_server_compute_infer", {"_value": 15.519, "_timestamp": 0}], ["perf_server_compute_input", {"_value": 0.338, "_timestamp": 0}], ["perf_server_compute_output", {"_value": 0.025, "_timestamp": 0}]], "_non_gpu_data_from_tag": {"perf_latency_avg": ["perf_latency_avg", {"_value": 32.006, "_timestamp": 0}], "perf_latency_p90": ["perf_latency_p90", {"_value": 34.842, "_timestamp": 0}], "perf_latency_p95": ["perf_latency_p95", {"_value": 35.251, "_timestamp": 0}], "perf_latency_p99": ["perf_latency_p99", {"_value": 35.741, "_timestamp": 0}], "perf_throughput": ["perf_throughput", {"_value": 125.213, "_timestamp": 0}], "perf_client_send_recv": ["perf_client_send_recv", {"_value": 0.149, "_timestamp": 0}], "perf_client_response_wait": ["perf_client_response_wait", {"_value": 31.828, "_timestamp": 0}], "perf_server_queue": ["perf_server_queue", {"_value": 14.957, "_timestamp": 0}], "perf_server_compute_infer": ["perf_server_compute_infer", {"_value": 15.519, "_timestamp": 0}], "perf_server_compute_input": ["perf_server_compute_input", {"_value": 0.338, "_timestamp": 0}], "perf_server_compute_output": ["perf_server_compute_output", {"_value": 0.025, "_timestamp": 0}]}}, {"_model_config_name": "vgg19_libtorch_config_7", "_model_specific_pa_params": {"batch-size": 1, "concurrency-range": 8}, "_non_gpu_data": [["perf_latency_avg", {"_value": 27.298, "_timestamp": 0}], ["perf_latency_p90", {"_value": 27.599, "_timestamp": 0}], ["perf_latency_p95", {"_value": 27.702, "_timestamp": 0}], ["perf_latency_p99", {"_value": 27.825, "_timestamp": 0}], ["perf_throughput", {"_value": 293.042, "_timestamp": 0}], ["perf_client_send_recv", {"_value": 0.146, "_timestamp": 0}], ["perf_client_response_wait", {"_value": 27.126, "_timestamp": 0}], ["perf_server_queue", {"_value": 12.429, "_timestamp": 0}], ["perf_server_compute_infer", {"_value": 12.752, "_timestamp": 0}], ["perf_server_compute_input", {"_value": 0.719, "_timestamp": 0}], ["perf_server_compute_output", {"_value": 0.028, "_timestamp": 0}]], "_non_gpu_data_from_tag": {"perf_latency_avg": ["perf_latency_avg", {"_value": 27.298, "_timestamp": 0}], "perf_latency_p90": ["perf_latency_p90", {"_value": 27.599, "_timestamp": 0}], "perf_latency_p95": ["perf_latency_p95", {"_value": 27.702, "_timestamp": 0}], "perf_latency_p99": ["perf_latency_p99", {"_value": 27.825, "_timestamp": 0}], "perf_throughput": ["perf_throughput", {"_value": 293.042, "_timestamp": 0}], "perf_client_send_recv": ["perf_client_send_recv", {"_value": 0.146, "_timestamp": 0}], "perf_client_response_wait": ["perf_client_response_wait", {"_value": 27.126, "_timestamp": 0}], "perf_server_queue": ["perf_server_queue", {"_value": 12.429, "_timestamp": 0}], "perf_server_compute_infer": ["perf_server_compute_infer", {"_value": 12.752, "_timestamp": 0}], "perf_server_compute_input": ["perf_server_compute_input", {"_value": 0.719, "_timestamp": 0}], "perf_server_compute_output": ["perf_server_compute_output", {"_value": 0.028, "_timestamp": 0}]}}]}}], "resnet50_libtorch_config_6,vgg19_libtorch_config_0": [{"_triton_env": {}, "_model_run_configs": [{"_model_name": "resnet50_libtorch", "_model_config": {"name": "resnet50_libtorch_config_6", "platform": "pytorch_libtorch", "maxBatchSize": 2, "input": [{"name": "INPUT__0", "dataType": "TYPE_FP32", "format": "FORMAT_NCHW", "dims": ["3", "224", "224"]}], "output": [{"name": "OUTPUT__0", "dataType": "TYPE_FP32", "dims": ["1000"], "labelFilename": "resnet50_labels.txt"}], "instanceGroup": [{"count": 1, "kind": "KIND_GPU"}], "dynamicBatching": {}, "cpu_only": false}, "_perf_config": {"_args": {"service-kind": null, "model-signature-name": null, "async": null, "sync": null, "measurement-interval": null, "concurrency-range": 4, "request-rate-range": null, "request-distribution": null, "request-intervals": null, "binary-search": null, "num-of-sequences": null, "latency-threshold": null, "max-threads": null, "stability-percentage": null, "max-trials": null, "percentile": null, "input-data": null, "shared-memory": null, "output-shared-memory-size": null, "shape": null, "sequence-length": null, "sequence-id-range": null, "string-length": null, "string-data": null, "measurement-mode": "count_windows", "measurement-request-count": null, "streaming": null, "grpc-compression-algorithm": null, "triton-server-directory": null, "model-repository": null, "ssl-grpc-use-ssl": null, "ssl-grpc-root-certifications-file": null, "ssl-grpc-private-key-file": null, "ssl-grpc-certificate-chain-file": null, "ssl-https-verify-peer": null, "ssl-https-verify-host": null, "ssl-https-ca-certificates-file": null, "ssl-https-client-certificate-type": null, "ssl-https-client-certificate-file": null, "ssl-https-private-key-type": null, "ssl-https-private-key-file": null, "collect-metrics": "True", "metrics-url": "http://localhost:8002/metrics", "metrics-interval": 1000.0}, "_options": {"-m": "resnet50_libtorch_config_6", "-x": null, "-b": 1, "-u": "localhost:8001", "-i": "grpc", "-f": "resnet50_libtorch_config_6-results.csv", "-H": null}, "_verbose": {"-v": null, "-v -v": null, "--verbose-csv": "--verbose-csv"}, "_input_to_options": {"model-name": "-m", "model-version": "-x", "batch-size": "-b", "url": "-u", "protocol": "-i", "latency-report-file": "-f", "http-header": "-H"}, "_input_to_verbose": {"verbose": "-v", "extra-verbose": "-v -v", "verbose-csv": "--verbose-csv"}, "_additive_args": {"input-data": null, "shape": null}}, "_ensemble_composing_configs": []}, {"_model_name": "vgg19_libtorch", "_model_config": {"name": "vgg19_libtorch_config_0", "platform": "pytorch_libtorch", "maxBatchSize": 1, "input": [{"name": "INPUT__0", "dataType": "TYPE_FP32", "format": "FORMAT_NCHW", "dims": ["3", "224", "224"]}], "output": [{"name": "OUTPUT__0", "dataType": "TYPE_FP32", "dims": ["1000"], "labelFilename": "vgg19_labels.txt"}], "instanceGroup": [{"count": 1, "kind": "KIND_GPU"}], "dynamicBatching": {}, "cpu_only": false}, "_perf_config": {"_args": {"service-kind": null, "model-signature-name": null, "async": null, "sync": null, "measurement-interval": null, "concurrency-range": 2, "request-rate-range": null, "request-distribution": null, "request-intervals": null, "binary-search": null, "num-of-sequences": null, "latency-threshold": null, "max-threads": null, "stability-percentage": null, "max-trials": null, "percentile": null, "input-data": null, "shared-memory": null, "output-shared-memory-size": null, "shape": null, "sequence-length": null, "sequence-id-range": null, "string-length": null, "string-data": null, "measurement-mode": "count_windows", "measurement-request-count": null, "streaming": null, "grpc-compression-algorithm": null, "triton-server-directory": null, "model-repository": null, "ssl-grpc-use-ssl": null, "ssl-grpc-root-certifications-file": null, "ssl-grpc-private-key-file": null, "ssl-grpc-certificate-chain-file": null, "ssl-https-verify-peer": null, "ssl-https-verify-host": null, "ssl-https-ca-certificates-file": null, "ssl-https-client-certificate-type": null, "ssl-https-client-certificate-file": null, "ssl-https-private-key-type": null, "ssl-https-private-key-file": null, "collect-metrics": "True", "metrics-url": "http://localhost:8002/metrics", "metrics-interval": 1000.0}, "_options": {"-m": "vgg19_libtorch_config_0", "-x": null, "-b": 1, "-u": "localhost:8001", "-i": "grpc", "-f": "vgg19_libtorch_config_0-results.csv", "-H": null}, "_verbose": {"-v": null, "-v -v": null, "--verbose-csv": "--verbose-csv"}, "_input_to_options": {"model-name": "-m", "model-version": "-x", "batch-size": "-b", "url": "-u", "protocol": "-i", "latency-report-file": "-f", "http-header": "-H"}, "_input_to_verbose": {"verbose": "-v", "extra-verbose": "-v -v", "verbose-csv": "--verbose-csv"}, "_additive_args": {"input-data": null, "shape": null}}, "_ensemble_composing_configs": []}]}, {"-m resnet50_libtorch_config_6 -b 1 -i grpc -f resnet50_libtorch_config_6-results.csv --verbose-csv --concurrency-range=4 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0-m vgg19_libtorch_config_0 -b 1 -i grpc -f vgg19_libtorch_config_0-results.csv --verbose-csv --concurrency-range=2 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": {"_model_variants_name": "resnet50_libtorch_config_6,vgg19_libtorch_config_0", "_gpu_data": {"GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [["gpu_used_memory", {"_value": 2248.146944, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0"}], ["gpu_free_memory", {"_value": 23147.3152, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0"}], ["gpu_utilization", {"_value": 78.0, "_timestamp": 0, "_device_uuid": null}], ["gpu_power_usage", {"_value": 283.15, "_timestamp": 0, "_device_uuid": null}]]}, "_avg_gpu_data": [["gpu_used_memory", {"_value": 2248.146944, "_timestamp": 0, "_device_uuid": null}], ["gpu_free_memory", {"_value": 23147.3152, "_timestamp": 0, "_device_uuid": null}], ["gpu_utilization", {"_value": 78.0, "_timestamp": 0, "_device_uuid": null}], ["gpu_power_usage", {"_value": 283.15, "_timestamp": 0, "_device_uuid": null}]], "_avg_gpu_data_from_tag": {"gpu_used_memory": ["gpu_used_memory", {"_value": 2248.146944, "_timestamp": 0, "_device_uuid": null}], "gpu_free_memory": ["gpu_free_memory", {"_value": 23147.3152, "_timestamp": 0, "_device_uuid": null}], "gpu_utilization": ["gpu_utilization", {"_value": 78.0, "_timestamp": 0, "_device_uuid": null}], "gpu_power_usage": ["gpu_power_usage", {"_value": 283.15, "_timestamp": 0, "_device_uuid": null}]}, "_model_config_measurements": [{"_model_config_name": "resnet50_libtorch_config_6", "_model_specific_pa_params": {"batch-size": 1, "concurrency-range": 4}, "_non_gpu_data": [["perf_latency_avg", {"_value": 19.087, "_timestamp": 0}], ["perf_latency_p90", {"_value": 20.109, "_timestamp": 0}], ["perf_latency_p95", {"_value": 20.336, "_timestamp": 0}], ["perf_latency_p99", {"_value": 20.627, "_timestamp": 0}], ["perf_throughput", {"_value": 209.798, "_timestamp": 0}], ["perf_client_send_recv", {"_value": 0.128, "_timestamp": 0}], ["perf_client_response_wait", {"_value": 18.933, "_timestamp": 0}], ["perf_server_queue", {"_value": 8.548, "_timestamp": 0}], ["perf_server_compute_infer", {"_value": 9.131, "_timestamp": 0}], ["perf_server_compute_input", {"_value": 0.296, "_timestamp": 0}], ["perf_server_compute_output", {"_value": 0.023, "_timestamp": 0}]], "_non_gpu_data_from_tag": {"perf_latency_avg": ["perf_latency_avg", {"_value": 19.087, "_timestamp": 0}], "perf_latency_p90": ["perf_latency_p90", {"_value": 20.109, "_timestamp": 0}], "perf_latency_p95": ["perf_latency_p95", {"_value": 20.336, "_timestamp": 0}], "perf_latency_p99": ["perf_latency_p99", {"_value": 20.627, "_timestamp": 0}], "perf_throughput": ["perf_throughput", {"_value": 209.798, "_timestamp": 0}], "perf_client_send_recv": ["perf_client_send_recv", {"_value": 0.128, "_timestamp": 0}], "perf_client_response_wait": ["perf_client_response_wait", {"_value": 18.933, "_timestamp": 0}], "perf_server_queue": ["perf_server_queue", {"_value": 8.548, "_timestamp": 0}], "perf_server_compute_infer": ["perf_server_compute_infer", {"_value": 9.131, "_timestamp": 0}], "perf_server_compute_input": ["perf_server_compute_input", {"_value": 0.296, "_timestamp": 0}], "perf_server_compute_output": ["perf_server_compute_output", {"_value": 0.023, "_timestamp": 0}]}}, {"_model_config_name": "vgg19_libtorch_config_0", "_model_specific_pa_params": {"batch-size": 1, "concurrency-range": 2}, "_non_gpu_data": [["perf_latency_avg", {"_value": 10.23, "_timestamp": 0}], ["perf_latency_p90", {"_value": 10.412, "_timestamp": 0}], ["perf_latency_p95", {"_value": 10.492, "_timestamp": 0}], ["perf_latency_p99", {"_value": 10.573, "_timestamp": 0}], ["perf_throughput", {"_value": 195.482, "_timestamp": 0}], ["perf_client_send_recv", {"_value": 0.101, "_timestamp": 0}], ["perf_client_response_wait", {"_value": 10.106, "_timestamp": 0}], ["perf_server_queue", {"_value": 4.384, "_timestamp": 0}], ["perf_server_compute_infer", {"_value": 4.875, "_timestamp": 0}], ["perf_server_compute_input", {"_value": 0.148, "_timestamp": 0}], ["perf_server_compute_output", {"_value": 0.019, "_timestamp": 0}]], "_non_gpu_data_from_tag": {"perf_latency_avg": ["perf_latency_avg", {"_value": 10.23, "_timestamp": 0}], "perf_latency_p90": ["perf_latency_p90", {"_value": 10.412, "_timestamp": 0}], "perf_latency_p95": ["perf_latency_p95", {"_value": 10.492, "_timestamp": 0}], "perf_latency_p99": ["perf_latency_p99", {"_value": 10.573, "_timestamp": 0}], "perf_throughput": ["perf_throughput", {"_value": 195.482, "_timestamp": 0}], "perf_client_send_recv": ["perf_client_send_recv", {"_value": 0.101, "_timestamp": 0}], "perf_client_response_wait": ["perf_client_response_wait", {"_value": 10.106, "_timestamp": 0}], "perf_server_queue": ["perf_server_queue", {"_value": 4.384, "_timestamp": 0}], "perf_server_compute_infer": ["perf_server_compute_infer", {"_value": 4.875, "_timestamp": 0}], "perf_server_compute_input": ["perf_server_compute_input", {"_value": 0.148, "_timestamp": 0}], "perf_server_compute_output": ["perf_server_compute_output", {"_value": 0.019, "_timestamp": 0}]}}]}}], "resnet50_libtorch_config_8,vgg19_libtorch_config_0": [{"_triton_env": {}, "_model_run_configs": [{"_model_name": "resnet50_libtorch", "_model_config": {"name": "resnet50_libtorch_config_8", "platform": "pytorch_libtorch", "maxBatchSize": 4, "input": [{"name": "INPUT__0", "dataType": "TYPE_FP32", "format": "FORMAT_NCHW", "dims": ["3", "224", "224"]}], "output": [{"name": "OUTPUT__0", "dataType": "TYPE_FP32", "dims": ["1000"], "labelFilename": "resnet50_labels.txt"}], "instanceGroup": [{"count": 1, "kind": "KIND_GPU"}], "dynamicBatching": {}, "cpu_only": false}, "_perf_config": {"_args": {"service-kind": null, "model-signature-name": null, "async": null, "sync": null, "measurement-interval": null, "concurrency-range": 8, "request-rate-range": null, "request-distribution": null, "request-intervals": null, "binary-search": null, "num-of-sequences": null, "latency-threshold": null, "max-threads": null, "stability-percentage": null, "max-trials": null, "percentile": null, "input-data": null, "shared-memory": null, "output-shared-memory-size": null, "shape": null, "sequence-length": null, "sequence-id-range": null, "string-length": null, "string-data": null, "measurement-mode": "count_windows", "measurement-request-count": null, "streaming": null, "grpc-compression-algorithm": null, "triton-server-directory": null, "model-repository": null, "ssl-grpc-use-ssl": null, "ssl-grpc-root-certifications-file": null, "ssl-grpc-private-key-file": null, "ssl-grpc-certificate-chain-file": null, "ssl-https-verify-peer": null, "ssl-https-verify-host": null, "ssl-https-ca-certificates-file": null, "ssl-https-client-certificate-type": null, "ssl-https-client-certificate-file": null, "ssl-https-private-key-type": null, "ssl-https-private-key-file": null, "collect-metrics": "True", "metrics-url": "http://localhost:8002/metrics", "metrics-interval": 1000.0}, "_options": {"-m": "resnet50_libtorch_config_8", "-x": null, "-b": 1, "-u": "localhost:8001", "-i": "grpc", "-f": "resnet50_libtorch_config_8-results.csv", "-H": null}, "_verbose": {"-v": null, "-v -v": null, "--verbose-csv": "--verbose-csv"}, "_input_to_options": {"model-name": "-m", "model-version": "-x", "batch-size": "-b", "url": "-u", "protocol": "-i", "latency-report-file": "-f", "http-header": "-H"}, "_input_to_verbose": {"verbose": "-v", "extra-verbose": "-v -v", "verbose-csv": "--verbose-csv"}, "_additive_args": {"input-data": null, "shape": null}}, "_ensemble_composing_configs": []}, {"_model_name": "vgg19_libtorch", "_model_config": {"name": "vgg19_libtorch_config_0", "platform": "pytorch_libtorch", "maxBatchSize": 1, "input": [{"name": "INPUT__0", "dataType": "TYPE_FP32", "format": "FORMAT_NCHW", "dims": ["3", "224", "224"]}], "output": [{"name": "OUTPUT__0", "dataType": "TYPE_FP32", "dims": ["1000"], "labelFilename": "vgg19_labels.txt"}], "instanceGroup": [{"count": 1, "kind": "KIND_GPU"}], "dynamicBatching": {}, "cpu_only": false}, "_perf_config": {"_args": {"service-kind": null, "model-signature-name": null, "async": null, "sync": null, "measurement-interval": null, "concurrency-range": 2, "request-rate-range": null, "request-distribution": null, "request-intervals": null, "binary-search": null, "num-of-sequences": null, "latency-threshold": null, "max-threads": null, "stability-percentage": null, "max-trials": null, "percentile": null, "input-data": null, "shared-memory": null, "output-shared-memory-size": null, "shape": null, "sequence-length": null, "sequence-id-range": null, "string-length": null, "string-data": null, "measurement-mode": "count_windows", "measurement-request-count": null, "streaming": null, "grpc-compression-algorithm": null, "triton-server-directory": null, "model-repository": null, "ssl-grpc-use-ssl": null, "ssl-grpc-root-certifications-file": null, "ssl-grpc-private-key-file": null, "ssl-grpc-certificate-chain-file": null, "ssl-https-verify-peer": null, "ssl-https-verify-host": null, "ssl-https-ca-certificates-file": null, "ssl-https-client-certificate-type": null, "ssl-https-client-certificate-file": null, "ssl-https-private-key-type": null, "ssl-https-private-key-file": null, "collect-metrics": "True", "metrics-url": "http://localhost:8002/metrics", "metrics-interval": 1000.0}, "_options": {"-m": "vgg19_libtorch_config_0", "-x": null, "-b": 1, "-u": "localhost:8001", "-i": "grpc", "-f": "vgg19_libtorch_config_0-results.csv", "-H": null}, "_verbose": {"-v": null, "-v -v": null, "--verbose-csv": "--verbose-csv"}, "_input_to_options": {"model-name": "-m", "model-version": "-x", "batch-size": "-b", "url": "-u", "protocol": "-i", "latency-report-file": "-f", "http-header": "-H"}, "_input_to_verbose": {"verbose": "-v", "extra-verbose": "-v -v", "verbose-csv": "--verbose-csv"}, "_additive_args": {"input-data": null, "shape": null}}, "_ensemble_composing_configs": []}]}, {"-m resnet50_libtorch_config_8 -b 1 -i grpc -f resnet50_libtorch_config_8-results.csv --verbose-csv --concurrency-range=8 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0-m vgg19_libtorch_config_0 -b 1 -i grpc -f vgg19_libtorch_config_0-results.csv --verbose-csv --concurrency-range=2 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": {"_model_variants_name": "resnet50_libtorch_config_8,vgg19_libtorch_config_0", "_gpu_data": {"GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [["gpu_used_memory", {"_value": 2380.26752, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0"}], ["gpu_free_memory", {"_value": 23015.194624, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0"}], ["gpu_utilization", {"_value": 100.0, "_timestamp": 0, "_device_uuid": null}], ["gpu_power_usage", {"_value": 279.874, "_timestamp": 0, "_device_uuid": null}]]}, "_avg_gpu_data": [["gpu_used_memory", {"_value": 2380.26752, "_timestamp": 0, "_device_uuid": null}], ["gpu_free_memory", {"_value": 23015.194624, "_timestamp": 0, "_device_uuid": null}], ["gpu_utilization", {"_value": 100.0, "_timestamp": 0, "_device_uuid": null}], ["gpu_power_usage", {"_value": 279.874, "_timestamp": 0, "_device_uuid": null}]], "_avg_gpu_data_from_tag": {"gpu_used_memory": ["gpu_used_memory", {"_value": 2380.26752, "_timestamp": 0, "_device_uuid": null}], "gpu_free_memory": ["gpu_free_memory", {"_value": 23015.194624, "_timestamp": 0, "_device_uuid": null}], "gpu_utilization": ["gpu_utilization", {"_value": 100.0, "_timestamp": 0, "_device_uuid": null}], "gpu_power_usage": ["gpu_power_usage", {"_value": 279.874, "_timestamp": 0, "_device_uuid": null}]}, "_model_config_measurements": [{"_model_config_name": "resnet50_libtorch_config_8", "_model_specific_pa_params": {"batch-size": 1, "concurrency-range": 8}, "_non_gpu_data": [["perf_latency_avg", {"_value": 25.254, "_timestamp": 0}], ["perf_latency_p90", {"_value": 26.164, "_timestamp": 0}], ["perf_latency_p95", {"_value": 26.696, "_timestamp": 0}], ["perf_latency_p99", {"_value": 27.17, "_timestamp": 0}], ["perf_throughput", {"_value": 317.064, "_timestamp": 0}], ["perf_client_send_recv", {"_value": 0.146, "_timestamp": 0}], ["perf_client_response_wait", {"_value": 25.082, "_timestamp": 0}], ["perf_server_queue", {"_value": 11.461, "_timestamp": 0}], ["perf_server_compute_infer", {"_value": 11.763, "_timestamp": 0}], ["perf_server_compute_input", {"_value": 0.688, "_timestamp": 0}], ["perf_server_compute_output", {"_value": 0.027, "_timestamp": 0}]], "_non_gpu_data_from_tag": {"perf_latency_avg": ["perf_latency_avg", {"_value": 25.254, "_timestamp": 0}], "perf_latency_p90": ["perf_latency_p90", {"_value": 26.164, "_timestamp": 0}], "perf_latency_p95": ["perf_latency_p95", {"_value": 26.696, "_timestamp": 0}], "perf_latency_p99": ["perf_latency_p99", {"_value": 27.17, "_timestamp": 0}], "perf_throughput": ["perf_throughput", {"_value": 317.064, "_timestamp": 0}], "perf_client_send_recv": ["perf_client_send_recv", {"_value": 0.146, "_timestamp": 0}], "perf_client_response_wait": ["perf_client_response_wait", {"_value": 25.082, "_timestamp": 0}], "perf_server_queue": ["perf_server_queue", {"_value": 11.461, "_timestamp": 0}], "perf_server_compute_infer": ["perf_server_compute_infer", {"_value": 11.763, "_timestamp": 0}], "perf_server_compute_input": ["perf_server_compute_input", {"_value": 0.688, "_timestamp": 0}], "perf_server_compute_output": ["perf_server_compute_output", {"_value": 0.027, "_timestamp": 0}]}}, {"_model_config_name": "vgg19_libtorch_config_0", "_model_specific_pa_params": {"batch-size": 1, "concurrency-range": 2}, "_non_gpu_data": [["perf_latency_avg", {"_value": 11.006, "_timestamp": 0}], ["perf_latency_p90", {"_value": 11.278, "_timestamp": 0}], ["perf_latency_p95", {"_value": 11.358, "_timestamp": 0}], ["perf_latency_p99", {"_value": 11.499, "_timestamp": 0}], ["perf_throughput", {"_value": 181.51, "_timestamp": 0}], ["perf_client_send_recv", {"_value": 0.101, "_timestamp": 0}], ["perf_client_response_wait", {"_value": 10.88, "_timestamp": 0}], ["perf_server_queue", {"_value": 4.716, "_timestamp": 0}], ["perf_server_compute_infer", {"_value": 5.262, "_timestamp": 0}], ["perf_server_compute_input", {"_value": 0.139, "_timestamp": 0}], ["perf_server_compute_output", {"_value": 0.021, "_timestamp": 0}]], "_non_gpu_data_from_tag": {"perf_latency_avg": ["perf_latency_avg", {"_value": 11.006, "_timestamp": 0}], "perf_latency_p90": ["perf_latency_p90", {"_value": 11.278, "_timestamp": 0}], "perf_latency_p95": ["perf_latency_p95", {"_value": 11.358, "_timestamp": 0}], "perf_latency_p99": ["perf_latency_p99", {"_value": 11.499, "_timestamp": 0}], "perf_throughput": ["perf_throughput", {"_value": 181.51, "_timestamp": 0}], "perf_client_send_recv": ["perf_client_send_recv", {"_value": 0.101, "_timestamp": 0}], "perf_client_response_wait": ["perf_client_response_wait", {"_value": 10.88, "_timestamp": 0}], "perf_server_queue": ["perf_server_queue", {"_value": 4.716, "_timestamp": 0}], "perf_server_compute_infer": ["perf_server_compute_infer", {"_value": 5.262, "_timestamp": 0}], "perf_server_compute_input": ["perf_server_compute_input", {"_value": 0.139, "_timestamp": 0}], "perf_server_compute_output": ["perf_server_compute_output", {"_value": 0.021, "_timestamp": 0}]}}]}, "-m resnet50_libtorch_config_8 -b 1 -i grpc -f resnet50_libtorch_config_8-results.csv --verbose-csv --concurrency-range=1 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0-m vgg19_libtorch_config_0 -b 1 -i grpc -f vgg19_libtorch_config_0-results.csv --verbose-csv --concurrency-range=1 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": {"_model_variants_name": "resnet50_libtorch_config_8,vgg19_libtorch_config_0", "_gpu_data": {"GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [["gpu_used_memory", {"_value": 2246.049792, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0"}], ["gpu_free_memory", {"_value": 23149.412352, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0"}], ["gpu_utilization", {"_value": 97.3333, "_timestamp": 0, "_device_uuid": null}], ["gpu_power_usage", {"_value": 283.742, "_timestamp": 0, "_device_uuid": null}]]}, "_avg_gpu_data": [["gpu_used_memory", {"_value": 2246.049792, "_timestamp": 0, "_device_uuid": null}], ["gpu_free_memory", {"_value": 23149.412352, "_timestamp": 0, "_device_uuid": null}], ["gpu_utilization", {"_value": 97.3333, "_timestamp": 0, "_device_uuid": null}], ["gpu_power_usage", {"_value": 283.742, "_timestamp": 0, "_device_uuid": null}]], "_avg_gpu_data_from_tag": {"gpu_used_memory": ["gpu_used_memory", {"_value": 2246.049792, "_timestamp": 0, "_device_uuid": null}], "gpu_free_memory": ["gpu_free_memory", {"_value": 23149.412352, "_timestamp": 0, "_device_uuid": null}], "gpu_utilization": ["gpu_utilization", {"_value": 97.3333, "_timestamp": 0, "_device_uuid": null}], "gpu_power_usage": ["gpu_power_usage", {"_value": 283.742, "_timestamp": 0, "_device_uuid": null}]}, "_model_config_measurements": [{"_model_config_name": "resnet50_libtorch_config_8", "_model_specific_pa_params": {"batch-size": 1, "concurrency-range": 1}, "_non_gpu_data": [["perf_latency_avg", {"_value": 5.878, "_timestamp": 0}], ["perf_latency_p90", {"_value": 6.24, "_timestamp": 0}], ["perf_latency_p95", {"_value": 6.471, "_timestamp": 0}], ["perf_latency_p99", {"_value": 7.147, "_timestamp": 0}], ["perf_throughput", {"_value": 169.886, "_timestamp": 0}], ["perf_client_send_recv", {"_value": 0.085, "_timestamp": 0}], ["perf_client_response_wait", {"_value": 5.773, "_timestamp": 0}], ["perf_server_queue", {"_value": 0.039, "_timestamp": 0}], ["perf_server_compute_infer", {"_value": 4.934, "_timestamp": 0}], ["perf_server_compute_input", {"_value": 0.13, "_timestamp": 0}], ["perf_server_compute_output", {"_value": 0.021, "_timestamp": 0}]], "_non_gpu_data_from_tag": {"perf_latency_avg": ["perf_latency_avg", {"_value": 5.878, "_timestamp": 0}], "perf_latency_p90": ["perf_latency_p90", {"_value": 6.24, "_timestamp": 0}], "perf_latency_p95": ["perf_latency_p95", {"_value": 6.471, "_timestamp": 0}], "perf_latency_p99": ["perf_latency_p99", {"_value": 7.147, "_timestamp": 0}], "perf_throughput": ["perf_throughput", {"_value": 169.886, "_timestamp": 0}], "perf_client_send_recv": ["perf_client_send_recv", {"_value": 0.085, "_timestamp": 0}], "perf_client_response_wait": ["perf_client_response_wait", {"_value": 5.773, "_timestamp": 0}], "perf_server_queue": ["perf_server_queue", {"_value": 0.039, "_timestamp": 0}], "perf_server_compute_infer": ["perf_server_compute_infer", {"_value": 4.934, "_timestamp": 0}], "perf_server_compute_input": ["perf_server_compute_input", {"_value": 0.13, "_timestamp": 0}], "perf_server_compute_output": ["perf_server_compute_output", {"_value": 0.021, "_timestamp": 0}]}}, {"_model_config_name": "vgg19_libtorch_config_0", "_model_specific_pa_params": {"batch-size": 1, "concurrency-range": 1}, "_non_gpu_data": [["perf_latency_avg", {"_value": 5.755, "_timestamp": 0}], ["perf_latency_p90", {"_value": 5.879, "_timestamp": 0}], ["perf_latency_p95", {"_value": 5.917, "_timestamp": 0}], ["perf_latency_p99", {"_value": 6.006, "_timestamp": 0}], ["perf_throughput", {"_value": 173.884, "_timestamp": 0}], ["perf_client_send_recv", {"_value": 0.098, "_timestamp": 0}], ["perf_client_response_wait", {"_value": 5.636, "_timestamp": 0}], ["perf_server_queue", {"_value": 0.042, "_timestamp": 0}], ["perf_server_compute_infer", {"_value": 4.767, "_timestamp": 0}], ["perf_server_compute_input", {"_value": 0.125, "_timestamp": 0}], ["perf_server_compute_output", {"_value": 0.02, "_timestamp": 0}]], "_non_gpu_data_from_tag": {"perf_latency_avg": ["perf_latency_avg", {"_value": 5.755, "_timestamp": 0}], "perf_latency_p90": ["perf_latency_p90", {"_value": 5.879, "_timestamp": 0}], "perf_latency_p95": ["perf_latency_p95", {"_value": 5.917, "_timestamp": 0}], "perf_latency_p99": ["perf_latency_p99", {"_value": 6.006, "_timestamp": 0}], "perf_throughput": ["perf_throughput", {"_value": 173.884, "_timestamp": 0}], "perf_client_send_recv": ["perf_client_send_recv", {"_value": 0.098, "_timestamp": 0}], "perf_client_response_wait": ["perf_client_response_wait", {"_value": 5.636, "_timestamp": 0}], "perf_server_queue": ["perf_server_queue", {"_value": 0.042, "_timestamp": 0}], "perf_server_compute_infer": ["perf_server_compute_infer", {"_value": 4.767, "_timestamp": 0}], "perf_server_compute_input": ["perf_server_compute_input", {"_value": 0.125, "_timestamp": 0}], "perf_server_compute_output": ["perf_server_compute_output", {"_value": 0.02, "_timestamp": 0}]}}]}, "-m resnet50_libtorch_config_8 -b 1 -i grpc -f resnet50_libtorch_config_8-results.csv --verbose-csv --concurrency-range=2 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0-m vgg19_libtorch_config_0 -b 1 -i grpc -f vgg19_libtorch_config_0-results.csv --verbose-csv --concurrency-range=2 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": {"_model_variants_name": "resnet50_libtorch_config_8,vgg19_libtorch_config_0", "_gpu_data": {"GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [["gpu_used_memory", {"_value": 2246.049792, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0"}], ["gpu_free_memory", {"_value": 23149.412352, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0"}], ["gpu_utilization", {"_value": 65.66669999999999, "_timestamp": 0, "_device_uuid": null}], ["gpu_power_usage", {"_value": 217.209, "_timestamp": 0, "_device_uuid": null}]]}, "_avg_gpu_data": [["gpu_used_memory", {"_value": 2246.049792, "_timestamp": 0, "_device_uuid": null}], ["gpu_free_memory", {"_value": 23149.412352, "_timestamp": 0, "_device_uuid": null}], ["gpu_utilization", {"_value": 65.66669999999999, "_timestamp": 0, "_device_uuid": null}], ["gpu_power_usage", {"_value": 217.209, "_timestamp": 0, "_device_uuid": null}]], "_avg_gpu_data_from_tag": {"gpu_used_memory": ["gpu_used_memory", {"_value": 2246.049792, "_timestamp": 0, "_device_uuid": null}], "gpu_free_memory": ["gpu_free_memory", {"_value": 23149.412352, "_timestamp": 0, "_device_uuid": null}], "gpu_utilization": ["gpu_utilization", {"_value": 65.66669999999999, "_timestamp": 0, "_device_uuid": null}], "gpu_power_usage": ["gpu_power_usage", {"_value": 217.209, "_timestamp": 0, "_device_uuid": null}]}, "_model_config_measurements": [{"_model_config_name": "resnet50_libtorch_config_8", "_model_specific_pa_params": {"batch-size": 1, "concurrency-range": 2}, "_non_gpu_data": [["perf_latency_avg", {"_value": 13.863, "_timestamp": 0}], ["perf_latency_p90", {"_value": 14.52, "_timestamp": 0}], ["perf_latency_p95", {"_value": 14.657, "_timestamp": 0}], ["perf_latency_p99", {"_value": 15.054, "_timestamp": 0}], ["perf_throughput", {"_value": 143.914, "_timestamp": 0}], ["perf_client_send_recv", {"_value": 0.105, "_timestamp": 0}], ["perf_client_response_wait", {"_value": 13.732, "_timestamp": 0}], ["perf_server_queue", {"_value": 6.143, "_timestamp": 0}], ["perf_server_compute_infer", {"_value": 6.697, "_timestamp": 0}], ["perf_server_compute_input", {"_value": 0.145, "_timestamp": 0}], ["perf_server_compute_output", {"_value": 0.02, "_timestamp": 0}]], "_non_gpu_data_from_tag": {"perf_latency_avg": ["perf_latency_avg", {"_value": 13.863, "_timestamp": 0}], "perf_latency_p90": ["perf_latency_p90", {"_value": 14.52, "_timestamp": 0}], "perf_latency_p95": ["perf_latency_p95", {"_value": 14.657, "_timestamp": 0}], "perf_latency_p99": ["perf_latency_p99", {"_value": 15.054, "_timestamp": 0}], "perf_throughput": ["perf_throughput", {"_value": 143.914, "_timestamp": 0}], "perf_client_send_recv": ["perf_client_send_recv", {"_value": 0.105, "_timestamp": 0}], "perf_client_response_wait": ["perf_client_response_wait", {"_value": 13.732, "_timestamp": 0}], "perf_server_queue": ["perf_server_queue", {"_value": 6.143, "_timestamp": 0}], "perf_server_compute_infer": ["perf_server_compute_infer", {"_value": 6.697, "_timestamp": 0}], "perf_server_compute_input": ["perf_server_compute_input", {"_value": 0.145, "_timestamp": 0}], "perf_server_compute_output": ["perf_server_compute_output", {"_value": 0.02, "_timestamp": 0}]}}, {"_model_config_name": "vgg19_libtorch_config_0", "_model_specific_pa_params": {"batch-size": 1, "concurrency-range": 2}, "_non_gpu_data": [["perf_latency_avg", {"_value": 10.091, "_timestamp": 0}], ["perf_latency_p90", {"_value": 10.227, "_timestamp": 0}], ["perf_latency_p95", {"_value": 10.267, "_timestamp": 0}], ["perf_latency_p99", {"_value": 10.355, "_timestamp": 0}], ["perf_throughput", {"_value": 197.879, "_timestamp": 0}], ["perf_client_send_recv", {"_value": 0.102, "_timestamp": 0}], ["perf_client_response_wait", {"_value": 9.964, "_timestamp": 0}], ["perf_server_queue", {"_value": 4.24, "_timestamp": 0}], ["perf_server_compute_infer", {"_value": 4.818, "_timestamp": 0}], ["perf_server_compute_input", {"_value": 0.139, "_timestamp": 0}], ["perf_server_compute_output", {"_value": 0.02, "_timestamp": 0}]], "_non_gpu_data_from_tag": {"perf_latency_avg": ["perf_latency_avg", {"_value": 10.091, "_timestamp": 0}], "perf_latency_p90": ["perf_latency_p90", {"_value": 10.227, "_timestamp": 0}], "perf_latency_p95": ["perf_latency_p95", {"_value": 10.267, "_timestamp": 0}], "perf_latency_p99": ["perf_latency_p99", {"_value": 10.355, "_timestamp": 0}], "perf_throughput": ["perf_throughput", {"_value": 197.879, "_timestamp": 0}], "perf_client_send_recv": ["perf_client_send_recv", {"_value": 0.102, "_timestamp": 0}], "perf_client_response_wait": ["perf_client_response_wait", {"_value": 9.964, "_timestamp": 0}], "perf_server_queue": ["perf_server_queue", {"_value": 4.24, "_timestamp": 0}], "perf_server_compute_infer": ["perf_server_compute_infer", {"_value": 4.818, "_timestamp": 0}], "perf_server_compute_input": ["perf_server_compute_input", {"_value": 0.139, "_timestamp": 0}], "perf_server_compute_output": ["perf_server_compute_output", {"_value": 0.02, "_timestamp": 0}]}}]}, "-m resnet50_libtorch_config_8 -b 1 -i grpc -f resnet50_libtorch_config_8-results.csv --verbose-csv --concurrency-range=4 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0-m vgg19_libtorch_config_0 -b 1 -i grpc -f vgg19_libtorch_config_0-results.csv --verbose-csv --concurrency-range=4 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": {"_model_variants_name": "resnet50_libtorch_config_8,vgg19_libtorch_config_0", "_gpu_data": {"GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [["gpu_used_memory", {"_value": 2380.26752, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0"}], ["gpu_free_memory", {"_value": 23015.194624, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0"}], ["gpu_utilization", {"_value": 100.0, "_timestamp": 0, "_device_uuid": null}], ["gpu_power_usage", {"_value": 279.644, "_timestamp": 0, "_device_uuid": null}]]}, "_avg_gpu_data": [["gpu_used_memory", {"_value": 2380.26752, "_timestamp": 0, "_device_uuid": null}], ["gpu_free_memory", {"_value": 23015.194624, "_timestamp": 0, "_device_uuid": null}], ["gpu_utilization", {"_value": 100.0, "_timestamp": 0, "_device_uuid": null}], ["gpu_power_usage", {"_value": 279.644, "_timestamp": 0, "_device_uuid": null}]], "_avg_gpu_data_from_tag": {"gpu_used_memory": ["gpu_used_memory", {"_value": 2380.26752, "_timestamp": 0, "_device_uuid": null}], "gpu_free_memory": ["gpu_free_memory", {"_value": 23015.194624, "_timestamp": 0, "_device_uuid": null}], "gpu_utilization": ["gpu_utilization", {"_value": 100.0, "_timestamp": 0, "_device_uuid": null}], "gpu_power_usage": ["gpu_power_usage", {"_value": 279.644, "_timestamp": 0, "_device_uuid": null}]}, "_model_config_measurements": [{"_model_config_name": "resnet50_libtorch_config_8", "_model_specific_pa_params": {"batch-size": 1, "concurrency-range": 4}, "_non_gpu_data": [["perf_latency_avg", {"_value": 18.493, "_timestamp": 0}], ["perf_latency_p90", {"_value": 19.568, "_timestamp": 0}], ["perf_latency_p95", {"_value": 19.646, "_timestamp": 0}], ["perf_latency_p99", {"_value": 19.972, "_timestamp": 0}], ["perf_throughput", {"_value": 216.834, "_timestamp": 0}], ["perf_client_send_recv", {"_value": 0.115, "_timestamp": 0}], ["perf_client_response_wait", {"_value": 18.354, "_timestamp": 0}], ["perf_server_queue", {"_value": 7.118, "_timestamp": 0}], ["perf_server_compute_infer", {"_value": 9.879, "_timestamp": 0}], ["perf_server_compute_input", {"_value": 0.353, "_timestamp": 0}], ["perf_server_compute_output", {"_value": 0.024, "_timestamp": 0}]], "_non_gpu_data_from_tag": {"perf_latency_avg": ["perf_latency_avg", {"_value": 18.493, "_timestamp": 0}], "perf_latency_p90": ["perf_latency_p90", {"_value": 19.568, "_timestamp": 0}], "perf_latency_p95": ["perf_latency_p95", {"_value": 19.646, "_timestamp": 0}], "perf_latency_p99": ["perf_latency_p99", {"_value": 19.972, "_timestamp": 0}], "perf_throughput": ["perf_throughput", {"_value": 216.834, "_timestamp": 0}], "perf_client_send_recv": ["perf_client_send_recv", {"_value": 0.115, "_timestamp": 0}], "perf_client_response_wait": ["perf_client_response_wait", {"_value": 18.354, "_timestamp": 0}], "perf_server_queue": ["perf_server_queue", {"_value": 7.118, "_timestamp": 0}], "perf_server_compute_infer": ["perf_server_compute_infer", {"_value": 9.879, "_timestamp": 0}], "perf_server_compute_input": ["perf_server_compute_input", {"_value": 0.353, "_timestamp": 0}], "perf_server_compute_output": ["perf_server_compute_output", {"_value": 0.024, "_timestamp": 0}]}}, {"_model_config_name": "vgg19_libtorch_config_0", "_model_specific_pa_params": {"batch-size": 1, "concurrency-range": 4}, "_non_gpu_data": [["perf_latency_avg", {"_value": 21.114, "_timestamp": 0}], ["perf_latency_p90", {"_value": 21.433, "_timestamp": 0}], ["perf_latency_p95", {"_value": 21.539, "_timestamp": 0}], ["perf_latency_p99", {"_value": 21.723, "_timestamp": 0}], ["perf_throughput", {"_value": 189.521, "_timestamp": 0}], ["perf_client_send_recv", {"_value": 0.1, "_timestamp": 0}], ["perf_client_response_wait", {"_value": 20.988, "_timestamp": 0}], ["perf_server_queue", {"_value": 15.07, "_timestamp": 0}], ["perf_server_compute_infer", {"_value": 5.038, "_timestamp": 0}], ["perf_server_compute_input", {"_value": 0.145, "_timestamp": 0}], ["perf_server_compute_output", {"_value": 0.02, "_timestamp": 0}]], "_non_gpu_data_from_tag": {"perf_latency_avg": ["perf_latency_avg", {"_value": 21.114, "_timestamp": 0}], "perf_latency_p90": ["perf_latency_p90", {"_value": 21.433, "_timestamp": 0}], "perf_latency_p95": ["perf_latency_p95", {"_value": 21.539, "_timestamp": 0}], "perf_latency_p99": ["perf_latency_p99", {"_value": 21.723, "_timestamp": 0}], "perf_throughput": ["perf_throughput", {"_value": 189.521, "_timestamp": 0}], "perf_client_send_recv": ["perf_client_send_recv", {"_value": 0.1, "_timestamp": 0}], "perf_client_response_wait": ["perf_client_response_wait", {"_value": 20.988, "_timestamp": 0}], "perf_server_queue": ["perf_server_queue", {"_value": 15.07, "_timestamp": 0}], "perf_server_compute_infer": ["perf_server_compute_infer", {"_value": 5.038, "_timestamp": 0}], "perf_server_compute_input": ["perf_server_compute_input", {"_value": 0.145, "_timestamp": 0}], "perf_server_compute_output": ["perf_server_compute_output", {"_value": 0.02, "_timestamp": 0}]}}]}, "-m resnet50_libtorch_config_8 -b 1 -i grpc -f resnet50_libtorch_config_8-results.csv --verbose-csv --concurrency-range=8 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0-m vgg19_libtorch_config_0 -b 1 -i grpc -f vgg19_libtorch_config_0-results.csv --verbose-csv --concurrency-range=8 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": {"_model_variants_name": "resnet50_libtorch_config_8,vgg19_libtorch_config_0", "_gpu_data": {"GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [["gpu_used_memory", {"_value": 2380.26752, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0"}], ["gpu_free_memory", {"_value": 23015.194624, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0"}], ["gpu_utilization", {"_value": 74.0, "_timestamp": 0, "_device_uuid": null}], ["gpu_power_usage", {"_value": 232.483, "_timestamp": 0, "_device_uuid": null}]]}, "_avg_gpu_data": [["gpu_used_memory", {"_value": 2380.26752, "_timestamp": 0, "_device_uuid": null}], ["gpu_free_memory", {"_value": 23015.194624, "_timestamp": 0, "_device_uuid": null}], ["gpu_utilization", {"_value": 74.0, "_timestamp": 0, "_device_uuid": null}], ["gpu_power_usage", {"_value": 232.483, "_timestamp": 0, "_device_uuid": null}]], "_avg_gpu_data_from_tag": {"gpu_used_memory": ["gpu_used_memory", {"_value": 2380.26752, "_timestamp": 0, "_device_uuid": null}], "gpu_free_memory": ["gpu_free_memory", {"_value": 23015.194624, "_timestamp": 0, "_device_uuid": null}], "gpu_utilization": ["gpu_utilization", {"_value": 74.0, "_timestamp": 0, "_device_uuid": null}], "gpu_power_usage": ["gpu_power_usage", {"_value": 232.483, "_timestamp": 0, "_device_uuid": null}]}, "_model_config_measurements": [{"_model_config_name": "resnet50_libtorch_config_8", "_model_specific_pa_params": {"batch-size": 1, "concurrency-range": 8}, "_non_gpu_data": [["perf_latency_avg", {"_value": 25.787, "_timestamp": 0}], ["perf_latency_p90", {"_value": 26.408, "_timestamp": 0}], ["perf_latency_p95", {"_value": 26.791, "_timestamp": 0}], ["perf_latency_p99", {"_value": 27.543, "_timestamp": 0}], ["perf_throughput", {"_value": 308.871, "_timestamp": 0}], ["perf_client_send_recv", {"_value": 0.152, "_timestamp": 0}], ["perf_client_response_wait", {"_value": 25.604, "_timestamp": 0}], ["perf_server_queue", {"_value": 11.664, "_timestamp": 0}], ["perf_server_compute_infer", {"_value": 12.012, "_timestamp": 0}], ["perf_server_compute_input", {"_value": 0.699, "_timestamp": 0}], ["perf_server_compute_output", {"_value": 0.027, "_timestamp": 0}]], "_non_gpu_data_from_tag": {"perf_latency_avg": ["perf_latency_avg", {"_value": 25.787, "_timestamp": 0}], "perf_latency_p90": ["perf_latency_p90", {"_value": 26.408, "_timestamp": 0}], "perf_latency_p95": ["perf_latency_p95", {"_value": 26.791, "_timestamp": 0}], "perf_latency_p99": ["perf_latency_p99", {"_value": 27.543, "_timestamp": 0}], "perf_throughput": ["perf_throughput", {"_value": 308.871, "_timestamp": 0}], "perf_client_send_recv": ["perf_client_send_recv", {"_value": 0.152, "_timestamp": 0}], "perf_client_response_wait": ["perf_client_response_wait", {"_value": 25.604, "_timestamp": 0}], "perf_server_queue": ["perf_server_queue", {"_value": 11.664, "_timestamp": 0}], "perf_server_compute_infer": ["perf_server_compute_infer", {"_value": 12.012, "_timestamp": 0}], "perf_server_compute_input": ["perf_server_compute_input", {"_value": 0.699, "_timestamp": 0}], "perf_server_compute_output": ["perf_server_compute_output", {"_value": 0.027, "_timestamp": 0}]}}, {"_model_config_name": "vgg19_libtorch_config_0", "_model_specific_pa_params": {"batch-size": 1, "concurrency-range": 8}, "_non_gpu_data": [["perf_latency_avg", {"_value": 44.44, "_timestamp": 0}], ["perf_latency_p90", {"_value": 45.447, "_timestamp": 0}], ["perf_latency_p95", {"_value": 45.645, "_timestamp": 0}], ["perf_latency_p99", {"_value": 46.695, "_timestamp": 0}], ["perf_throughput", {"_value": 178.526, "_timestamp": 0}], ["perf_client_send_recv", {"_value": 0.117, "_timestamp": 0}], ["perf_client_response_wait", {"_value": 44.295, "_timestamp": 0}], ["perf_server_queue", {"_value": 37.984, "_timestamp": 0}], ["perf_server_compute_infer", {"_value": 5.335, "_timestamp": 0}], ["perf_server_compute_input", {"_value": 0.147, "_timestamp": 0}], ["perf_server_compute_output", {"_value": 0.021, "_timestamp": 0}]], "_non_gpu_data_from_tag": {"perf_latency_avg": ["perf_latency_avg", {"_value": 44.44, "_timestamp": 0}], "perf_latency_p90": ["perf_latency_p90", {"_value": 45.447, "_timestamp": 0}], "perf_latency_p95": ["perf_latency_p95", {"_value": 45.645, "_timestamp": 0}], "perf_latency_p99": ["perf_latency_p99", {"_value": 46.695, "_timestamp": 0}], "perf_throughput": ["perf_throughput", {"_value": 178.526, "_timestamp": 0}], "perf_client_send_recv": ["perf_client_send_recv", {"_value": 0.117, "_timestamp": 0}], "perf_client_response_wait": ["perf_client_response_wait", {"_value": 44.295, "_timestamp": 0}], "perf_server_queue": ["perf_server_queue", {"_value": 37.984, "_timestamp": 0}], "perf_server_compute_infer": ["perf_server_compute_infer", {"_value": 5.335, "_timestamp": 0}], "perf_server_compute_input": ["perf_server_compute_input", {"_value": 0.147, "_timestamp": 0}], "perf_server_compute_output": ["perf_server_compute_output", {"_value": 0.021, "_timestamp": 0}]}}]}, "-m resnet50_libtorch_config_8 -b 1 -i grpc -f resnet50_libtorch_config_8-results.csv --verbose-csv --concurrency-range=16 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0-m vgg19_libtorch_config_0 -b 1 -i grpc -f vgg19_libtorch_config_0-results.csv --verbose-csv --concurrency-range=16 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": {"_model_variants_name": "resnet50_libtorch_config_8,vgg19_libtorch_config_0", "_gpu_data": {"GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [["gpu_used_memory", {"_value": 2380.26752, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0"}], ["gpu_free_memory", {"_value": 23015.194624, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0"}], ["gpu_utilization", {"_value": 67.3333, "_timestamp": 0, "_device_uuid": null}], ["gpu_power_usage", {"_value": 220.085, "_timestamp": 0, "_device_uuid": null}]]}, "_avg_gpu_data": [["gpu_used_memory", {"_value": 2380.26752, "_timestamp": 0, "_device_uuid": null}], ["gpu_free_memory", {"_value": 23015.194624, "_timestamp": 0, "_device_uuid": null}], ["gpu_utilization", {"_value": 67.3333, "_timestamp": 0, "_device_uuid": null}], ["gpu_power_usage", {"_value": 220.085, "_timestamp": 0, "_device_uuid": null}]], "_avg_gpu_data_from_tag": {"gpu_used_memory": ["gpu_used_memory", {"_value": 2380.26752, "_timestamp": 0, "_device_uuid": null}], "gpu_free_memory": ["gpu_free_memory", {"_value": 23015.194624, "_timestamp": 0, "_device_uuid": null}], "gpu_utilization": ["gpu_utilization", {"_value": 67.3333, "_timestamp": 0, "_device_uuid": null}], "gpu_power_usage": ["gpu_power_usage", {"_value": 220.085, "_timestamp": 0, "_device_uuid": null}]}, "_model_config_measurements": [{"_model_config_name": "resnet50_libtorch_config_8", "_model_specific_pa_params": {"batch-size": 1, "concurrency-range": 16}, "_non_gpu_data": [["perf_latency_avg", {"_value": 51.106, "_timestamp": 0}], ["perf_latency_p90", {"_value": 52.356, "_timestamp": 0}], ["perf_latency_p95", {"_value": 52.712, "_timestamp": 0}], ["perf_latency_p99", {"_value": 54.235, "_timestamp": 0}], ["perf_throughput", {"_value": 310.326, "_timestamp": 0}], ["perf_client_send_recv", {"_value": 0.214, "_timestamp": 0}], ["perf_client_response_wait", {"_value": 50.864, "_timestamp": 0}], ["perf_server_queue", {"_value": 36.936, "_timestamp": 0}], ["perf_server_compute_infer", {"_value": 11.931, "_timestamp": 0}], ["perf_server_compute_input", {"_value": 0.701, "_timestamp": 0}], ["perf_server_compute_output", {"_value": 0.028, "_timestamp": 0}]], "_non_gpu_data_from_tag": {"perf_latency_avg": ["perf_latency_avg", {"_value": 51.106, "_timestamp": 0}], "perf_latency_p90": ["perf_latency_p90", {"_value": 52.356, "_timestamp": 0}], "perf_latency_p95": ["perf_latency_p95", {"_value": 52.712, "_timestamp": 0}], "perf_latency_p99": ["perf_latency_p99", {"_value": 54.235, "_timestamp": 0}], "perf_throughput": ["perf_throughput", {"_value": 310.326, "_timestamp": 0}], "perf_client_send_recv": ["perf_client_send_recv", {"_value": 0.214, "_timestamp": 0}], "perf_client_response_wait": ["perf_client_response_wait", {"_value": 50.864, "_timestamp": 0}], "perf_server_queue": ["perf_server_queue", {"_value": 36.936, "_timestamp": 0}], "perf_server_compute_infer": ["perf_server_compute_infer", {"_value": 11.931, "_timestamp": 0}], "perf_server_compute_input": ["perf_server_compute_input", {"_value": 0.701, "_timestamp": 0}], "perf_server_compute_output": ["perf_server_compute_output", {"_value": 0.028, "_timestamp": 0}]}}, {"_model_config_name": "vgg19_libtorch_config_0", "_model_specific_pa_params": {"batch-size": 1, "concurrency-range": 16}, "_non_gpu_data": [["perf_latency_avg", {"_value": 88.297, "_timestamp": 0}], ["perf_latency_p90", {"_value": 90.095, "_timestamp": 0}], ["perf_latency_p95", {"_value": 90.459, "_timestamp": 0}], ["perf_latency_p99", {"_value": 91.533, "_timestamp": 0}], ["perf_throughput", {"_value": 178.517, "_timestamp": 0}], ["perf_client_send_recv", {"_value": 0.317, "_timestamp": 0}], ["perf_client_response_wait", {"_value": 87.949, "_timestamp": 0}], ["perf_server_queue", {"_value": 81.444, "_timestamp": 0}], ["perf_server_compute_infer", {"_value": 5.341, "_timestamp": 0}], ["perf_server_compute_input", {"_value": 0.146, "_timestamp": 0}], ["perf_server_compute_output", {"_value": 0.021, "_timestamp": 0}]], "_non_gpu_data_from_tag": {"perf_latency_avg": ["perf_latency_avg", {"_value": 88.297, "_timestamp": 0}], "perf_latency_p90": ["perf_latency_p90", {"_value": 90.095, "_timestamp": 0}], "perf_latency_p95": ["perf_latency_p95", {"_value": 90.459, "_timestamp": 0}], "perf_latency_p99": ["perf_latency_p99", {"_value": 91.533, "_timestamp": 0}], "perf_throughput": ["perf_throughput", {"_value": 178.517, "_timestamp": 0}], "perf_client_send_recv": ["perf_client_send_recv", {"_value": 0.317, "_timestamp": 0}], "perf_client_response_wait": ["perf_client_response_wait", {"_value": 87.949, "_timestamp": 0}], "perf_server_queue": ["perf_server_queue", {"_value": 81.444, "_timestamp": 0}], "perf_server_compute_infer": ["perf_server_compute_infer", {"_value": 5.341, "_timestamp": 0}], "perf_server_compute_input": ["perf_server_compute_input", {"_value": 0.146, "_timestamp": 0}], "perf_server_compute_output": ["perf_server_compute_output", {"_value": 0.021, "_timestamp": 0}]}}]}, "-m resnet50_libtorch_config_8 -b 1 -i grpc -f resnet50_libtorch_config_8-results.csv --verbose-csv --concurrency-range=32 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0-m vgg19_libtorch_config_0 -b 1 -i grpc -f vgg19_libtorch_config_0-results.csv --verbose-csv --concurrency-range=32 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": {"_model_variants_name": "resnet50_libtorch_config_8,vgg19_libtorch_config_0", "_gpu_data": {"GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [["gpu_used_memory", {"_value": 2380.26752, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0"}], ["gpu_free_memory", {"_value": 23015.194624, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0"}], ["gpu_utilization", {"_value": 65.66669999999999, "_timestamp": 0, "_device_uuid": null}], ["gpu_power_usage", {"_value": 220.153, "_timestamp": 0, "_device_uuid": null}]]}, "_avg_gpu_data": [["gpu_used_memory", {"_value": 2380.26752, "_timestamp": 0, "_device_uuid": null}], ["gpu_free_memory", {"_value": 23015.194624, "_timestamp": 0, "_device_uuid": null}], ["gpu_utilization", {"_value": 65.66669999999999, "_timestamp": 0, "_device_uuid": null}], ["gpu_power_usage", {"_value": 220.153, "_timestamp": 0, "_device_uuid": null}]], "_avg_gpu_data_from_tag": {"gpu_used_memory": ["gpu_used_memory", {"_value": 2380.26752, "_timestamp": 0, "_device_uuid": null}], "gpu_free_memory": ["gpu_free_memory", {"_value": 23015.194624, "_timestamp": 0, "_device_uuid": null}], "gpu_utilization": ["gpu_utilization", {"_value": 65.66669999999999, "_timestamp": 0, "_device_uuid": null}], "gpu_power_usage": ["gpu_power_usage", {"_value": 220.153, "_timestamp": 0, "_device_uuid": null}]}, "_model_config_measurements": [{"_model_config_name": "resnet50_libtorch_config_8", "_model_specific_pa_params": {"batch-size": 1, "concurrency-range": 32}, "_non_gpu_data": [["perf_latency_avg", {"_value": 100.877, "_timestamp": 0}], ["perf_latency_p90", {"_value": 104.51, "_timestamp": 0}], ["perf_latency_p95", {"_value": 105.14, "_timestamp": 0}], ["perf_latency_p99", {"_value": 107.065, "_timestamp": 0}], ["perf_throughput", {"_value": 312.041, "_timestamp": 0}], ["perf_client_send_recv", {"_value": 0.407, "_timestamp": 0}], ["perf_client_response_wait", {"_value": 100.439, "_timestamp": 0}], ["perf_server_queue", {"_value": 86.084, "_timestamp": 0}], ["perf_server_compute_infer", {"_value": 11.871, "_timestamp": 0}], ["perf_server_compute_input", {"_value": 0.698, "_timestamp": 0}], ["perf_server_compute_output", {"_value": 0.029, "_timestamp": 0}]], "_non_gpu_data_from_tag": {"perf_latency_avg": ["perf_latency_avg", {"_value": 100.877, "_timestamp": 0}], "perf_latency_p90": ["perf_latency_p90", {"_value": 104.51, "_timestamp": 0}], "perf_latency_p95": ["perf_latency_p95", {"_value": 105.14, "_timestamp": 0}], "perf_latency_p99": ["perf_latency_p99", {"_value": 107.065, "_timestamp": 0}], "perf_throughput": ["perf_throughput", {"_value": 312.041, "_timestamp": 0}], "perf_client_send_recv": ["perf_client_send_recv", {"_value": 0.407, "_timestamp": 0}], "perf_client_response_wait": ["perf_client_response_wait", {"_value": 100.439, "_timestamp": 0}], "perf_server_queue": ["perf_server_queue", {"_value": 86.084, "_timestamp": 0}], "perf_server_compute_infer": ["perf_server_compute_infer", {"_value": 11.871, "_timestamp": 0}], "perf_server_compute_input": ["perf_server_compute_input", {"_value": 0.698, "_timestamp": 0}], "perf_server_compute_output": ["perf_server_compute_output", {"_value": 0.029, "_timestamp": 0}]}}, {"_model_config_name": "vgg19_libtorch_config_0", "_model_specific_pa_params": {"batch-size": 1, "concurrency-range": 32}, "_non_gpu_data": [["perf_latency_avg", {"_value": 175.246, "_timestamp": 0}], ["perf_latency_p90", {"_value": 180.595, "_timestamp": 0}], ["perf_latency_p95", {"_value": 181.291, "_timestamp": 0}], ["perf_latency_p99", {"_value": 182.52, "_timestamp": 0}], ["perf_throughput", {"_value": 177.157, "_timestamp": 0}], ["perf_client_send_recv", {"_value": 0.713, "_timestamp": 0}], ["perf_client_response_wait", {"_value": 174.496, "_timestamp": 0}], ["perf_server_queue", {"_value": 167.092, "_timestamp": 0}], ["perf_server_compute_infer", {"_value": 5.348, "_timestamp": 0}], ["perf_server_compute_input", {"_value": 0.148, "_timestamp": 0}], ["perf_server_compute_output", {"_value": 0.022, "_timestamp": 0}]], "_non_gpu_data_from_tag": {"perf_latency_avg": ["perf_latency_avg", {"_value": 175.246, "_timestamp": 0}], "perf_latency_p90": ["perf_latency_p90", {"_value": 180.595, "_timestamp": 0}], "perf_latency_p95": ["perf_latency_p95", {"_value": 181.291, "_timestamp": 0}], "perf_latency_p99": ["perf_latency_p99", {"_value": 182.52, "_timestamp": 0}], "perf_throughput": ["perf_throughput", {"_value": 177.157, "_timestamp": 0}], "perf_client_send_recv": ["perf_client_send_recv", {"_value": 0.713, "_timestamp": 0}], "perf_client_response_wait": ["perf_client_response_wait", {"_value": 174.496, "_timestamp": 0}], "perf_server_queue": ["perf_server_queue", {"_value": 167.092, "_timestamp": 0}], "perf_server_compute_infer": ["perf_server_compute_infer", {"_value": 5.348, "_timestamp": 0}], "perf_server_compute_input": ["perf_server_compute_input", {"_value": 0.148, "_timestamp": 0}], "perf_server_compute_output": ["perf_server_compute_output", {"_value": 0.022, "_timestamp": 0}]}}]}, "-m resnet50_libtorch_config_8 -b 1 -i grpc -f resnet50_libtorch_config_8-results.csv --verbose-csv --concurrency-range=64 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0-m vgg19_libtorch_config_0 -b 1 -i grpc -f vgg19_libtorch_config_0-results.csv --verbose-csv --concurrency-range=64 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": {"_model_variants_name": "resnet50_libtorch_config_8,vgg19_libtorch_config_0", "_gpu_data": {"GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [["gpu_used_memory", {"_value": 2380.26752, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0"}], ["gpu_free_memory", {"_value": 23015.194624, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0"}], ["gpu_utilization", {"_value": 71.3333, "_timestamp": 0, "_device_uuid": null}], ["gpu_power_usage", {"_value": 283.039, "_timestamp": 0, "_device_uuid": null}]]}, "_avg_gpu_data": [["gpu_used_memory", {"_value": 2380.26752, "_timestamp": 0, "_device_uuid": null}], ["gpu_free_memory", {"_value": 23015.194624, "_timestamp": 0, "_device_uuid": null}], ["gpu_utilization", {"_value": 71.3333, "_timestamp": 0, "_device_uuid": null}], ["gpu_power_usage", {"_value": 283.039, "_timestamp": 0, "_device_uuid": null}]], "_avg_gpu_data_from_tag": {"gpu_used_memory": ["gpu_used_memory", {"_value": 2380.26752, "_timestamp": 0, "_device_uuid": null}], "gpu_free_memory": ["gpu_free_memory", {"_value": 23015.194624, "_timestamp": 0, "_device_uuid": null}], "gpu_utilization": ["gpu_utilization", {"_value": 71.3333, "_timestamp": 0, "_device_uuid": null}], "gpu_power_usage": ["gpu_power_usage", {"_value": 283.039, "_timestamp": 0, "_device_uuid": null}]}, "_model_config_measurements": [{"_model_config_name": "resnet50_libtorch_config_8", "_model_specific_pa_params": {"batch-size": 1, "concurrency-range": 64}, "_non_gpu_data": [["perf_latency_avg", {"_value": 204.396, "_timestamp": 0}], ["perf_latency_p90", {"_value": 207.945, "_timestamp": 0}], ["perf_latency_p95", {"_value": 208.921, "_timestamp": 0}], ["perf_latency_p99", {"_value": 210.913, "_timestamp": 0}], ["perf_throughput", {"_value": 314.261, "_timestamp": 0}], ["perf_client_send_recv", {"_value": 0.141, "_timestamp": 0}], ["perf_client_response_wait", {"_value": 204.229, "_timestamp": 0}], ["perf_server_queue", {"_value": 190.474, "_timestamp": 0}], ["perf_server_compute_infer", {"_value": 11.879, "_timestamp": 0}], ["perf_server_compute_input", {"_value": 0.706, "_timestamp": 0}], ["perf_server_compute_output", {"_value": 0.027, "_timestamp": 0}]], "_non_gpu_data_from_tag": {"perf_latency_avg": ["perf_latency_avg", {"_value": 204.396, "_timestamp": 0}], "perf_latency_p90": ["perf_latency_p90", {"_value": 207.945, "_timestamp": 0}], "perf_latency_p95": ["perf_latency_p95", {"_value": 208.921, "_timestamp": 0}], "perf_latency_p99": ["perf_latency_p99", {"_value": 210.913, "_timestamp": 0}], "perf_throughput": ["perf_throughput", {"_value": 314.261, "_timestamp": 0}], "perf_client_send_recv": ["perf_client_send_recv", {"_value": 0.141, "_timestamp": 0}], "perf_client_response_wait": ["perf_client_response_wait", {"_value": 204.229, "_timestamp": 0}], "perf_server_queue": ["perf_server_queue", {"_value": 190.474, "_timestamp": 0}], "perf_server_compute_infer": ["perf_server_compute_infer", {"_value": 11.879, "_timestamp": 0}], "perf_server_compute_input": ["perf_server_compute_input", {"_value": 0.706, "_timestamp": 0}], "perf_server_compute_output": ["perf_server_compute_output", {"_value": 0.027, "_timestamp": 0}]}}, {"_model_config_name": "vgg19_libtorch_config_0", "_model_specific_pa_params": {"batch-size": 1, "concurrency-range": 64}, "_non_gpu_data": [["perf_latency_avg", {"_value": 359.624, "_timestamp": 0}], ["perf_latency_p90", {"_value": 362.119, "_timestamp": 0}], ["perf_latency_p95", {"_value": 362.572, "_timestamp": 0}], ["perf_latency_p99", {"_value": 363.04, "_timestamp": 0}], ["perf_throughput", {"_value": 177.854, "_timestamp": 0}], ["perf_client_send_recv", {"_value": 0.105, "_timestamp": 0}], ["perf_client_response_wait", {"_value": 359.49, "_timestamp": 0}], ["perf_server_queue", {"_value": 353.168, "_timestamp": 0}], ["perf_server_compute_infer", {"_value": 5.364, "_timestamp": 0}], ["perf_server_compute_input", {"_value": 0.146, "_timestamp": 0}], ["perf_server_compute_output", {"_value": 0.022, "_timestamp": 0}]], "_non_gpu_data_from_tag": {"perf_latency_avg": ["perf_latency_avg", {"_value": 359.624, "_timestamp": 0}], "perf_latency_p90": ["perf_latency_p90", {"_value": 362.119, "_timestamp": 0}], "perf_latency_p95": ["perf_latency_p95", {"_value": 362.572, "_timestamp": 0}], "perf_latency_p99": ["perf_latency_p99", {"_value": 363.04, "_timestamp": 0}], "perf_throughput": ["perf_throughput", {"_value": 177.854, "_timestamp": 0}], "perf_client_send_recv": ["perf_client_send_recv", {"_value": 0.105, "_timestamp": 0}], "perf_client_response_wait": ["perf_client_response_wait", {"_value": 359.49, "_timestamp": 0}], "perf_server_queue": ["perf_server_queue", {"_value": 353.168, "_timestamp": 0}], "perf_server_compute_infer": ["perf_server_compute_infer", {"_value": 5.364, "_timestamp": 0}], "perf_server_compute_input": ["perf_server_compute_input", {"_value": 0.146, "_timestamp": 0}], "perf_server_compute_output": ["perf_server_compute_output", {"_value": 0.022, "_timestamp": 0}]}}]}}], "resnet50_libtorch_config_9,vgg19_libtorch_config_0": [{"_triton_env": {}, "_model_run_configs": [{"_model_name": "resnet50_libtorch", "_model_config": {"name": "resnet50_libtorch_config_9", "platform": "pytorch_libtorch", "maxBatchSize": 8, "input": [{"name": "INPUT__0", "dataType": "TYPE_FP32", "format": "FORMAT_NCHW", "dims": ["3", "224", "224"]}], "output": [{"name": "OUTPUT__0", "dataType": "TYPE_FP32", "dims": ["1000"], "labelFilename": "resnet50_labels.txt"}], "instanceGroup": [{"count": 1, "kind": "KIND_GPU"}], "dynamicBatching": {}, "cpu_only": false}, "_perf_config": {"_args": {"service-kind": null, "model-signature-name": null, "async": null, "sync": null, "measurement-interval": null, "concurrency-range": 16, "request-rate-range": null, "request-distribution": null, "request-intervals": null, "binary-search": null, "num-of-sequences": null, "latency-threshold": null, "max-threads": null, "stability-percentage": null, "max-trials": null, "percentile": null, "input-data": null, "shared-memory": null, "output-shared-memory-size": null, "shape": null, "sequence-length": null, "sequence-id-range": null, "string-length": null, "string-data": null, "measurement-mode": "count_windows", "measurement-request-count": null, "streaming": null, "grpc-compression-algorithm": null, "triton-server-directory": null, "model-repository": null, "ssl-grpc-use-ssl": null, "ssl-grpc-root-certifications-file": null, "ssl-grpc-private-key-file": null, "ssl-grpc-certificate-chain-file": null, "ssl-https-verify-peer": null, "ssl-https-verify-host": null, "ssl-https-ca-certificates-file": null, "ssl-https-client-certificate-type": null, "ssl-https-client-certificate-file": null, "ssl-https-private-key-type": null, "ssl-https-private-key-file": null, "collect-metrics": "True", "metrics-url": "http://localhost:8002/metrics", "metrics-interval": 1000.0}, "_options": {"-m": "resnet50_libtorch_config_9", "-x": null, "-b": 1, "-u": "localhost:8001", "-i": "grpc", "-f": "resnet50_libtorch_config_9-results.csv", "-H": null}, "_verbose": {"-v": null, "-v -v": null, "--verbose-csv": "--verbose-csv"}, "_input_to_options": {"model-name": "-m", "model-version": "-x", "batch-size": "-b", "url": "-u", "protocol": "-i", "latency-report-file": "-f", "http-header": "-H"}, "_input_to_verbose": {"verbose": "-v", "extra-verbose": "-v -v", "verbose-csv": "--verbose-csv"}, "_additive_args": {"input-data": null, "shape": null}}, "_ensemble_composing_configs": []}, {"_model_name": "vgg19_libtorch", "_model_config": {"name": "vgg19_libtorch_config_0", "platform": "pytorch_libtorch", "maxBatchSize": 1, "input": [{"name": "INPUT__0", "dataType": "TYPE_FP32", "format": "FORMAT_NCHW", "dims": ["3", "224", "224"]}], "output": [{"name": "OUTPUT__0", "dataType": "TYPE_FP32", "dims": ["1000"], "labelFilename": "vgg19_labels.txt"}], "instanceGroup": [{"count": 1, "kind": "KIND_GPU"}], "dynamicBatching": {}, "cpu_only": false}, "_perf_config": {"_args": {"service-kind": null, "model-signature-name": null, "async": null, "sync": null, "measurement-interval": null, "concurrency-range": 2, "request-rate-range": null, "request-distribution": null, "request-intervals": null, "binary-search": null, "num-of-sequences": null, "latency-threshold": null, "max-threads": null, "stability-percentage": null, "max-trials": null, "percentile": null, "input-data": null, "shared-memory": null, "output-shared-memory-size": null, "shape": null, "sequence-length": null, "sequence-id-range": null, "string-length": null, "string-data": null, "measurement-mode": "count_windows", "measurement-request-count": null, "streaming": null, "grpc-compression-algorithm": null, "triton-server-directory": null, "model-repository": null, "ssl-grpc-use-ssl": null, "ssl-grpc-root-certifications-file": null, "ssl-grpc-private-key-file": null, "ssl-grpc-certificate-chain-file": null, "ssl-https-verify-peer": null, "ssl-https-verify-host": null, "ssl-https-ca-certificates-file": null, "ssl-https-client-certificate-type": null, "ssl-https-client-certificate-file": null, "ssl-https-private-key-type": null, "ssl-https-private-key-file": null, "collect-metrics": "True", "metrics-url": "http://localhost:8002/metrics", "metrics-interval": 1000.0}, "_options": {"-m": "vgg19_libtorch_config_0", "-x": null, "-b": 1, "-u": "localhost:8001", "-i": "grpc", "-f": "vgg19_libtorch_config_0-results.csv", "-H": null}, "_verbose": {"-v": null, "-v -v": null, "--verbose-csv": "--verbose-csv"}, "_input_to_options": {"model-name": "-m", "model-version": "-x", "batch-size": "-b", "url": "-u", "protocol": "-i", "latency-report-file": "-f", "http-header": "-H"}, "_input_to_verbose": {"verbose": "-v", "extra-verbose": "-v -v", "verbose-csv": "--verbose-csv"}, "_additive_args": {"input-data": null, "shape": null}}, "_ensemble_composing_configs": []}]}, {"-m resnet50_libtorch_config_9 -b 1 -i grpc -f resnet50_libtorch_config_9-results.csv --verbose-csv --concurrency-range=16 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0-m vgg19_libtorch_config_0 -b 1 -i grpc -f vgg19_libtorch_config_0-results.csv --verbose-csv --concurrency-range=2 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": {"_model_variants_name": "resnet50_libtorch_config_9,vgg19_libtorch_config_0", "_gpu_data": {"GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [["gpu_used_memory", {"_value": 2380.26752, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0"}], ["gpu_free_memory", {"_value": 23015.194624, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0"}], ["gpu_utilization", {"_value": 86.25, "_timestamp": 0, "_device_uuid": null}], ["gpu_power_usage", {"_value": 280.638, "_timestamp": 0, "_device_uuid": null}]]}, "_avg_gpu_data": [["gpu_used_memory", {"_value": 2380.26752, "_timestamp": 0, "_device_uuid": null}], ["gpu_free_memory", {"_value": 23015.194624, "_timestamp": 0, "_device_uuid": null}], ["gpu_utilization", {"_value": 86.25, "_timestamp": 0, "_device_uuid": null}], ["gpu_power_usage", {"_value": 280.638, "_timestamp": 0, "_device_uuid": null}]], "_avg_gpu_data_from_tag": {"gpu_used_memory": ["gpu_used_memory", {"_value": 2380.26752, "_timestamp": 0, "_device_uuid": null}], "gpu_free_memory": ["gpu_free_memory", {"_value": 23015.194624, "_timestamp": 0, "_device_uuid": null}], "gpu_utilization": ["gpu_utilization", {"_value": 86.25, "_timestamp": 0, "_device_uuid": null}], "gpu_power_usage": ["gpu_power_usage", {"_value": 280.638, "_timestamp": 0, "_device_uuid": null}]}, "_model_config_measurements": [{"_model_config_name": "resnet50_libtorch_config_9", "_model_specific_pa_params": {"batch-size": 1, "concurrency-range": 16}, "_non_gpu_data": [["perf_latency_avg", {"_value": 36.9, "_timestamp": 0}], ["perf_latency_p90", {"_value": 37.393, "_timestamp": 0}], ["perf_latency_p95", {"_value": 37.524, "_timestamp": 0}], ["perf_latency_p99", {"_value": 37.797, "_timestamp": 0}], ["perf_throughput", {"_value": 431.608, "_timestamp": 0}], ["perf_client_send_recv", {"_value": 0.178, "_timestamp": 0}], ["perf_client_response_wait", {"_value": 36.695, "_timestamp": 0}], ["perf_server_queue", {"_value": 16.769, "_timestamp": 0}], ["perf_server_compute_infer", {"_value": 16.577, "_timestamp": 0}], ["perf_server_compute_input", {"_value": 1.612, "_timestamp": 0}], ["perf_server_compute_output", {"_value": 0.037, "_timestamp": 0}]], "_non_gpu_data_from_tag": {"perf_latency_avg": ["perf_latency_avg", {"_value": 36.9, "_timestamp": 0}], "perf_latency_p90": ["perf_latency_p90", {"_value": 37.393, "_timestamp": 0}], "perf_latency_p95": ["perf_latency_p95", {"_value": 37.524, "_timestamp": 0}], "perf_latency_p99": ["perf_latency_p99", {"_value": 37.797, "_timestamp": 0}], "perf_throughput": ["perf_throughput", {"_value": 431.608, "_timestamp": 0}], "perf_client_send_recv": ["perf_client_send_recv", {"_value": 0.178, "_timestamp": 0}], "perf_client_response_wait": ["perf_client_response_wait", {"_value": 36.695, "_timestamp": 0}], "perf_server_queue": ["perf_server_queue", {"_value": 16.769, "_timestamp": 0}], "perf_server_compute_infer": ["perf_server_compute_infer", {"_value": 16.577, "_timestamp": 0}], "perf_server_compute_input": ["perf_server_compute_input", {"_value": 1.612, "_timestamp": 0}], "perf_server_compute_output": ["perf_server_compute_output", {"_value": 0.037, "_timestamp": 0}]}}, {"_model_config_name": "vgg19_libtorch_config_0", "_model_specific_pa_params": {"batch-size": 1, "concurrency-range": 2}, "_non_gpu_data": [["perf_latency_avg", {"_value": 12.298, "_timestamp": 0}], ["perf_latency_p90", {"_value": 12.866, "_timestamp": 0}], ["perf_latency_p95", {"_value": 12.957, "_timestamp": 0}], ["perf_latency_p99", {"_value": 13.246, "_timestamp": 0}], ["perf_throughput", {"_value": 162.521, "_timestamp": 0}], ["perf_client_send_recv", {"_value": 0.1, "_timestamp": 0}], ["perf_client_response_wait", {"_value": 12.172, "_timestamp": 0}], ["perf_server_queue", {"_value": 5.42, "_timestamp": 0}], ["perf_server_compute_infer", {"_value": 5.895, "_timestamp": 0}], ["perf_server_compute_input", {"_value": 0.147, "_timestamp": 0}], ["perf_server_compute_output", {"_value": 0.023, "_timestamp": 0}]], "_non_gpu_data_from_tag": {"perf_latency_avg": ["perf_latency_avg", {"_value": 12.298, "_timestamp": 0}], "perf_latency_p90": ["perf_latency_p90", {"_value": 12.866, "_timestamp": 0}], "perf_latency_p95": ["perf_latency_p95", {"_value": 12.957, "_timestamp": 0}], "perf_latency_p99": ["perf_latency_p99", {"_value": 13.246, "_timestamp": 0}], "perf_throughput": ["perf_throughput", {"_value": 162.521, "_timestamp": 0}], "perf_client_send_recv": ["perf_client_send_recv", {"_value": 0.1, "_timestamp": 0}], "perf_client_response_wait": ["perf_client_response_wait", {"_value": 12.172, "_timestamp": 0}], "perf_server_queue": ["perf_server_queue", {"_value": 5.42, "_timestamp": 0}], "perf_server_compute_infer": ["perf_server_compute_infer", {"_value": 5.895, "_timestamp": 0}], "perf_server_compute_input": ["perf_server_compute_input", {"_value": 0.147, "_timestamp": 0}], "perf_server_compute_output": ["perf_server_compute_output", {"_value": 0.023, "_timestamp": 0}]}}]}, "-m resnet50_libtorch_config_9 -b 1 -i grpc -f resnet50_libtorch_config_9-results.csv --verbose-csv --concurrency-range=1 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0-m vgg19_libtorch_config_0 -b 1 -i grpc -f vgg19_libtorch_config_0-results.csv --verbose-csv --concurrency-range=1 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": {"_model_variants_name": "resnet50_libtorch_config_9,vgg19_libtorch_config_0", "_gpu_data": {"GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [["gpu_used_memory", {"_value": 2246.049792, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0"}], ["gpu_free_memory", {"_value": 23149.412352, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0"}], ["gpu_utilization", {"_value": 64.66669999999999, "_timestamp": 0, "_device_uuid": null}], ["gpu_power_usage", {"_value": 281.921, "_timestamp": 0, "_device_uuid": null}]]}, "_avg_gpu_data": [["gpu_used_memory", {"_value": 2246.049792, "_timestamp": 0, "_device_uuid": null}], ["gpu_free_memory", {"_value": 23149.412352, "_timestamp": 0, "_device_uuid": null}], ["gpu_utilization", {"_value": 64.66669999999999, "_timestamp": 0, "_device_uuid": null}], ["gpu_power_usage", {"_value": 281.921, "_timestamp": 0, "_device_uuid": null}]], "_avg_gpu_data_from_tag": {"gpu_used_memory": ["gpu_used_memory", {"_value": 2246.049792, "_timestamp": 0, "_device_uuid": null}], "gpu_free_memory": ["gpu_free_memory", {"_value": 23149.412352, "_timestamp": 0, "_device_uuid": null}], "gpu_utilization": ["gpu_utilization", {"_value": 64.66669999999999, "_timestamp": 0, "_device_uuid": null}], "gpu_power_usage": ["gpu_power_usage", {"_value": 281.921, "_timestamp": 0, "_device_uuid": null}]}, "_model_config_measurements": [{"_model_config_name": "resnet50_libtorch_config_9", "_model_specific_pa_params": {"batch-size": 1, "concurrency-range": 1}, "_non_gpu_data": [["perf_latency_avg", {"_value": 5.85, "_timestamp": 0}], ["perf_latency_p90", {"_value": 6.053, "_timestamp": 0}], ["perf_latency_p95", {"_value": 6.148, "_timestamp": 0}], ["perf_latency_p99", {"_value": 7.005, "_timestamp": 0}], ["perf_throughput", {"_value": 170.871, "_timestamp": 0}], ["perf_client_send_recv", {"_value": 0.089, "_timestamp": 0}], ["perf_client_response_wait", {"_value": 5.74, "_timestamp": 0}], ["perf_server_queue", {"_value": 0.036, "_timestamp": 0}], ["perf_server_compute_infer", {"_value": 4.904, "_timestamp": 0}], ["perf_server_compute_input", {"_value": 0.125, "_timestamp": 0}], ["perf_server_compute_output", {"_value": 0.02, "_timestamp": 0}]], "_non_gpu_data_from_tag": {"perf_latency_avg": ["perf_latency_avg", {"_value": 5.85, "_timestamp": 0}], "perf_latency_p90": ["perf_latency_p90", {"_value": 6.053, "_timestamp": 0}], "perf_latency_p95": ["perf_latency_p95", {"_value": 6.148, "_timestamp": 0}], "perf_latency_p99": ["perf_latency_p99", {"_value": 7.005, "_timestamp": 0}], "perf_throughput": ["perf_throughput", {"_value": 170.871, "_timestamp": 0}], "perf_client_send_recv": ["perf_client_send_recv", {"_value": 0.089, "_timestamp": 0}], "perf_client_response_wait": ["perf_client_response_wait", {"_value": 5.74, "_timestamp": 0}], "perf_server_queue": ["perf_server_queue", {"_value": 0.036, "_timestamp": 0}], "perf_server_compute_infer": ["perf_server_compute_infer", {"_value": 4.904, "_timestamp": 0}], "perf_server_compute_input": ["perf_server_compute_input", {"_value": 0.125, "_timestamp": 0}], "perf_server_compute_output": ["perf_server_compute_output", {"_value": 0.02, "_timestamp": 0}]}}, {"_model_config_name": "vgg19_libtorch_config_0", "_model_specific_pa_params": {"batch-size": 1, "concurrency-range": 1}, "_non_gpu_data": [["perf_latency_avg", {"_value": 5.794, "_timestamp": 0}], ["perf_latency_p90", {"_value": 5.87, "_timestamp": 0}], ["perf_latency_p95", {"_value": 5.893, "_timestamp": 0}], ["perf_latency_p99", {"_value": 5.943, "_timestamp": 0}], ["perf_throughput", {"_value": 172.532, "_timestamp": 0}], ["perf_client_send_recv", {"_value": 0.098, "_timestamp": 0}], ["perf_client_response_wait", {"_value": 5.672, "_timestamp": 0}], ["perf_server_queue", {"_value": 0.044, "_timestamp": 0}], ["perf_server_compute_infer", {"_value": 4.779, "_timestamp": 0}], ["perf_server_compute_input", {"_value": 0.122, "_timestamp": 0}], ["perf_server_compute_output", {"_value": 0.02, "_timestamp": 0}]], "_non_gpu_data_from_tag": {"perf_latency_avg": ["perf_latency_avg", {"_value": 5.794, "_timestamp": 0}], "perf_latency_p90": ["perf_latency_p90", {"_value": 5.87, "_timestamp": 0}], "perf_latency_p95": ["perf_latency_p95", {"_value": 5.893, "_timestamp": 0}], "perf_latency_p99": ["perf_latency_p99", {"_value": 5.943, "_timestamp": 0}], "perf_throughput": ["perf_throughput", {"_value": 172.532, "_timestamp": 0}], "perf_client_send_recv": ["perf_client_send_recv", {"_value": 0.098, "_timestamp": 0}], "perf_client_response_wait": ["perf_client_response_wait", {"_value": 5.672, "_timestamp": 0}], "perf_server_queue": ["perf_server_queue", {"_value": 0.044, "_timestamp": 0}], "perf_server_compute_infer": ["perf_server_compute_infer", {"_value": 4.779, "_timestamp": 0}], "perf_server_compute_input": ["perf_server_compute_input", {"_value": 0.122, "_timestamp": 0}], "perf_server_compute_output": ["perf_server_compute_output", {"_value": 0.02, "_timestamp": 0}]}}]}, "-m resnet50_libtorch_config_9 -b 1 -i grpc -f resnet50_libtorch_config_9-results.csv --verbose-csv --concurrency-range=2 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0-m vgg19_libtorch_config_0 -b 1 -i grpc -f vgg19_libtorch_config_0-results.csv --verbose-csv --concurrency-range=2 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": {"_model_variants_name": "resnet50_libtorch_config_9,vgg19_libtorch_config_0", "_gpu_data": {"GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [["gpu_used_memory", {"_value": 2246.049792, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0"}], ["gpu_free_memory", {"_value": 23149.412352, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0"}], ["gpu_utilization", {"_value": 63.6667, "_timestamp": 0, "_device_uuid": null}], ["gpu_power_usage", {"_value": 217.968, "_timestamp": 0, "_device_uuid": null}]]}, "_avg_gpu_data": [["gpu_used_memory", {"_value": 2246.049792, "_timestamp": 0, "_device_uuid": null}], ["gpu_free_memory", {"_value": 23149.412352, "_timestamp": 0, "_device_uuid": null}], ["gpu_utilization", {"_value": 63.6667, "_timestamp": 0, "_device_uuid": null}], ["gpu_power_usage", {"_value": 217.968, "_timestamp": 0, "_device_uuid": null}]], "_avg_gpu_data_from_tag": {"gpu_used_memory": ["gpu_used_memory", {"_value": 2246.049792, "_timestamp": 0, "_device_uuid": null}], "gpu_free_memory": ["gpu_free_memory", {"_value": 23149.412352, "_timestamp": 0, "_device_uuid": null}], "gpu_utilization": ["gpu_utilization", {"_value": 63.6667, "_timestamp": 0, "_device_uuid": null}], "gpu_power_usage": ["gpu_power_usage", {"_value": 217.968, "_timestamp": 0, "_device_uuid": null}]}, "_model_config_measurements": [{"_model_config_name": "resnet50_libtorch_config_9", "_model_specific_pa_params": {"batch-size": 1, "concurrency-range": 2}, "_non_gpu_data": [["perf_latency_avg", {"_value": 13.717, "_timestamp": 0}], ["perf_latency_p90", {"_value": 14.403, "_timestamp": 0}], ["perf_latency_p95", {"_value": 14.492, "_timestamp": 0}], ["perf_latency_p99", {"_value": 14.786, "_timestamp": 0}], ["perf_throughput", {"_value": 145.231, "_timestamp": 0}], ["perf_client_send_recv", {"_value": 0.105, "_timestamp": 0}], ["perf_client_response_wait", {"_value": 13.589, "_timestamp": 0}], ["perf_server_queue", {"_value": 6.082, "_timestamp": 0}], ["perf_server_compute_infer", {"_value": 6.627, "_timestamp": 0}], ["perf_server_compute_input", {"_value": 0.142, "_timestamp": 0}], ["perf_server_compute_output", {"_value": 0.021, "_timestamp": 0}]], "_non_gpu_data_from_tag": {"perf_latency_avg": ["perf_latency_avg", {"_value": 13.717, "_timestamp": 0}], "perf_latency_p90": ["perf_latency_p90", {"_value": 14.403, "_timestamp": 0}], "perf_latency_p95": ["perf_latency_p95", {"_value": 14.492, "_timestamp": 0}], "perf_latency_p99": ["perf_latency_p99", {"_value": 14.786, "_timestamp": 0}], "perf_throughput": ["perf_throughput", {"_value": 145.231, "_timestamp": 0}], "perf_client_send_recv": ["perf_client_send_recv", {"_value": 0.105, "_timestamp": 0}], "perf_client_response_wait": ["perf_client_response_wait", {"_value": 13.589, "_timestamp": 0}], "perf_server_queue": ["perf_server_queue", {"_value": 6.082, "_timestamp": 0}], "perf_server_compute_infer": ["perf_server_compute_infer", {"_value": 6.627, "_timestamp": 0}], "perf_server_compute_input": ["perf_server_compute_input", {"_value": 0.142, "_timestamp": 0}], "perf_server_compute_output": ["perf_server_compute_output", {"_value": 0.021, "_timestamp": 0}]}}, {"_model_config_name": "vgg19_libtorch_config_0", "_model_specific_pa_params": {"batch-size": 1, "concurrency-range": 2}, "_non_gpu_data": [["perf_latency_avg", {"_value": 10.087, "_timestamp": 0}], ["perf_latency_p90", {"_value": 10.232, "_timestamp": 0}], ["perf_latency_p95", {"_value": 10.27, "_timestamp": 0}], ["perf_latency_p99", {"_value": 10.389, "_timestamp": 0}], ["perf_throughput", {"_value": 197.858, "_timestamp": 0}], ["perf_client_send_recv", {"_value": 0.101, "_timestamp": 0}], ["perf_client_response_wait", {"_value": 9.963, "_timestamp": 0}], ["perf_server_queue", {"_value": 4.291, "_timestamp": 0}], ["perf_server_compute_infer", {"_value": 4.81, "_timestamp": 0}], ["perf_server_compute_input", {"_value": 0.142, "_timestamp": 0}], ["perf_server_compute_output", {"_value": 0.02, "_timestamp": 0}]], "_non_gpu_data_from_tag": {"perf_latency_avg": ["perf_latency_avg", {"_value": 10.087, "_timestamp": 0}], "perf_latency_p90": ["perf_latency_p90", {"_value": 10.232, "_timestamp": 0}], "perf_latency_p95": ["perf_latency_p95", {"_value": 10.27, "_timestamp": 0}], "perf_latency_p99": ["perf_latency_p99", {"_value": 10.389, "_timestamp": 0}], "perf_throughput": ["perf_throughput", {"_value": 197.858, "_timestamp": 0}], "perf_client_send_recv": ["perf_client_send_recv", {"_value": 0.101, "_timestamp": 0}], "perf_client_response_wait": ["perf_client_response_wait", {"_value": 9.963, "_timestamp": 0}], "perf_server_queue": ["perf_server_queue", {"_value": 4.291, "_timestamp": 0}], "perf_server_compute_infer": ["perf_server_compute_infer", {"_value": 4.81, "_timestamp": 0}], "perf_server_compute_input": ["perf_server_compute_input", {"_value": 0.142, "_timestamp": 0}], "perf_server_compute_output": ["perf_server_compute_output", {"_value": 0.02, "_timestamp": 0}]}}]}, "-m resnet50_libtorch_config_9 -b 1 -i grpc -f resnet50_libtorch_config_9-results.csv --verbose-csv --concurrency-range=4 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0-m vgg19_libtorch_config_0 -b 1 -i grpc -f vgg19_libtorch_config_0-results.csv --verbose-csv --concurrency-range=4 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": {"_model_variants_name": "resnet50_libtorch_config_9,vgg19_libtorch_config_0", "_gpu_data": {"GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [["gpu_used_memory", {"_value": 2380.26752, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0"}], ["gpu_free_memory", {"_value": 23015.194624, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0"}], ["gpu_utilization", {"_value": 55.0, "_timestamp": 0, "_device_uuid": null}], ["gpu_power_usage", {"_value": 227.424, "_timestamp": 0, "_device_uuid": null}]]}, "_avg_gpu_data": [["gpu_used_memory", {"_value": 2380.26752, "_timestamp": 0, "_device_uuid": null}], ["gpu_free_memory", {"_value": 23015.194624, "_timestamp": 0, "_device_uuid": null}], ["gpu_utilization", {"_value": 55.0, "_timestamp": 0, "_device_uuid": null}], ["gpu_power_usage", {"_value": 227.424, "_timestamp": 0, "_device_uuid": null}]], "_avg_gpu_data_from_tag": {"gpu_used_memory": ["gpu_used_memory", {"_value": 2380.26752, "_timestamp": 0, "_device_uuid": null}], "gpu_free_memory": ["gpu_free_memory", {"_value": 23015.194624, "_timestamp": 0, "_device_uuid": null}], "gpu_utilization": ["gpu_utilization", {"_value": 55.0, "_timestamp": 0, "_device_uuid": null}], "gpu_power_usage": ["gpu_power_usage", {"_value": 227.424, "_timestamp": 0, "_device_uuid": null}]}, "_model_config_measurements": [{"_model_config_name": "resnet50_libtorch_config_9", "_model_specific_pa_params": {"batch-size": 1, "concurrency-range": 4}, "_non_gpu_data": [["perf_latency_avg", {"_value": 18.392, "_timestamp": 0}], ["perf_latency_p90", {"_value": 19.338, "_timestamp": 0}], ["perf_latency_p95", {"_value": 19.494, "_timestamp": 0}], ["perf_latency_p99", {"_value": 19.772, "_timestamp": 0}], ["perf_throughput", {"_value": 217.184, "_timestamp": 0}], ["perf_client_send_recv", {"_value": 0.115, "_timestamp": 0}], ["perf_client_response_wait", {"_value": 18.252, "_timestamp": 0}], ["perf_server_queue", {"_value": 7.04, "_timestamp": 0}], ["perf_server_compute_infer", {"_value": 9.824, "_timestamp": 0}], ["perf_server_compute_input", {"_value": 0.352, "_timestamp": 0}], ["perf_server_compute_output", {"_value": 0.022, "_timestamp": 0}]], "_non_gpu_data_from_tag": {"perf_latency_avg": ["perf_latency_avg", {"_value": 18.392, "_timestamp": 0}], "perf_latency_p90": ["perf_latency_p90", {"_value": 19.338, "_timestamp": 0}], "perf_latency_p95": ["perf_latency_p95", {"_value": 19.494, "_timestamp": 0}], "perf_latency_p99": ["perf_latency_p99", {"_value": 19.772, "_timestamp": 0}], "perf_throughput": ["perf_throughput", {"_value": 217.184, "_timestamp": 0}], "perf_client_send_recv": ["perf_client_send_recv", {"_value": 0.115, "_timestamp": 0}], "perf_client_response_wait": ["perf_client_response_wait", {"_value": 18.252, "_timestamp": 0}], "perf_server_queue": ["perf_server_queue", {"_value": 7.04, "_timestamp": 0}], "perf_server_compute_infer": ["perf_server_compute_infer", {"_value": 9.824, "_timestamp": 0}], "perf_server_compute_input": ["perf_server_compute_input", {"_value": 0.352, "_timestamp": 0}], "perf_server_compute_output": ["perf_server_compute_output", {"_value": 0.022, "_timestamp": 0}]}}, {"_model_config_name": "vgg19_libtorch_config_0", "_model_specific_pa_params": {"batch-size": 1, "concurrency-range": 4}, "_non_gpu_data": [["perf_latency_avg", {"_value": 21.179, "_timestamp": 0}], ["perf_latency_p90", {"_value": 21.503, "_timestamp": 0}], ["perf_latency_p95", {"_value": 21.655, "_timestamp": 0}], ["perf_latency_p99", {"_value": 24.695, "_timestamp": 0}], ["perf_throughput", {"_value": 188.894, "_timestamp": 0}], ["perf_client_send_recv", {"_value": 0.102, "_timestamp": 0}], ["perf_client_response_wait", {"_value": 21.051, "_timestamp": 0}], ["perf_server_queue", {"_value": 15.106, "_timestamp": 0}], ["perf_server_compute_infer", {"_value": 5.048, "_timestamp": 0}], ["perf_server_compute_input", {"_value": 0.147, "_timestamp": 0}], ["perf_server_compute_output", {"_value": 0.02, "_timestamp": 0}]], "_non_gpu_data_from_tag": {"perf_latency_avg": ["perf_latency_avg", {"_value": 21.179, "_timestamp": 0}], "perf_latency_p90": ["perf_latency_p90", {"_value": 21.503, "_timestamp": 0}], "perf_latency_p95": ["perf_latency_p95", {"_value": 21.655, "_timestamp": 0}], "perf_latency_p99": ["perf_latency_p99", {"_value": 24.695, "_timestamp": 0}], "perf_throughput": ["perf_throughput", {"_value": 188.894, "_timestamp": 0}], "perf_client_send_recv": ["perf_client_send_recv", {"_value": 0.102, "_timestamp": 0}], "perf_client_response_wait": ["perf_client_response_wait", {"_value": 21.051, "_timestamp": 0}], "perf_server_queue": ["perf_server_queue", {"_value": 15.106, "_timestamp": 0}], "perf_server_compute_infer": ["perf_server_compute_infer", {"_value": 5.048, "_timestamp": 0}], "perf_server_compute_input": ["perf_server_compute_input", {"_value": 0.147, "_timestamp": 0}], "perf_server_compute_output": ["perf_server_compute_output", {"_value": 0.02, "_timestamp": 0}]}}]}, "-m resnet50_libtorch_config_9 -b 1 -i grpc -f resnet50_libtorch_config_9-results.csv --verbose-csv --concurrency-range=8 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0-m vgg19_libtorch_config_0 -b 1 -i grpc -f vgg19_libtorch_config_0-results.csv --verbose-csv --concurrency-range=8 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": {"_model_variants_name": "resnet50_libtorch_config_9,vgg19_libtorch_config_0", "_gpu_data": {"GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [["gpu_used_memory", {"_value": 2380.26752, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0"}], ["gpu_free_memory", {"_value": 23015.194624, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0"}], ["gpu_utilization", {"_value": 66.3333, "_timestamp": 0, "_device_uuid": null}], ["gpu_power_usage", {"_value": 217.945, "_timestamp": 0, "_device_uuid": null}]]}, "_avg_gpu_data": [["gpu_used_memory", {"_value": 2380.26752, "_timestamp": 0, "_device_uuid": null}], ["gpu_free_memory", {"_value": 23015.194624, "_timestamp": 0, "_device_uuid": null}], ["gpu_utilization", {"_value": 66.3333, "_timestamp": 0, "_device_uuid": null}], ["gpu_power_usage", {"_value": 217.945, "_timestamp": 0, "_device_uuid": null}]], "_avg_gpu_data_from_tag": {"gpu_used_memory": ["gpu_used_memory", {"_value": 2380.26752, "_timestamp": 0, "_device_uuid": null}], "gpu_free_memory": ["gpu_free_memory", {"_value": 23015.194624, "_timestamp": 0, "_device_uuid": null}], "gpu_utilization": ["gpu_utilization", {"_value": 66.3333, "_timestamp": 0, "_device_uuid": null}], "gpu_power_usage": ["gpu_power_usage", {"_value": 217.945, "_timestamp": 0, "_device_uuid": null}]}, "_model_config_measurements": [{"_model_config_name": "resnet50_libtorch_config_9", "_model_specific_pa_params": {"batch-size": 1, "concurrency-range": 8}, "_non_gpu_data": [["perf_latency_avg", {"_value": 24.277, "_timestamp": 0}], ["perf_latency_p90", {"_value": 25.289, "_timestamp": 0}], ["perf_latency_p95", {"_value": 25.754, "_timestamp": 0}], ["perf_latency_p99", {"_value": 26.592, "_timestamp": 0}], ["perf_throughput", {"_value": 328.015, "_timestamp": 0}], ["perf_client_send_recv", {"_value": 0.153, "_timestamp": 0}], ["perf_client_response_wait", {"_value": 24.098, "_timestamp": 0}], ["perf_server_queue", {"_value": 6.852, "_timestamp": 0}], ["perf_server_compute_infer", {"_value": 14.87, "_timestamp": 0}], ["perf_server_compute_input", {"_value": 0.843, "_timestamp": 0}], ["perf_server_compute_output", {"_value": 0.031, "_timestamp": 0}]], "_non_gpu_data_from_tag": {"perf_latency_avg": ["perf_latency_avg", {"_value": 24.277, "_timestamp": 0}], "perf_latency_p90": ["perf_latency_p90", {"_value": 25.289, "_timestamp": 0}], "perf_latency_p95": ["perf_latency_p95", {"_value": 25.754, "_timestamp": 0}], "perf_latency_p99": ["perf_latency_p99", {"_value": 26.592, "_timestamp": 0}], "perf_throughput": ["perf_throughput", {"_value": 328.015, "_timestamp": 0}], "perf_client_send_recv": ["perf_client_send_recv", {"_value": 0.153, "_timestamp": 0}], "perf_client_response_wait": ["perf_client_response_wait", {"_value": 24.098, "_timestamp": 0}], "perf_server_queue": ["perf_server_queue", {"_value": 6.852, "_timestamp": 0}], "perf_server_compute_infer": ["perf_server_compute_infer", {"_value": 14.87, "_timestamp": 0}], "perf_server_compute_input": ["perf_server_compute_input", {"_value": 0.843, "_timestamp": 0}], "perf_server_compute_output": ["perf_server_compute_output", {"_value": 0.031, "_timestamp": 0}]}}, {"_model_config_name": "vgg19_libtorch_config_0", "_model_specific_pa_params": {"batch-size": 1, "concurrency-range": 8}, "_non_gpu_data": [["perf_latency_avg", {"_value": 46.128, "_timestamp": 0}], ["perf_latency_p90", {"_value": 46.983, "_timestamp": 0}], ["perf_latency_p95", {"_value": 47.095, "_timestamp": 0}], ["perf_latency_p99", {"_value": 47.282, "_timestamp": 0}], ["perf_throughput", {"_value": 172.186, "_timestamp": 0}], ["perf_client_send_recv", {"_value": 0.129, "_timestamp": 0}], ["perf_client_response_wait", {"_value": 45.969, "_timestamp": 0}], ["perf_server_queue", {"_value": 39.423, "_timestamp": 0}], ["perf_server_compute_infer", {"_value": 5.535, "_timestamp": 0}], ["perf_server_compute_input", {"_value": 0.155, "_timestamp": 0}], ["perf_server_compute_output", {"_value": 0.023, "_timestamp": 0}]], "_non_gpu_data_from_tag": {"perf_latency_avg": ["perf_latency_avg", {"_value": 46.128, "_timestamp": 0}], "perf_latency_p90": ["perf_latency_p90", {"_value": 46.983, "_timestamp": 0}], "perf_latency_p95": ["perf_latency_p95", {"_value": 47.095, "_timestamp": 0}], "perf_latency_p99": ["perf_latency_p99", {"_value": 47.282, "_timestamp": 0}], "perf_throughput": ["perf_throughput", {"_value": 172.186, "_timestamp": 0}], "perf_client_send_recv": ["perf_client_send_recv", {"_value": 0.129, "_timestamp": 0}], "perf_client_response_wait": ["perf_client_response_wait", {"_value": 45.969, "_timestamp": 0}], "perf_server_queue": ["perf_server_queue", {"_value": 39.423, "_timestamp": 0}], "perf_server_compute_infer": ["perf_server_compute_infer", {"_value": 5.535, "_timestamp": 0}], "perf_server_compute_input": ["perf_server_compute_input", {"_value": 0.155, "_timestamp": 0}], "perf_server_compute_output": ["perf_server_compute_output", {"_value": 0.023, "_timestamp": 0}]}}]}, "-m resnet50_libtorch_config_9 -b 1 -i grpc -f resnet50_libtorch_config_9-results.csv --verbose-csv --concurrency-range=16 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0-m vgg19_libtorch_config_0 -b 1 -i grpc -f vgg19_libtorch_config_0-results.csv --verbose-csv --concurrency-range=16 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": {"_model_variants_name": "resnet50_libtorch_config_9,vgg19_libtorch_config_0", "_gpu_data": {"GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [["gpu_used_memory", {"_value": 2380.26752, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0"}], ["gpu_free_memory", {"_value": 23015.194624, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0"}], ["gpu_utilization", {"_value": 66.0, "_timestamp": 0, "_device_uuid": null}], ["gpu_power_usage", {"_value": 218.99, "_timestamp": 0, "_device_uuid": null}]]}, "_avg_gpu_data": [["gpu_used_memory", {"_value": 2380.26752, "_timestamp": 0, "_device_uuid": null}], ["gpu_free_memory", {"_value": 23015.194624, "_timestamp": 0, "_device_uuid": null}], ["gpu_utilization", {"_value": 66.0, "_timestamp": 0, "_device_uuid": null}], ["gpu_power_usage", {"_value": 218.99, "_timestamp": 0, "_device_uuid": null}]], "_avg_gpu_data_from_tag": {"gpu_used_memory": ["gpu_used_memory", {"_value": 2380.26752, "_timestamp": 0, "_device_uuid": null}], "gpu_free_memory": ["gpu_free_memory", {"_value": 23015.194624, "_timestamp": 0, "_device_uuid": null}], "gpu_utilization": ["gpu_utilization", {"_value": 66.0, "_timestamp": 0, "_device_uuid": null}], "gpu_power_usage": ["gpu_power_usage", {"_value": 218.99, "_timestamp": 0, "_device_uuid": null}]}, "_model_config_measurements": [{"_model_config_name": "resnet50_libtorch_config_9", "_model_specific_pa_params": {"batch-size": 1, "concurrency-range": 16}, "_non_gpu_data": [["perf_latency_avg", {"_value": 37.867, "_timestamp": 0}], ["perf_latency_p90", {"_value": 37.919, "_timestamp": 0}], ["perf_latency_p95", {"_value": 38.182, "_timestamp": 0}], ["perf_latency_p99", {"_value": 74.519, "_timestamp": 0}], ["perf_throughput", {"_value": 418.992, "_timestamp": 0}], ["perf_client_send_recv", {"_value": 0.212, "_timestamp": 0}], ["perf_client_response_wait", {"_value": 37.628, "_timestamp": 0}], ["perf_server_queue", {"_value": 17.05, "_timestamp": 0}], ["perf_server_compute_infer", {"_value": 17.049, "_timestamp": 0}], ["perf_server_compute_input", {"_value": 1.567, "_timestamp": 0}], ["perf_server_compute_output", {"_value": 0.042, "_timestamp": 0}]], "_non_gpu_data_from_tag": {"perf_latency_avg": ["perf_latency_avg", {"_value": 37.867, "_timestamp": 0}], "perf_latency_p90": ["perf_latency_p90", {"_value": 37.919, "_timestamp": 0}], "perf_latency_p95": ["perf_latency_p95", {"_value": 38.182, "_timestamp": 0}], "perf_latency_p99": ["perf_latency_p99", {"_value": 74.519, "_timestamp": 0}], "perf_throughput": ["perf_throughput", {"_value": 418.992, "_timestamp": 0}], "perf_client_send_recv": ["perf_client_send_recv", {"_value": 0.212, "_timestamp": 0}], "perf_client_response_wait": ["perf_client_response_wait", {"_value": 37.628, "_timestamp": 0}], "perf_server_queue": ["perf_server_queue", {"_value": 17.05, "_timestamp": 0}], "perf_server_compute_infer": ["perf_server_compute_infer", {"_value": 17.049, "_timestamp": 0}], "perf_server_compute_input": ["perf_server_compute_input", {"_value": 1.567, "_timestamp": 0}], "perf_server_compute_output": ["perf_server_compute_output", {"_value": 0.042, "_timestamp": 0}]}}, {"_model_config_name": "vgg19_libtorch_config_0", "_model_specific_pa_params": {"batch-size": 1, "concurrency-range": 16}, "_non_gpu_data": [["perf_latency_avg", {"_value": 97.673, "_timestamp": 0}], ["perf_latency_p90", {"_value": 100.695, "_timestamp": 0}], ["perf_latency_p95", {"_value": 100.939, "_timestamp": 0}], ["perf_latency_p99", {"_value": 101.577, "_timestamp": 0}], ["perf_throughput", {"_value": 161.22, "_timestamp": 0}], ["perf_client_send_recv", {"_value": 0.222, "_timestamp": 0}], ["perf_client_response_wait", {"_value": 97.419, "_timestamp": 0}], ["perf_server_queue", {"_value": 90.352, "_timestamp": 0}], ["perf_server_compute_infer", {"_value": 5.922, "_timestamp": 0}], ["perf_server_compute_input", {"_value": 0.154, "_timestamp": 0}], ["perf_server_compute_output", {"_value": 0.024, "_timestamp": 0}]], "_non_gpu_data_from_tag": {"perf_latency_avg": ["perf_latency_avg", {"_value": 97.673, "_timestamp": 0}], "perf_latency_p90": ["perf_latency_p90", {"_value": 100.695, "_timestamp": 0}], "perf_latency_p95": ["perf_latency_p95", {"_value": 100.939, "_timestamp": 0}], "perf_latency_p99": ["perf_latency_p99", {"_value": 101.577, "_timestamp": 0}], "perf_throughput": ["perf_throughput", {"_value": 161.22, "_timestamp": 0}], "perf_client_send_recv": ["perf_client_send_recv", {"_value": 0.222, "_timestamp": 0}], "perf_client_response_wait": ["perf_client_response_wait", {"_value": 97.419, "_timestamp": 0}], "perf_server_queue": ["perf_server_queue", {"_value": 90.352, "_timestamp": 0}], "perf_server_compute_infer": ["perf_server_compute_infer", {"_value": 5.922, "_timestamp": 0}], "perf_server_compute_input": ["perf_server_compute_input", {"_value": 0.154, "_timestamp": 0}], "perf_server_compute_output": ["perf_server_compute_output", {"_value": 0.024, "_timestamp": 0}]}}]}, "-m resnet50_libtorch_config_9 -b 1 -i grpc -f resnet50_libtorch_config_9-results.csv --verbose-csv --concurrency-range=32 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0-m vgg19_libtorch_config_0 -b 1 -i grpc -f vgg19_libtorch_config_0-results.csv --verbose-csv --concurrency-range=32 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": {"_model_variants_name": "resnet50_libtorch_config_9,vgg19_libtorch_config_0", "_gpu_data": {"GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [["gpu_used_memory", {"_value": 2380.26752, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0"}], ["gpu_free_memory", {"_value": 23015.194624, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0"}], ["gpu_utilization", {"_value": 69.3333, "_timestamp": 0, "_device_uuid": null}], ["gpu_power_usage", {"_value": 278.852, "_timestamp": 0, "_device_uuid": null}]]}, "_avg_gpu_data": [["gpu_used_memory", {"_value": 2380.26752, "_timestamp": 0, "_device_uuid": null}], ["gpu_free_memory", {"_value": 23015.194624, "_timestamp": 0, "_device_uuid": null}], ["gpu_utilization", {"_value": 69.3333, "_timestamp": 0, "_device_uuid": null}], ["gpu_power_usage", {"_value": 278.852, "_timestamp": 0, "_device_uuid": null}]], "_avg_gpu_data_from_tag": {"gpu_used_memory": ["gpu_used_memory", {"_value": 2380.26752, "_timestamp": 0, "_device_uuid": null}], "gpu_free_memory": ["gpu_free_memory", {"_value": 23015.194624, "_timestamp": 0, "_device_uuid": null}], "gpu_utilization": ["gpu_utilization", {"_value": 69.3333, "_timestamp": 0, "_device_uuid": null}], "gpu_power_usage": ["gpu_power_usage", {"_value": 278.852, "_timestamp": 0, "_device_uuid": null}]}, "_model_config_measurements": [{"_model_config_name": "resnet50_libtorch_config_9", "_model_specific_pa_params": {"batch-size": 1, "concurrency-range": 32}, "_non_gpu_data": [["perf_latency_avg", {"_value": 74.81, "_timestamp": 0}], ["perf_latency_p90", {"_value": 75.465, "_timestamp": 0}], ["perf_latency_p95", {"_value": 75.643, "_timestamp": 0}], ["perf_latency_p99", {"_value": 76.362, "_timestamp": 0}], ["perf_throughput", {"_value": 428.77, "_timestamp": 0}], ["perf_client_send_recv", {"_value": 0.168, "_timestamp": 0}], ["perf_client_response_wait", {"_value": 74.612, "_timestamp": 0}], ["perf_server_queue", {"_value": 54.434, "_timestamp": 0}], ["perf_server_compute_infer", {"_value": 16.829, "_timestamp": 0}], ["perf_server_compute_input", {"_value": 1.578, "_timestamp": 0}], ["perf_server_compute_output", {"_value": 0.041, "_timestamp": 0}]], "_non_gpu_data_from_tag": {"perf_latency_avg": ["perf_latency_avg", {"_value": 74.81, "_timestamp": 0}], "perf_latency_p90": ["perf_latency_p90", {"_value": 75.465, "_timestamp": 0}], "perf_latency_p95": ["perf_latency_p95", {"_value": 75.643, "_timestamp": 0}], "perf_latency_p99": ["perf_latency_p99", {"_value": 76.362, "_timestamp": 0}], "perf_throughput": ["perf_throughput", {"_value": 428.77, "_timestamp": 0}], "perf_client_send_recv": ["perf_client_send_recv", {"_value": 0.168, "_timestamp": 0}], "perf_client_response_wait": ["perf_client_response_wait", {"_value": 74.612, "_timestamp": 0}], "perf_server_queue": ["perf_server_queue", {"_value": 54.434, "_timestamp": 0}], "perf_server_compute_infer": ["perf_server_compute_infer", {"_value": 16.829, "_timestamp": 0}], "perf_server_compute_input": ["perf_server_compute_input", {"_value": 1.578, "_timestamp": 0}], "perf_server_compute_output": ["perf_server_compute_output", {"_value": 0.041, "_timestamp": 0}]}}, {"_model_config_name": "vgg19_libtorch_config_0", "_model_specific_pa_params": {"batch-size": 1, "concurrency-range": 32}, "_non_gpu_data": [["perf_latency_avg", {"_value": 199.523, "_timestamp": 0}], ["perf_latency_p90", {"_value": 200.438, "_timestamp": 0}], ["perf_latency_p95", {"_value": 200.682, "_timestamp": 0}], ["perf_latency_p99", {"_value": 201.609, "_timestamp": 0}], ["perf_throughput", {"_value": 160.505, "_timestamp": 0}], ["perf_client_send_recv", {"_value": 0.102, "_timestamp": 0}], ["perf_client_response_wait", {"_value": 199.392, "_timestamp": 0}], ["perf_server_queue", {"_value": 192.507, "_timestamp": 0}], ["perf_server_compute_infer", {"_value": 5.973, "_timestamp": 0}], ["perf_server_compute_input", {"_value": 0.149, "_timestamp": 0}], ["perf_server_compute_output", {"_value": 0.023, "_timestamp": 0}]], "_non_gpu_data_from_tag": {"perf_latency_avg": ["perf_latency_avg", {"_value": 199.523, "_timestamp": 0}], "perf_latency_p90": ["perf_latency_p90", {"_value": 200.438, "_timestamp": 0}], "perf_latency_p95": ["perf_latency_p95", {"_value": 200.682, "_timestamp": 0}], "perf_latency_p99": ["perf_latency_p99", {"_value": 201.609, "_timestamp": 0}], "perf_throughput": ["perf_throughput", {"_value": 160.505, "_timestamp": 0}], "perf_client_send_recv": ["perf_client_send_recv", {"_value": 0.102, "_timestamp": 0}], "perf_client_response_wait": ["perf_client_response_wait", {"_value": 199.392, "_timestamp": 0}], "perf_server_queue": ["perf_server_queue", {"_value": 192.507, "_timestamp": 0}], "perf_server_compute_infer": ["perf_server_compute_infer", {"_value": 5.973, "_timestamp": 0}], "perf_server_compute_input": ["perf_server_compute_input", {"_value": 0.149, "_timestamp": 0}], "perf_server_compute_output": ["perf_server_compute_output", {"_value": 0.023, "_timestamp": 0}]}}]}, "-m resnet50_libtorch_config_9 -b 1 -i grpc -f resnet50_libtorch_config_9-results.csv --verbose-csv --concurrency-range=64 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0-m vgg19_libtorch_config_0 -b 1 -i grpc -f vgg19_libtorch_config_0-results.csv --verbose-csv --concurrency-range=64 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": {"_model_variants_name": "resnet50_libtorch_config_9,vgg19_libtorch_config_0", "_gpu_data": {"GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [["gpu_used_memory", {"_value": 2380.26752, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0"}], ["gpu_free_memory", {"_value": 23015.194624, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0"}], ["gpu_utilization", {"_value": 76.5, "_timestamp": 0, "_device_uuid": null}], ["gpu_power_usage", {"_value": 280.238, "_timestamp": 0, "_device_uuid": null}]]}, "_avg_gpu_data": [["gpu_used_memory", {"_value": 2380.26752, "_timestamp": 0, "_device_uuid": null}], ["gpu_free_memory", {"_value": 23015.194624, "_timestamp": 0, "_device_uuid": null}], ["gpu_utilization", {"_value": 76.5, "_timestamp": 0, "_device_uuid": null}], ["gpu_power_usage", {"_value": 280.238, "_timestamp": 0, "_device_uuid": null}]], "_avg_gpu_data_from_tag": {"gpu_used_memory": ["gpu_used_memory", {"_value": 2380.26752, "_timestamp": 0, "_device_uuid": null}], "gpu_free_memory": ["gpu_free_memory", {"_value": 23015.194624, "_timestamp": 0, "_device_uuid": null}], "gpu_utilization": ["gpu_utilization", {"_value": 76.5, "_timestamp": 0, "_device_uuid": null}], "gpu_power_usage": ["gpu_power_usage", {"_value": 280.238, "_timestamp": 0, "_device_uuid": null}]}, "_model_config_measurements": [{"_model_config_name": "resnet50_libtorch_config_9", "_model_specific_pa_params": {"batch-size": 1, "concurrency-range": 64}, "_non_gpu_data": [["perf_latency_avg", {"_value": 150.154, "_timestamp": 0}], ["perf_latency_p90", {"_value": 150.926, "_timestamp": 0}], ["perf_latency_p95", {"_value": 152.243, "_timestamp": 0}], ["perf_latency_p99", {"_value": 153.232, "_timestamp": 0}], ["perf_throughput", {"_value": 426.365, "_timestamp": 0}], ["perf_client_send_recv", {"_value": 0.169, "_timestamp": 0}], ["perf_client_response_wait", {"_value": 149.957, "_timestamp": 0}], ["perf_server_queue", {"_value": 129.726, "_timestamp": 0}], ["perf_server_compute_infer", {"_value": 16.799, "_timestamp": 0}], ["perf_server_compute_input", {"_value": 1.673, "_timestamp": 0}], ["perf_server_compute_output", {"_value": 0.041, "_timestamp": 0}]], "_non_gpu_data_from_tag": {"perf_latency_avg": ["perf_latency_avg", {"_value": 150.154, "_timestamp": 0}], "perf_latency_p90": ["perf_latency_p90", {"_value": 150.926, "_timestamp": 0}], "perf_latency_p95": ["perf_latency_p95", {"_value": 152.243, "_timestamp": 0}], "perf_latency_p99": ["perf_latency_p99", {"_value": 153.232, "_timestamp": 0}], "perf_throughput": ["perf_throughput", {"_value": 426.365, "_timestamp": 0}], "perf_client_send_recv": ["perf_client_send_recv", {"_value": 0.169, "_timestamp": 0}], "perf_client_response_wait": ["perf_client_response_wait", {"_value": 149.957, "_timestamp": 0}], "perf_server_queue": ["perf_server_queue", {"_value": 129.726, "_timestamp": 0}], "perf_server_compute_infer": ["perf_server_compute_infer", {"_value": 16.799, "_timestamp": 0}], "perf_server_compute_input": ["perf_server_compute_input", {"_value": 1.673, "_timestamp": 0}], "perf_server_compute_output": ["perf_server_compute_output", {"_value": 0.041, "_timestamp": 0}]}}, {"_model_config_name": "vgg19_libtorch_config_0", "_model_specific_pa_params": {"batch-size": 1, "concurrency-range": 64}, "_non_gpu_data": [["perf_latency_avg", {"_value": 400.461, "_timestamp": 0}], ["perf_latency_p90", {"_value": 402.64, "_timestamp": 0}], ["perf_latency_p95", {"_value": 403.006, "_timestamp": 0}], ["perf_latency_p99", {"_value": 404.197, "_timestamp": 0}], ["perf_throughput", {"_value": 160.087, "_timestamp": 0}], ["perf_client_send_recv", {"_value": 0.105, "_timestamp": 0}], ["perf_client_response_wait", {"_value": 400.324, "_timestamp": 0}], ["perf_server_queue", {"_value": 393.404, "_timestamp": 0}], ["perf_server_compute_infer", {"_value": 5.984, "_timestamp": 0}], ["perf_server_compute_input", {"_value": 0.155, "_timestamp": 0}], ["perf_server_compute_output", {"_value": 0.024, "_timestamp": 0}]], "_non_gpu_data_from_tag": {"perf_latency_avg": ["perf_latency_avg", {"_value": 400.461, "_timestamp": 0}], "perf_latency_p90": ["perf_latency_p90", {"_value": 402.64, "_timestamp": 0}], "perf_latency_p95": ["perf_latency_p95", {"_value": 403.006, "_timestamp": 0}], "perf_latency_p99": ["perf_latency_p99", {"_value": 404.197, "_timestamp": 0}], "perf_throughput": ["perf_throughput", {"_value": 160.087, "_timestamp": 0}], "perf_client_send_recv": ["perf_client_send_recv", {"_value": 0.105, "_timestamp": 0}], "perf_client_response_wait": ["perf_client_response_wait", {"_value": 400.324, "_timestamp": 0}], "perf_server_queue": ["perf_server_queue", {"_value": 393.404, "_timestamp": 0}], "perf_server_compute_infer": ["perf_server_compute_infer", {"_value": 5.984, "_timestamp": 0}], "perf_server_compute_input": ["perf_server_compute_input", {"_value": 0.155, "_timestamp": 0}], "perf_server_compute_output": ["perf_server_compute_output", {"_value": 0.024, "_timestamp": 0}]}}]}, "-m resnet50_libtorch_config_9 -b 1 -i grpc -f resnet50_libtorch_config_9-results.csv --verbose-csv --concurrency-range=128 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0-m vgg19_libtorch_config_0 -b 1 -i grpc -f vgg19_libtorch_config_0-results.csv --verbose-csv --concurrency-range=128 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": {"_model_variants_name": "resnet50_libtorch_config_9,vgg19_libtorch_config_0", "_gpu_data": {"GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [["gpu_used_memory", {"_value": 2380.26752, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0"}], ["gpu_free_memory", {"_value": 23015.194624, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0"}], ["gpu_utilization", {"_value": 75.33330000000001, "_timestamp": 0, "_device_uuid": null}], ["gpu_power_usage", {"_value": 281.414, "_timestamp": 0, "_device_uuid": null}]]}, "_avg_gpu_data": [["gpu_used_memory", {"_value": 2380.26752, "_timestamp": 0, "_device_uuid": null}], ["gpu_free_memory", {"_value": 23015.194624, "_timestamp": 0, "_device_uuid": null}], ["gpu_utilization", {"_value": 75.33330000000001, "_timestamp": 0, "_device_uuid": null}], ["gpu_power_usage", {"_value": 281.414, "_timestamp": 0, "_device_uuid": null}]], "_avg_gpu_data_from_tag": {"gpu_used_memory": ["gpu_used_memory", {"_value": 2380.26752, "_timestamp": 0, "_device_uuid": null}], "gpu_free_memory": ["gpu_free_memory", {"_value": 23015.194624, "_timestamp": 0, "_device_uuid": null}], "gpu_utilization": ["gpu_utilization", {"_value": 75.33330000000001, "_timestamp": 0, "_device_uuid": null}], "gpu_power_usage": ["gpu_power_usage", {"_value": 281.414, "_timestamp": 0, "_device_uuid": null}]}, "_model_config_measurements": [{"_model_config_name": "resnet50_libtorch_config_9", "_model_specific_pa_params": {"batch-size": 1, "concurrency-range": 128}, "_non_gpu_data": [["perf_latency_avg", {"_value": 300.739, "_timestamp": 0}], ["perf_latency_p90", {"_value": 302.213, "_timestamp": 0}], ["perf_latency_p95", {"_value": 302.604, "_timestamp": 0}], ["perf_latency_p99", {"_value": 302.867, "_timestamp": 0}], ["perf_throughput", {"_value": 426.076, "_timestamp": 0}], ["perf_client_send_recv", {"_value": 0.162, "_timestamp": 0}], ["perf_client_response_wait", {"_value": 300.549, "_timestamp": 0}], ["perf_server_queue", {"_value": 280.225, "_timestamp": 0}], ["perf_server_compute_infer", {"_value": 16.77, "_timestamp": 0}], ["perf_server_compute_input", {"_value": 1.694, "_timestamp": 0}], ["perf_server_compute_output", {"_value": 0.043, "_timestamp": 0}]], "_non_gpu_data_from_tag": {"perf_latency_avg": ["perf_latency_avg", {"_value": 300.739, "_timestamp": 0}], "perf_latency_p90": ["perf_latency_p90", {"_value": 302.213, "_timestamp": 0}], "perf_latency_p95": ["perf_latency_p95", {"_value": 302.604, "_timestamp": 0}], "perf_latency_p99": ["perf_latency_p99", {"_value": 302.867, "_timestamp": 0}], "perf_throughput": ["perf_throughput", {"_value": 426.076, "_timestamp": 0}], "perf_client_send_recv": ["perf_client_send_recv", {"_value": 0.162, "_timestamp": 0}], "perf_client_response_wait": ["perf_client_response_wait", {"_value": 300.549, "_timestamp": 0}], "perf_server_queue": ["perf_server_queue", {"_value": 280.225, "_timestamp": 0}], "perf_server_compute_infer": ["perf_server_compute_infer", {"_value": 16.77, "_timestamp": 0}], "perf_server_compute_input": ["perf_server_compute_input", {"_value": 1.694, "_timestamp": 0}], "perf_server_compute_output": ["perf_server_compute_output", {"_value": 0.043, "_timestamp": 0}]}}, {"_model_config_name": "vgg19_libtorch_config_0", "_model_specific_pa_params": {"batch-size": 1, "concurrency-range": 128}, "_non_gpu_data": [["perf_latency_avg", {"_value": 801.832, "_timestamp": 0}], ["perf_latency_p90", {"_value": 803.695, "_timestamp": 0}], ["perf_latency_p95", {"_value": 804.184, "_timestamp": 0}], ["perf_latency_p99", {"_value": 804.788, "_timestamp": 0}], ["perf_throughput", {"_value": 159.541, "_timestamp": 0}], ["perf_client_send_recv", {"_value": 0.105, "_timestamp": 0}], ["perf_client_response_wait", {"_value": 801.697, "_timestamp": 0}], ["perf_server_queue", {"_value": 794.692, "_timestamp": 0}], ["perf_server_compute_infer", {"_value": 5.989, "_timestamp": 0}], ["perf_server_compute_input", {"_value": 0.155, "_timestamp": 0}], ["perf_server_compute_output", {"_value": 0.025, "_timestamp": 0}]], "_non_gpu_data_from_tag": {"perf_latency_avg": ["perf_latency_avg", {"_value": 801.832, "_timestamp": 0}], "perf_latency_p90": ["perf_latency_p90", {"_value": 803.695, "_timestamp": 0}], "perf_latency_p95": ["perf_latency_p95", {"_value": 804.184, "_timestamp": 0}], "perf_latency_p99": ["perf_latency_p99", {"_value": 804.788, "_timestamp": 0}], "perf_throughput": ["perf_throughput", {"_value": 159.541, "_timestamp": 0}], "perf_client_send_recv": ["perf_client_send_recv", {"_value": 0.105, "_timestamp": 0}], "perf_client_response_wait": ["perf_client_response_wait", {"_value": 801.697, "_timestamp": 0}], "perf_server_queue": ["perf_server_queue", {"_value": 794.692, "_timestamp": 0}], "perf_server_compute_infer": ["perf_server_compute_infer", {"_value": 5.989, "_timestamp": 0}], "perf_server_compute_input": ["perf_server_compute_input", {"_value": 0.155, "_timestamp": 0}], "perf_server_compute_output": ["perf_server_compute_output", {"_value": 0.025, "_timestamp": 0}]}}]}}], "resnet50_libtorch_config_10,vgg19_libtorch_config_0": [{"_triton_env": {}, "_model_run_configs": [{"_model_name": "resnet50_libtorch", "_model_config": {"name": "resnet50_libtorch_config_10", "platform": "pytorch_libtorch", "maxBatchSize": 16, "input": [{"name": "INPUT__0", "dataType": "TYPE_FP32", "format": "FORMAT_NCHW", "dims": ["3", "224", "224"]}], "output": [{"name": "OUTPUT__0", "dataType": "TYPE_FP32", "dims": ["1000"], "labelFilename": "resnet50_labels.txt"}], "instanceGroup": [{"count": 1, "kind": "KIND_GPU"}], "dynamicBatching": {}, "cpu_only": false}, "_perf_config": {"_args": {"service-kind": null, "model-signature-name": null, "async": null, "sync": null, "measurement-interval": null, "concurrency-range": 32, "request-rate-range": null, "request-distribution": null, "request-intervals": null, "binary-search": null, "num-of-sequences": null, "latency-threshold": null, "max-threads": null, "stability-percentage": null, "max-trials": null, "percentile": null, "input-data": null, "shared-memory": null, "output-shared-memory-size": null, "shape": null, "sequence-length": null, "sequence-id-range": null, "string-length": null, "string-data": null, "measurement-mode": "count_windows", "measurement-request-count": null, "streaming": null, "grpc-compression-algorithm": null, "triton-server-directory": null, "model-repository": null, "ssl-grpc-use-ssl": null, "ssl-grpc-root-certifications-file": null, "ssl-grpc-private-key-file": null, "ssl-grpc-certificate-chain-file": null, "ssl-https-verify-peer": null, "ssl-https-verify-host": null, "ssl-https-ca-certificates-file": null, "ssl-https-client-certificate-type": null, "ssl-https-client-certificate-file": null, "ssl-https-private-key-type": null, "ssl-https-private-key-file": null, "collect-metrics": "True", "metrics-url": "http://localhost:8002/metrics", "metrics-interval": 1000.0}, "_options": {"-m": "resnet50_libtorch_config_10", "-x": null, "-b": 1, "-u": "localhost:8001", "-i": "grpc", "-f": "resnet50_libtorch_config_10-results.csv", "-H": null}, "_verbose": {"-v": null, "-v -v": null, "--verbose-csv": "--verbose-csv"}, "_input_to_options": {"model-name": "-m", "model-version": "-x", "batch-size": "-b", "url": "-u", "protocol": "-i", "latency-report-file": "-f", "http-header": "-H"}, "_input_to_verbose": {"verbose": "-v", "extra-verbose": "-v -v", "verbose-csv": "--verbose-csv"}, "_additive_args": {"input-data": null, "shape": null}}, "_ensemble_composing_configs": []}, {"_model_name": "vgg19_libtorch", "_model_config": {"name": "vgg19_libtorch_config_0", "platform": "pytorch_libtorch", "maxBatchSize": 1, "input": [{"name": "INPUT__0", "dataType": "TYPE_FP32", "format": "FORMAT_NCHW", "dims": ["3", "224", "224"]}], "output": [{"name": "OUTPUT__0", "dataType": "TYPE_FP32", "dims": ["1000"], "labelFilename": "vgg19_labels.txt"}], "instanceGroup": [{"count": 1, "kind": "KIND_GPU"}], "dynamicBatching": {}, "cpu_only": false}, "_perf_config": {"_args": {"service-kind": null, "model-signature-name": null, "async": null, "sync": null, "measurement-interval": null, "concurrency-range": 2, "request-rate-range": null, "request-distribution": null, "request-intervals": null, "binary-search": null, "num-of-sequences": null, "latency-threshold": null, "max-threads": null, "stability-percentage": null, "max-trials": null, "percentile": null, "input-data": null, "shared-memory": null, "output-shared-memory-size": null, "shape": null, "sequence-length": null, "sequence-id-range": null, "string-length": null, "string-data": null, "measurement-mode": "count_windows", "measurement-request-count": null, "streaming": null, "grpc-compression-algorithm": null, "triton-server-directory": null, "model-repository": null, "ssl-grpc-use-ssl": null, "ssl-grpc-root-certifications-file": null, "ssl-grpc-private-key-file": null, "ssl-grpc-certificate-chain-file": null, "ssl-https-verify-peer": null, "ssl-https-verify-host": null, "ssl-https-ca-certificates-file": null, "ssl-https-client-certificate-type": null, "ssl-https-client-certificate-file": null, "ssl-https-private-key-type": null, "ssl-https-private-key-file": null, "collect-metrics": "True", "metrics-url": "http://localhost:8002/metrics", "metrics-interval": 1000.0}, "_options": {"-m": "vgg19_libtorch_config_0", "-x": null, "-b": 1, "-u": "localhost:8001", "-i": "grpc", "-f": "vgg19_libtorch_config_0-results.csv", "-H": null}, "_verbose": {"-v": null, "-v -v": null, "--verbose-csv": "--verbose-csv"}, "_input_to_options": {"model-name": "-m", "model-version": "-x", "batch-size": "-b", "url": "-u", "protocol": "-i", "latency-report-file": "-f", "http-header": "-H"}, "_input_to_verbose": {"verbose": "-v", "extra-verbose": "-v -v", "verbose-csv": "--verbose-csv"}, "_additive_args": {"input-data": null, "shape": null}}, "_ensemble_composing_configs": []}]}, {"-m resnet50_libtorch_config_10 -b 1 -i grpc -f resnet50_libtorch_config_10-results.csv --verbose-csv --concurrency-range=32 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0-m vgg19_libtorch_config_0 -b 1 -i grpc -f vgg19_libtorch_config_0-results.csv --verbose-csv --concurrency-range=2 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": {"_model_variants_name": "resnet50_libtorch_config_10,vgg19_libtorch_config_0", "_gpu_data": {"GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [["gpu_used_memory", {"_value": 2432.69632, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0"}], ["gpu_free_memory", {"_value": 22962.765824000002, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0"}], ["gpu_utilization", {"_value": 87.66669999999999, "_timestamp": 0, "_device_uuid": null}], ["gpu_power_usage", {"_value": 279.258, "_timestamp": 0, "_device_uuid": null}]]}, "_avg_gpu_data": [["gpu_used_memory", {"_value": 2432.69632, "_timestamp": 0, "_device_uuid": null}], ["gpu_free_memory", {"_value": 22962.765824000002, "_timestamp": 0, "_device_uuid": null}], ["gpu_utilization", {"_value": 87.66669999999999, "_timestamp": 0, "_device_uuid": null}], ["gpu_power_usage", {"_value": 279.258, "_timestamp": 0, "_device_uuid": null}]], "_avg_gpu_data_from_tag": {"gpu_used_memory": ["gpu_used_memory", {"_value": 2432.69632, "_timestamp": 0, "_device_uuid": null}], "gpu_free_memory": ["gpu_free_memory", {"_value": 22962.765824000002, "_timestamp": 0, "_device_uuid": null}], "gpu_utilization": ["gpu_utilization", {"_value": 87.66669999999999, "_timestamp": 0, "_device_uuid": null}], "gpu_power_usage": ["gpu_power_usage", {"_value": 279.258, "_timestamp": 0, "_device_uuid": null}]}, "_model_config_measurements": [{"_model_config_name": "resnet50_libtorch_config_10", "_model_specific_pa_params": {"batch-size": 1, "concurrency-range": 32}, "_non_gpu_data": [["perf_latency_avg", {"_value": 54.999, "_timestamp": 0}], ["perf_latency_p90", {"_value": 56.743, "_timestamp": 0}], ["perf_latency_p95", {"_value": 57.348, "_timestamp": 0}], ["perf_latency_p99", {"_value": 58.045, "_timestamp": 0}], ["perf_throughput", {"_value": 586.193, "_timestamp": 0}], ["perf_client_send_recv", {"_value": 0.18, "_timestamp": 0}], ["perf_client_response_wait", {"_value": 54.79, "_timestamp": 0}], ["perf_server_queue", {"_value": 24.378, "_timestamp": 0}], ["perf_server_compute_infer", {"_value": 22.733, "_timestamp": 0}], ["perf_server_compute_input", {"_value": 4.036, "_timestamp": 0}], ["perf_server_compute_output", {"_value": 0.1, "_timestamp": 0}]], "_non_gpu_data_from_tag": {"perf_latency_avg": ["perf_latency_avg", {"_value": 54.999, "_timestamp": 0}], "perf_latency_p90": ["perf_latency_p90", {"_value": 56.743, "_timestamp": 0}], "perf_latency_p95": ["perf_latency_p95", {"_value": 57.348, "_timestamp": 0}], "perf_latency_p99": ["perf_latency_p99", {"_value": 58.045, "_timestamp": 0}], "perf_throughput": ["perf_throughput", {"_value": 586.193, "_timestamp": 0}], "perf_client_send_recv": ["perf_client_send_recv", {"_value": 0.18, "_timestamp": 0}], "perf_client_response_wait": ["perf_client_response_wait", {"_value": 54.79, "_timestamp": 0}], "perf_server_queue": ["perf_server_queue", {"_value": 24.378, "_timestamp": 0}], "perf_server_compute_infer": ["perf_server_compute_infer", {"_value": 22.733, "_timestamp": 0}], "perf_server_compute_input": ["perf_server_compute_input", {"_value": 4.036, "_timestamp": 0}], "perf_server_compute_output": ["perf_server_compute_output", {"_value": 0.1, "_timestamp": 0}]}}, {"_model_config_name": "vgg19_libtorch_config_0", "_model_specific_pa_params": {"batch-size": 1, "concurrency-range": 2}, "_non_gpu_data": [["perf_latency_avg", {"_value": 14.926, "_timestamp": 0}], ["perf_latency_p90", {"_value": 17.149, "_timestamp": 0}], ["perf_latency_p95", {"_value": 17.652, "_timestamp": 0}], ["perf_latency_p99", {"_value": 18.474, "_timestamp": 0}], ["perf_throughput", {"_value": 133.893, "_timestamp": 0}], ["perf_client_send_recv", {"_value": 0.14, "_timestamp": 0}], ["perf_client_response_wait", {"_value": 14.75, "_timestamp": 0}], ["perf_server_queue", {"_value": 6.327, "_timestamp": 0}], ["perf_server_compute_infer", {"_value": 7.082, "_timestamp": 0}], ["perf_server_compute_input", {"_value": 0.213, "_timestamp": 0}], ["perf_server_compute_output", {"_value": 0.034, "_timestamp": 0}]], "_non_gpu_data_from_tag": {"perf_latency_avg": ["perf_latency_avg", {"_value": 14.926, "_timestamp": 0}], "perf_latency_p90": ["perf_latency_p90", {"_value": 17.149, "_timestamp": 0}], "perf_latency_p95": ["perf_latency_p95", {"_value": 17.652, "_timestamp": 0}], "perf_latency_p99": ["perf_latency_p99", {"_value": 18.474, "_timestamp": 0}], "perf_throughput": ["perf_throughput", {"_value": 133.893, "_timestamp": 0}], "perf_client_send_recv": ["perf_client_send_recv", {"_value": 0.14, "_timestamp": 0}], "perf_client_response_wait": ["perf_client_response_wait", {"_value": 14.75, "_timestamp": 0}], "perf_server_queue": ["perf_server_queue", {"_value": 6.327, "_timestamp": 0}], "perf_server_compute_infer": ["perf_server_compute_infer", {"_value": 7.082, "_timestamp": 0}], "perf_server_compute_input": ["perf_server_compute_input", {"_value": 0.213, "_timestamp": 0}], "perf_server_compute_output": ["perf_server_compute_output", {"_value": 0.034, "_timestamp": 0}]}}]}, "-m resnet50_libtorch_config_10 -b 1 -i grpc -f resnet50_libtorch_config_10-results.csv --verbose-csv --concurrency-range=1 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0-m vgg19_libtorch_config_0 -b 1 -i grpc -f vgg19_libtorch_config_0-results.csv --verbose-csv --concurrency-range=1 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": {"_model_variants_name": "resnet50_libtorch_config_10,vgg19_libtorch_config_0", "_gpu_data": {"GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [["gpu_used_memory", {"_value": 2432.69632, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0"}], ["gpu_free_memory", {"_value": 22962.765824000002, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0"}], ["gpu_utilization", {"_value": 83.25, "_timestamp": 0, "_device_uuid": null}], ["gpu_power_usage", {"_value": 280.94, "_timestamp": 0, "_device_uuid": null}]]}, "_avg_gpu_data": [["gpu_used_memory", {"_value": 2432.69632, "_timestamp": 0, "_device_uuid": null}], ["gpu_free_memory", {"_value": 22962.765824000002, "_timestamp": 0, "_device_uuid": null}], ["gpu_utilization", {"_value": 83.25, "_timestamp": 0, "_device_uuid": null}], ["gpu_power_usage", {"_value": 280.94, "_timestamp": 0, "_device_uuid": null}]], "_avg_gpu_data_from_tag": {"gpu_used_memory": ["gpu_used_memory", {"_value": 2432.69632, "_timestamp": 0, "_device_uuid": null}], "gpu_free_memory": ["gpu_free_memory", {"_value": 22962.765824000002, "_timestamp": 0, "_device_uuid": null}], "gpu_utilization": ["gpu_utilization", {"_value": 83.25, "_timestamp": 0, "_device_uuid": null}], "gpu_power_usage": ["gpu_power_usage", {"_value": 280.94, "_timestamp": 0, "_device_uuid": null}]}, "_model_config_measurements": [{"_model_config_name": "resnet50_libtorch_config_10", "_model_specific_pa_params": {"batch-size": 1, "concurrency-range": 1}, "_non_gpu_data": [["perf_latency_avg", {"_value": 5.845, "_timestamp": 0}], ["perf_latency_p90", {"_value": 6.022, "_timestamp": 0}], ["perf_latency_p95", {"_value": 6.18, "_timestamp": 0}], ["perf_latency_p99", {"_value": 7.877, "_timestamp": 0}], ["perf_throughput", {"_value": 171.199, "_timestamp": 0}], ["perf_client_send_recv", {"_value": 0.098, "_timestamp": 0}], ["perf_client_response_wait", {"_value": 5.724, "_timestamp": 0}], ["perf_server_queue", {"_value": 0.035, "_timestamp": 0}], ["perf_server_compute_infer", {"_value": 4.84, "_timestamp": 0}], ["perf_server_compute_input", {"_value": 0.125, "_timestamp": 0}], ["perf_server_compute_output", {"_value": 0.02, "_timestamp": 0}]], "_non_gpu_data_from_tag": {"perf_latency_avg": ["perf_latency_avg", {"_value": 5.845, "_timestamp": 0}], "perf_latency_p90": ["perf_latency_p90", {"_value": 6.022, "_timestamp": 0}], "perf_latency_p95": ["perf_latency_p95", {"_value": 6.18, "_timestamp": 0}], "perf_latency_p99": ["perf_latency_p99", {"_value": 7.877, "_timestamp": 0}], "perf_throughput": ["perf_throughput", {"_value": 171.199, "_timestamp": 0}], "perf_client_send_recv": ["perf_client_send_recv", {"_value": 0.098, "_timestamp": 0}], "perf_client_response_wait": ["perf_client_response_wait", {"_value": 5.724, "_timestamp": 0}], "perf_server_queue": ["perf_server_queue", {"_value": 0.035, "_timestamp": 0}], "perf_server_compute_infer": ["perf_server_compute_infer", {"_value": 4.84, "_timestamp": 0}], "perf_server_compute_input": ["perf_server_compute_input", {"_value": 0.125, "_timestamp": 0}], "perf_server_compute_output": ["perf_server_compute_output", {"_value": 0.02, "_timestamp": 0}]}}, {"_model_config_name": "vgg19_libtorch_config_0", "_model_specific_pa_params": {"batch-size": 1, "concurrency-range": 1}, "_non_gpu_data": [["perf_latency_avg", {"_value": 5.767, "_timestamp": 0}], ["perf_latency_p90", {"_value": 5.853, "_timestamp": 0}], ["perf_latency_p95", {"_value": 5.872, "_timestamp": 0}], ["perf_latency_p99", {"_value": 5.901, "_timestamp": 0}], ["perf_throughput", {"_value": 173.197, "_timestamp": 0}], ["perf_client_send_recv", {"_value": 0.099, "_timestamp": 0}], ["perf_client_response_wait", {"_value": 5.643, "_timestamp": 0}], ["perf_server_queue", {"_value": 0.045, "_timestamp": 0}], ["perf_server_compute_infer", {"_value": 4.75, "_timestamp": 0}], ["perf_server_compute_input", {"_value": 0.119, "_timestamp": 0}], ["perf_server_compute_output", {"_value": 0.02, "_timestamp": 0}]], "_non_gpu_data_from_tag": {"perf_latency_avg": ["perf_latency_avg", {"_value": 5.767, "_timestamp": 0}], "perf_latency_p90": ["perf_latency_p90", {"_value": 5.853, "_timestamp": 0}], "perf_latency_p95": ["perf_latency_p95", {"_value": 5.872, "_timestamp": 0}], "perf_latency_p99": ["perf_latency_p99", {"_value": 5.901, "_timestamp": 0}], "perf_throughput": ["perf_throughput", {"_value": 173.197, "_timestamp": 0}], "perf_client_send_recv": ["perf_client_send_recv", {"_value": 0.099, "_timestamp": 0}], "perf_client_response_wait": ["perf_client_response_wait", {"_value": 5.643, "_timestamp": 0}], "perf_server_queue": ["perf_server_queue", {"_value": 0.045, "_timestamp": 0}], "perf_server_compute_infer": ["perf_server_compute_infer", {"_value": 4.75, "_timestamp": 0}], "perf_server_compute_input": ["perf_server_compute_input", {"_value": 0.119, "_timestamp": 0}], "perf_server_compute_output": ["perf_server_compute_output", {"_value": 0.02, "_timestamp": 0}]}}]}, "-m resnet50_libtorch_config_10 -b 1 -i grpc -f resnet50_libtorch_config_10-results.csv --verbose-csv --concurrency-range=2 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0-m vgg19_libtorch_config_0 -b 1 -i grpc -f vgg19_libtorch_config_0-results.csv --verbose-csv --concurrency-range=2 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": {"_model_variants_name": "resnet50_libtorch_config_10,vgg19_libtorch_config_0", "_gpu_data": {"GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [["gpu_used_memory", {"_value": 2432.69632, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0"}], ["gpu_free_memory", {"_value": 22962.765824000002, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0"}], ["gpu_utilization", {"_value": 66.0, "_timestamp": 0, "_device_uuid": null}], ["gpu_power_usage", {"_value": 218.539, "_timestamp": 0, "_device_uuid": null}]]}, "_avg_gpu_data": [["gpu_used_memory", {"_value": 2432.69632, "_timestamp": 0, "_device_uuid": null}], ["gpu_free_memory", {"_value": 22962.765824000002, "_timestamp": 0, "_device_uuid": null}], ["gpu_utilization", {"_value": 66.0, "_timestamp": 0, "_device_uuid": null}], ["gpu_power_usage", {"_value": 218.539, "_timestamp": 0, "_device_uuid": null}]], "_avg_gpu_data_from_tag": {"gpu_used_memory": ["gpu_used_memory", {"_value": 2432.69632, "_timestamp": 0, "_device_uuid": null}], "gpu_free_memory": ["gpu_free_memory", {"_value": 22962.765824000002, "_timestamp": 0, "_device_uuid": null}], "gpu_utilization": ["gpu_utilization", {"_value": 66.0, "_timestamp": 0, "_device_uuid": null}], "gpu_power_usage": ["gpu_power_usage", {"_value": 218.539, "_timestamp": 0, "_device_uuid": null}]}, "_model_config_measurements": [{"_model_config_name": "resnet50_libtorch_config_10", "_model_specific_pa_params": {"batch-size": 1, "concurrency-range": 2}, "_non_gpu_data": [["perf_latency_avg", {"_value": 13.697, "_timestamp": 0}], ["perf_latency_p90", {"_value": 14.358, "_timestamp": 0}], ["perf_latency_p95", {"_value": 14.465, "_timestamp": 0}], ["perf_latency_p99", {"_value": 14.89, "_timestamp": 0}], ["perf_throughput", {"_value": 145.57, "_timestamp": 0}], ["perf_client_send_recv", {"_value": 0.106, "_timestamp": 0}], ["perf_client_response_wait", {"_value": 13.565, "_timestamp": 0}], ["perf_server_queue", {"_value": 6.052, "_timestamp": 0}], ["perf_server_compute_infer", {"_value": 6.615, "_timestamp": 0}], ["perf_server_compute_input", {"_value": 0.141, "_timestamp": 0}], ["perf_server_compute_output", {"_value": 0.021, "_timestamp": 0}]], "_non_gpu_data_from_tag": {"perf_latency_avg": ["perf_latency_avg", {"_value": 13.697, "_timestamp": 0}], "perf_latency_p90": ["perf_latency_p90", {"_value": 14.358, "_timestamp": 0}], "perf_latency_p95": ["perf_latency_p95", {"_value": 14.465, "_timestamp": 0}], "perf_latency_p99": ["perf_latency_p99", {"_value": 14.89, "_timestamp": 0}], "perf_throughput": ["perf_throughput", {"_value": 145.57, "_timestamp": 0}], "perf_client_send_recv": ["perf_client_send_recv", {"_value": 0.106, "_timestamp": 0}], "perf_client_response_wait": ["perf_client_response_wait", {"_value": 13.565, "_timestamp": 0}], "perf_server_queue": ["perf_server_queue", {"_value": 6.052, "_timestamp": 0}], "perf_server_compute_infer": ["perf_server_compute_infer", {"_value": 6.615, "_timestamp": 0}], "perf_server_compute_input": ["perf_server_compute_input", {"_value": 0.141, "_timestamp": 0}], "perf_server_compute_output": ["perf_server_compute_output", {"_value": 0.021, "_timestamp": 0}]}}, {"_model_config_name": "vgg19_libtorch_config_0", "_model_specific_pa_params": {"batch-size": 1, "concurrency-range": 2}, "_non_gpu_data": [["perf_latency_avg", {"_value": 9.995, "_timestamp": 0}], ["perf_latency_p90", {"_value": 10.134, "_timestamp": 0}], ["perf_latency_p95", {"_value": 10.173, "_timestamp": 0}], ["perf_latency_p99", {"_value": 10.3, "_timestamp": 0}], ["perf_throughput", {"_value": 199.868, "_timestamp": 0}], ["perf_client_send_recv", {"_value": 0.104, "_timestamp": 0}], ["perf_client_response_wait", {"_value": 9.866, "_timestamp": 0}], ["perf_server_queue", {"_value": 4.219, "_timestamp": 0}], ["perf_server_compute_infer", {"_value": 4.772, "_timestamp": 0}], ["perf_server_compute_input", {"_value": 0.136, "_timestamp": 0}], ["perf_server_compute_output", {"_value": 0.02, "_timestamp": 0}]], "_non_gpu_data_from_tag": {"perf_latency_avg": ["perf_latency_avg", {"_value": 9.995, "_timestamp": 0}], "perf_latency_p90": ["perf_latency_p90", {"_value": 10.134, "_timestamp": 0}], "perf_latency_p95": ["perf_latency_p95", {"_value": 10.173, "_timestamp": 0}], "perf_latency_p99": ["perf_latency_p99", {"_value": 10.3, "_timestamp": 0}], "perf_throughput": ["perf_throughput", {"_value": 199.868, "_timestamp": 0}], "perf_client_send_recv": ["perf_client_send_recv", {"_value": 0.104, "_timestamp": 0}], "perf_client_response_wait": ["perf_client_response_wait", {"_value": 9.866, "_timestamp": 0}], "perf_server_queue": ["perf_server_queue", {"_value": 4.219, "_timestamp": 0}], "perf_server_compute_infer": ["perf_server_compute_infer", {"_value": 4.772, "_timestamp": 0}], "perf_server_compute_input": ["perf_server_compute_input", {"_value": 0.136, "_timestamp": 0}], "perf_server_compute_output": ["perf_server_compute_output", {"_value": 0.02, "_timestamp": 0}]}}]}, "-m resnet50_libtorch_config_10 -b 1 -i grpc -f resnet50_libtorch_config_10-results.csv --verbose-csv --concurrency-range=4 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0-m vgg19_libtorch_config_0 -b 1 -i grpc -f vgg19_libtorch_config_0-results.csv --verbose-csv --concurrency-range=4 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": {"_model_variants_name": "resnet50_libtorch_config_10,vgg19_libtorch_config_0", "_gpu_data": {"GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [["gpu_used_memory", {"_value": 2432.69632, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0"}], ["gpu_free_memory", {"_value": 22962.765824000002, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0"}], ["gpu_utilization", {"_value": 64.0, "_timestamp": 0, "_device_uuid": null}], ["gpu_power_usage", {"_value": 218.752, "_timestamp": 0, "_device_uuid": null}]]}, "_avg_gpu_data": [["gpu_used_memory", {"_value": 2432.69632, "_timestamp": 0, "_device_uuid": null}], ["gpu_free_memory", {"_value": 22962.765824000002, "_timestamp": 0, "_device_uuid": null}], ["gpu_utilization", {"_value": 64.0, "_timestamp": 0, "_device_uuid": null}], ["gpu_power_usage", {"_value": 218.752, "_timestamp": 0, "_device_uuid": null}]], "_avg_gpu_data_from_tag": {"gpu_used_memory": ["gpu_used_memory", {"_value": 2432.69632, "_timestamp": 0, "_device_uuid": null}], "gpu_free_memory": ["gpu_free_memory", {"_value": 22962.765824000002, "_timestamp": 0, "_device_uuid": null}], "gpu_utilization": ["gpu_utilization", {"_value": 64.0, "_timestamp": 0, "_device_uuid": null}], "gpu_power_usage": ["gpu_power_usage", {"_value": 218.752, "_timestamp": 0, "_device_uuid": null}]}, "_model_config_measurements": [{"_model_config_name": "resnet50_libtorch_config_10", "_model_specific_pa_params": {"batch-size": 1, "concurrency-range": 4}, "_non_gpu_data": [["perf_latency_avg", {"_value": 18.517, "_timestamp": 0}], ["perf_latency_p90", {"_value": 19.324, "_timestamp": 0}], ["perf_latency_p95", {"_value": 19.43, "_timestamp": 0}], ["perf_latency_p99", {"_value": 19.864, "_timestamp": 0}], ["perf_throughput", {"_value": 214.842, "_timestamp": 0}], ["perf_client_send_recv", {"_value": 0.118, "_timestamp": 0}], ["perf_client_response_wait", {"_value": 18.371, "_timestamp": 0}], ["perf_server_queue", {"_value": 7.041, "_timestamp": 0}], ["perf_server_compute_infer", {"_value": 9.949, "_timestamp": 0}], ["perf_server_compute_input", {"_value": 0.343, "_timestamp": 0}], ["perf_server_compute_output", {"_value": 0.023, "_timestamp": 0}]], "_non_gpu_data_from_tag": {"perf_latency_avg": ["perf_latency_avg", {"_value": 18.517, "_timestamp": 0}], "perf_latency_p90": ["perf_latency_p90", {"_value": 19.324, "_timestamp": 0}], "perf_latency_p95": ["perf_latency_p95", {"_value": 19.43, "_timestamp": 0}], "perf_latency_p99": ["perf_latency_p99", {"_value": 19.864, "_timestamp": 0}], "perf_throughput": ["perf_throughput", {"_value": 214.842, "_timestamp": 0}], "perf_client_send_recv": ["perf_client_send_recv", {"_value": 0.118, "_timestamp": 0}], "perf_client_response_wait": ["perf_client_response_wait", {"_value": 18.371, "_timestamp": 0}], "perf_server_queue": ["perf_server_queue", {"_value": 7.041, "_timestamp": 0}], "perf_server_compute_infer": ["perf_server_compute_infer", {"_value": 9.949, "_timestamp": 0}], "perf_server_compute_input": ["perf_server_compute_input", {"_value": 0.343, "_timestamp": 0}], "perf_server_compute_output": ["perf_server_compute_output", {"_value": 0.023, "_timestamp": 0}]}}, {"_model_config_name": "vgg19_libtorch_config_0", "_model_specific_pa_params": {"batch-size": 1, "concurrency-range": 4}, "_non_gpu_data": [["perf_latency_avg", {"_value": 20.908, "_timestamp": 0}], ["perf_latency_p90", {"_value": 21.338, "_timestamp": 0}], ["perf_latency_p95", {"_value": 21.457, "_timestamp": 0}], ["perf_latency_p99", {"_value": 21.758, "_timestamp": 0}], ["perf_throughput", {"_value": 190.515, "_timestamp": 0}], ["perf_client_send_recv", {"_value": 0.108, "_timestamp": 0}], ["perf_client_response_wait", {"_value": 20.773, "_timestamp": 0}], ["perf_server_queue", {"_value": 14.867, "_timestamp": 0}], ["perf_server_compute_infer", {"_value": 4.986, "_timestamp": 0}], ["perf_server_compute_input", {"_value": 0.148, "_timestamp": 0}], ["perf_server_compute_output", {"_value": 0.021, "_timestamp": 0}]], "_non_gpu_data_from_tag": {"perf_latency_avg": ["perf_latency_avg", {"_value": 20.908, "_timestamp": 0}], "perf_latency_p90": ["perf_latency_p90", {"_value": 21.338, "_timestamp": 0}], "perf_latency_p95": ["perf_latency_p95", {"_value": 21.457, "_timestamp": 0}], "perf_latency_p99": ["perf_latency_p99", {"_value": 21.758, "_timestamp": 0}], "perf_throughput": ["perf_throughput", {"_value": 190.515, "_timestamp": 0}], "perf_client_send_recv": ["perf_client_send_recv", {"_value": 0.108, "_timestamp": 0}], "perf_client_response_wait": ["perf_client_response_wait", {"_value": 20.773, "_timestamp": 0}], "perf_server_queue": ["perf_server_queue", {"_value": 14.867, "_timestamp": 0}], "perf_server_compute_infer": ["perf_server_compute_infer", {"_value": 4.986, "_timestamp": 0}], "perf_server_compute_input": ["perf_server_compute_input", {"_value": 0.148, "_timestamp": 0}], "perf_server_compute_output": ["perf_server_compute_output", {"_value": 0.021, "_timestamp": 0}]}}]}, "-m resnet50_libtorch_config_10 -b 1 -i grpc -f resnet50_libtorch_config_10-results.csv --verbose-csv --concurrency-range=8 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0-m vgg19_libtorch_config_0 -b 1 -i grpc -f vgg19_libtorch_config_0-results.csv --verbose-csv --concurrency-range=8 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": {"_model_variants_name": "resnet50_libtorch_config_10,vgg19_libtorch_config_0", "_gpu_data": {"GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [["gpu_used_memory", {"_value": 2432.69632, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0"}], ["gpu_free_memory", {"_value": 22962.765824000002, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0"}], ["gpu_utilization", {"_value": 67.3333, "_timestamp": 0, "_device_uuid": null}], ["gpu_power_usage", {"_value": 218.068, "_timestamp": 0, "_device_uuid": null}]]}, "_avg_gpu_data": [["gpu_used_memory", {"_value": 2432.69632, "_timestamp": 0, "_device_uuid": null}], ["gpu_free_memory", {"_value": 22962.765824000002, "_timestamp": 0, "_device_uuid": null}], ["gpu_utilization", {"_value": 67.3333, "_timestamp": 0, "_device_uuid": null}], ["gpu_power_usage", {"_value": 218.068, "_timestamp": 0, "_device_uuid": null}]], "_avg_gpu_data_from_tag": {"gpu_used_memory": ["gpu_used_memory", {"_value": 2432.69632, "_timestamp": 0, "_device_uuid": null}], "gpu_free_memory": ["gpu_free_memory", {"_value": 22962.765824000002, "_timestamp": 0, "_device_uuid": null}], "gpu_utilization": ["gpu_utilization", {"_value": 67.3333, "_timestamp": 0, "_device_uuid": null}], "gpu_power_usage": ["gpu_power_usage", {"_value": 218.068, "_timestamp": 0, "_device_uuid": null}]}, "_model_config_measurements": [{"_model_config_name": "resnet50_libtorch_config_10", "_model_specific_pa_params": {"batch-size": 1, "concurrency-range": 8}, "_non_gpu_data": [["perf_latency_avg", {"_value": 24.419, "_timestamp": 0}], ["perf_latency_p90", {"_value": 25.365, "_timestamp": 0}], ["perf_latency_p95", {"_value": 25.727, "_timestamp": 0}], ["perf_latency_p99", {"_value": 26.276, "_timestamp": 0}], ["perf_throughput", {"_value": 325.425, "_timestamp": 0}], ["perf_client_send_recv", {"_value": 0.171, "_timestamp": 0}], ["perf_client_response_wait", {"_value": 24.221, "_timestamp": 0}], ["perf_server_queue", {"_value": 6.916, "_timestamp": 0}], ["perf_server_compute_infer", {"_value": 14.88, "_timestamp": 0}], ["perf_server_compute_input", {"_value": 0.859, "_timestamp": 0}], ["perf_server_compute_output", {"_value": 0.033, "_timestamp": 0}]], "_non_gpu_data_from_tag": {"perf_latency_avg": ["perf_latency_avg", {"_value": 24.419, "_timestamp": 0}], "perf_latency_p90": ["perf_latency_p90", {"_value": 25.365, "_timestamp": 0}], "perf_latency_p95": ["perf_latency_p95", {"_value": 25.727, "_timestamp": 0}], "perf_latency_p99": ["perf_latency_p99", {"_value": 26.276, "_timestamp": 0}], "perf_throughput": ["perf_throughput", {"_value": 325.425, "_timestamp": 0}], "perf_client_send_recv": ["perf_client_send_recv", {"_value": 0.171, "_timestamp": 0}], "perf_client_response_wait": ["perf_client_response_wait", {"_value": 24.221, "_timestamp": 0}], "perf_server_queue": ["perf_server_queue", {"_value": 6.916, "_timestamp": 0}], "perf_server_compute_infer": ["perf_server_compute_infer", {"_value": 14.88, "_timestamp": 0}], "perf_server_compute_input": ["perf_server_compute_input", {"_value": 0.859, "_timestamp": 0}], "perf_server_compute_output": ["perf_server_compute_output", {"_value": 0.033, "_timestamp": 0}]}}, {"_model_config_name": "vgg19_libtorch_config_0", "_model_specific_pa_params": {"batch-size": 1, "concurrency-range": 8}, "_non_gpu_data": [["perf_latency_avg", {"_value": 45.69, "_timestamp": 0}], ["perf_latency_p90", {"_value": 46.757, "_timestamp": 0}], ["perf_latency_p95", {"_value": 46.881, "_timestamp": 0}], ["perf_latency_p99", {"_value": 47.226, "_timestamp": 0}], ["perf_throughput", {"_value": 173.849, "_timestamp": 0}], ["perf_client_send_recv", {"_value": 0.121, "_timestamp": 0}], ["perf_client_response_wait", {"_value": 45.541, "_timestamp": 0}], ["perf_server_queue", {"_value": 39.068, "_timestamp": 0}], ["perf_server_compute_infer", {"_value": 5.482, "_timestamp": 0}], ["perf_server_compute_input", {"_value": 0.154, "_timestamp": 0}], ["perf_server_compute_output", {"_value": 0.022, "_timestamp": 0}]], "_non_gpu_data_from_tag": {"perf_latency_avg": ["perf_latency_avg", {"_value": 45.69, "_timestamp": 0}], "perf_latency_p90": ["perf_latency_p90", {"_value": 46.757, "_timestamp": 0}], "perf_latency_p95": ["perf_latency_p95", {"_value": 46.881, "_timestamp": 0}], "perf_latency_p99": ["perf_latency_p99", {"_value": 47.226, "_timestamp": 0}], "perf_throughput": ["perf_throughput", {"_value": 173.849, "_timestamp": 0}], "perf_client_send_recv": ["perf_client_send_recv", {"_value": 0.121, "_timestamp": 0}], "perf_client_response_wait": ["perf_client_response_wait", {"_value": 45.541, "_timestamp": 0}], "perf_server_queue": ["perf_server_queue", {"_value": 39.068, "_timestamp": 0}], "perf_server_compute_infer": ["perf_server_compute_infer", {"_value": 5.482, "_timestamp": 0}], "perf_server_compute_input": ["perf_server_compute_input", {"_value": 0.154, "_timestamp": 0}], "perf_server_compute_output": ["perf_server_compute_output", {"_value": 0.022, "_timestamp": 0}]}}]}, "-m resnet50_libtorch_config_10 -b 1 -i grpc -f resnet50_libtorch_config_10-results.csv --verbose-csv --concurrency-range=16 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0-m vgg19_libtorch_config_0 -b 1 -i grpc -f vgg19_libtorch_config_0-results.csv --verbose-csv --concurrency-range=16 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": {"_model_variants_name": "resnet50_libtorch_config_10,vgg19_libtorch_config_0", "_gpu_data": {"GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [["gpu_used_memory", {"_value": 2432.69632, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0"}], ["gpu_free_memory", {"_value": 22962.765824000002, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0"}], ["gpu_utilization", {"_value": 66.0, "_timestamp": 0, "_device_uuid": null}], ["gpu_power_usage", {"_value": 219.254, "_timestamp": 0, "_device_uuid": null}]]}, "_avg_gpu_data": [["gpu_used_memory", {"_value": 2432.69632, "_timestamp": 0, "_device_uuid": null}], ["gpu_free_memory", {"_value": 22962.765824000002, "_timestamp": 0, "_device_uuid": null}], ["gpu_utilization", {"_value": 66.0, "_timestamp": 0, "_device_uuid": null}], ["gpu_power_usage", {"_value": 219.254, "_timestamp": 0, "_device_uuid": null}]], "_avg_gpu_data_from_tag": {"gpu_used_memory": ["gpu_used_memory", {"_value": 2432.69632, "_timestamp": 0, "_device_uuid": null}], "gpu_free_memory": ["gpu_free_memory", {"_value": 22962.765824000002, "_timestamp": 0, "_device_uuid": null}], "gpu_utilization": ["gpu_utilization", {"_value": 66.0, "_timestamp": 0, "_device_uuid": null}], "gpu_power_usage": ["gpu_power_usage", {"_value": 219.254, "_timestamp": 0, "_device_uuid": null}]}, "_model_config_measurements": [{"_model_config_name": "resnet50_libtorch_config_10", "_model_specific_pa_params": {"batch-size": 1, "concurrency-range": 16}, "_non_gpu_data": [["perf_latency_avg", {"_value": 37.156, "_timestamp": 0}], ["perf_latency_p90", {"_value": 37.495, "_timestamp": 0}], ["perf_latency_p95", {"_value": 37.832, "_timestamp": 0}], ["perf_latency_p99", {"_value": 67.801, "_timestamp": 0}], ["perf_throughput", {"_value": 427.848, "_timestamp": 0}], ["perf_client_send_recv", {"_value": 0.265, "_timestamp": 0}], ["perf_client_response_wait", {"_value": 36.859, "_timestamp": 0}], ["perf_server_queue", {"_value": 13.922, "_timestamp": 0}], ["perf_server_compute_infer", {"_value": 18.981, "_timestamp": 0}], ["perf_server_compute_input", {"_value": 1.651, "_timestamp": 0}], ["perf_server_compute_output", {"_value": 0.045, "_timestamp": 0}]], "_non_gpu_data_from_tag": {"perf_latency_avg": ["perf_latency_avg", {"_value": 37.156, "_timestamp": 0}], "perf_latency_p90": ["perf_latency_p90", {"_value": 37.495, "_timestamp": 0}], "perf_latency_p95": ["perf_latency_p95", {"_value": 37.832, "_timestamp": 0}], "perf_latency_p99": ["perf_latency_p99", {"_value": 67.801, "_timestamp": 0}], "perf_throughput": ["perf_throughput", {"_value": 427.848, "_timestamp": 0}], "perf_client_send_recv": ["perf_client_send_recv", {"_value": 0.265, "_timestamp": 0}], "perf_client_response_wait": ["perf_client_response_wait", {"_value": 36.859, "_timestamp": 0}], "perf_server_queue": ["perf_server_queue", {"_value": 13.922, "_timestamp": 0}], "perf_server_compute_infer": ["perf_server_compute_infer", {"_value": 18.981, "_timestamp": 0}], "perf_server_compute_input": ["perf_server_compute_input", {"_value": 1.651, "_timestamp": 0}], "perf_server_compute_output": ["perf_server_compute_output", {"_value": 0.045, "_timestamp": 0}]}}, {"_model_config_name": "vgg19_libtorch_config_0", "_model_specific_pa_params": {"batch-size": 1, "concurrency-range": 16}, "_non_gpu_data": [["perf_latency_avg", {"_value": 99.168, "_timestamp": 0}], ["perf_latency_p90", {"_value": 103.636, "_timestamp": 0}], ["perf_latency_p95", {"_value": 103.981, "_timestamp": 0}], ["perf_latency_p99", {"_value": 105.046, "_timestamp": 0}], ["perf_throughput", {"_value": 158.481, "_timestamp": 0}], ["perf_client_send_recv", {"_value": 0.188, "_timestamp": 0}], ["perf_client_response_wait", {"_value": 98.949, "_timestamp": 0}], ["perf_server_queue", {"_value": 91.801, "_timestamp": 0}], ["perf_server_compute_infer", {"_value": 6.007, "_timestamp": 0}], ["perf_server_compute_input", {"_value": 0.163, "_timestamp": 0}], ["perf_server_compute_output", {"_value": 0.024, "_timestamp": 0}]], "_non_gpu_data_from_tag": {"perf_latency_avg": ["perf_latency_avg", {"_value": 99.168, "_timestamp": 0}], "perf_latency_p90": ["perf_latency_p90", {"_value": 103.636, "_timestamp": 0}], "perf_latency_p95": ["perf_latency_p95", {"_value": 103.981, "_timestamp": 0}], "perf_latency_p99": ["perf_latency_p99", {"_value": 105.046, "_timestamp": 0}], "perf_throughput": ["perf_throughput", {"_value": 158.481, "_timestamp": 0}], "perf_client_send_recv": ["perf_client_send_recv", {"_value": 0.188, "_timestamp": 0}], "perf_client_response_wait": ["perf_client_response_wait", {"_value": 98.949, "_timestamp": 0}], "perf_server_queue": ["perf_server_queue", {"_value": 91.801, "_timestamp": 0}], "perf_server_compute_infer": ["perf_server_compute_infer", {"_value": 6.007, "_timestamp": 0}], "perf_server_compute_input": ["perf_server_compute_input", {"_value": 0.163, "_timestamp": 0}], "perf_server_compute_output": ["perf_server_compute_output", {"_value": 0.024, "_timestamp": 0}]}}]}, "-m resnet50_libtorch_config_10 -b 1 -i grpc -f resnet50_libtorch_config_10-results.csv --verbose-csv --concurrency-range=32 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0-m vgg19_libtorch_config_0 -b 1 -i grpc -f vgg19_libtorch_config_0-results.csv --verbose-csv --concurrency-range=32 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": {"_model_variants_name": "resnet50_libtorch_config_10,vgg19_libtorch_config_0", "_gpu_data": {"GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [["gpu_used_memory", {"_value": 2432.69632, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0"}], ["gpu_free_memory", {"_value": 22962.765824000002, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0"}], ["gpu_utilization", {"_value": 65.66669999999999, "_timestamp": 0, "_device_uuid": null}], ["gpu_power_usage", {"_value": 279.967, "_timestamp": 0, "_device_uuid": null}]]}, "_avg_gpu_data": [["gpu_used_memory", {"_value": 2432.69632, "_timestamp": 0, "_device_uuid": null}], ["gpu_free_memory", {"_value": 22962.765824000002, "_timestamp": 0, "_device_uuid": null}], ["gpu_utilization", {"_value": 65.66669999999999, "_timestamp": 0, "_device_uuid": null}], ["gpu_power_usage", {"_value": 279.967, "_timestamp": 0, "_device_uuid": null}]], "_avg_gpu_data_from_tag": {"gpu_used_memory": ["gpu_used_memory", {"_value": 2432.69632, "_timestamp": 0, "_device_uuid": null}], "gpu_free_memory": ["gpu_free_memory", {"_value": 22962.765824000002, "_timestamp": 0, "_device_uuid": null}], "gpu_utilization": ["gpu_utilization", {"_value": 65.66669999999999, "_timestamp": 0, "_device_uuid": null}], "gpu_power_usage": ["gpu_power_usage", {"_value": 279.967, "_timestamp": 0, "_device_uuid": null}]}, "_model_config_measurements": [{"_model_config_name": "resnet50_libtorch_config_10", "_model_specific_pa_params": {"batch-size": 1, "concurrency-range": 32}, "_non_gpu_data": [["perf_latency_avg", {"_value": 54.985, "_timestamp": 0}], ["perf_latency_p90", {"_value": 56.723, "_timestamp": 0}], ["perf_latency_p95", {"_value": 57.311, "_timestamp": 0}], ["perf_latency_p99", {"_value": 58.002, "_timestamp": 0}], ["perf_throughput", {"_value": 580.845, "_timestamp": 0}], ["perf_client_send_recv", {"_value": 0.179, "_timestamp": 0}], ["perf_client_response_wait", {"_value": 54.777, "_timestamp": 0}], ["perf_server_queue", {"_value": 24.512, "_timestamp": 0}], ["perf_server_compute_infer", {"_value": 22.765, "_timestamp": 0}], ["perf_server_compute_input", {"_value": 3.926, "_timestamp": 0}], ["perf_server_compute_output", {"_value": 0.105, "_timestamp": 0}]], "_non_gpu_data_from_tag": {"perf_latency_avg": ["perf_latency_avg", {"_value": 54.985, "_timestamp": 0}], "perf_latency_p90": ["perf_latency_p90", {"_value": 56.723, "_timestamp": 0}], "perf_latency_p95": ["perf_latency_p95", {"_value": 57.311, "_timestamp": 0}], "perf_latency_p99": ["perf_latency_p99", {"_value": 58.002, "_timestamp": 0}], "perf_throughput": ["perf_throughput", {"_value": 580.845, "_timestamp": 0}], "perf_client_send_recv": ["perf_client_send_recv", {"_value": 0.179, "_timestamp": 0}], "perf_client_response_wait": ["perf_client_response_wait", {"_value": 54.777, "_timestamp": 0}], "perf_server_queue": ["perf_server_queue", {"_value": 24.512, "_timestamp": 0}], "perf_server_compute_infer": ["perf_server_compute_infer", {"_value": 22.765, "_timestamp": 0}], "perf_server_compute_input": ["perf_server_compute_input", {"_value": 3.926, "_timestamp": 0}], "perf_server_compute_output": ["perf_server_compute_output", {"_value": 0.105, "_timestamp": 0}]}}, {"_model_config_name": "vgg19_libtorch_config_0", "_model_specific_pa_params": {"batch-size": 1, "concurrency-range": 32}, "_non_gpu_data": [["perf_latency_avg", {"_value": 240.438, "_timestamp": 0}], ["perf_latency_p90", {"_value": 243.736, "_timestamp": 0}], ["perf_latency_p95", {"_value": 244.407, "_timestamp": 0}], ["perf_latency_p99", {"_value": 245.13, "_timestamp": 0}], ["perf_throughput", {"_value": 133.53, "_timestamp": 0}], ["perf_client_send_recv", {"_value": 0.137, "_timestamp": 0}], ["perf_client_response_wait", {"_value": 240.26, "_timestamp": 0}], ["perf_server_queue", {"_value": 231.765, "_timestamp": 0}], ["perf_server_compute_infer", {"_value": 7.136, "_timestamp": 0}], ["perf_server_compute_input", {"_value": 0.208, "_timestamp": 0}], ["perf_server_compute_output", {"_value": 0.033, "_timestamp": 0}]], "_non_gpu_data_from_tag": {"perf_latency_avg": ["perf_latency_avg", {"_value": 240.438, "_timestamp": 0}], "perf_latency_p90": ["perf_latency_p90", {"_value": 243.736, "_timestamp": 0}], "perf_latency_p95": ["perf_latency_p95", {"_value": 244.407, "_timestamp": 0}], "perf_latency_p99": ["perf_latency_p99", {"_value": 245.13, "_timestamp": 0}], "perf_throughput": ["perf_throughput", {"_value": 133.53, "_timestamp": 0}], "perf_client_send_recv": ["perf_client_send_recv", {"_value": 0.137, "_timestamp": 0}], "perf_client_response_wait": ["perf_client_response_wait", {"_value": 240.26, "_timestamp": 0}], "perf_server_queue": ["perf_server_queue", {"_value": 231.765, "_timestamp": 0}], "perf_server_compute_infer": ["perf_server_compute_infer", {"_value": 7.136, "_timestamp": 0}], "perf_server_compute_input": ["perf_server_compute_input", {"_value": 0.208, "_timestamp": 0}], "perf_server_compute_output": ["perf_server_compute_output", {"_value": 0.033, "_timestamp": 0}]}}]}, "-m resnet50_libtorch_config_10 -b 1 -i grpc -f resnet50_libtorch_config_10-results.csv --verbose-csv --concurrency-range=64 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0-m vgg19_libtorch_config_0 -b 1 -i grpc -f vgg19_libtorch_config_0-results.csv --verbose-csv --concurrency-range=64 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": {"_model_variants_name": "resnet50_libtorch_config_10,vgg19_libtorch_config_0", "_gpu_data": {"GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [["gpu_used_memory", {"_value": 2432.69632, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0"}], ["gpu_free_memory", {"_value": 22962.765824000002, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0"}], ["gpu_utilization", {"_value": 68.6667, "_timestamp": 0, "_device_uuid": null}], ["gpu_power_usage", {"_value": 277.347, "_timestamp": 0, "_device_uuid": null}]]}, "_avg_gpu_data": [["gpu_used_memory", {"_value": 2432.69632, "_timestamp": 0, "_device_uuid": null}], ["gpu_free_memory", {"_value": 22962.765824000002, "_timestamp": 0, "_device_uuid": null}], ["gpu_utilization", {"_value": 68.6667, "_timestamp": 0, "_device_uuid": null}], ["gpu_power_usage", {"_value": 277.347, "_timestamp": 0, "_device_uuid": null}]], "_avg_gpu_data_from_tag": {"gpu_used_memory": ["gpu_used_memory", {"_value": 2432.69632, "_timestamp": 0, "_device_uuid": null}], "gpu_free_memory": ["gpu_free_memory", {"_value": 22962.765824000002, "_timestamp": 0, "_device_uuid": null}], "gpu_utilization": ["gpu_utilization", {"_value": 68.6667, "_timestamp": 0, "_device_uuid": null}], "gpu_power_usage": ["gpu_power_usage", {"_value": 277.347, "_timestamp": 0, "_device_uuid": null}]}, "_model_config_measurements": [{"_model_config_name": "resnet50_libtorch_config_10", "_model_specific_pa_params": {"batch-size": 1, "concurrency-range": 64}, "_non_gpu_data": [["perf_latency_avg", {"_value": 110.017, "_timestamp": 0}], ["perf_latency_p90", {"_value": 112.334, "_timestamp": 0}], ["perf_latency_p95", {"_value": 112.836, "_timestamp": 0}], ["perf_latency_p99", {"_value": 113.883, "_timestamp": 0}], ["perf_throughput", {"_value": 578.439, "_timestamp": 0}], ["perf_client_send_recv", {"_value": 0.183, "_timestamp": 0}], ["perf_client_response_wait", {"_value": 109.805, "_timestamp": 0}], ["perf_server_queue", {"_value": 79.583, "_timestamp": 0}], ["perf_server_compute_infer", {"_value": 22.718, "_timestamp": 0}], ["perf_server_compute_input", {"_value": 3.921, "_timestamp": 0}], ["perf_server_compute_output", {"_value": 0.111, "_timestamp": 0}]], "_non_gpu_data_from_tag": {"perf_latency_avg": ["perf_latency_avg", {"_value": 110.017, "_timestamp": 0}], "perf_latency_p90": ["perf_latency_p90", {"_value": 112.334, "_timestamp": 0}], "perf_latency_p95": ["perf_latency_p95", {"_value": 112.836, "_timestamp": 0}], "perf_latency_p99": ["perf_latency_p99", {"_value": 113.883, "_timestamp": 0}], "perf_throughput": ["perf_throughput", {"_value": 578.439, "_timestamp": 0}], "perf_client_send_recv": ["perf_client_send_recv", {"_value": 0.183, "_timestamp": 0}], "perf_client_response_wait": ["perf_client_response_wait", {"_value": 109.805, "_timestamp": 0}], "perf_server_queue": ["perf_server_queue", {"_value": 79.583, "_timestamp": 0}], "perf_server_compute_infer": ["perf_server_compute_infer", {"_value": 22.718, "_timestamp": 0}], "perf_server_compute_input": ["perf_server_compute_input", {"_value": 3.921, "_timestamp": 0}], "perf_server_compute_output": ["perf_server_compute_output", {"_value": 0.111, "_timestamp": 0}]}}, {"_model_config_name": "vgg19_libtorch_config_0", "_model_specific_pa_params": {"batch-size": 1, "concurrency-range": 64}, "_non_gpu_data": [["perf_latency_avg", {"_value": 482.439, "_timestamp": 0}], ["perf_latency_p90", {"_value": 485.836, "_timestamp": 0}], ["perf_latency_p95", {"_value": 486.667, "_timestamp": 0}], ["perf_latency_p99", {"_value": 487.927, "_timestamp": 0}], ["perf_throughput", {"_value": 132.555, "_timestamp": 0}], ["perf_client_send_recv", {"_value": 0.135, "_timestamp": 0}], ["perf_client_response_wait", {"_value": 482.272, "_timestamp": 0}], ["perf_server_queue", {"_value": 473.715, "_timestamp": 0}], ["perf_server_compute_infer", {"_value": 7.17, "_timestamp": 0}], ["perf_server_compute_input", {"_value": 0.209, "_timestamp": 0}], ["perf_server_compute_output", {"_value": 0.034, "_timestamp": 0}]], "_non_gpu_data_from_tag": {"perf_latency_avg": ["perf_latency_avg", {"_value": 482.439, "_timestamp": 0}], "perf_latency_p90": ["perf_latency_p90", {"_value": 485.836, "_timestamp": 0}], "perf_latency_p95": ["perf_latency_p95", {"_value": 486.667, "_timestamp": 0}], "perf_latency_p99": ["perf_latency_p99", {"_value": 487.927, "_timestamp": 0}], "perf_throughput": ["perf_throughput", {"_value": 132.555, "_timestamp": 0}], "perf_client_send_recv": ["perf_client_send_recv", {"_value": 0.135, "_timestamp": 0}], "perf_client_response_wait": ["perf_client_response_wait", {"_value": 482.272, "_timestamp": 0}], "perf_server_queue": ["perf_server_queue", {"_value": 473.715, "_timestamp": 0}], "perf_server_compute_infer": ["perf_server_compute_infer", {"_value": 7.17, "_timestamp": 0}], "perf_server_compute_input": ["perf_server_compute_input", {"_value": 0.209, "_timestamp": 0}], "perf_server_compute_output": ["perf_server_compute_output", {"_value": 0.034, "_timestamp": 0}]}}]}, "-m resnet50_libtorch_config_10 -b 1 -i grpc -f resnet50_libtorch_config_10-results.csv --verbose-csv --concurrency-range=128 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0-m vgg19_libtorch_config_0 -b 1 -i grpc -f vgg19_libtorch_config_0-results.csv --verbose-csv --concurrency-range=128 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": {"_model_variants_name": "resnet50_libtorch_config_10,vgg19_libtorch_config_0", "_gpu_data": {"GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [["gpu_used_memory", {"_value": 2432.69632, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0"}], ["gpu_free_memory", {"_value": 22962.765824000002, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0"}], ["gpu_utilization", {"_value": 73.0, "_timestamp": 0, "_device_uuid": null}], ["gpu_power_usage", {"_value": 276.283, "_timestamp": 0, "_device_uuid": null}]]}, "_avg_gpu_data": [["gpu_used_memory", {"_value": 2432.69632, "_timestamp": 0, "_device_uuid": null}], ["gpu_free_memory", {"_value": 22962.765824000002, "_timestamp": 0, "_device_uuid": null}], ["gpu_utilization", {"_value": 73.0, "_timestamp": 0, "_device_uuid": null}], ["gpu_power_usage", {"_value": 276.283, "_timestamp": 0, "_device_uuid": null}]], "_avg_gpu_data_from_tag": {"gpu_used_memory": ["gpu_used_memory", {"_value": 2432.69632, "_timestamp": 0, "_device_uuid": null}], "gpu_free_memory": ["gpu_free_memory", {"_value": 22962.765824000002, "_timestamp": 0, "_device_uuid": null}], "gpu_utilization": ["gpu_utilization", {"_value": 73.0, "_timestamp": 0, "_device_uuid": null}], "gpu_power_usage": ["gpu_power_usage", {"_value": 276.283, "_timestamp": 0, "_device_uuid": null}]}, "_model_config_measurements": [{"_model_config_name": "resnet50_libtorch_config_10", "_model_specific_pa_params": {"batch-size": 1, "concurrency-range": 128}, "_non_gpu_data": [["perf_latency_avg", {"_value": 221.052, "_timestamp": 0}], ["perf_latency_p90", {"_value": 223.728, "_timestamp": 0}], ["perf_latency_p95", {"_value": 224.303, "_timestamp": 0}], ["perf_latency_p99", {"_value": 224.938, "_timestamp": 0}], ["perf_throughput", {"_value": 580.765, "_timestamp": 0}], ["perf_client_send_recv", {"_value": 0.177, "_timestamp": 0}], ["perf_client_response_wait", {"_value": 220.845, "_timestamp": 0}], ["perf_server_queue", {"_value": 190.45, "_timestamp": 0}], ["perf_server_compute_infer", {"_value": 22.822, "_timestamp": 0}], ["perf_server_compute_input", {"_value": 3.932, "_timestamp": 0}], ["perf_server_compute_output", {"_value": 0.107, "_timestamp": 0}]], "_non_gpu_data_from_tag": {"perf_latency_avg": ["perf_latency_avg", {"_value": 221.052, "_timestamp": 0}], "perf_latency_p90": ["perf_latency_p90", {"_value": 223.728, "_timestamp": 0}], "perf_latency_p95": ["perf_latency_p95", {"_value": 224.303, "_timestamp": 0}], "perf_latency_p99": ["perf_latency_p99", {"_value": 224.938, "_timestamp": 0}], "perf_throughput": ["perf_throughput", {"_value": 580.765, "_timestamp": 0}], "perf_client_send_recv": ["perf_client_send_recv", {"_value": 0.177, "_timestamp": 0}], "perf_client_response_wait": ["perf_client_response_wait", {"_value": 220.845, "_timestamp": 0}], "perf_server_queue": ["perf_server_queue", {"_value": 190.45, "_timestamp": 0}], "perf_server_compute_infer": ["perf_server_compute_infer", {"_value": 22.822, "_timestamp": 0}], "perf_server_compute_input": ["perf_server_compute_input", {"_value": 3.932, "_timestamp": 0}], "perf_server_compute_output": ["perf_server_compute_output", {"_value": 0.107, "_timestamp": 0}]}}, {"_model_config_name": "vgg19_libtorch_config_0", "_model_specific_pa_params": {"batch-size": 1, "concurrency-range": 128}, "_non_gpu_data": [["perf_latency_avg", {"_value": 966.907, "_timestamp": 0}], ["perf_latency_p90", {"_value": 969.759, "_timestamp": 0}], ["perf_latency_p95", {"_value": 971.737, "_timestamp": 0}], ["perf_latency_p99", {"_value": 1048.228, "_timestamp": 0}], ["perf_throughput", {"_value": 132.881, "_timestamp": 0}], ["perf_client_send_recv", {"_value": 1.734, "_timestamp": 0}], ["perf_client_response_wait", {"_value": 965.16, "_timestamp": 0}], ["perf_server_queue", {"_value": 952.769, "_timestamp": 0}], ["perf_server_compute_infer", {"_value": 7.153, "_timestamp": 0}], ["perf_server_compute_input", {"_value": 0.22, "_timestamp": 0}], ["perf_server_compute_output", {"_value": 0.035, "_timestamp": 0}]], "_non_gpu_data_from_tag": {"perf_latency_avg": ["perf_latency_avg", {"_value": 966.907, "_timestamp": 0}], "perf_latency_p90": ["perf_latency_p90", {"_value": 969.759, "_timestamp": 0}], "perf_latency_p95": ["perf_latency_p95", {"_value": 971.737, "_timestamp": 0}], "perf_latency_p99": ["perf_latency_p99", {"_value": 1048.228, "_timestamp": 0}], "perf_throughput": ["perf_throughput", {"_value": 132.881, "_timestamp": 0}], "perf_client_send_recv": ["perf_client_send_recv", {"_value": 1.734, "_timestamp": 0}], "perf_client_response_wait": ["perf_client_response_wait", {"_value": 965.16, "_timestamp": 0}], "perf_server_queue": ["perf_server_queue", {"_value": 952.769, "_timestamp": 0}], "perf_server_compute_infer": ["perf_server_compute_infer", {"_value": 7.153, "_timestamp": 0}], "perf_server_compute_input": ["perf_server_compute_input", {"_value": 0.22, "_timestamp": 0}], "perf_server_compute_output": ["perf_server_compute_output", {"_value": 0.035, "_timestamp": 0}]}}]}, "-m resnet50_libtorch_config_10 -b 1 -i grpc -f resnet50_libtorch_config_10-results.csv --verbose-csv --concurrency-range=256 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0-m vgg19_libtorch_config_0 -b 1 -i grpc -f vgg19_libtorch_config_0-results.csv --verbose-csv --concurrency-range=256 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": {"_model_variants_name": "resnet50_libtorch_config_10,vgg19_libtorch_config_0", "_gpu_data": {"GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [["gpu_used_memory", {"_value": 2432.69632, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0"}], ["gpu_free_memory", {"_value": 22962.765824000002, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0"}], ["gpu_utilization", {"_value": 89.33330000000001, "_timestamp": 0, "_device_uuid": null}], ["gpu_power_usage", {"_value": 279.323, "_timestamp": 0, "_device_uuid": null}]]}, "_avg_gpu_data": [["gpu_used_memory", {"_value": 2432.69632, "_timestamp": 0, "_device_uuid": null}], ["gpu_free_memory", {"_value": 22962.765824000002, "_timestamp": 0, "_device_uuid": null}], ["gpu_utilization", {"_value": 89.33330000000001, "_timestamp": 0, "_device_uuid": null}], ["gpu_power_usage", {"_value": 279.323, "_timestamp": 0, "_device_uuid": null}]], "_avg_gpu_data_from_tag": {"gpu_used_memory": ["gpu_used_memory", {"_value": 2432.69632, "_timestamp": 0, "_device_uuid": null}], "gpu_free_memory": ["gpu_free_memory", {"_value": 22962.765824000002, "_timestamp": 0, "_device_uuid": null}], "gpu_utilization": ["gpu_utilization", {"_value": 89.33330000000001, "_timestamp": 0, "_device_uuid": null}], "gpu_power_usage": ["gpu_power_usage", {"_value": 279.323, "_timestamp": 0, "_device_uuid": null}]}, "_model_config_measurements": [{"_model_config_name": "resnet50_libtorch_config_10", "_model_specific_pa_params": {"batch-size": 1, "concurrency-range": 256}, "_non_gpu_data": [["perf_latency_avg", {"_value": 439.955, "_timestamp": 0}], ["perf_latency_p90", {"_value": 443.525, "_timestamp": 0}], ["perf_latency_p95", {"_value": 445.519, "_timestamp": 0}], ["perf_latency_p99", {"_value": 447.522, "_timestamp": 0}], ["perf_throughput", {"_value": 586.087, "_timestamp": 0}], ["perf_client_send_recv", {"_value": 0.174, "_timestamp": 0}], ["perf_client_response_wait", {"_value": 439.736, "_timestamp": 0}], ["perf_server_queue", {"_value": 409.438, "_timestamp": 0}], ["perf_server_compute_infer", {"_value": 22.761, "_timestamp": 0}], ["perf_server_compute_input", {"_value": 3.856, "_timestamp": 0}], ["perf_server_compute_output", {"_value": 0.115, "_timestamp": 0}]], "_non_gpu_data_from_tag": {"perf_latency_avg": ["perf_latency_avg", {"_value": 439.955, "_timestamp": 0}], "perf_latency_p90": ["perf_latency_p90", {"_value": 443.525, "_timestamp": 0}], "perf_latency_p95": ["perf_latency_p95", {"_value": 445.519, "_timestamp": 0}], "perf_latency_p99": ["perf_latency_p99", {"_value": 447.522, "_timestamp": 0}], "perf_throughput": ["perf_throughput", {"_value": 586.087, "_timestamp": 0}], "perf_client_send_recv": ["perf_client_send_recv", {"_value": 0.174, "_timestamp": 0}], "perf_client_response_wait": ["perf_client_response_wait", {"_value": 439.736, "_timestamp": 0}], "perf_server_queue": ["perf_server_queue", {"_value": 409.438, "_timestamp": 0}], "perf_server_compute_infer": ["perf_server_compute_infer", {"_value": 22.761, "_timestamp": 0}], "perf_server_compute_input": ["perf_server_compute_input", {"_value": 3.856, "_timestamp": 0}], "perf_server_compute_output": ["perf_server_compute_output", {"_value": 0.115, "_timestamp": 0}]}}, {"_model_config_name": "vgg19_libtorch_config_0", "_model_specific_pa_params": {"batch-size": 1, "concurrency-range": 256}, "_non_gpu_data": [["perf_latency_avg", {"_value": 1942.991, "_timestamp": 0}], ["perf_latency_p90", {"_value": 1935.441, "_timestamp": 0}], ["perf_latency_p95", {"_value": 2061.135, "_timestamp": 0}], ["perf_latency_p99", {"_value": 2175.656, "_timestamp": 0}], ["perf_throughput", {"_value": 132.536, "_timestamp": 0}], ["perf_client_send_recv", {"_value": 7.841, "_timestamp": 0}], ["perf_client_response_wait", {"_value": 1935.107, "_timestamp": 0}], ["perf_server_queue", {"_value": 1908.142, "_timestamp": 0}], ["perf_server_compute_infer", {"_value": 7.178, "_timestamp": 0}], ["perf_server_compute_input", {"_value": 0.206, "_timestamp": 0}], ["perf_server_compute_output", {"_value": 0.033, "_timestamp": 0}]], "_non_gpu_data_from_tag": {"perf_latency_avg": ["perf_latency_avg", {"_value": 1942.991, "_timestamp": 0}], "perf_latency_p90": ["perf_latency_p90", {"_value": 1935.441, "_timestamp": 0}], "perf_latency_p95": ["perf_latency_p95", {"_value": 2061.135, "_timestamp": 0}], "perf_latency_p99": ["perf_latency_p99", {"_value": 2175.656, "_timestamp": 0}], "perf_throughput": ["perf_throughput", {"_value": 132.536, "_timestamp": 0}], "perf_client_send_recv": ["perf_client_send_recv", {"_value": 7.841, "_timestamp": 0}], "perf_client_response_wait": ["perf_client_response_wait", {"_value": 1935.107, "_timestamp": 0}], "perf_server_queue": ["perf_server_queue", {"_value": 1908.142, "_timestamp": 0}], "perf_server_compute_infer": ["perf_server_compute_infer", {"_value": 7.178, "_timestamp": 0}], "perf_server_compute_input": ["perf_server_compute_input", {"_value": 0.206, "_timestamp": 0}], "perf_server_compute_output": ["perf_server_compute_output", {"_value": 0.033, "_timestamp": 0}]}}]}}]}}}, "ResultManager.server_only_data": {"GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [["gpu_used_memory", {"_value": 457.0, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0"}], ["gpu_free_memory", {"_value": 24938.0, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0"}], ["gpu_utilization", {"_value": 0.25, "_timestamp": 0, "_device_uuid": null}], ["gpu_power_usage", {"_value": 55.25625, "_timestamp": 0, "_device_uuid": null}]]}, "MetricsManager.gpus": {"GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": {"name": "NVIDIA TITAN RTX", "total_memory": 25395462144}}, "ModelManager.model_variant_name_manager": {"_model_config_dicts": {"resnet50_libtorch_config_0": {"name": "resnet50_libtorch", "platform": "pytorch_libtorch", "max_batch_size": 1, "input": [{"name": "INPUT__0", "data_type": "TYPE_FP32", "format": "FORMAT_NCHW", "dims": ["3", "224", "224"]}], "output": [{"name": "OUTPUT__0", "data_type": "TYPE_FP32", "dims": ["1000"], "label_filename": "resnet50_labels.txt"}], "instance_group": [{"count": 1, "kind": "KIND_GPU"}], "dynamic_batching": {}}, "vgg19_libtorch_config_0": {"name": "vgg19_libtorch", "platform": "pytorch_libtorch", "max_batch_size": 1, "input": [{"name": "INPUT__0", "data_type": "TYPE_FP32", "format": "FORMAT_NCHW", "dims": ["3", "224", "224"]}], "output": [{"name": "OUTPUT__0", "data_type": "TYPE_FP32", "dims": ["1000"], "label_filename": "vgg19_labels.txt"}], "instance_group": [{"count": 1, "kind": "KIND_GPU"}], "dynamic_batching": {}}, "vgg19_libtorch_config_1": {"name": "vgg19_libtorch", "platform": "pytorch_libtorch", "max_batch_size": 1, "input": [{"name": "INPUT__0", "data_type": "TYPE_FP32", "format": "FORMAT_NCHW", "dims": ["3", "224", "224"]}], "output": [{"name": "OUTPUT__0", "data_type": "TYPE_FP32", "dims": ["1000"], "label_filename": "vgg19_labels.txt"}], "instance_group": [{"count": 2, "kind": "KIND_GPU"}], "dynamic_batching": {}}, "vgg19_libtorch_config_2": {"name": "vgg19_libtorch", "platform": "pytorch_libtorch", "max_batch_size": 1, "input": [{"name": "INPUT__0", "data_type": "TYPE_FP32", "format": "FORMAT_NCHW", "dims": ["3", "224", "224"]}], "output": [{"name": "OUTPUT__0", "data_type": "TYPE_FP32", "dims": ["1000"], "label_filename": "vgg19_labels.txt"}], "instance_group": [{"count": 3, "kind": "KIND_GPU"}], "dynamic_batching": {}}, "vgg19_libtorch_config_3": {"name": "vgg19_libtorch", "platform": "pytorch_libtorch", "max_batch_size": 1, "input": [{"name": "INPUT__0", "data_type": "TYPE_FP32", "format": "FORMAT_NCHW", "dims": ["3", "224", "224"]}], "output": [{"name": "OUTPUT__0", "data_type": "TYPE_FP32", "dims": ["1000"], "label_filename": "vgg19_labels.txt"}], "instance_group": [{"count": 4, "kind": "KIND_GPU"}], "dynamic_batching": {}}, "vgg19_libtorch_config_4": {"name": "vgg19_libtorch", "platform": "pytorch_libtorch", "max_batch_size": 2, "input": [{"name": "INPUT__0", "data_type": "TYPE_FP32", "format": "FORMAT_NCHW", "dims": ["3", "224", "224"]}], "output": [{"name": "OUTPUT__0", "data_type": "TYPE_FP32", "dims": ["1000"], "label_filename": "vgg19_labels.txt"}], "instance_group": [{"count": 1, "kind": "KIND_GPU"}], "dynamic_batching": {}}, "resnet50_libtorch_config_1": {"name": "resnet50_libtorch", "platform": "pytorch_libtorch", "max_batch_size": 2, "input": [{"name": "INPUT__0", "data_type": "TYPE_FP32", "format": "FORMAT_NCHW", "dims": ["3", "224", "224"]}], "output": [{"name": "OUTPUT__0", "data_type": "TYPE_FP32", "dims": ["1000"], "label_filename": "resnet50_labels.txt"}], "instance_group": [{"count": 2, "kind": "KIND_GPU"}], "dynamic_batching": {}}, "vgg19_libtorch_config_5": {"name": "vgg19_libtorch", "platform": "pytorch_libtorch", "max_batch_size": 4, "input": [{"name": "INPUT__0", "data_type": "TYPE_FP32", "format": "FORMAT_NCHW", "dims": ["3", "224", "224"]}], "output": [{"name": "OUTPUT__0", "data_type": "TYPE_FP32", "dims": ["1000"], "label_filename": "vgg19_labels.txt"}], "instance_group": [{"count": 2, "kind": "KIND_GPU"}], "dynamic_batching": {}}, "resnet50_libtorch_config_2": {"name": "resnet50_libtorch", "platform": "pytorch_libtorch", "max_batch_size": 2, "input": [{"name": "INPUT__0", "data_type": "TYPE_FP32", "format": "FORMAT_NCHW", "dims": ["3", "224", "224"]}], "output": [{"name": "OUTPUT__0", "data_type": "TYPE_FP32", "dims": ["1000"], "label_filename": "resnet50_labels.txt"}], "instance_group": [{"count": 3, "kind": "KIND_GPU"}], "dynamic_batching": {}}, "resnet50_libtorch_config_3": {"name": "resnet50_libtorch", "platform": "pytorch_libtorch", "max_batch_size": 4, "input": [{"name": "INPUT__0", "data_type": "TYPE_FP32", "format": "FORMAT_NCHW", "dims": ["3", "224", "224"]}], "output": [{"name": "OUTPUT__0", "data_type": "TYPE_FP32", "dims": ["1000"], "label_filename": "resnet50_labels.txt"}], "instance_group": [{"count": 2, "kind": "KIND_GPU"}], "dynamic_batching": {}}, "resnet50_libtorch_config_4": {"name": "resnet50_libtorch", "platform": "pytorch_libtorch", "max_batch_size": 4, "input": [{"name": "INPUT__0", "data_type": "TYPE_FP32", "format": "FORMAT_NCHW", "dims": ["3", "224", "224"]}], "output": [{"name": "OUTPUT__0", "data_type": "TYPE_FP32", "dims": ["1000"], "label_filename": "resnet50_labels.txt"}], "instance_group": [{"count": 3, "kind": "KIND_GPU"}], "dynamic_batching": {}}, "resnet50_libtorch_config_5": {"name": "resnet50_libtorch", "platform": "pytorch_libtorch", "max_batch_size": 1, "input": [{"name": "INPUT__0", "data_type": "TYPE_FP32", "format": "FORMAT_NCHW", "dims": ["3", "224", "224"]}], "output": [{"name": "OUTPUT__0", "data_type": "TYPE_FP32", "dims": ["1000"], "label_filename": "resnet50_labels.txt"}], "instance_group": [{"count": 2, "kind": "KIND_GPU"}], "dynamic_batching": {}}, "resnet50_libtorch_config_6": {"name": "resnet50_libtorch", "platform": "pytorch_libtorch", "max_batch_size": 2, "input": [{"name": "INPUT__0", "data_type": "TYPE_FP32", "format": "FORMAT_NCHW", "dims": ["3", "224", "224"]}], "output": [{"name": "OUTPUT__0", "data_type": "TYPE_FP32", "dims": ["1000"], "label_filename": "resnet50_labels.txt"}], "instance_group": [{"count": 1, "kind": "KIND_GPU"}], "dynamic_batching": {}}, "vgg19_libtorch_config_6": {"name": "vgg19_libtorch", "platform": "pytorch_libtorch", "max_batch_size": 4, "input": [{"name": "INPUT__0", "data_type": "TYPE_FP32", "format": "FORMAT_NCHW", "dims": ["3", "224", "224"]}], "output": [{"name": "OUTPUT__0", "data_type": "TYPE_FP32", "dims": ["1000"], "label_filename": "vgg19_labels.txt"}], "instance_group": [{"count": 3, "kind": "KIND_GPU"}], "dynamic_batching": {}}, "vgg19_libtorch_config_7": {"name": "vgg19_libtorch", "platform": "pytorch_libtorch", "max_batch_size": 4, "input": [{"name": "INPUT__0", "data_type": "TYPE_FP32", "format": "FORMAT_NCHW", "dims": ["3", "224", "224"]}], "output": [{"name": "OUTPUT__0", "data_type": "TYPE_FP32", "dims": ["1000"], "label_filename": "vgg19_labels.txt"}], "instance_group": [{"count": 1, "kind": "KIND_GPU"}], "dynamic_batching": {}}, "resnet50_libtorch_config_7": {"name": "resnet50_libtorch", "platform": "pytorch_libtorch", "max_batch_size": 1, "input": [{"name": "INPUT__0", "data_type": "TYPE_FP32", "format": "FORMAT_NCHW", "dims": ["3", "224", "224"]}], "output": [{"name": "OUTPUT__0", "data_type": "TYPE_FP32", "dims": ["1000"], "label_filename": "resnet50_labels.txt"}], "instance_group": [{"count": 3, "kind": "KIND_GPU"}], "dynamic_batching": {}}, "vgg19_libtorch_config_8": {"name": "vgg19_libtorch", "platform": "pytorch_libtorch", "max_batch_size": 2, "input": [{"name": "INPUT__0", "data_type": "TYPE_FP32", "format": "FORMAT_NCHW", "dims": ["3", "224", "224"]}], "output": [{"name": "OUTPUT__0", "data_type": "TYPE_FP32", "dims": ["1000"], "label_filename": "vgg19_labels.txt"}], "instance_group": [{"count": 3, "kind": "KIND_GPU"}], "dynamic_batching": {}}, "resnet50_libtorch_config_8": {"name": "resnet50_libtorch", "platform": "pytorch_libtorch", "max_batch_size": 4, "input": [{"name": "INPUT__0", "data_type": "TYPE_FP32", "format": "FORMAT_NCHW", "dims": ["3", "224", "224"]}], "output": [{"name": "OUTPUT__0", "data_type": "TYPE_FP32", "dims": ["1000"], "label_filename": "resnet50_labels.txt"}], "instance_group": [{"count": 1, "kind": "KIND_GPU"}], "dynamic_batching": {}}, "resnet50_libtorch_config_9": {"name": "resnet50_libtorch", "platform": "pytorch_libtorch", "max_batch_size": 8, "input": [{"name": "INPUT__0", "data_type": "TYPE_FP32", "format": "FORMAT_NCHW", "dims": ["3", "224", "224"]}], "output": [{"name": "OUTPUT__0", "data_type": "TYPE_FP32", "dims": ["1000"], "label_filename": "resnet50_labels.txt"}], "instance_group": [{"count": 1, "kind": "KIND_GPU"}], "dynamic_batching": {}}, "resnet50_libtorch_config_10": {"name": "resnet50_libtorch", "platform": "pytorch_libtorch", "max_batch_size": 16, "input": [{"name": "INPUT__0", "data_type": "TYPE_FP32", "format": "FORMAT_NCHW", "dims": ["3", "224", "224"]}], "output": [{"name": "OUTPUT__0", "data_type": "TYPE_FP32", "dims": ["1000"], "label_filename": "resnet50_labels.txt"}], "instance_group": [{"count": 1, "kind": "KIND_GPU"}], "dynamic_batching": {}}}, "_model_name_index": {"resnet50_libtorch": 10, "vgg19_libtorch": 8}}} \ No newline at end of file +{ + "ResultManager.results": { + "_results": { + "resnet50_libtorch,vgg19_libtorch": { + "resnet50_libtorch_config_default,vgg19_libtorch_config_default": [ + { + "_triton_env": {}, + "_model_run_configs": [ + { + "_model_name": "resnet50_libtorch", + "_model_config_variant": { + "model_config": { + "name": "resnet50_libtorch", + "platform": "pytorch_libtorch", + "maxBatchSize": 128, + "input": [ + { + "name": "input", + "dataType": "TYPE_FP32", + "format": "FORMAT_NCHW", + "dims": [ + "3", + "224", + "224" + ] + } + ], + "output": [ + { + "name": "output", + "dataType": "TYPE_FP32", + "dims": [ + "1000" + ], + "labelFilename": "resnet50_labels.txt" + } + ] + }, + "variant_name": "resnet50_libtorch_config_default", + "cpu_only": false + }, + "_perf_config": { + "_args": { + "service-kind": null, + "model-signature-name": null, + "async": null, + "sync": null, + "measurement-interval": null, + "concurrency-range": 1, + "request-rate-range": null, + "request-distribution": null, + "request-intervals": null, + "binary-search": null, + "num-of-sequences": null, + "latency-threshold": null, + "max-threads": null, + "stability-percentage": null, + "max-trials": null, + "percentile": null, + "input-data": null, + "shared-memory": null, + "output-shared-memory-size": null, + "shape": null, + "sequence-length": null, + "sequence-id-range": null, + "string-length": null, + "string-data": null, + "measurement-mode": "count_windows", + "measurement-request-count": null, + "streaming": null, + "grpc-compression-algorithm": null, + "triton-server-directory": null, + "model-repository": null, + "ssl-grpc-use-ssl": null, + "ssl-grpc-root-certifications-file": null, + "ssl-grpc-private-key-file": null, + "ssl-grpc-certificate-chain-file": null, + "ssl-https-verify-peer": null, + "ssl-https-verify-host": null, + "ssl-https-ca-certificates-file": null, + "ssl-https-client-certificate-type": null, + "ssl-https-client-certificate-file": null, + "ssl-https-private-key-type": null, + "ssl-https-private-key-file": null, + "collect-metrics": "True", + "metrics-url": "http://localhost:8002/metrics", + "metrics-interval": 1000, + "bls-composing-models": null + }, + "_options": { + "-m": "resnet50_libtorch", + "-x": null, + "-b": 1, + "-u": "localhost:8001", + "-i": "grpc", + "-f": "resnet50_libtorch-results.csv", + "-H": null + }, + "_verbose": { + "-v": null, + "-v -v": null, + "--verbose-csv": "--verbose-csv" + }, + "_input_to_options": { + "model-name": "-m", + "model-version": "-x", + "batch-size": "-b", + "url": "-u", + "protocol": "-i", + "latency-report-file": "-f", + "http-header": "-H" + }, + "_input_to_verbose": { + "verbose": "-v", + "extra-verbose": "-v -v", + "verbose-csv": "--verbose-csv" + }, + "_additive_args": { + "input-data": null, + "shape": null + } + }, + "_composing_config_variants": [] + }, + { + "_model_name": "vgg19_libtorch", + "_model_config_variant": { + "model_config": { + "name": "vgg19_libtorch", + "platform": "pytorch_libtorch", + "maxBatchSize": 128, + "input": [ + { + "name": "INPUT__0", + "dataType": "TYPE_FP32", + "format": "FORMAT_NCHW", + "dims": [ + "3", + "224", + "224" + ] + } + ], + "output": [ + { + "name": "OUTPUT__0", + "dataType": "TYPE_FP32", + "dims": [ + "1000" + ], + "labelFilename": "vgg19_labels.txt" + } + ] + }, + "variant_name": "vgg19_libtorch_config_default", + "cpu_only": false + }, + "_perf_config": { + "_args": { + "service-kind": null, + "model-signature-name": null, + "async": null, + "sync": null, + "measurement-interval": null, + "concurrency-range": 1, + "request-rate-range": null, + "request-distribution": null, + "request-intervals": null, + "binary-search": null, + "num-of-sequences": null, + "latency-threshold": null, + "max-threads": null, + "stability-percentage": null, + "max-trials": null, + "percentile": null, + "input-data": null, + "shared-memory": null, + "output-shared-memory-size": null, + "shape": null, + "sequence-length": null, + "sequence-id-range": null, + "string-length": null, + "string-data": null, + "measurement-mode": "count_windows", + "measurement-request-count": null, + "streaming": null, + "grpc-compression-algorithm": null, + "triton-server-directory": null, + "model-repository": null, + "ssl-grpc-use-ssl": null, + "ssl-grpc-root-certifications-file": null, + "ssl-grpc-private-key-file": null, + "ssl-grpc-certificate-chain-file": null, + "ssl-https-verify-peer": null, + "ssl-https-verify-host": null, + "ssl-https-ca-certificates-file": null, + "ssl-https-client-certificate-type": null, + "ssl-https-client-certificate-file": null, + "ssl-https-private-key-type": null, + "ssl-https-private-key-file": null, + "collect-metrics": "True", + "metrics-url": "http://localhost:8002/metrics", + "metrics-interval": 1000, + "bls-composing-models": null + }, + "_options": { + "-m": "vgg19_libtorch", + "-x": null, + "-b": 1, + "-u": "localhost:8001", + "-i": "grpc", + "-f": "vgg19_libtorch-results.csv", + "-H": null + }, + "_verbose": { + "-v": null, + "-v -v": null, + "--verbose-csv": "--verbose-csv" + }, + "_input_to_options": { + "model-name": "-m", + "model-version": "-x", + "batch-size": "-b", + "url": "-u", + "protocol": "-i", + "latency-report-file": "-f", + "http-header": "-H" + }, + "_input_to_verbose": { + "verbose": "-v", + "extra-verbose": "-v -v", + "verbose-csv": "--verbose-csv" + }, + "_additive_args": { + "input-data": null, + "shape": null + } + }, + "_composing_config_variants": [] + } + ] + }, + { + "resnet50_libtorch_config_default -m resnet50_libtorch -b 1 -i grpc -f resnet50_libtorch-results.csv --verbose-csv --concurrency-range=1 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000vgg19_libtorch_config_default -m vgg19_libtorch -b 1 -i grpc -f vgg19_libtorch-results.csv --verbose-csv --concurrency-range=1 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000": { + "_model_variants_name": "resnet50_libtorch_config_default,vgg19_libtorch_config_default", + "_gpu_data": { + "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ + [ + "gpu_used_memory", + { + "_value": 2296.38144, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_free_memory", + { + "_value": 23473.422336, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_utilization", + { + "_value": 99.66669999999999, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 273.771, + "_timestamp": 0, + "_device_uuid": null + } + ] + ] + }, + "_avg_gpu_data": [ + [ + "gpu_used_memory", + { + "_value": 2296.38144, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_free_memory", + { + "_value": 23473.422336, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_utilization", + { + "_value": 99.66669999999999, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 273.771, + "_timestamp": 0, + "_device_uuid": null + } + ] + ], + "_avg_gpu_data_from_tag": { + "gpu_used_memory": [ + "gpu_used_memory", + { + "_value": 2296.38144, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_free_memory": [ + "gpu_free_memory", + { + "_value": 23473.422336, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_utilization": [ + "gpu_utilization", + { + "_value": 99.66669999999999, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_power_usage": [ + "gpu_power_usage", + { + "_value": 273.771, + "_timestamp": 0, + "_device_uuid": null + } + ] + }, + "_model_config_measurements": [ + { + "_model_config_name": "resnet50_libtorch", + "_model_specific_pa_params": { + "batch-size": 1, + "concurrency-range": 1, + "request-rate-range": null + }, + "_non_gpu_data": [ + [ + "perf_latency_avg", + { + "_value": 5.956, + "_timestamp": 0 + } + ], + [ + "perf_latency_p90", + { + "_value": 6.607, + "_timestamp": 0 + } + ], + [ + "perf_latency_p95", + { + "_value": 6.848, + "_timestamp": 0 + } + ], + [ + "perf_latency_p99", + { + "_value": 7.683, + "_timestamp": 0 + } + ], + [ + "perf_throughput", + { + "_value": 167.896, + "_timestamp": 0 + } + ], + [ + "perf_client_send_recv", + { + "_value": 0.092, + "_timestamp": 0 + } + ], + [ + "perf_client_response_wait", + { + "_value": 5.84, + "_timestamp": 0 + } + ], + [ + "perf_server_queue", + { + "_value": 0.016, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_infer", + { + "_value": 4.986, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_input", + { + "_value": 0.123, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_output", + { + "_value": 0.02, + "_timestamp": 0 + } + ] + ], + "_non_gpu_data_from_tag": { + "perf_latency_avg": [ + "perf_latency_avg", + { + "_value": 5.956, + "_timestamp": 0 + } + ], + "perf_latency_p90": [ + "perf_latency_p90", + { + "_value": 6.607, + "_timestamp": 0 + } + ], + "perf_latency_p95": [ + "perf_latency_p95", + { + "_value": 6.848, + "_timestamp": 0 + } + ], + "perf_latency_p99": [ + "perf_latency_p99", + { + "_value": 7.683, + "_timestamp": 0 + } + ], + "perf_throughput": [ + "perf_throughput", + { + "_value": 167.896, + "_timestamp": 0 + } + ], + "perf_client_send_recv": [ + "perf_client_send_recv", + { + "_value": 0.092, + "_timestamp": 0 + } + ], + "perf_client_response_wait": [ + "perf_client_response_wait", + { + "_value": 5.84, + "_timestamp": 0 + } + ], + "perf_server_queue": [ + "perf_server_queue", + { + "_value": 0.016, + "_timestamp": 0 + } + ], + "perf_server_compute_infer": [ + "perf_server_compute_infer", + { + "_value": 4.986, + "_timestamp": 0 + } + ], + "perf_server_compute_input": [ + "perf_server_compute_input", + { + "_value": 0.123, + "_timestamp": 0 + } + ], + "perf_server_compute_output": [ + "perf_server_compute_output", + { + "_value": 0.02, + "_timestamp": 0 + } + ] + } + }, + { + "_model_config_name": "vgg19_libtorch", + "_model_specific_pa_params": { + "batch-size": 1, + "concurrency-range": 1, + "request-rate-range": null + }, + "_non_gpu_data": [ + [ + "perf_latency_avg", + { + "_value": 5.665, + "_timestamp": 0 + } + ], + [ + "perf_latency_p90", + { + "_value": 5.79, + "_timestamp": 0 + } + ], + [ + "perf_latency_p95", + { + "_value": 5.813, + "_timestamp": 0 + } + ], + [ + "perf_latency_p99", + { + "_value": 5.866, + "_timestamp": 0 + } + ], + [ + "perf_throughput", + { + "_value": 176.223, + "_timestamp": 0 + } + ], + [ + "perf_client_send_recv", + { + "_value": 0.092, + "_timestamp": 0 + } + ], + [ + "perf_client_response_wait", + { + "_value": 5.552, + "_timestamp": 0 + } + ], + [ + "perf_server_queue", + { + "_value": 0.016, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_infer", + { + "_value": 4.742, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_input", + { + "_value": 0.122, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_output", + { + "_value": 0.02, + "_timestamp": 0 + } + ] + ], + "_non_gpu_data_from_tag": { + "perf_latency_avg": [ + "perf_latency_avg", + { + "_value": 5.665, + "_timestamp": 0 + } + ], + "perf_latency_p90": [ + "perf_latency_p90", + { + "_value": 5.79, + "_timestamp": 0 + } + ], + "perf_latency_p95": [ + "perf_latency_p95", + { + "_value": 5.813, + "_timestamp": 0 + } + ], + "perf_latency_p99": [ + "perf_latency_p99", + { + "_value": 5.866, + "_timestamp": 0 + } + ], + "perf_throughput": [ + "perf_throughput", + { + "_value": 176.223, + "_timestamp": 0 + } + ], + "perf_client_send_recv": [ + "perf_client_send_recv", + { + "_value": 0.092, + "_timestamp": 0 + } + ], + "perf_client_response_wait": [ + "perf_client_response_wait", + { + "_value": 5.552, + "_timestamp": 0 + } + ], + "perf_server_queue": [ + "perf_server_queue", + { + "_value": 0.016, + "_timestamp": 0 + } + ], + "perf_server_compute_infer": [ + "perf_server_compute_infer", + { + "_value": 4.742, + "_timestamp": 0 + } + ], + "perf_server_compute_input": [ + "perf_server_compute_input", + { + "_value": 0.122, + "_timestamp": 0 + } + ], + "perf_server_compute_output": [ + "perf_server_compute_output", + { + "_value": 0.02, + "_timestamp": 0 + } + ] + } + } + ] + }, + "resnet50_libtorch_config_default -m resnet50_libtorch -b 1 -i grpc -f resnet50_libtorch-results.csv --verbose-csv --concurrency-range=2 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000vgg19_libtorch_config_default -m vgg19_libtorch -b 1 -i grpc -f vgg19_libtorch-results.csv --verbose-csv --concurrency-range=2 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000": { + "_model_variants_name": "resnet50_libtorch_config_default,vgg19_libtorch_config_default", + "_gpu_data": { + "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ + [ + "gpu_used_memory", + { + "_value": 2296.38144, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_free_memory", + { + "_value": 23473.422336, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_utilization", + { + "_value": 99.5, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 277.904, + "_timestamp": 0, + "_device_uuid": null + } + ] + ] + }, + "_avg_gpu_data": [ + [ + "gpu_used_memory", + { + "_value": 2296.38144, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_free_memory", + { + "_value": 23473.422336, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_utilization", + { + "_value": 99.5, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 277.904, + "_timestamp": 0, + "_device_uuid": null + } + ] + ], + "_avg_gpu_data_from_tag": { + "gpu_used_memory": [ + "gpu_used_memory", + { + "_value": 2296.38144, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_free_memory": [ + "gpu_free_memory", + { + "_value": 23473.422336, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_utilization": [ + "gpu_utilization", + { + "_value": 99.5, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_power_usage": [ + "gpu_power_usage", + { + "_value": 277.904, + "_timestamp": 0, + "_device_uuid": null + } + ] + }, + "_model_config_measurements": [ + { + "_model_config_name": "resnet50_libtorch", + "_model_specific_pa_params": { + "batch-size": 1, + "concurrency-range": 2, + "request-rate-range": null + }, + "_non_gpu_data": [ + [ + "perf_latency_avg", + { + "_value": 14.46, + "_timestamp": 0 + } + ], + [ + "perf_latency_p90", + { + "_value": 15.086, + "_timestamp": 0 + } + ], + [ + "perf_latency_p95", + { + "_value": 15.251, + "_timestamp": 0 + } + ], + [ + "perf_latency_p99", + { + "_value": 15.622, + "_timestamp": 0 + } + ], + [ + "perf_throughput", + { + "_value": 138.552, + "_timestamp": 0 + } + ], + [ + "perf_client_send_recv", + { + "_value": 0.103, + "_timestamp": 0 + } + ], + [ + "perf_client_response_wait", + { + "_value": 14.333, + "_timestamp": 0 + } + ], + [ + "perf_server_queue", + { + "_value": 6.453, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_infer", + { + "_value": 6.99, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_input", + { + "_value": 0.146, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_output", + { + "_value": 0.021, + "_timestamp": 0 + } + ] + ], + "_non_gpu_data_from_tag": { + "perf_latency_avg": [ + "perf_latency_avg", + { + "_value": 14.46, + "_timestamp": 0 + } + ], + "perf_latency_p90": [ + "perf_latency_p90", + { + "_value": 15.086, + "_timestamp": 0 + } + ], + "perf_latency_p95": [ + "perf_latency_p95", + { + "_value": 15.251, + "_timestamp": 0 + } + ], + "perf_latency_p99": [ + "perf_latency_p99", + { + "_value": 15.622, + "_timestamp": 0 + } + ], + "perf_throughput": [ + "perf_throughput", + { + "_value": 138.552, + "_timestamp": 0 + } + ], + "perf_client_send_recv": [ + "perf_client_send_recv", + { + "_value": 0.103, + "_timestamp": 0 + } + ], + "perf_client_response_wait": [ + "perf_client_response_wait", + { + "_value": 14.333, + "_timestamp": 0 + } + ], + "perf_server_queue": [ + "perf_server_queue", + { + "_value": 6.453, + "_timestamp": 0 + } + ], + "perf_server_compute_infer": [ + "perf_server_compute_infer", + { + "_value": 6.99, + "_timestamp": 0 + } + ], + "perf_server_compute_input": [ + "perf_server_compute_input", + { + "_value": 0.146, + "_timestamp": 0 + } + ], + "perf_server_compute_output": [ + "perf_server_compute_output", + { + "_value": 0.021, + "_timestamp": 0 + } + ] + } + }, + { + "_model_config_name": "vgg19_libtorch", + "_model_specific_pa_params": { + "batch-size": 1, + "concurrency-range": 2, + "request-rate-range": null + }, + "_non_gpu_data": [ + [ + "perf_latency_avg", + { + "_value": 9.971, + "_timestamp": 0 + } + ], + [ + "perf_latency_p90", + { + "_value": 10.166, + "_timestamp": 0 + } + ], + [ + "perf_latency_p95", + { + "_value": 10.223, + "_timestamp": 0 + } + ], + [ + "perf_latency_p99", + { + "_value": 10.341, + "_timestamp": 0 + } + ], + [ + "perf_throughput", + { + "_value": 200.5, + "_timestamp": 0 + } + ], + [ + "perf_client_send_recv", + { + "_value": 0.094, + "_timestamp": 0 + } + ], + [ + "perf_client_response_wait", + { + "_value": 9.855, + "_timestamp": 0 + } + ], + [ + "perf_server_queue", + { + "_value": 4.266, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_infer", + { + "_value": 4.765, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_input", + { + "_value": 0.137, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_output", + { + "_value": 0.018, + "_timestamp": 0 + } + ] + ], + "_non_gpu_data_from_tag": { + "perf_latency_avg": [ + "perf_latency_avg", + { + "_value": 9.971, + "_timestamp": 0 + } + ], + "perf_latency_p90": [ + "perf_latency_p90", + { + "_value": 10.166, + "_timestamp": 0 + } + ], + "perf_latency_p95": [ + "perf_latency_p95", + { + "_value": 10.223, + "_timestamp": 0 + } + ], + "perf_latency_p99": [ + "perf_latency_p99", + { + "_value": 10.341, + "_timestamp": 0 + } + ], + "perf_throughput": [ + "perf_throughput", + { + "_value": 200.5, + "_timestamp": 0 + } + ], + "perf_client_send_recv": [ + "perf_client_send_recv", + { + "_value": 0.094, + "_timestamp": 0 + } + ], + "perf_client_response_wait": [ + "perf_client_response_wait", + { + "_value": 9.855, + "_timestamp": 0 + } + ], + "perf_server_queue": [ + "perf_server_queue", + { + "_value": 4.266, + "_timestamp": 0 + } + ], + "perf_server_compute_infer": [ + "perf_server_compute_infer", + { + "_value": 4.765, + "_timestamp": 0 + } + ], + "perf_server_compute_input": [ + "perf_server_compute_input", + { + "_value": 0.137, + "_timestamp": 0 + } + ], + "perf_server_compute_output": [ + "perf_server_compute_output", + { + "_value": 0.018, + "_timestamp": 0 + } + ] + } + } + ] + }, + "resnet50_libtorch_config_default -m resnet50_libtorch -b 1 -i grpc -f resnet50_libtorch-results.csv --verbose-csv --concurrency-range=4 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000vgg19_libtorch_config_default -m vgg19_libtorch -b 1 -i grpc -f vgg19_libtorch-results.csv --verbose-csv --concurrency-range=4 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000": { + "_model_variants_name": "resnet50_libtorch_config_default,vgg19_libtorch_config_default", + "_gpu_data": { + "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ + [ + "gpu_used_memory", + { + "_value": 2296.38144, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_free_memory", + { + "_value": 23473.422336, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_utilization", + { + "_value": 50.5, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 178.015, + "_timestamp": 0, + "_device_uuid": null + } + ] + ] + }, + "_avg_gpu_data": [ + [ + "gpu_used_memory", + { + "_value": 2296.38144, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_free_memory", + { + "_value": 23473.422336, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_utilization", + { + "_value": 50.5, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 178.015, + "_timestamp": 0, + "_device_uuid": null + } + ] + ], + "_avg_gpu_data_from_tag": { + "gpu_used_memory": [ + "gpu_used_memory", + { + "_value": 2296.38144, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_free_memory": [ + "gpu_free_memory", + { + "_value": 23473.422336, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_utilization": [ + "gpu_utilization", + { + "_value": 50.5, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_power_usage": [ + "gpu_power_usage", + { + "_value": 178.015, + "_timestamp": 0, + "_device_uuid": null + } + ] + }, + "_model_config_measurements": [ + { + "_model_config_name": "resnet50_libtorch", + "_model_specific_pa_params": { + "batch-size": 1, + "concurrency-range": 4, + "request-rate-range": null + }, + "_non_gpu_data": [ + [ + "perf_latency_avg", + { + "_value": 28.594, + "_timestamp": 0 + } + ], + [ + "perf_latency_p90", + { + "_value": 29.911, + "_timestamp": 0 + } + ], + [ + "perf_latency_p95", + { + "_value": 30.124, + "_timestamp": 0 + } + ], + [ + "perf_latency_p99", + { + "_value": 30.448, + "_timestamp": 0 + } + ], + [ + "perf_throughput", + { + "_value": 139.233, + "_timestamp": 0 + } + ], + [ + "perf_client_send_recv", + { + "_value": 0.11, + "_timestamp": 0 + } + ], + [ + "perf_client_response_wait", + { + "_value": 28.456, + "_timestamp": 0 + } + ], + [ + "perf_server_queue", + { + "_value": 20.613, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_infer", + { + "_value": 6.926, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_input", + { + "_value": 0.149, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_output", + { + "_value": 0.02, + "_timestamp": 0 + } + ] + ], + "_non_gpu_data_from_tag": { + "perf_latency_avg": [ + "perf_latency_avg", + { + "_value": 28.594, + "_timestamp": 0 + } + ], + "perf_latency_p90": [ + "perf_latency_p90", + { + "_value": 29.911, + "_timestamp": 0 + } + ], + "perf_latency_p95": [ + "perf_latency_p95", + { + "_value": 30.124, + "_timestamp": 0 + } + ], + "perf_latency_p99": [ + "perf_latency_p99", + { + "_value": 30.448, + "_timestamp": 0 + } + ], + "perf_throughput": [ + "perf_throughput", + { + "_value": 139.233, + "_timestamp": 0 + } + ], + "perf_client_send_recv": [ + "perf_client_send_recv", + { + "_value": 0.11, + "_timestamp": 0 + } + ], + "perf_client_response_wait": [ + "perf_client_response_wait", + { + "_value": 28.456, + "_timestamp": 0 + } + ], + "perf_server_queue": [ + "perf_server_queue", + { + "_value": 20.613, + "_timestamp": 0 + } + ], + "perf_server_compute_infer": [ + "perf_server_compute_infer", + { + "_value": 6.926, + "_timestamp": 0 + } + ], + "perf_server_compute_input": [ + "perf_server_compute_input", + { + "_value": 0.149, + "_timestamp": 0 + } + ], + "perf_server_compute_output": [ + "perf_server_compute_output", + { + "_value": 0.02, + "_timestamp": 0 + } + ] + } + }, + { + "_model_config_name": "vgg19_libtorch", + "_model_specific_pa_params": { + "batch-size": 1, + "concurrency-range": 4, + "request-rate-range": null + }, + "_non_gpu_data": [ + [ + "perf_latency_avg", + { + "_value": 19.973, + "_timestamp": 0 + } + ], + [ + "perf_latency_p90", + { + "_value": 20.289, + "_timestamp": 0 + } + ], + [ + "perf_latency_p95", + { + "_value": 20.419, + "_timestamp": 0 + } + ], + [ + "perf_latency_p99", + { + "_value": 20.683, + "_timestamp": 0 + } + ], + [ + "perf_throughput", + { + "_value": 199.529, + "_timestamp": 0 + } + ], + [ + "perf_client_send_recv", + { + "_value": 0.105, + "_timestamp": 0 + } + ], + [ + "perf_client_response_wait", + { + "_value": 19.841, + "_timestamp": 0 + } + ], + [ + "perf_server_queue", + { + "_value": 14.17, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_infer", + { + "_value": 4.767, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_input", + { + "_value": 0.145, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_output", + { + "_value": 0.018, + "_timestamp": 0 + } + ] + ], + "_non_gpu_data_from_tag": { + "perf_latency_avg": [ + "perf_latency_avg", + { + "_value": 19.973, + "_timestamp": 0 + } + ], + "perf_latency_p90": [ + "perf_latency_p90", + { + "_value": 20.289, + "_timestamp": 0 + } + ], + "perf_latency_p95": [ + "perf_latency_p95", + { + "_value": 20.419, + "_timestamp": 0 + } + ], + "perf_latency_p99": [ + "perf_latency_p99", + { + "_value": 20.683, + "_timestamp": 0 + } + ], + "perf_throughput": [ + "perf_throughput", + { + "_value": 199.529, + "_timestamp": 0 + } + ], + "perf_client_send_recv": [ + "perf_client_send_recv", + { + "_value": 0.105, + "_timestamp": 0 + } + ], + "perf_client_response_wait": [ + "perf_client_response_wait", + { + "_value": 19.841, + "_timestamp": 0 + } + ], + "perf_server_queue": [ + "perf_server_queue", + { + "_value": 14.17, + "_timestamp": 0 + } + ], + "perf_server_compute_infer": [ + "perf_server_compute_infer", + { + "_value": 4.767, + "_timestamp": 0 + } + ], + "perf_server_compute_input": [ + "perf_server_compute_input", + { + "_value": 0.145, + "_timestamp": 0 + } + ], + "perf_server_compute_output": [ + "perf_server_compute_output", + { + "_value": 0.018, + "_timestamp": 0 + } + ] + } + } + ] + }, + "resnet50_libtorch_config_default -m resnet50_libtorch -b 1 -i grpc -f resnet50_libtorch-results.csv --verbose-csv --concurrency-range=8 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000vgg19_libtorch_config_default -m vgg19_libtorch -b 1 -i grpc -f vgg19_libtorch-results.csv --verbose-csv --concurrency-range=8 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000": { + "_model_variants_name": "resnet50_libtorch_config_default,vgg19_libtorch_config_default", + "_gpu_data": { + "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ + [ + "gpu_used_memory", + { + "_value": 2296.38144, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_free_memory", + { + "_value": 23473.422336, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_utilization", + { + "_value": 60.3333, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 216.06, + "_timestamp": 0, + "_device_uuid": null + } + ] + ] + }, + "_avg_gpu_data": [ + [ + "gpu_used_memory", + { + "_value": 2296.38144, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_free_memory", + { + "_value": 23473.422336, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_utilization", + { + "_value": 60.3333, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 216.06, + "_timestamp": 0, + "_device_uuid": null + } + ] + ], + "_avg_gpu_data_from_tag": { + "gpu_used_memory": [ + "gpu_used_memory", + { + "_value": 2296.38144, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_free_memory": [ + "gpu_free_memory", + { + "_value": 23473.422336, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_utilization": [ + "gpu_utilization", + { + "_value": 60.3333, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_power_usage": [ + "gpu_power_usage", + { + "_value": 216.06, + "_timestamp": 0, + "_device_uuid": null + } + ] + }, + "_model_config_measurements": [ + { + "_model_config_name": "resnet50_libtorch", + "_model_specific_pa_params": { + "batch-size": 1, + "concurrency-range": 8, + "request-rate-range": null + }, + "_non_gpu_data": [ + [ + "perf_latency_avg", + { + "_value": 57.152, + "_timestamp": 0 + } + ], + [ + "perf_latency_p90", + { + "_value": 59.898, + "_timestamp": 0 + } + ], + [ + "perf_latency_p95", + { + "_value": 60.204, + "_timestamp": 0 + } + ], + [ + "perf_latency_p99", + { + "_value": 60.672, + "_timestamp": 0 + } + ], + [ + "perf_throughput", + { + "_value": 138.549, + "_timestamp": 0 + } + ], + [ + "perf_client_send_recv", + { + "_value": 0.119, + "_timestamp": 0 + } + ], + [ + "perf_client_response_wait", + { + "_value": 57.003, + "_timestamp": 0 + } + ], + [ + "perf_server_queue", + { + "_value": 49.032, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_infer", + { + "_value": 6.953, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_input", + { + "_value": 0.148, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_output", + { + "_value": 0.021, + "_timestamp": 0 + } + ] + ], + "_non_gpu_data_from_tag": { + "perf_latency_avg": [ + "perf_latency_avg", + { + "_value": 57.152, + "_timestamp": 0 + } + ], + "perf_latency_p90": [ + "perf_latency_p90", + { + "_value": 59.898, + "_timestamp": 0 + } + ], + "perf_latency_p95": [ + "perf_latency_p95", + { + "_value": 60.204, + "_timestamp": 0 + } + ], + "perf_latency_p99": [ + "perf_latency_p99", + { + "_value": 60.672, + "_timestamp": 0 + } + ], + "perf_throughput": [ + "perf_throughput", + { + "_value": 138.549, + "_timestamp": 0 + } + ], + "perf_client_send_recv": [ + "perf_client_send_recv", + { + "_value": 0.119, + "_timestamp": 0 + } + ], + "perf_client_response_wait": [ + "perf_client_response_wait", + { + "_value": 57.003, + "_timestamp": 0 + } + ], + "perf_server_queue": [ + "perf_server_queue", + { + "_value": 49.032, + "_timestamp": 0 + } + ], + "perf_server_compute_infer": [ + "perf_server_compute_infer", + { + "_value": 6.953, + "_timestamp": 0 + } + ], + "perf_server_compute_input": [ + "perf_server_compute_input", + { + "_value": 0.148, + "_timestamp": 0 + } + ], + "perf_server_compute_output": [ + "perf_server_compute_output", + { + "_value": 0.021, + "_timestamp": 0 + } + ] + } + }, + { + "_model_config_name": "vgg19_libtorch", + "_model_specific_pa_params": { + "batch-size": 1, + "concurrency-range": 8, + "request-rate-range": null + }, + "_non_gpu_data": [ + [ + "perf_latency_avg", + { + "_value": 39.855, + "_timestamp": 0 + } + ], + [ + "perf_latency_p90", + { + "_value": 40.302, + "_timestamp": 0 + } + ], + [ + "perf_latency_p95", + { + "_value": 40.389, + "_timestamp": 0 + } + ], + [ + "perf_latency_p99", + { + "_value": 40.655, + "_timestamp": 0 + } + ], + [ + "perf_throughput", + { + "_value": 199.507, + "_timestamp": 0 + } + ], + [ + "perf_client_send_recv", + { + "_value": 0.147, + "_timestamp": 0 + } + ], + [ + "perf_client_response_wait", + { + "_value": 39.68, + "_timestamp": 0 + } + ], + [ + "perf_server_queue", + { + "_value": 33.962, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_infer", + { + "_value": 4.769, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_input", + { + "_value": 0.143, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_output", + { + "_value": 0.018, + "_timestamp": 0 + } + ] + ], + "_non_gpu_data_from_tag": { + "perf_latency_avg": [ + "perf_latency_avg", + { + "_value": 39.855, + "_timestamp": 0 + } + ], + "perf_latency_p90": [ + "perf_latency_p90", + { + "_value": 40.302, + "_timestamp": 0 + } + ], + "perf_latency_p95": [ + "perf_latency_p95", + { + "_value": 40.389, + "_timestamp": 0 + } + ], + "perf_latency_p99": [ + "perf_latency_p99", + { + "_value": 40.655, + "_timestamp": 0 + } + ], + "perf_throughput": [ + "perf_throughput", + { + "_value": 199.507, + "_timestamp": 0 + } + ], + "perf_client_send_recv": [ + "perf_client_send_recv", + { + "_value": 0.147, + "_timestamp": 0 + } + ], + "perf_client_response_wait": [ + "perf_client_response_wait", + { + "_value": 39.68, + "_timestamp": 0 + } + ], + "perf_server_queue": [ + "perf_server_queue", + { + "_value": 33.962, + "_timestamp": 0 + } + ], + "perf_server_compute_infer": [ + "perf_server_compute_infer", + { + "_value": 4.769, + "_timestamp": 0 + } + ], + "perf_server_compute_input": [ + "perf_server_compute_input", + { + "_value": 0.143, + "_timestamp": 0 + } + ], + "perf_server_compute_output": [ + "perf_server_compute_output", + { + "_value": 0.018, + "_timestamp": 0 + } + ] + } + } + ] + } + } + ], + "resnet50_libtorch_config_0,vgg19_libtorch_config_0": [ + { + "_triton_env": {}, + "_model_run_configs": [ + { + "_model_name": "resnet50_libtorch", + "_model_config_variant": { + "model_config": { + "name": "resnet50_libtorch", + "platform": "pytorch_libtorch", + "maxBatchSize": 1, + "input": [ + { + "name": "input", + "dataType": "TYPE_FP32", + "format": "FORMAT_NCHW", + "dims": [ + "3", + "224", + "224" + ] + } + ], + "output": [ + { + "name": "output", + "dataType": "TYPE_FP32", + "dims": [ + "1000" + ], + "labelFilename": "resnet50_labels.txt" + } + ], + "instanceGroup": [ + { + "count": 1, + "kind": "KIND_GPU" + } + ], + "dynamicBatching": {} + }, + "variant_name": "resnet50_libtorch_config_0", + "cpu_only": false + }, + "_perf_config": { + "_args": { + "service-kind": null, + "model-signature-name": null, + "async": null, + "sync": null, + "measurement-interval": null, + "concurrency-range": 2, + "request-rate-range": null, + "request-distribution": null, + "request-intervals": null, + "binary-search": null, + "num-of-sequences": null, + "latency-threshold": null, + "max-threads": null, + "stability-percentage": null, + "max-trials": null, + "percentile": null, + "input-data": null, + "shared-memory": null, + "output-shared-memory-size": null, + "shape": null, + "sequence-length": null, + "sequence-id-range": null, + "string-length": null, + "string-data": null, + "measurement-mode": "count_windows", + "measurement-request-count": null, + "streaming": null, + "grpc-compression-algorithm": null, + "triton-server-directory": null, + "model-repository": null, + "ssl-grpc-use-ssl": null, + "ssl-grpc-root-certifications-file": null, + "ssl-grpc-private-key-file": null, + "ssl-grpc-certificate-chain-file": null, + "ssl-https-verify-peer": null, + "ssl-https-verify-host": null, + "ssl-https-ca-certificates-file": null, + "ssl-https-client-certificate-type": null, + "ssl-https-client-certificate-file": null, + "ssl-https-private-key-type": null, + "ssl-https-private-key-file": null, + "collect-metrics": "True", + "metrics-url": "http://localhost:8002/metrics", + "metrics-interval": 1000, + "bls-composing-models": null + }, + "_options": { + "-m": "resnet50_libtorch", + "-x": null, + "-b": 1, + "-u": "localhost:8001", + "-i": "grpc", + "-f": "resnet50_libtorch-results.csv", + "-H": null + }, + "_verbose": { + "-v": null, + "-v -v": null, + "--verbose-csv": "--verbose-csv" + }, + "_input_to_options": { + "model-name": "-m", + "model-version": "-x", + "batch-size": "-b", + "url": "-u", + "protocol": "-i", + "latency-report-file": "-f", + "http-header": "-H" + }, + "_input_to_verbose": { + "verbose": "-v", + "extra-verbose": "-v -v", + "verbose-csv": "--verbose-csv" + }, + "_additive_args": { + "input-data": null, + "shape": null + } + }, + "_composing_config_variants": [] + }, + { + "_model_name": "vgg19_libtorch", + "_model_config_variant": { + "model_config": { + "name": "vgg19_libtorch", + "platform": "pytorch_libtorch", + "maxBatchSize": 1, + "input": [ + { + "name": "INPUT__0", + "dataType": "TYPE_FP32", + "format": "FORMAT_NCHW", + "dims": [ + "3", + "224", + "224" + ] + } + ], + "output": [ + { + "name": "OUTPUT__0", + "dataType": "TYPE_FP32", + "dims": [ + "1000" + ], + "labelFilename": "vgg19_labels.txt" + } + ], + "instanceGroup": [ + { + "count": 1, + "kind": "KIND_GPU" + } + ], + "dynamicBatching": {} + }, + "variant_name": "vgg19_libtorch_config_0", + "cpu_only": false + }, + "_perf_config": { + "_args": { + "service-kind": null, + "model-signature-name": null, + "async": null, + "sync": null, + "measurement-interval": null, + "concurrency-range": 2, + "request-rate-range": null, + "request-distribution": null, + "request-intervals": null, + "binary-search": null, + "num-of-sequences": null, + "latency-threshold": null, + "max-threads": null, + "stability-percentage": null, + "max-trials": null, + "percentile": null, + "input-data": null, + "shared-memory": null, + "output-shared-memory-size": null, + "shape": null, + "sequence-length": null, + "sequence-id-range": null, + "string-length": null, + "string-data": null, + "measurement-mode": "count_windows", + "measurement-request-count": null, + "streaming": null, + "grpc-compression-algorithm": null, + "triton-server-directory": null, + "model-repository": null, + "ssl-grpc-use-ssl": null, + "ssl-grpc-root-certifications-file": null, + "ssl-grpc-private-key-file": null, + "ssl-grpc-certificate-chain-file": null, + "ssl-https-verify-peer": null, + "ssl-https-verify-host": null, + "ssl-https-ca-certificates-file": null, + "ssl-https-client-certificate-type": null, + "ssl-https-client-certificate-file": null, + "ssl-https-private-key-type": null, + "ssl-https-private-key-file": null, + "collect-metrics": "True", + "metrics-url": "http://localhost:8002/metrics", + "metrics-interval": 1000, + "bls-composing-models": null + }, + "_options": { + "-m": "vgg19_libtorch", + "-x": null, + "-b": 1, + "-u": "localhost:8001", + "-i": "grpc", + "-f": "vgg19_libtorch-results.csv", + "-H": null + }, + "_verbose": { + "-v": null, + "-v -v": null, + "--verbose-csv": "--verbose-csv" + }, + "_input_to_options": { + "model-name": "-m", + "model-version": "-x", + "batch-size": "-b", + "url": "-u", + "protocol": "-i", + "latency-report-file": "-f", + "http-header": "-H" + }, + "_input_to_verbose": { + "verbose": "-v", + "extra-verbose": "-v -v", + "verbose-csv": "--verbose-csv" + }, + "_additive_args": { + "input-data": null, + "shape": null + } + }, + "_composing_config_variants": [] + } + ] + }, + { + "resnet50_libtorch_config_0 -m resnet50_libtorch -b 1 -i grpc -f resnet50_libtorch-results.csv --verbose-csv --concurrency-range=2 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000vgg19_libtorch_config_0 -m vgg19_libtorch -b 1 -i grpc -f vgg19_libtorch-results.csv --verbose-csv --concurrency-range=2 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000": { + "_model_variants_name": "resnet50_libtorch_config_0,vgg19_libtorch_config_0", + "_gpu_data": { + "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ + [ + "gpu_used_memory", + { + "_value": 2296.38144, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_free_memory", + { + "_value": 23473.422336, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_utilization", + { + "_value": 97.66669999999999, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 269.32, + "_timestamp": 0, + "_device_uuid": null + } + ] + ] + }, + "_avg_gpu_data": [ + [ + "gpu_used_memory", + { + "_value": 2296.38144, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_free_memory", + { + "_value": 23473.422336, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_utilization", + { + "_value": 97.66669999999999, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 269.32, + "_timestamp": 0, + "_device_uuid": null + } + ] + ], + "_avg_gpu_data_from_tag": { + "gpu_used_memory": [ + "gpu_used_memory", + { + "_value": 2296.38144, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_free_memory": [ + "gpu_free_memory", + { + "_value": 23473.422336, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_utilization": [ + "gpu_utilization", + { + "_value": 97.66669999999999, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_power_usage": [ + "gpu_power_usage", + { + "_value": 269.32, + "_timestamp": 0, + "_device_uuid": null + } + ] + }, + "_model_config_measurements": [ + { + "_model_config_name": "resnet50_libtorch", + "_model_specific_pa_params": { + "batch-size": 1, + "concurrency-range": 2, + "request-rate-range": null + }, + "_non_gpu_data": [ + [ + "perf_latency_avg", + { + "_value": 14.31, + "_timestamp": 0 + } + ], + [ + "perf_latency_p90", + { + "_value": 15.01, + "_timestamp": 0 + } + ], + [ + "perf_latency_p95", + { + "_value": 15.154, + "_timestamp": 0 + } + ], + [ + "perf_latency_p99", + { + "_value": 15.455, + "_timestamp": 0 + } + ], + [ + "perf_throughput", + { + "_value": 139.554, + "_timestamp": 0 + } + ], + [ + "perf_client_send_recv", + { + "_value": 0.105, + "_timestamp": 0 + } + ], + [ + "perf_client_response_wait", + { + "_value": 14.177, + "_timestamp": 0 + } + ], + [ + "perf_server_queue", + { + "_value": 6.358, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_infer", + { + "_value": 6.915, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_input", + { + "_value": 0.144, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_output", + { + "_value": 0.021, + "_timestamp": 0 + } + ] + ], + "_non_gpu_data_from_tag": { + "perf_latency_avg": [ + "perf_latency_avg", + { + "_value": 14.31, + "_timestamp": 0 + } + ], + "perf_latency_p90": [ + "perf_latency_p90", + { + "_value": 15.01, + "_timestamp": 0 + } + ], + "perf_latency_p95": [ + "perf_latency_p95", + { + "_value": 15.154, + "_timestamp": 0 + } + ], + "perf_latency_p99": [ + "perf_latency_p99", + { + "_value": 15.455, + "_timestamp": 0 + } + ], + "perf_throughput": [ + "perf_throughput", + { + "_value": 139.554, + "_timestamp": 0 + } + ], + "perf_client_send_recv": [ + "perf_client_send_recv", + { + "_value": 0.105, + "_timestamp": 0 + } + ], + "perf_client_response_wait": [ + "perf_client_response_wait", + { + "_value": 14.177, + "_timestamp": 0 + } + ], + "perf_server_queue": [ + "perf_server_queue", + { + "_value": 6.358, + "_timestamp": 0 + } + ], + "perf_server_compute_infer": [ + "perf_server_compute_infer", + { + "_value": 6.915, + "_timestamp": 0 + } + ], + "perf_server_compute_input": [ + "perf_server_compute_input", + { + "_value": 0.144, + "_timestamp": 0 + } + ], + "perf_server_compute_output": [ + "perf_server_compute_output", + { + "_value": 0.021, + "_timestamp": 0 + } + ] + } + }, + { + "_model_config_name": "vgg19_libtorch", + "_model_specific_pa_params": { + "batch-size": 1, + "concurrency-range": 2, + "request-rate-range": null + }, + "_non_gpu_data": [ + [ + "perf_latency_avg", + { + "_value": 9.907, + "_timestamp": 0 + } + ], + [ + "perf_latency_p90", + { + "_value": 10.113, + "_timestamp": 0 + } + ], + [ + "perf_latency_p95", + { + "_value": 10.18, + "_timestamp": 0 + } + ], + [ + "perf_latency_p99", + { + "_value": 10.32, + "_timestamp": 0 + } + ], + [ + "perf_throughput", + { + "_value": 201.837, + "_timestamp": 0 + } + ], + [ + "perf_client_send_recv", + { + "_value": 0.101, + "_timestamp": 0 + } + ], + [ + "perf_client_response_wait", + { + "_value": 9.78, + "_timestamp": 0 + } + ], + [ + "perf_server_queue", + { + "_value": 4.181, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_infer", + { + "_value": 4.729, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_input", + { + "_value": 0.137, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_output", + { + "_value": 0.018, + "_timestamp": 0 + } + ] + ], + "_non_gpu_data_from_tag": { + "perf_latency_avg": [ + "perf_latency_avg", + { + "_value": 9.907, + "_timestamp": 0 + } + ], + "perf_latency_p90": [ + "perf_latency_p90", + { + "_value": 10.113, + "_timestamp": 0 + } + ], + "perf_latency_p95": [ + "perf_latency_p95", + { + "_value": 10.18, + "_timestamp": 0 + } + ], + "perf_latency_p99": [ + "perf_latency_p99", + { + "_value": 10.32, + "_timestamp": 0 + } + ], + "perf_throughput": [ + "perf_throughput", + { + "_value": 201.837, + "_timestamp": 0 + } + ], + "perf_client_send_recv": [ + "perf_client_send_recv", + { + "_value": 0.101, + "_timestamp": 0 + } + ], + "perf_client_response_wait": [ + "perf_client_response_wait", + { + "_value": 9.78, + "_timestamp": 0 + } + ], + "perf_server_queue": [ + "perf_server_queue", + { + "_value": 4.181, + "_timestamp": 0 + } + ], + "perf_server_compute_infer": [ + "perf_server_compute_infer", + { + "_value": 4.729, + "_timestamp": 0 + } + ], + "perf_server_compute_input": [ + "perf_server_compute_input", + { + "_value": 0.137, + "_timestamp": 0 + } + ], + "perf_server_compute_output": [ + "perf_server_compute_output", + { + "_value": 0.018, + "_timestamp": 0 + } + ] + } + } + ] + } + } + ], + "resnet50_libtorch_config_0,vgg19_libtorch_config_4": [ + { + "_triton_env": {}, + "_model_run_configs": [ + { + "_model_name": "resnet50_libtorch", + "_model_config_variant": { + "model_config": { + "name": "resnet50_libtorch", + "platform": "pytorch_libtorch", + "maxBatchSize": 1, + "input": [ + { + "name": "input", + "dataType": "TYPE_FP32", + "format": "FORMAT_NCHW", + "dims": [ + "3", + "224", + "224" + ] + } + ], + "output": [ + { + "name": "output", + "dataType": "TYPE_FP32", + "dims": [ + "1000" + ], + "labelFilename": "resnet50_labels.txt" + } + ], + "instanceGroup": [ + { + "count": 1, + "kind": "KIND_GPU" + } + ], + "dynamicBatching": {} + }, + "variant_name": "resnet50_libtorch_config_0", + "cpu_only": false + }, + "_perf_config": { + "_args": { + "service-kind": null, + "model-signature-name": null, + "async": null, + "sync": null, + "measurement-interval": null, + "concurrency-range": 2, + "request-rate-range": null, + "request-distribution": null, + "request-intervals": null, + "binary-search": null, + "num-of-sequences": null, + "latency-threshold": null, + "max-threads": null, + "stability-percentage": null, + "max-trials": null, + "percentile": null, + "input-data": null, + "shared-memory": null, + "output-shared-memory-size": null, + "shape": null, + "sequence-length": null, + "sequence-id-range": null, + "string-length": null, + "string-data": null, + "measurement-mode": "count_windows", + "measurement-request-count": null, + "streaming": null, + "grpc-compression-algorithm": null, + "triton-server-directory": null, + "model-repository": null, + "ssl-grpc-use-ssl": null, + "ssl-grpc-root-certifications-file": null, + "ssl-grpc-private-key-file": null, + "ssl-grpc-certificate-chain-file": null, + "ssl-https-verify-peer": null, + "ssl-https-verify-host": null, + "ssl-https-ca-certificates-file": null, + "ssl-https-client-certificate-type": null, + "ssl-https-client-certificate-file": null, + "ssl-https-private-key-type": null, + "ssl-https-private-key-file": null, + "collect-metrics": "True", + "metrics-url": "http://localhost:8002/metrics", + "metrics-interval": 1000, + "bls-composing-models": null + }, + "_options": { + "-m": "resnet50_libtorch", + "-x": null, + "-b": 1, + "-u": "localhost:8001", + "-i": "grpc", + "-f": "resnet50_libtorch-results.csv", + "-H": null + }, + "_verbose": { + "-v": null, + "-v -v": null, + "--verbose-csv": "--verbose-csv" + }, + "_input_to_options": { + "model-name": "-m", + "model-version": "-x", + "batch-size": "-b", + "url": "-u", + "protocol": "-i", + "latency-report-file": "-f", + "http-header": "-H" + }, + "_input_to_verbose": { + "verbose": "-v", + "extra-verbose": "-v -v", + "verbose-csv": "--verbose-csv" + }, + "_additive_args": { + "input-data": null, + "shape": null + } + }, + "_composing_config_variants": [] + }, + { + "_model_name": "vgg19_libtorch", + "_model_config_variant": { + "model_config": { + "name": "vgg19_libtorch", + "platform": "pytorch_libtorch", + "maxBatchSize": 2, + "input": [ + { + "name": "INPUT__0", + "dataType": "TYPE_FP32", + "format": "FORMAT_NCHW", + "dims": [ + "3", + "224", + "224" + ] + } + ], + "output": [ + { + "name": "OUTPUT__0", + "dataType": "TYPE_FP32", + "dims": [ + "1000" + ], + "labelFilename": "vgg19_labels.txt" + } + ], + "instanceGroup": [ + { + "count": 1, + "kind": "KIND_GPU" + } + ], + "dynamicBatching": {} + }, + "variant_name": "vgg19_libtorch_config_4", + "cpu_only": false + }, + "_perf_config": { + "_args": { + "service-kind": null, + "model-signature-name": null, + "async": null, + "sync": null, + "measurement-interval": null, + "concurrency-range": 4, + "request-rate-range": null, + "request-distribution": null, + "request-intervals": null, + "binary-search": null, + "num-of-sequences": null, + "latency-threshold": null, + "max-threads": null, + "stability-percentage": null, + "max-trials": null, + "percentile": null, + "input-data": null, + "shared-memory": null, + "output-shared-memory-size": null, + "shape": null, + "sequence-length": null, + "sequence-id-range": null, + "string-length": null, + "string-data": null, + "measurement-mode": "count_windows", + "measurement-request-count": null, + "streaming": null, + "grpc-compression-algorithm": null, + "triton-server-directory": null, + "model-repository": null, + "ssl-grpc-use-ssl": null, + "ssl-grpc-root-certifications-file": null, + "ssl-grpc-private-key-file": null, + "ssl-grpc-certificate-chain-file": null, + "ssl-https-verify-peer": null, + "ssl-https-verify-host": null, + "ssl-https-ca-certificates-file": null, + "ssl-https-client-certificate-type": null, + "ssl-https-client-certificate-file": null, + "ssl-https-private-key-type": null, + "ssl-https-private-key-file": null, + "collect-metrics": "True", + "metrics-url": "http://localhost:8002/metrics", + "metrics-interval": 1000, + "bls-composing-models": null + }, + "_options": { + "-m": "vgg19_libtorch", + "-x": null, + "-b": 1, + "-u": "localhost:8001", + "-i": "grpc", + "-f": "vgg19_libtorch-results.csv", + "-H": null + }, + "_verbose": { + "-v": null, + "-v -v": null, + "--verbose-csv": "--verbose-csv" + }, + "_input_to_options": { + "model-name": "-m", + "model-version": "-x", + "batch-size": "-b", + "url": "-u", + "protocol": "-i", + "latency-report-file": "-f", + "http-header": "-H" + }, + "_input_to_verbose": { + "verbose": "-v", + "extra-verbose": "-v -v", + "verbose-csv": "--verbose-csv" + }, + "_additive_args": { + "input-data": null, + "shape": null + } + }, + "_composing_config_variants": [] + } + ] + }, + { + "resnet50_libtorch_config_0 -m resnet50_libtorch -b 1 -i grpc -f resnet50_libtorch-results.csv --verbose-csv --concurrency-range=2 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000vgg19_libtorch_config_4 -m vgg19_libtorch -b 1 -i grpc -f vgg19_libtorch-results.csv --verbose-csv --concurrency-range=4 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000": { + "_model_variants_name": "resnet50_libtorch_config_0,vgg19_libtorch_config_4", + "_gpu_data": { + "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ + [ + "gpu_used_memory", + { + "_value": 2323.644416, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_free_memory", + { + "_value": 23446.15936, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_utilization", + { + "_value": 99.3333, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 275.388, + "_timestamp": 0, + "_device_uuid": null + } + ] + ] + }, + "_avg_gpu_data": [ + [ + "gpu_used_memory", + { + "_value": 2323.644416, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_free_memory", + { + "_value": 23446.15936, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_utilization", + { + "_value": 99.3333, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 275.388, + "_timestamp": 0, + "_device_uuid": null + } + ] + ], + "_avg_gpu_data_from_tag": { + "gpu_used_memory": [ + "gpu_used_memory", + { + "_value": 2323.644416, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_free_memory": [ + "gpu_free_memory", + { + "_value": 23446.15936, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_utilization": [ + "gpu_utilization", + { + "_value": 99.3333, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_power_usage": [ + "gpu_power_usage", + { + "_value": 275.388, + "_timestamp": 0, + "_device_uuid": null + } + ] + }, + "_model_config_measurements": [ + { + "_model_config_name": "resnet50_libtorch", + "_model_specific_pa_params": { + "batch-size": 1, + "concurrency-range": 2, + "request-rate-range": null + }, + "_non_gpu_data": [ + [ + "perf_latency_avg", + { + "_value": 18.466, + "_timestamp": 0 + } + ], + [ + "perf_latency_p90", + { + "_value": 19.367, + "_timestamp": 0 + } + ], + [ + "perf_latency_p95", + { + "_value": 19.625, + "_timestamp": 0 + } + ], + [ + "perf_latency_p99", + { + "_value": 19.916, + "_timestamp": 0 + } + ], + [ + "perf_throughput", + { + "_value": 108.59, + "_timestamp": 0 + } + ], + [ + "perf_client_send_recv", + { + "_value": 0.107, + "_timestamp": 0 + } + ], + [ + "perf_client_response_wait", + { + "_value": 18.331, + "_timestamp": 0 + } + ], + [ + "perf_server_queue", + { + "_value": 8.443, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_infer", + { + "_value": 8.977, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_input", + { + "_value": 0.148, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_output", + { + "_value": 0.022, + "_timestamp": 0 + } + ] + ], + "_non_gpu_data_from_tag": { + "perf_latency_avg": [ + "perf_latency_avg", + { + "_value": 18.466, + "_timestamp": 0 + } + ], + "perf_latency_p90": [ + "perf_latency_p90", + { + "_value": 19.367, + "_timestamp": 0 + } + ], + "perf_latency_p95": [ + "perf_latency_p95", + { + "_value": 19.625, + "_timestamp": 0 + } + ], + "perf_latency_p99": [ + "perf_latency_p99", + { + "_value": 19.916, + "_timestamp": 0 + } + ], + "perf_throughput": [ + "perf_throughput", + { + "_value": 108.59, + "_timestamp": 0 + } + ], + "perf_client_send_recv": [ + "perf_client_send_recv", + { + "_value": 0.107, + "_timestamp": 0 + } + ], + "perf_client_response_wait": [ + "perf_client_response_wait", + { + "_value": 18.331, + "_timestamp": 0 + } + ], + "perf_server_queue": [ + "perf_server_queue", + { + "_value": 8.443, + "_timestamp": 0 + } + ], + "perf_server_compute_infer": [ + "perf_server_compute_infer", + { + "_value": 8.977, + "_timestamp": 0 + } + ], + "perf_server_compute_input": [ + "perf_server_compute_input", + { + "_value": 0.148, + "_timestamp": 0 + } + ], + "perf_server_compute_output": [ + "perf_server_compute_output", + { + "_value": 0.022, + "_timestamp": 0 + } + ] + } + }, + { + "_model_config_name": "vgg19_libtorch", + "_model_specific_pa_params": { + "batch-size": 1, + "concurrency-range": 4, + "request-rate-range": null + }, + "_non_gpu_data": [ + [ + "perf_latency_avg", + { + "_value": 15.394, + "_timestamp": 0 + } + ], + [ + "perf_latency_p90", + { + "_value": 15.651, + "_timestamp": 0 + } + ], + [ + "perf_latency_p95", + { + "_value": 15.727, + "_timestamp": 0 + } + ], + [ + "perf_latency_p99", + { + "_value": 15.803, + "_timestamp": 0 + } + ], + [ + "perf_throughput", + { + "_value": 259.817, + "_timestamp": 0 + } + ], + [ + "perf_client_send_recv", + { + "_value": 0.124, + "_timestamp": 0 + } + ], + [ + "perf_client_response_wait", + { + "_value": 15.243, + "_timestamp": 0 + } + ], + [ + "perf_server_queue", + { + "_value": 6.726, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_infer", + { + "_value": 7.302, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_input", + { + "_value": 0.28, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_output", + { + "_value": 0.021, + "_timestamp": 0 + } + ] + ], + "_non_gpu_data_from_tag": { + "perf_latency_avg": [ + "perf_latency_avg", + { + "_value": 15.394, + "_timestamp": 0 + } + ], + "perf_latency_p90": [ + "perf_latency_p90", + { + "_value": 15.651, + "_timestamp": 0 + } + ], + "perf_latency_p95": [ + "perf_latency_p95", + { + "_value": 15.727, + "_timestamp": 0 + } + ], + "perf_latency_p99": [ + "perf_latency_p99", + { + "_value": 15.803, + "_timestamp": 0 + } + ], + "perf_throughput": [ + "perf_throughput", + { + "_value": 259.817, + "_timestamp": 0 + } + ], + "perf_client_send_recv": [ + "perf_client_send_recv", + { + "_value": 0.124, + "_timestamp": 0 + } + ], + "perf_client_response_wait": [ + "perf_client_response_wait", + { + "_value": 15.243, + "_timestamp": 0 + } + ], + "perf_server_queue": [ + "perf_server_queue", + { + "_value": 6.726, + "_timestamp": 0 + } + ], + "perf_server_compute_infer": [ + "perf_server_compute_infer", + { + "_value": 7.302, + "_timestamp": 0 + } + ], + "perf_server_compute_input": [ + "perf_server_compute_input", + { + "_value": 0.28, + "_timestamp": 0 + } + ], + "perf_server_compute_output": [ + "perf_server_compute_output", + { + "_value": 0.021, + "_timestamp": 0 + } + ] + } + } + ] + } + } + ], + "resnet50_libtorch_config_7,vgg19_libtorch_config_1": [ + { + "_triton_env": {}, + "_model_run_configs": [ + { + "_model_name": "resnet50_libtorch", + "_model_config_variant": { + "model_config": { + "name": "resnet50_libtorch", + "platform": "pytorch_libtorch", + "maxBatchSize": 2, + "input": [ + { + "name": "input", + "dataType": "TYPE_FP32", + "format": "FORMAT_NCHW", + "dims": [ + "3", + "224", + "224" + ] + } + ], + "output": [ + { + "name": "output", + "dataType": "TYPE_FP32", + "dims": [ + "1000" + ], + "labelFilename": "resnet50_labels.txt" + } + ], + "instanceGroup": [ + { + "count": 1, + "kind": "KIND_GPU" + } + ], + "dynamicBatching": {} + }, + "variant_name": "resnet50_libtorch_config_7", + "cpu_only": false + }, + "_perf_config": { + "_args": { + "service-kind": null, + "model-signature-name": null, + "async": null, + "sync": null, + "measurement-interval": null, + "concurrency-range": 4, + "request-rate-range": null, + "request-distribution": null, + "request-intervals": null, + "binary-search": null, + "num-of-sequences": null, + "latency-threshold": null, + "max-threads": null, + "stability-percentage": null, + "max-trials": null, + "percentile": null, + "input-data": null, + "shared-memory": null, + "output-shared-memory-size": null, + "shape": null, + "sequence-length": null, + "sequence-id-range": null, + "string-length": null, + "string-data": null, + "measurement-mode": "count_windows", + "measurement-request-count": null, + "streaming": null, + "grpc-compression-algorithm": null, + "triton-server-directory": null, + "model-repository": null, + "ssl-grpc-use-ssl": null, + "ssl-grpc-root-certifications-file": null, + "ssl-grpc-private-key-file": null, + "ssl-grpc-certificate-chain-file": null, + "ssl-https-verify-peer": null, + "ssl-https-verify-host": null, + "ssl-https-ca-certificates-file": null, + "ssl-https-client-certificate-type": null, + "ssl-https-client-certificate-file": null, + "ssl-https-private-key-type": null, + "ssl-https-private-key-file": null, + "collect-metrics": "True", + "metrics-url": "http://localhost:8002/metrics", + "metrics-interval": 1000, + "bls-composing-models": null + }, + "_options": { + "-m": "resnet50_libtorch", + "-x": null, + "-b": 1, + "-u": "localhost:8001", + "-i": "grpc", + "-f": "resnet50_libtorch-results.csv", + "-H": null + }, + "_verbose": { + "-v": null, + "-v -v": null, + "--verbose-csv": "--verbose-csv" + }, + "_input_to_options": { + "model-name": "-m", + "model-version": "-x", + "batch-size": "-b", + "url": "-u", + "protocol": "-i", + "latency-report-file": "-f", + "http-header": "-H" + }, + "_input_to_verbose": { + "verbose": "-v", + "extra-verbose": "-v -v", + "verbose-csv": "--verbose-csv" + }, + "_additive_args": { + "input-data": null, + "shape": null + } + }, + "_composing_config_variants": [] + }, + { + "_model_name": "vgg19_libtorch", + "_model_config_variant": { + "model_config": { + "name": "vgg19_libtorch", + "platform": "pytorch_libtorch", + "maxBatchSize": 1, + "input": [ + { + "name": "INPUT__0", + "dataType": "TYPE_FP32", + "format": "FORMAT_NCHW", + "dims": [ + "3", + "224", + "224" + ] + } + ], + "output": [ + { + "name": "OUTPUT__0", + "dataType": "TYPE_FP32", + "dims": [ + "1000" + ], + "labelFilename": "vgg19_labels.txt" + } + ], + "instanceGroup": [ + { + "count": 2, + "kind": "KIND_GPU" + } + ], + "dynamicBatching": {} + }, + "variant_name": "vgg19_libtorch_config_1", + "cpu_only": false + }, + "_perf_config": { + "_args": { + "service-kind": null, + "model-signature-name": null, + "async": null, + "sync": null, + "measurement-interval": null, + "concurrency-range": 4, + "request-rate-range": null, + "request-distribution": null, + "request-intervals": null, + "binary-search": null, + "num-of-sequences": null, + "latency-threshold": null, + "max-threads": null, + "stability-percentage": null, + "max-trials": null, + "percentile": null, + "input-data": null, + "shared-memory": null, + "output-shared-memory-size": null, + "shape": null, + "sequence-length": null, + "sequence-id-range": null, + "string-length": null, + "string-data": null, + "measurement-mode": "count_windows", + "measurement-request-count": null, + "streaming": null, + "grpc-compression-algorithm": null, + "triton-server-directory": null, + "model-repository": null, + "ssl-grpc-use-ssl": null, + "ssl-grpc-root-certifications-file": null, + "ssl-grpc-private-key-file": null, + "ssl-grpc-certificate-chain-file": null, + "ssl-https-verify-peer": null, + "ssl-https-verify-host": null, + "ssl-https-ca-certificates-file": null, + "ssl-https-client-certificate-type": null, + "ssl-https-client-certificate-file": null, + "ssl-https-private-key-type": null, + "ssl-https-private-key-file": null, + "collect-metrics": "True", + "metrics-url": "http://localhost:8002/metrics", + "metrics-interval": 1000, + "bls-composing-models": null + }, + "_options": { + "-m": "vgg19_libtorch", + "-x": null, + "-b": 1, + "-u": "localhost:8001", + "-i": "grpc", + "-f": "vgg19_libtorch-results.csv", + "-H": null + }, + "_verbose": { + "-v": null, + "-v -v": null, + "--verbose-csv": "--verbose-csv" + }, + "_input_to_options": { + "model-name": "-m", + "model-version": "-x", + "batch-size": "-b", + "url": "-u", + "protocol": "-i", + "latency-report-file": "-f", + "http-header": "-H" + }, + "_input_to_verbose": { + "verbose": "-v", + "extra-verbose": "-v -v", + "verbose-csv": "--verbose-csv" + }, + "_additive_args": { + "input-data": null, + "shape": null + } + }, + "_composing_config_variants": [] + } + ] + }, + { + "resnet50_libtorch_config_7 -m resnet50_libtorch -b 1 -i grpc -f resnet50_libtorch-results.csv --verbose-csv --concurrency-range=4 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000vgg19_libtorch_config_1 -m vgg19_libtorch -b 1 -i grpc -f vgg19_libtorch-results.csv --verbose-csv --concurrency-range=4 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000": { + "_model_variants_name": "resnet50_libtorch_config_7,vgg19_libtorch_config_1", + "_gpu_data": { + "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ + [ + "gpu_used_memory", + { + "_value": 3089.104896, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_free_memory", + { + "_value": 22680.69888, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_utilization", + { + "_value": 79.6667, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 280.57, + "_timestamp": 0, + "_device_uuid": null + } + ] + ] + }, + "_avg_gpu_data": [ + [ + "gpu_used_memory", + { + "_value": 3089.104896, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_free_memory", + { + "_value": 22680.69888, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_utilization", + { + "_value": 79.6667, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 280.57, + "_timestamp": 0, + "_device_uuid": null + } + ] + ], + "_avg_gpu_data_from_tag": { + "gpu_used_memory": [ + "gpu_used_memory", + { + "_value": 3089.104896, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_free_memory": [ + "gpu_free_memory", + { + "_value": 22680.69888, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_utilization": [ + "gpu_utilization", + { + "_value": 79.6667, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_power_usage": [ + "gpu_power_usage", + { + "_value": 280.57, + "_timestamp": 0, + "_device_uuid": null + } + ] + }, + "_model_config_measurements": [ + { + "_model_config_name": "resnet50_libtorch", + "_model_specific_pa_params": { + "batch-size": 1, + "concurrency-range": 4, + "request-rate-range": null + }, + "_non_gpu_data": [ + [ + "perf_latency_avg", + { + "_value": 36.419, + "_timestamp": 0 + } + ], + [ + "perf_latency_p90", + { + "_value": 37.645, + "_timestamp": 0 + } + ], + [ + "perf_latency_p95", + { + "_value": 37.906, + "_timestamp": 0 + } + ], + [ + "perf_latency_p99", + { + "_value": 39.24, + "_timestamp": 0 + } + ], + [ + "perf_throughput", + { + "_value": 108.59, + "_timestamp": 0 + } + ], + [ + "perf_client_send_recv", + { + "_value": 0.129, + "_timestamp": 0 + } + ], + [ + "perf_client_response_wait", + { + "_value": 36.263, + "_timestamp": 0 + } + ], + [ + "perf_server_queue", + { + "_value": 17.128, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_infer", + { + "_value": 17.604, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_input", + { + "_value": 0.316, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_output", + { + "_value": 0.296, + "_timestamp": 0 + } + ] + ], + "_non_gpu_data_from_tag": { + "perf_latency_avg": [ + "perf_latency_avg", + { + "_value": 36.419, + "_timestamp": 0 + } + ], + "perf_latency_p90": [ + "perf_latency_p90", + { + "_value": 37.645, + "_timestamp": 0 + } + ], + "perf_latency_p95": [ + "perf_latency_p95", + { + "_value": 37.906, + "_timestamp": 0 + } + ], + "perf_latency_p99": [ + "perf_latency_p99", + { + "_value": 39.24, + "_timestamp": 0 + } + ], + "perf_throughput": [ + "perf_throughput", + { + "_value": 108.59, + "_timestamp": 0 + } + ], + "perf_client_send_recv": [ + "perf_client_send_recv", + { + "_value": 0.129, + "_timestamp": 0 + } + ], + "perf_client_response_wait": [ + "perf_client_response_wait", + { + "_value": 36.263, + "_timestamp": 0 + } + ], + "perf_server_queue": [ + "perf_server_queue", + { + "_value": 17.128, + "_timestamp": 0 + } + ], + "perf_server_compute_infer": [ + "perf_server_compute_infer", + { + "_value": 17.604, + "_timestamp": 0 + } + ], + "perf_server_compute_input": [ + "perf_server_compute_input", + { + "_value": 0.316, + "_timestamp": 0 + } + ], + "perf_server_compute_output": [ + "perf_server_compute_output", + { + "_value": 0.296, + "_timestamp": 0 + } + ] + } + }, + { + "_model_config_name": "vgg19_libtorch", + "_model_specific_pa_params": { + "batch-size": 1, + "concurrency-range": 4, + "request-rate-range": null + }, + "_non_gpu_data": [ + [ + "perf_latency_avg", + { + "_value": 16.805, + "_timestamp": 0 + } + ], + [ + "perf_latency_p90", + { + "_value": 17.027, + "_timestamp": 0 + } + ], + [ + "perf_latency_p95", + { + "_value": 17.179, + "_timestamp": 0 + } + ], + [ + "perf_latency_p99", + { + "_value": 17.484, + "_timestamp": 0 + } + ], + [ + "perf_throughput", + { + "_value": 236.833, + "_timestamp": 0 + } + ], + [ + "perf_client_send_recv", + { + "_value": 0.096, + "_timestamp": 0 + } + ], + [ + "perf_client_response_wait", + { + "_value": 16.682, + "_timestamp": 0 + } + ], + [ + "perf_server_queue", + { + "_value": 7.71, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_infer", + { + "_value": 8.033, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_input", + { + "_value": 0.162, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_output", + { + "_value": 0.025, + "_timestamp": 0 + } + ] + ], + "_non_gpu_data_from_tag": { + "perf_latency_avg": [ + "perf_latency_avg", + { + "_value": 16.805, + "_timestamp": 0 + } + ], + "perf_latency_p90": [ + "perf_latency_p90", + { + "_value": 17.027, + "_timestamp": 0 + } + ], + "perf_latency_p95": [ + "perf_latency_p95", + { + "_value": 17.179, + "_timestamp": 0 + } + ], + "perf_latency_p99": [ + "perf_latency_p99", + { + "_value": 17.484, + "_timestamp": 0 + } + ], + "perf_throughput": [ + "perf_throughput", + { + "_value": 236.833, + "_timestamp": 0 + } + ], + "perf_client_send_recv": [ + "perf_client_send_recv", + { + "_value": 0.096, + "_timestamp": 0 + } + ], + "perf_client_response_wait": [ + "perf_client_response_wait", + { + "_value": 16.682, + "_timestamp": 0 + } + ], + "perf_server_queue": [ + "perf_server_queue", + { + "_value": 7.71, + "_timestamp": 0 + } + ], + "perf_server_compute_infer": [ + "perf_server_compute_infer", + { + "_value": 8.033, + "_timestamp": 0 + } + ], + "perf_server_compute_input": [ + "perf_server_compute_input", + { + "_value": 0.162, + "_timestamp": 0 + } + ], + "perf_server_compute_output": [ + "perf_server_compute_output", + { + "_value": 0.025, + "_timestamp": 0 + } + ] + } + } + ] + } + } + ], + "resnet50_libtorch_config_5,vgg19_libtorch_config_7": [ + { + "_triton_env": {}, + "_model_run_configs": [ + { + "_model_name": "resnet50_libtorch", + "_model_config_variant": { + "model_config": { + "name": "resnet50_libtorch", + "platform": "pytorch_libtorch", + "maxBatchSize": 1, + "input": [ + { + "name": "input", + "dataType": "TYPE_FP32", + "format": "FORMAT_NCHW", + "dims": [ + "3", + "224", + "224" + ] + } + ], + "output": [ + { + "name": "output", + "dataType": "TYPE_FP32", + "dims": [ + "1000" + ], + "labelFilename": "resnet50_labels.txt" + } + ], + "instanceGroup": [ + { + "count": 2, + "kind": "KIND_GPU" + } + ], + "dynamicBatching": {} + }, + "variant_name": "resnet50_libtorch_config_5", + "cpu_only": false + }, + "_perf_config": { + "_args": { + "service-kind": null, + "model-signature-name": null, + "async": null, + "sync": null, + "measurement-interval": null, + "concurrency-range": 4, + "request-rate-range": null, + "request-distribution": null, + "request-intervals": null, + "binary-search": null, + "num-of-sequences": null, + "latency-threshold": null, + "max-threads": null, + "stability-percentage": null, + "max-trials": null, + "percentile": null, + "input-data": null, + "shared-memory": null, + "output-shared-memory-size": null, + "shape": null, + "sequence-length": null, + "sequence-id-range": null, + "string-length": null, + "string-data": null, + "measurement-mode": "count_windows", + "measurement-request-count": null, + "streaming": null, + "grpc-compression-algorithm": null, + "triton-server-directory": null, + "model-repository": null, + "ssl-grpc-use-ssl": null, + "ssl-grpc-root-certifications-file": null, + "ssl-grpc-private-key-file": null, + "ssl-grpc-certificate-chain-file": null, + "ssl-https-verify-peer": null, + "ssl-https-verify-host": null, + "ssl-https-ca-certificates-file": null, + "ssl-https-client-certificate-type": null, + "ssl-https-client-certificate-file": null, + "ssl-https-private-key-type": null, + "ssl-https-private-key-file": null, + "collect-metrics": "True", + "metrics-url": "http://localhost:8002/metrics", + "metrics-interval": 1000, + "bls-composing-models": null + }, + "_options": { + "-m": "resnet50_libtorch", + "-x": null, + "-b": 1, + "-u": "localhost:8001", + "-i": "grpc", + "-f": "resnet50_libtorch-results.csv", + "-H": null + }, + "_verbose": { + "-v": null, + "-v -v": null, + "--verbose-csv": "--verbose-csv" + }, + "_input_to_options": { + "model-name": "-m", + "model-version": "-x", + "batch-size": "-b", + "url": "-u", + "protocol": "-i", + "latency-report-file": "-f", + "http-header": "-H" + }, + "_input_to_verbose": { + "verbose": "-v", + "extra-verbose": "-v -v", + "verbose-csv": "--verbose-csv" + }, + "_additive_args": { + "input-data": null, + "shape": null + } + }, + "_composing_config_variants": [] + }, + { + "_model_name": "vgg19_libtorch", + "_model_config_variant": { + "model_config": { + "name": "vgg19_libtorch", + "platform": "pytorch_libtorch", + "maxBatchSize": 4, + "input": [ + { + "name": "INPUT__0", + "dataType": "TYPE_FP32", + "format": "FORMAT_NCHW", + "dims": [ + "3", + "224", + "224" + ] + } + ], + "output": [ + { + "name": "OUTPUT__0", + "dataType": "TYPE_FP32", + "dims": [ + "1000" + ], + "labelFilename": "vgg19_labels.txt" + } + ], + "instanceGroup": [ + { + "count": 1, + "kind": "KIND_GPU" + } + ], + "dynamicBatching": {} + }, + "variant_name": "vgg19_libtorch_config_7", + "cpu_only": false + }, + "_perf_config": { + "_args": { + "service-kind": null, + "model-signature-name": null, + "async": null, + "sync": null, + "measurement-interval": null, + "concurrency-range": 8, + "request-rate-range": null, + "request-distribution": null, + "request-intervals": null, + "binary-search": null, + "num-of-sequences": null, + "latency-threshold": null, + "max-threads": null, + "stability-percentage": null, + "max-trials": null, + "percentile": null, + "input-data": null, + "shared-memory": null, + "output-shared-memory-size": null, + "shape": null, + "sequence-length": null, + "sequence-id-range": null, + "string-length": null, + "string-data": null, + "measurement-mode": "count_windows", + "measurement-request-count": null, + "streaming": null, + "grpc-compression-algorithm": null, + "triton-server-directory": null, + "model-repository": null, + "ssl-grpc-use-ssl": null, + "ssl-grpc-root-certifications-file": null, + "ssl-grpc-private-key-file": null, + "ssl-grpc-certificate-chain-file": null, + "ssl-https-verify-peer": null, + "ssl-https-verify-host": null, + "ssl-https-ca-certificates-file": null, + "ssl-https-client-certificate-type": null, + "ssl-https-client-certificate-file": null, + "ssl-https-private-key-type": null, + "ssl-https-private-key-file": null, + "collect-metrics": "True", + "metrics-url": "http://localhost:8002/metrics", + "metrics-interval": 1000, + "bls-composing-models": null + }, + "_options": { + "-m": "vgg19_libtorch", + "-x": null, + "-b": 1, + "-u": "localhost:8001", + "-i": "grpc", + "-f": "vgg19_libtorch-results.csv", + "-H": null + }, + "_verbose": { + "-v": null, + "-v -v": null, + "--verbose-csv": "--verbose-csv" + }, + "_input_to_options": { + "model-name": "-m", + "model-version": "-x", + "batch-size": "-b", + "url": "-u", + "protocol": "-i", + "latency-report-file": "-f", + "http-header": "-H" + }, + "_input_to_verbose": { + "verbose": "-v", + "extra-verbose": "-v -v", + "verbose-csv": "--verbose-csv" + }, + "_additive_args": { + "input-data": null, + "shape": null + } + }, + "_composing_config_variants": [] + } + ] + }, + { + "resnet50_libtorch_config_5 -m resnet50_libtorch -b 1 -i grpc -f resnet50_libtorch-results.csv --verbose-csv --concurrency-range=4 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000vgg19_libtorch_config_7 -m vgg19_libtorch -b 1 -i grpc -f vgg19_libtorch-results.csv --verbose-csv --concurrency-range=8 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000": { + "_model_variants_name": "resnet50_libtorch_config_5,vgg19_libtorch_config_7", + "_gpu_data": { + "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ + [ + "gpu_used_memory", + { + "_value": 2680.160256, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_free_memory", + { + "_value": 23089.64352, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_utilization", + { + "_value": 100.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 274.244, + "_timestamp": 0, + "_device_uuid": null + } + ] + ] + }, + "_avg_gpu_data": [ + [ + "gpu_used_memory", + { + "_value": 2680.160256, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_free_memory", + { + "_value": 23089.64352, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_utilization", + { + "_value": 100.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 274.244, + "_timestamp": 0, + "_device_uuid": null + } + ] + ], + "_avg_gpu_data_from_tag": { + "gpu_used_memory": [ + "gpu_used_memory", + { + "_value": 2680.160256, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_free_memory": [ + "gpu_free_memory", + { + "_value": 23089.64352, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_utilization": [ + "gpu_utilization", + { + "_value": 100.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_power_usage": [ + "gpu_power_usage", + { + "_value": 274.244, + "_timestamp": 0, + "_device_uuid": null + } + ] + }, + "_model_config_measurements": [ + { + "_model_config_name": "resnet50_libtorch", + "_model_specific_pa_params": { + "batch-size": 1, + "concurrency-range": 4, + "request-rate-range": null + }, + "_non_gpu_data": [ + [ + "perf_latency_avg", + { + "_value": 28.635, + "_timestamp": 0 + } + ], + [ + "perf_latency_p90", + { + "_value": 30.035, + "_timestamp": 0 + } + ], + [ + "perf_latency_p95", + { + "_value": 30.354, + "_timestamp": 0 + } + ], + [ + "perf_latency_p99", + { + "_value": 31.183, + "_timestamp": 0 + } + ], + [ + "perf_throughput", + { + "_value": 139.873, + "_timestamp": 0 + } + ], + [ + "perf_client_send_recv", + { + "_value": 0.104, + "_timestamp": 0 + } + ], + [ + "perf_client_response_wait", + { + "_value": 28.503, + "_timestamp": 0 + } + ], + [ + "perf_server_queue", + { + "_value": 13.517, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_infer", + { + "_value": 14.039, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_input", + { + "_value": 0.163, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_output", + { + "_value": 0.029, + "_timestamp": 0 + } + ] + ], + "_non_gpu_data_from_tag": { + "perf_latency_avg": [ + "perf_latency_avg", + { + "_value": 28.635, + "_timestamp": 0 + } + ], + "perf_latency_p90": [ + "perf_latency_p90", + { + "_value": 30.035, + "_timestamp": 0 + } + ], + "perf_latency_p95": [ + "perf_latency_p95", + { + "_value": 30.354, + "_timestamp": 0 + } + ], + "perf_latency_p99": [ + "perf_latency_p99", + { + "_value": 31.183, + "_timestamp": 0 + } + ], + "perf_throughput": [ + "perf_throughput", + { + "_value": 139.873, + "_timestamp": 0 + } + ], + "perf_client_send_recv": [ + "perf_client_send_recv", + { + "_value": 0.104, + "_timestamp": 0 + } + ], + "perf_client_response_wait": [ + "perf_client_response_wait", + { + "_value": 28.503, + "_timestamp": 0 + } + ], + "perf_server_queue": [ + "perf_server_queue", + { + "_value": 13.517, + "_timestamp": 0 + } + ], + "perf_server_compute_infer": [ + "perf_server_compute_infer", + { + "_value": 14.039, + "_timestamp": 0 + } + ], + "perf_server_compute_input": [ + "perf_server_compute_input", + { + "_value": 0.163, + "_timestamp": 0 + } + ], + "perf_server_compute_output": [ + "perf_server_compute_output", + { + "_value": 0.029, + "_timestamp": 0 + } + ] + } + }, + { + "_model_config_name": "vgg19_libtorch", + "_model_specific_pa_params": { + "batch-size": 1, + "concurrency-range": 8, + "request-rate-range": null + }, + "_non_gpu_data": [ + [ + "perf_latency_avg", + { + "_value": 28.632, + "_timestamp": 0 + } + ], + [ + "perf_latency_p90", + { + "_value": 28.85, + "_timestamp": 0 + } + ], + [ + "perf_latency_p95", + { + "_value": 28.901, + "_timestamp": 0 + } + ], + [ + "perf_latency_p99", + { + "_value": 29.003, + "_timestamp": 0 + } + ], + [ + "perf_throughput", + { + "_value": 279.746, + "_timestamp": 0 + } + ], + [ + "perf_client_send_recv", + { + "_value": 0.178, + "_timestamp": 0 + } + ], + [ + "perf_client_response_wait", + { + "_value": 28.425, + "_timestamp": 0 + } + ], + [ + "perf_server_queue", + { + "_value": 13.041, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_infer", + { + "_value": 13.196, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_input", + { + "_value": 0.932, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_output", + { + "_value": 0.035, + "_timestamp": 0 + } + ] + ], + "_non_gpu_data_from_tag": { + "perf_latency_avg": [ + "perf_latency_avg", + { + "_value": 28.632, + "_timestamp": 0 + } + ], + "perf_latency_p90": [ + "perf_latency_p90", + { + "_value": 28.85, + "_timestamp": 0 + } + ], + "perf_latency_p95": [ + "perf_latency_p95", + { + "_value": 28.901, + "_timestamp": 0 + } + ], + "perf_latency_p99": [ + "perf_latency_p99", + { + "_value": 29.003, + "_timestamp": 0 + } + ], + "perf_throughput": [ + "perf_throughput", + { + "_value": 279.746, + "_timestamp": 0 + } + ], + "perf_client_send_recv": [ + "perf_client_send_recv", + { + "_value": 0.178, + "_timestamp": 0 + } + ], + "perf_client_response_wait": [ + "perf_client_response_wait", + { + "_value": 28.425, + "_timestamp": 0 + } + ], + "perf_server_queue": [ + "perf_server_queue", + { + "_value": 13.041, + "_timestamp": 0 + } + ], + "perf_server_compute_infer": [ + "perf_server_compute_infer", + { + "_value": 13.196, + "_timestamp": 0 + } + ], + "perf_server_compute_input": [ + "perf_server_compute_input", + { + "_value": 0.932, + "_timestamp": 0 + } + ], + "perf_server_compute_output": [ + "perf_server_compute_output", + { + "_value": 0.035, + "_timestamp": 0 + } + ] + } + } + ] + }, + "resnet50_libtorch_config_5 -m resnet50_libtorch -b 1 -i grpc -f resnet50_libtorch-results.csv --verbose-csv --concurrency-range=1 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000vgg19_libtorch_config_7 -m vgg19_libtorch -b 1 -i grpc -f vgg19_libtorch-results.csv --verbose-csv --concurrency-range=1 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000": { + "_model_variants_name": "resnet50_libtorch_config_5,vgg19_libtorch_config_7", + "_gpu_data": { + "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ + [ + "gpu_used_memory", + { + "_value": 2575.302656, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_free_memory", + { + "_value": 23194.50112, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_utilization", + { + "_value": 93.6667, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 277.039, + "_timestamp": 0, + "_device_uuid": null + } + ] + ] + }, + "_avg_gpu_data": [ + [ + "gpu_used_memory", + { + "_value": 2575.302656, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_free_memory", + { + "_value": 23194.50112, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_utilization", + { + "_value": 93.6667, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 277.039, + "_timestamp": 0, + "_device_uuid": null + } + ] + ], + "_avg_gpu_data_from_tag": { + "gpu_used_memory": [ + "gpu_used_memory", + { + "_value": 2575.302656, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_free_memory": [ + "gpu_free_memory", + { + "_value": 23194.50112, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_utilization": [ + "gpu_utilization", + { + "_value": 93.6667, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_power_usage": [ + "gpu_power_usage", + { + "_value": 277.039, + "_timestamp": 0, + "_device_uuid": null + } + ] + }, + "_model_config_measurements": [ + { + "_model_config_name": "resnet50_libtorch", + "_model_specific_pa_params": { + "batch-size": 1, + "concurrency-range": 1, + "request-rate-range": null + }, + "_non_gpu_data": [ + [ + "perf_latency_avg", + { + "_value": 8.091, + "_timestamp": 0 + } + ], + [ + "perf_latency_p90", + { + "_value": 9.072, + "_timestamp": 0 + } + ], + [ + "perf_latency_p95", + { + "_value": 9.317, + "_timestamp": 0 + } + ], + [ + "perf_latency_p99", + { + "_value": 10.044, + "_timestamp": 0 + } + ], + [ + "perf_throughput", + { + "_value": 123.562, + "_timestamp": 0 + } + ], + [ + "perf_client_send_recv", + { + "_value": 0.099, + "_timestamp": 0 + } + ], + [ + "perf_client_response_wait", + { + "_value": 7.966, + "_timestamp": 0 + } + ], + [ + "perf_server_queue", + { + "_value": 0.066, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_infer", + { + "_value": 6.922, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_input", + { + "_value": 0.175, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_output", + { + "_value": 0.038, + "_timestamp": 0 + } + ] + ], + "_non_gpu_data_from_tag": { + "perf_latency_avg": [ + "perf_latency_avg", + { + "_value": 8.091, + "_timestamp": 0 + } + ], + "perf_latency_p90": [ + "perf_latency_p90", + { + "_value": 9.072, + "_timestamp": 0 + } + ], + "perf_latency_p95": [ + "perf_latency_p95", + { + "_value": 9.317, + "_timestamp": 0 + } + ], + "perf_latency_p99": [ + "perf_latency_p99", + { + "_value": 10.044, + "_timestamp": 0 + } + ], + "perf_throughput": [ + "perf_throughput", + { + "_value": 123.562, + "_timestamp": 0 + } + ], + "perf_client_send_recv": [ + "perf_client_send_recv", + { + "_value": 0.099, + "_timestamp": 0 + } + ], + "perf_client_response_wait": [ + "perf_client_response_wait", + { + "_value": 7.966, + "_timestamp": 0 + } + ], + "perf_server_queue": [ + "perf_server_queue", + { + "_value": 0.066, + "_timestamp": 0 + } + ], + "perf_server_compute_infer": [ + "perf_server_compute_infer", + { + "_value": 6.922, + "_timestamp": 0 + } + ], + "perf_server_compute_input": [ + "perf_server_compute_input", + { + "_value": 0.175, + "_timestamp": 0 + } + ], + "perf_server_compute_output": [ + "perf_server_compute_output", + { + "_value": 0.038, + "_timestamp": 0 + } + ] + } + }, + { + "_model_config_name": "vgg19_libtorch", + "_model_specific_pa_params": { + "batch-size": 1, + "concurrency-range": 1, + "request-rate-range": null + }, + "_non_gpu_data": [ + [ + "perf_latency_avg", + { + "_value": 5.668, + "_timestamp": 0 + } + ], + [ + "perf_latency_p90", + { + "_value": 5.837, + "_timestamp": 0 + } + ], + [ + "perf_latency_p95", + { + "_value": 5.911, + "_timestamp": 0 + } + ], + [ + "perf_latency_p99", + { + "_value": 6.029, + "_timestamp": 0 + } + ], + [ + "perf_throughput", + { + "_value": 176.184, + "_timestamp": 0 + } + ], + [ + "perf_client_send_recv", + { + "_value": 0.095, + "_timestamp": 0 + } + ], + [ + "perf_client_response_wait", + { + "_value": 5.549, + "_timestamp": 0 + } + ], + [ + "perf_server_queue", + { + "_value": 0.039, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_infer", + { + "_value": 4.638, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_input", + { + "_value": 0.127, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_output", + { + "_value": 0.02, + "_timestamp": 0 + } + ] + ], + "_non_gpu_data_from_tag": { + "perf_latency_avg": [ + "perf_latency_avg", + { + "_value": 5.668, + "_timestamp": 0 + } + ], + "perf_latency_p90": [ + "perf_latency_p90", + { + "_value": 5.837, + "_timestamp": 0 + } + ], + "perf_latency_p95": [ + "perf_latency_p95", + { + "_value": 5.911, + "_timestamp": 0 + } + ], + "perf_latency_p99": [ + "perf_latency_p99", + { + "_value": 6.029, + "_timestamp": 0 + } + ], + "perf_throughput": [ + "perf_throughput", + { + "_value": 176.184, + "_timestamp": 0 + } + ], + "perf_client_send_recv": [ + "perf_client_send_recv", + { + "_value": 0.095, + "_timestamp": 0 + } + ], + "perf_client_response_wait": [ + "perf_client_response_wait", + { + "_value": 5.549, + "_timestamp": 0 + } + ], + "perf_server_queue": [ + "perf_server_queue", + { + "_value": 0.039, + "_timestamp": 0 + } + ], + "perf_server_compute_infer": [ + "perf_server_compute_infer", + { + "_value": 4.638, + "_timestamp": 0 + } + ], + "perf_server_compute_input": [ + "perf_server_compute_input", + { + "_value": 0.127, + "_timestamp": 0 + } + ], + "perf_server_compute_output": [ + "perf_server_compute_output", + { + "_value": 0.02, + "_timestamp": 0 + } + ] + } + } + ] + } + } + ], + "resnet50_libtorch_config_0,vgg19_libtorch_config_7": [ + { + "_triton_env": {}, + "_model_run_configs": [ + { + "_model_name": "resnet50_libtorch", + "_model_config_variant": { + "model_config": { + "name": "resnet50_libtorch", + "platform": "pytorch_libtorch", + "maxBatchSize": 1, + "input": [ + { + "name": "input", + "dataType": "TYPE_FP32", + "format": "FORMAT_NCHW", + "dims": [ + "3", + "224", + "224" + ] + } + ], + "output": [ + { + "name": "output", + "dataType": "TYPE_FP32", + "dims": [ + "1000" + ], + "labelFilename": "resnet50_labels.txt" + } + ], + "instanceGroup": [ + { + "count": 1, + "kind": "KIND_GPU" + } + ], + "dynamicBatching": {} + }, + "variant_name": "resnet50_libtorch_config_0", + "cpu_only": false + }, + "_perf_config": { + "_args": { + "service-kind": null, + "model-signature-name": null, + "async": null, + "sync": null, + "measurement-interval": null, + "concurrency-range": 2, + "request-rate-range": null, + "request-distribution": null, + "request-intervals": null, + "binary-search": null, + "num-of-sequences": null, + "latency-threshold": null, + "max-threads": null, + "stability-percentage": null, + "max-trials": null, + "percentile": null, + "input-data": null, + "shared-memory": null, + "output-shared-memory-size": null, + "shape": null, + "sequence-length": null, + "sequence-id-range": null, + "string-length": null, + "string-data": null, + "measurement-mode": "count_windows", + "measurement-request-count": null, + "streaming": null, + "grpc-compression-algorithm": null, + "triton-server-directory": null, + "model-repository": null, + "ssl-grpc-use-ssl": null, + "ssl-grpc-root-certifications-file": null, + "ssl-grpc-private-key-file": null, + "ssl-grpc-certificate-chain-file": null, + "ssl-https-verify-peer": null, + "ssl-https-verify-host": null, + "ssl-https-ca-certificates-file": null, + "ssl-https-client-certificate-type": null, + "ssl-https-client-certificate-file": null, + "ssl-https-private-key-type": null, + "ssl-https-private-key-file": null, + "collect-metrics": "True", + "metrics-url": "http://localhost:8002/metrics", + "metrics-interval": 1000, + "bls-composing-models": null + }, + "_options": { + "-m": "resnet50_libtorch", + "-x": null, + "-b": 1, + "-u": "localhost:8001", + "-i": "grpc", + "-f": "resnet50_libtorch-results.csv", + "-H": null + }, + "_verbose": { + "-v": null, + "-v -v": null, + "--verbose-csv": "--verbose-csv" + }, + "_input_to_options": { + "model-name": "-m", + "model-version": "-x", + "batch-size": "-b", + "url": "-u", + "protocol": "-i", + "latency-report-file": "-f", + "http-header": "-H" + }, + "_input_to_verbose": { + "verbose": "-v", + "extra-verbose": "-v -v", + "verbose-csv": "--verbose-csv" + }, + "_additive_args": { + "input-data": null, + "shape": null + } + }, + "_composing_config_variants": [] + }, + { + "_model_name": "vgg19_libtorch", + "_model_config_variant": { + "model_config": { + "name": "vgg19_libtorch", + "platform": "pytorch_libtorch", + "maxBatchSize": 4, + "input": [ + { + "name": "INPUT__0", + "dataType": "TYPE_FP32", + "format": "FORMAT_NCHW", + "dims": [ + "3", + "224", + "224" + ] + } + ], + "output": [ + { + "name": "OUTPUT__0", + "dataType": "TYPE_FP32", + "dims": [ + "1000" + ], + "labelFilename": "vgg19_labels.txt" + } + ], + "instanceGroup": [ + { + "count": 1, + "kind": "KIND_GPU" + } + ], + "dynamicBatching": {} + }, + "variant_name": "vgg19_libtorch_config_7", + "cpu_only": false + }, + "_perf_config": { + "_args": { + "service-kind": null, + "model-signature-name": null, + "async": null, + "sync": null, + "measurement-interval": null, + "concurrency-range": 8, + "request-rate-range": null, + "request-distribution": null, + "request-intervals": null, + "binary-search": null, + "num-of-sequences": null, + "latency-threshold": null, + "max-threads": null, + "stability-percentage": null, + "max-trials": null, + "percentile": null, + "input-data": null, + "shared-memory": null, + "output-shared-memory-size": null, + "shape": null, + "sequence-length": null, + "sequence-id-range": null, + "string-length": null, + "string-data": null, + "measurement-mode": "count_windows", + "measurement-request-count": null, + "streaming": null, + "grpc-compression-algorithm": null, + "triton-server-directory": null, + "model-repository": null, + "ssl-grpc-use-ssl": null, + "ssl-grpc-root-certifications-file": null, + "ssl-grpc-private-key-file": null, + "ssl-grpc-certificate-chain-file": null, + "ssl-https-verify-peer": null, + "ssl-https-verify-host": null, + "ssl-https-ca-certificates-file": null, + "ssl-https-client-certificate-type": null, + "ssl-https-client-certificate-file": null, + "ssl-https-private-key-type": null, + "ssl-https-private-key-file": null, + "collect-metrics": "True", + "metrics-url": "http://localhost:8002/metrics", + "metrics-interval": 1000, + "bls-composing-models": null + }, + "_options": { + "-m": "vgg19_libtorch", + "-x": null, + "-b": 1, + "-u": "localhost:8001", + "-i": "grpc", + "-f": "vgg19_libtorch-results.csv", + "-H": null + }, + "_verbose": { + "-v": null, + "-v -v": null, + "--verbose-csv": "--verbose-csv" + }, + "_input_to_options": { + "model-name": "-m", + "model-version": "-x", + "batch-size": "-b", + "url": "-u", + "protocol": "-i", + "latency-report-file": "-f", + "http-header": "-H" + }, + "_input_to_verbose": { + "verbose": "-v", + "extra-verbose": "-v -v", + "verbose-csv": "--verbose-csv" + }, + "_additive_args": { + "input-data": null, + "shape": null + } + }, + "_composing_config_variants": [] + } + ] + }, + { + "resnet50_libtorch_config_0 -m resnet50_libtorch -b 1 -i grpc -f resnet50_libtorch-results.csv --verbose-csv --concurrency-range=2 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000vgg19_libtorch_config_7 -m vgg19_libtorch -b 1 -i grpc -f vgg19_libtorch-results.csv --verbose-csv --concurrency-range=8 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000": { + "_model_variants_name": "resnet50_libtorch_config_0,vgg19_libtorch_config_7", + "_gpu_data": { + "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ + [ + "gpu_used_memory", + { + "_value": 2401.23904, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_free_memory", + { + "_value": 23368.564736, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_utilization", + { + "_value": 78.25, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 279.965, + "_timestamp": 0, + "_device_uuid": null + } + ] + ] + }, + "_avg_gpu_data": [ + [ + "gpu_used_memory", + { + "_value": 2401.23904, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_free_memory", + { + "_value": 23368.564736, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_utilization", + { + "_value": 78.25, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 279.965, + "_timestamp": 0, + "_device_uuid": null + } + ] + ], + "_avg_gpu_data_from_tag": { + "gpu_used_memory": [ + "gpu_used_memory", + { + "_value": 2401.23904, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_free_memory": [ + "gpu_free_memory", + { + "_value": 23368.564736, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_utilization": [ + "gpu_utilization", + { + "_value": 78.25, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_power_usage": [ + "gpu_power_usage", + { + "_value": 279.965, + "_timestamp": 0, + "_device_uuid": null + } + ] + }, + "_model_config_measurements": [ + { + "_model_config_name": "resnet50_libtorch", + "_model_specific_pa_params": { + "batch-size": 1, + "concurrency-range": 2, + "request-rate-range": null + }, + "_non_gpu_data": [ + [ + "perf_latency_avg", + { + "_value": 26.733, + "_timestamp": 0 + } + ], + [ + "perf_latency_p90", + { + "_value": 26.929, + "_timestamp": 0 + } + ], + [ + "perf_latency_p95", + { + "_value": 26.964, + "_timestamp": 0 + } + ], + [ + "perf_latency_p99", + { + "_value": 27.15, + "_timestamp": 0 + } + ], + [ + "perf_throughput", + { + "_value": 74.9411, + "_timestamp": 0 + } + ], + [ + "perf_client_send_recv", + { + "_value": 0.137, + "_timestamp": 0 + } + ], + [ + "perf_client_response_wait", + { + "_value": 26.567, + "_timestamp": 0 + } + ], + [ + "perf_server_queue", + { + "_value": 12.549, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_infer", + { + "_value": 13.034, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_input", + { + "_value": 0.194, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_output", + { + "_value": 0.032, + "_timestamp": 0 + } + ] + ], + "_non_gpu_data_from_tag": { + "perf_latency_avg": [ + "perf_latency_avg", + { + "_value": 26.733, + "_timestamp": 0 + } + ], + "perf_latency_p90": [ + "perf_latency_p90", + { + "_value": 26.929, + "_timestamp": 0 + } + ], + "perf_latency_p95": [ + "perf_latency_p95", + { + "_value": 26.964, + "_timestamp": 0 + } + ], + "perf_latency_p99": [ + "perf_latency_p99", + { + "_value": 27.15, + "_timestamp": 0 + } + ], + "perf_throughput": [ + "perf_throughput", + { + "_value": 74.9411, + "_timestamp": 0 + } + ], + "perf_client_send_recv": [ + "perf_client_send_recv", + { + "_value": 0.137, + "_timestamp": 0 + } + ], + "perf_client_response_wait": [ + "perf_client_response_wait", + { + "_value": 26.567, + "_timestamp": 0 + } + ], + "perf_server_queue": [ + "perf_server_queue", + { + "_value": 12.549, + "_timestamp": 0 + } + ], + "perf_server_compute_infer": [ + "perf_server_compute_infer", + { + "_value": 13.034, + "_timestamp": 0 + } + ], + "perf_server_compute_input": [ + "perf_server_compute_input", + { + "_value": 0.194, + "_timestamp": 0 + } + ], + "perf_server_compute_output": [ + "perf_server_compute_output", + { + "_value": 0.032, + "_timestamp": 0 + } + ] + } + }, + { + "_model_config_name": "vgg19_libtorch", + "_model_specific_pa_params": { + "batch-size": 1, + "concurrency-range": 8, + "request-rate-range": null + }, + "_non_gpu_data": [ + [ + "perf_latency_avg", + { + "_value": 26.732, + "_timestamp": 0 + } + ], + [ + "perf_latency_p90", + { + "_value": 26.922, + "_timestamp": 0 + } + ], + [ + "perf_latency_p95", + { + "_value": 26.962, + "_timestamp": 0 + } + ], + [ + "perf_latency_p99", + { + "_value": 27.101, + "_timestamp": 0 + } + ], + [ + "perf_throughput", + { + "_value": 299.764, + "_timestamp": 0 + } + ], + [ + "perf_client_send_recv", + { + "_value": 0.134, + "_timestamp": 0 + } + ], + [ + "perf_client_response_wait", + { + "_value": 26.571, + "_timestamp": 0 + } + ], + [ + "perf_server_queue", + { + "_value": 12.132, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_infer", + { + "_value": 12.557, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_input", + { + "_value": 0.648, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_output", + { + "_value": 0.023, + "_timestamp": 0 + } + ] + ], + "_non_gpu_data_from_tag": { + "perf_latency_avg": [ + "perf_latency_avg", + { + "_value": 26.732, + "_timestamp": 0 + } + ], + "perf_latency_p90": [ + "perf_latency_p90", + { + "_value": 26.922, + "_timestamp": 0 + } + ], + "perf_latency_p95": [ + "perf_latency_p95", + { + "_value": 26.962, + "_timestamp": 0 + } + ], + "perf_latency_p99": [ + "perf_latency_p99", + { + "_value": 27.101, + "_timestamp": 0 + } + ], + "perf_throughput": [ + "perf_throughput", + { + "_value": 299.764, + "_timestamp": 0 + } + ], + "perf_client_send_recv": [ + "perf_client_send_recv", + { + "_value": 0.134, + "_timestamp": 0 + } + ], + "perf_client_response_wait": [ + "perf_client_response_wait", + { + "_value": 26.571, + "_timestamp": 0 + } + ], + "perf_server_queue": [ + "perf_server_queue", + { + "_value": 12.132, + "_timestamp": 0 + } + ], + "perf_server_compute_infer": [ + "perf_server_compute_infer", + { + "_value": 12.557, + "_timestamp": 0 + } + ], + "perf_server_compute_input": [ + "perf_server_compute_input", + { + "_value": 0.648, + "_timestamp": 0 + } + ], + "perf_server_compute_output": [ + "perf_server_compute_output", + { + "_value": 0.023, + "_timestamp": 0 + } + ] + } + } + ] + } + } + ], + "resnet50_libtorch_config_5,vgg19_libtorch_config_4": [ + { + "_triton_env": {}, + "_model_run_configs": [ + { + "_model_name": "resnet50_libtorch", + "_model_config_variant": { + "model_config": { + "name": "resnet50_libtorch", + "platform": "pytorch_libtorch", + "maxBatchSize": 1, + "input": [ + { + "name": "input", + "dataType": "TYPE_FP32", + "format": "FORMAT_NCHW", + "dims": [ + "3", + "224", + "224" + ] + } + ], + "output": [ + { + "name": "output", + "dataType": "TYPE_FP32", + "dims": [ + "1000" + ], + "labelFilename": "resnet50_labels.txt" + } + ], + "instanceGroup": [ + { + "count": 2, + "kind": "KIND_GPU" + } + ], + "dynamicBatching": {} + }, + "variant_name": "resnet50_libtorch_config_5", + "cpu_only": false + }, + "_perf_config": { + "_args": { + "service-kind": null, + "model-signature-name": null, + "async": null, + "sync": null, + "measurement-interval": null, + "concurrency-range": 4, + "request-rate-range": null, + "request-distribution": null, + "request-intervals": null, + "binary-search": null, + "num-of-sequences": null, + "latency-threshold": null, + "max-threads": null, + "stability-percentage": null, + "max-trials": null, + "percentile": null, + "input-data": null, + "shared-memory": null, + "output-shared-memory-size": null, + "shape": null, + "sequence-length": null, + "sequence-id-range": null, + "string-length": null, + "string-data": null, + "measurement-mode": "count_windows", + "measurement-request-count": null, + "streaming": null, + "grpc-compression-algorithm": null, + "triton-server-directory": null, + "model-repository": null, + "ssl-grpc-use-ssl": null, + "ssl-grpc-root-certifications-file": null, + "ssl-grpc-private-key-file": null, + "ssl-grpc-certificate-chain-file": null, + "ssl-https-verify-peer": null, + "ssl-https-verify-host": null, + "ssl-https-ca-certificates-file": null, + "ssl-https-client-certificate-type": null, + "ssl-https-client-certificate-file": null, + "ssl-https-private-key-type": null, + "ssl-https-private-key-file": null, + "collect-metrics": "True", + "metrics-url": "http://localhost:8002/metrics", + "metrics-interval": 1000, + "bls-composing-models": null + }, + "_options": { + "-m": "resnet50_libtorch", + "-x": null, + "-b": 1, + "-u": "localhost:8001", + "-i": "grpc", + "-f": "resnet50_libtorch-results.csv", + "-H": null + }, + "_verbose": { + "-v": null, + "-v -v": null, + "--verbose-csv": "--verbose-csv" + }, + "_input_to_options": { + "model-name": "-m", + "model-version": "-x", + "batch-size": "-b", + "url": "-u", + "protocol": "-i", + "latency-report-file": "-f", + "http-header": "-H" + }, + "_input_to_verbose": { + "verbose": "-v", + "extra-verbose": "-v -v", + "verbose-csv": "--verbose-csv" + }, + "_additive_args": { + "input-data": null, + "shape": null + } + }, + "_composing_config_variants": [] + }, + { + "_model_name": "vgg19_libtorch", + "_model_config_variant": { + "model_config": { + "name": "vgg19_libtorch", + "platform": "pytorch_libtorch", + "maxBatchSize": 2, + "input": [ + { + "name": "INPUT__0", + "dataType": "TYPE_FP32", + "format": "FORMAT_NCHW", + "dims": [ + "3", + "224", + "224" + ] + } + ], + "output": [ + { + "name": "OUTPUT__0", + "dataType": "TYPE_FP32", + "dims": [ + "1000" + ], + "labelFilename": "vgg19_labels.txt" + } + ], + "instanceGroup": [ + { + "count": 1, + "kind": "KIND_GPU" + } + ], + "dynamicBatching": {} + }, + "variant_name": "vgg19_libtorch_config_4", + "cpu_only": false + }, + "_perf_config": { + "_args": { + "service-kind": null, + "model-signature-name": null, + "async": null, + "sync": null, + "measurement-interval": null, + "concurrency-range": 4, + "request-rate-range": null, + "request-distribution": null, + "request-intervals": null, + "binary-search": null, + "num-of-sequences": null, + "latency-threshold": null, + "max-threads": null, + "stability-percentage": null, + "max-trials": null, + "percentile": null, + "input-data": null, + "shared-memory": null, + "output-shared-memory-size": null, + "shape": null, + "sequence-length": null, + "sequence-id-range": null, + "string-length": null, + "string-data": null, + "measurement-mode": "count_windows", + "measurement-request-count": null, + "streaming": null, + "grpc-compression-algorithm": null, + "triton-server-directory": null, + "model-repository": null, + "ssl-grpc-use-ssl": null, + "ssl-grpc-root-certifications-file": null, + "ssl-grpc-private-key-file": null, + "ssl-grpc-certificate-chain-file": null, + "ssl-https-verify-peer": null, + "ssl-https-verify-host": null, + "ssl-https-ca-certificates-file": null, + "ssl-https-client-certificate-type": null, + "ssl-https-client-certificate-file": null, + "ssl-https-private-key-type": null, + "ssl-https-private-key-file": null, + "collect-metrics": "True", + "metrics-url": "http://localhost:8002/metrics", + "metrics-interval": 1000, + "bls-composing-models": null + }, + "_options": { + "-m": "vgg19_libtorch", + "-x": null, + "-b": 1, + "-u": "localhost:8001", + "-i": "grpc", + "-f": "vgg19_libtorch-results.csv", + "-H": null + }, + "_verbose": { + "-v": null, + "-v -v": null, + "--verbose-csv": "--verbose-csv" + }, + "_input_to_options": { + "model-name": "-m", + "model-version": "-x", + "batch-size": "-b", + "url": "-u", + "protocol": "-i", + "latency-report-file": "-f", + "http-header": "-H" + }, + "_input_to_verbose": { + "verbose": "-v", + "extra-verbose": "-v -v", + "verbose-csv": "--verbose-csv" + }, + "_additive_args": { + "input-data": null, + "shape": null + } + }, + "_composing_config_variants": [] + } + ] + }, + { + "resnet50_libtorch_config_5 -m resnet50_libtorch -b 1 -i grpc -f resnet50_libtorch-results.csv --verbose-csv --concurrency-range=4 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000vgg19_libtorch_config_4 -m vgg19_libtorch -b 1 -i grpc -f vgg19_libtorch-results.csv --verbose-csv --concurrency-range=4 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000": { + "_model_variants_name": "resnet50_libtorch_config_5,vgg19_libtorch_config_4", + "_gpu_data": { + "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ + [ + "gpu_used_memory", + { + "_value": 2602.565632, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_free_memory", + { + "_value": 23167.238144000003, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_utilization", + { + "_value": 100.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 276.12, + "_timestamp": 0, + "_device_uuid": null + } + ] + ] + }, + "_avg_gpu_data": [ + [ + "gpu_used_memory", + { + "_value": 2602.565632, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_free_memory", + { + "_value": 23167.238144000003, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_utilization", + { + "_value": 100.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 276.12, + "_timestamp": 0, + "_device_uuid": null + } + ] + ], + "_avg_gpu_data_from_tag": { + "gpu_used_memory": [ + "gpu_used_memory", + { + "_value": 2602.565632, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_free_memory": [ + "gpu_free_memory", + { + "_value": 23167.238144000003, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_utilization": [ + "gpu_utilization", + { + "_value": 100.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_power_usage": [ + "gpu_power_usage", + { + "_value": 276.12, + "_timestamp": 0, + "_device_uuid": null + } + ] + }, + "_model_config_measurements": [ + { + "_model_config_name": "resnet50_libtorch", + "_model_specific_pa_params": { + "batch-size": 1, + "concurrency-range": 4, + "request-rate-range": null + }, + "_non_gpu_data": [ + [ + "perf_latency_avg", + { + "_value": 21.714, + "_timestamp": 0 + } + ], + [ + "perf_latency_p90", + { + "_value": 23.229, + "_timestamp": 0 + } + ], + [ + "perf_latency_p95", + { + "_value": 23.504, + "_timestamp": 0 + } + ], + [ + "perf_latency_p99", + { + "_value": 23.899, + "_timestamp": 0 + } + ], + [ + "perf_throughput", + { + "_value": 184.509, + "_timestamp": 0 + } + ], + [ + "perf_client_send_recv", + { + "_value": 0.11, + "_timestamp": 0 + } + ], + [ + "perf_client_response_wait", + { + "_value": 21.575, + "_timestamp": 0 + } + ], + [ + "perf_server_queue", + { + "_value": 10.059, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_infer", + { + "_value": 10.574, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_input", + { + "_value": 0.17, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_output", + { + "_value": 0.026, + "_timestamp": 0 + } + ] + ], + "_non_gpu_data_from_tag": { + "perf_latency_avg": [ + "perf_latency_avg", + { + "_value": 21.714, + "_timestamp": 0 + } + ], + "perf_latency_p90": [ + "perf_latency_p90", + { + "_value": 23.229, + "_timestamp": 0 + } + ], + "perf_latency_p95": [ + "perf_latency_p95", + { + "_value": 23.504, + "_timestamp": 0 + } + ], + "perf_latency_p99": [ + "perf_latency_p99", + { + "_value": 23.899, + "_timestamp": 0 + } + ], + "perf_throughput": [ + "perf_throughput", + { + "_value": 184.509, + "_timestamp": 0 + } + ], + "perf_client_send_recv": [ + "perf_client_send_recv", + { + "_value": 0.11, + "_timestamp": 0 + } + ], + "perf_client_response_wait": [ + "perf_client_response_wait", + { + "_value": 21.575, + "_timestamp": 0 + } + ], + "perf_server_queue": [ + "perf_server_queue", + { + "_value": 10.059, + "_timestamp": 0 + } + ], + "perf_server_compute_infer": [ + "perf_server_compute_infer", + { + "_value": 10.574, + "_timestamp": 0 + } + ], + "perf_server_compute_input": [ + "perf_server_compute_input", + { + "_value": 0.17, + "_timestamp": 0 + } + ], + "perf_server_compute_output": [ + "perf_server_compute_output", + { + "_value": 0.026, + "_timestamp": 0 + } + ] + } + }, + { + "_model_config_name": "vgg19_libtorch", + "_model_specific_pa_params": { + "batch-size": 1, + "concurrency-range": 4, + "request-rate-range": null + }, + "_non_gpu_data": [ + [ + "perf_latency_avg", + { + "_value": 16.896, + "_timestamp": 0 + } + ], + [ + "perf_latency_p90", + { + "_value": 17.168, + "_timestamp": 0 + } + ], + [ + "perf_latency_p95", + { + "_value": 17.269, + "_timestamp": 0 + } + ], + [ + "perf_latency_p99", + { + "_value": 17.423, + "_timestamp": 0 + } + ], + [ + "perf_throughput", + { + "_value": 236.464, + "_timestamp": 0 + } + ], + [ + "perf_client_send_recv", + { + "_value": 0.133, + "_timestamp": 0 + } + ], + [ + "perf_client_response_wait", + { + "_value": 16.737, + "_timestamp": 0 + } + ], + [ + "perf_server_queue", + { + "_value": 7.479, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_infer", + { + "_value": 8.023, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_input", + { + "_value": 0.304, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_output", + { + "_value": 0.023, + "_timestamp": 0 + } + ] + ], + "_non_gpu_data_from_tag": { + "perf_latency_avg": [ + "perf_latency_avg", + { + "_value": 16.896, + "_timestamp": 0 + } + ], + "perf_latency_p90": [ + "perf_latency_p90", + { + "_value": 17.168, + "_timestamp": 0 + } + ], + "perf_latency_p95": [ + "perf_latency_p95", + { + "_value": 17.269, + "_timestamp": 0 + } + ], + "perf_latency_p99": [ + "perf_latency_p99", + { + "_value": 17.423, + "_timestamp": 0 + } + ], + "perf_throughput": [ + "perf_throughput", + { + "_value": 236.464, + "_timestamp": 0 + } + ], + "perf_client_send_recv": [ + "perf_client_send_recv", + { + "_value": 0.133, + "_timestamp": 0 + } + ], + "perf_client_response_wait": [ + "perf_client_response_wait", + { + "_value": 16.737, + "_timestamp": 0 + } + ], + "perf_server_queue": [ + "perf_server_queue", + { + "_value": 7.479, + "_timestamp": 0 + } + ], + "perf_server_compute_infer": [ + "perf_server_compute_infer", + { + "_value": 8.023, + "_timestamp": 0 + } + ], + "perf_server_compute_input": [ + "perf_server_compute_input", + { + "_value": 0.304, + "_timestamp": 0 + } + ], + "perf_server_compute_output": [ + "perf_server_compute_output", + { + "_value": 0.023, + "_timestamp": 0 + } + ] + } + } + ] + }, + "resnet50_libtorch_config_5 -m resnet50_libtorch -b 1 -i grpc -f resnet50_libtorch-results.csv --verbose-csv --concurrency-range=1 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000vgg19_libtorch_config_4 -m vgg19_libtorch -b 1 -i grpc -f vgg19_libtorch-results.csv --verbose-csv --concurrency-range=1 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000": { + "_model_variants_name": "resnet50_libtorch_config_5,vgg19_libtorch_config_4", + "_gpu_data": { + "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ + [ + "gpu_used_memory", + { + "_value": 2575.302656, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_free_memory", + { + "_value": 23194.50112, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_utilization", + { + "_value": 98.5, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 276.779, + "_timestamp": 0, + "_device_uuid": null + } + ] + ] + }, + "_avg_gpu_data": [ + [ + "gpu_used_memory", + { + "_value": 2575.302656, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_free_memory", + { + "_value": 23194.50112, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_utilization", + { + "_value": 98.5, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 276.779, + "_timestamp": 0, + "_device_uuid": null + } + ] + ], + "_avg_gpu_data_from_tag": { + "gpu_used_memory": [ + "gpu_used_memory", + { + "_value": 2575.302656, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_free_memory": [ + "gpu_free_memory", + { + "_value": 23194.50112, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_utilization": [ + "gpu_utilization", + { + "_value": 98.5, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_power_usage": [ + "gpu_power_usage", + { + "_value": 276.779, + "_timestamp": 0, + "_device_uuid": null + } + ] + }, + "_model_config_measurements": [ + { + "_model_config_name": "resnet50_libtorch", + "_model_specific_pa_params": { + "batch-size": 1, + "concurrency-range": 1, + "request-rate-range": null + }, + "_non_gpu_data": [ + [ + "perf_latency_avg", + { + "_value": 5.914, + "_timestamp": 0 + } + ], + [ + "perf_latency_p90", + { + "_value": 6.115, + "_timestamp": 0 + } + ], + [ + "perf_latency_p95", + { + "_value": 6.348, + "_timestamp": 0 + } + ], + [ + "perf_latency_p99", + { + "_value": 7.005, + "_timestamp": 0 + } + ], + [ + "perf_throughput", + { + "_value": 168.905, + "_timestamp": 0 + } + ], + [ + "perf_client_send_recv", + { + "_value": 0.095, + "_timestamp": 0 + } + ], + [ + "perf_client_response_wait", + { + "_value": 5.792, + "_timestamp": 0 + } + ], + [ + "perf_server_queue", + { + "_value": 0.047, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_infer", + { + "_value": 4.908, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_input", + { + "_value": 0.12, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_output", + { + "_value": 0.022, + "_timestamp": 0 + } + ] + ], + "_non_gpu_data_from_tag": { + "perf_latency_avg": [ + "perf_latency_avg", + { + "_value": 5.914, + "_timestamp": 0 + } + ], + "perf_latency_p90": [ + "perf_latency_p90", + { + "_value": 6.115, + "_timestamp": 0 + } + ], + "perf_latency_p95": [ + "perf_latency_p95", + { + "_value": 6.348, + "_timestamp": 0 + } + ], + "perf_latency_p99": [ + "perf_latency_p99", + { + "_value": 7.005, + "_timestamp": 0 + } + ], + "perf_throughput": [ + "perf_throughput", + { + "_value": 168.905, + "_timestamp": 0 + } + ], + "perf_client_send_recv": [ + "perf_client_send_recv", + { + "_value": 0.095, + "_timestamp": 0 + } + ], + "perf_client_response_wait": [ + "perf_client_response_wait", + { + "_value": 5.792, + "_timestamp": 0 + } + ], + "perf_server_queue": [ + "perf_server_queue", + { + "_value": 0.047, + "_timestamp": 0 + } + ], + "perf_server_compute_infer": [ + "perf_server_compute_infer", + { + "_value": 4.908, + "_timestamp": 0 + } + ], + "perf_server_compute_input": [ + "perf_server_compute_input", + { + "_value": 0.12, + "_timestamp": 0 + } + ], + "perf_server_compute_output": [ + "perf_server_compute_output", + { + "_value": 0.022, + "_timestamp": 0 + } + ] + } + }, + { + "_model_config_name": "vgg19_libtorch", + "_model_specific_pa_params": { + "batch-size": 1, + "concurrency-range": 1, + "request-rate-range": null + }, + "_non_gpu_data": [ + [ + "perf_latency_avg", + { + "_value": 5.833, + "_timestamp": 0 + } + ], + [ + "perf_latency_p90", + { + "_value": 5.921, + "_timestamp": 0 + } + ], + [ + "perf_latency_p95", + { + "_value": 5.945, + "_timestamp": 0 + } + ], + [ + "perf_latency_p99", + { + "_value": 5.989, + "_timestamp": 0 + } + ], + [ + "perf_throughput", + { + "_value": 171.238, + "_timestamp": 0 + } + ], + [ + "perf_client_send_recv", + { + "_value": 0.095, + "_timestamp": 0 + } + ], + [ + "perf_client_response_wait", + { + "_value": 5.713, + "_timestamp": 0 + } + ], + [ + "perf_server_queue", + { + "_value": 0.036, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_infer", + { + "_value": 4.842, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_input", + { + "_value": 0.12, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_output", + { + "_value": 0.022, + "_timestamp": 0 + } + ] + ], + "_non_gpu_data_from_tag": { + "perf_latency_avg": [ + "perf_latency_avg", + { + "_value": 5.833, + "_timestamp": 0 + } + ], + "perf_latency_p90": [ + "perf_latency_p90", + { + "_value": 5.921, + "_timestamp": 0 + } + ], + "perf_latency_p95": [ + "perf_latency_p95", + { + "_value": 5.945, + "_timestamp": 0 + } + ], + "perf_latency_p99": [ + "perf_latency_p99", + { + "_value": 5.989, + "_timestamp": 0 + } + ], + "perf_throughput": [ + "perf_throughput", + { + "_value": 171.238, + "_timestamp": 0 + } + ], + "perf_client_send_recv": [ + "perf_client_send_recv", + { + "_value": 0.095, + "_timestamp": 0 + } + ], + "perf_client_response_wait": [ + "perf_client_response_wait", + { + "_value": 5.713, + "_timestamp": 0 + } + ], + "perf_server_queue": [ + "perf_server_queue", + { + "_value": 0.036, + "_timestamp": 0 + } + ], + "perf_server_compute_infer": [ + "perf_server_compute_infer", + { + "_value": 4.842, + "_timestamp": 0 + } + ], + "perf_server_compute_input": [ + "perf_server_compute_input", + { + "_value": 0.12, + "_timestamp": 0 + } + ], + "perf_server_compute_output": [ + "perf_server_compute_output", + { + "_value": 0.022, + "_timestamp": 0 + } + ] + } + } + ] + } + } + ] + } + } + }, + "ResultManager.server_only_data": { + "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ + [ + "gpu_used_memory", + { + "_value": 870.0, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_free_memory", + { + "_value": 24899.0, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_utilization", + { + "_value": 0.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 55.84725, + "_timestamp": 0, + "_device_uuid": null + } + ] + ] + }, + "MetricsManager.gpus": { + "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": { + "name": "NVIDIA TITAN RTX", + "total_memory": 25387401216 + } + }, + "ModelManager.model_variant_name_manager": { + "_model_config_dicts": { + "resnet50_libtorch_config_0": { + "name": "resnet50_libtorch", + "platform": "pytorch_libtorch", + "max_batch_size": 1, + "input": [ + { + "name": "input", + "data_type": "TYPE_FP32", + "format": "FORMAT_NCHW", + "dims": [ + "3", + "224", + "224" + ] + } + ], + "output": [ + { + "name": "output", + "data_type": "TYPE_FP32", + "dims": [ + "1000" + ], + "label_filename": "resnet50_labels.txt" + } + ], + "instance_group": [ + { + "count": 1, + "kind": "KIND_GPU" + } + ], + "dynamic_batching": {} + }, + "vgg19_libtorch_config_0": { + "name": "vgg19_libtorch", + "platform": "pytorch_libtorch", + "max_batch_size": 1, + "input": [ + { + "name": "INPUT__0", + "data_type": "TYPE_FP32", + "format": "FORMAT_NCHW", + "dims": [ + "3", + "224", + "224" + ] + } + ], + "output": [ + { + "name": "OUTPUT__0", + "data_type": "TYPE_FP32", + "dims": [ + "1000" + ], + "label_filename": "vgg19_labels.txt" + } + ], + "instance_group": [ + { + "count": 1, + "kind": "KIND_GPU" + } + ], + "dynamic_batching": {} + }, + "vgg19_libtorch_config_1": { + "name": "vgg19_libtorch", + "platform": "pytorch_libtorch", + "max_batch_size": 1, + "input": [ + { + "name": "INPUT__0", + "data_type": "TYPE_FP32", + "format": "FORMAT_NCHW", + "dims": [ + "3", + "224", + "224" + ] + } + ], + "output": [ + { + "name": "OUTPUT__0", + "data_type": "TYPE_FP32", + "dims": [ + "1000" + ], + "label_filename": "vgg19_labels.txt" + } + ], + "instance_group": [ + { + "count": 2, + "kind": "KIND_GPU" + } + ], + "dynamic_batching": {} + }, + "vgg19_libtorch_config_2": { + "name": "vgg19_libtorch", + "platform": "pytorch_libtorch", + "max_batch_size": 1, + "input": [ + { + "name": "INPUT__0", + "data_type": "TYPE_FP32", + "format": "FORMAT_NCHW", + "dims": [ + "3", + "224", + "224" + ] + } + ], + "output": [ + { + "name": "OUTPUT__0", + "data_type": "TYPE_FP32", + "dims": [ + "1000" + ], + "label_filename": "vgg19_labels.txt" + } + ], + "instance_group": [ + { + "count": 3, + "kind": "KIND_GPU" + } + ], + "dynamic_batching": {} + }, + "vgg19_libtorch_config_3": { + "name": "vgg19_libtorch", + "platform": "pytorch_libtorch", + "max_batch_size": 1, + "input": [ + { + "name": "INPUT__0", + "data_type": "TYPE_FP32", + "format": "FORMAT_NCHW", + "dims": [ + "3", + "224", + "224" + ] + } + ], + "output": [ + { + "name": "OUTPUT__0", + "data_type": "TYPE_FP32", + "dims": [ + "1000" + ], + "label_filename": "vgg19_labels.txt" + } + ], + "instance_group": [ + { + "count": 4, + "kind": "KIND_GPU" + } + ], + "dynamic_batching": {} + }, + "vgg19_libtorch_config_4": { + "name": "vgg19_libtorch", + "platform": "pytorch_libtorch", + "max_batch_size": 2, + "input": [ + { + "name": "INPUT__0", + "data_type": "TYPE_FP32", + "format": "FORMAT_NCHW", + "dims": [ + "3", + "224", + "224" + ] + } + ], + "output": [ + { + "name": "OUTPUT__0", + "data_type": "TYPE_FP32", + "dims": [ + "1000" + ], + "label_filename": "vgg19_labels.txt" + } + ], + "instance_group": [ + { + "count": 1, + "kind": "KIND_GPU" + } + ], + "dynamic_batching": {} + }, + "resnet50_libtorch_config_1": { + "name": "resnet50_libtorch", + "platform": "pytorch_libtorch", + "max_batch_size": 2, + "input": [ + { + "name": "input", + "data_type": "TYPE_FP32", + "format": "FORMAT_NCHW", + "dims": [ + "3", + "224", + "224" + ] + } + ], + "output": [ + { + "name": "output", + "data_type": "TYPE_FP32", + "dims": [ + "1000" + ], + "label_filename": "resnet50_labels.txt" + } + ], + "instance_group": [ + { + "count": 2, + "kind": "KIND_GPU" + } + ], + "dynamic_batching": {} + }, + "vgg19_libtorch_config_5": { + "name": "vgg19_libtorch", + "platform": "pytorch_libtorch", + "max_batch_size": 4, + "input": [ + { + "name": "INPUT__0", + "data_type": "TYPE_FP32", + "format": "FORMAT_NCHW", + "dims": [ + "3", + "224", + "224" + ] + } + ], + "output": [ + { + "name": "OUTPUT__0", + "data_type": "TYPE_FP32", + "dims": [ + "1000" + ], + "label_filename": "vgg19_labels.txt" + } + ], + "instance_group": [ + { + "count": 2, + "kind": "KIND_GPU" + } + ], + "dynamic_batching": {} + }, + "resnet50_libtorch_config_2": { + "name": "resnet50_libtorch", + "platform": "pytorch_libtorch", + "max_batch_size": 2, + "input": [ + { + "name": "input", + "data_type": "TYPE_FP32", + "format": "FORMAT_NCHW", + "dims": [ + "3", + "224", + "224" + ] + } + ], + "output": [ + { + "name": "output", + "data_type": "TYPE_FP32", + "dims": [ + "1000" + ], + "label_filename": "resnet50_labels.txt" + } + ], + "instance_group": [ + { + "count": 3, + "kind": "KIND_GPU" + } + ], + "dynamic_batching": {} + }, + "resnet50_libtorch_config_3": { + "name": "resnet50_libtorch", + "platform": "pytorch_libtorch", + "max_batch_size": 4, + "input": [ + { + "name": "input", + "data_type": "TYPE_FP32", + "format": "FORMAT_NCHW", + "dims": [ + "3", + "224", + "224" + ] + } + ], + "output": [ + { + "name": "output", + "data_type": "TYPE_FP32", + "dims": [ + "1000" + ], + "label_filename": "resnet50_labels.txt" + } + ], + "instance_group": [ + { + "count": 2, + "kind": "KIND_GPU" + } + ], + "dynamic_batching": {} + }, + "resnet50_libtorch_config_4": { + "name": "resnet50_libtorch", + "platform": "pytorch_libtorch", + "max_batch_size": 4, + "input": [ + { + "name": "input", + "data_type": "TYPE_FP32", + "format": "FORMAT_NCHW", + "dims": [ + "3", + "224", + "224" + ] + } + ], + "output": [ + { + "name": "output", + "data_type": "TYPE_FP32", + "dims": [ + "1000" + ], + "label_filename": "resnet50_labels.txt" + } + ], + "instance_group": [ + { + "count": 3, + "kind": "KIND_GPU" + } + ], + "dynamic_batching": {} + }, + "resnet50_libtorch_config_5": { + "name": "resnet50_libtorch", + "platform": "pytorch_libtorch", + "max_batch_size": 1, + "input": [ + { + "name": "input", + "data_type": "TYPE_FP32", + "format": "FORMAT_NCHW", + "dims": [ + "3", + "224", + "224" + ] + } + ], + "output": [ + { + "name": "output", + "data_type": "TYPE_FP32", + "dims": [ + "1000" + ], + "label_filename": "resnet50_labels.txt" + } + ], + "instance_group": [ + { + "count": 2, + "kind": "KIND_GPU" + } + ], + "dynamic_batching": {} + }, + "vgg19_libtorch_config_6": { + "name": "vgg19_libtorch", + "platform": "pytorch_libtorch", + "max_batch_size": 4, + "input": [ + { + "name": "INPUT__0", + "data_type": "TYPE_FP32", + "format": "FORMAT_NCHW", + "dims": [ + "3", + "224", + "224" + ] + } + ], + "output": [ + { + "name": "OUTPUT__0", + "data_type": "TYPE_FP32", + "dims": [ + "1000" + ], + "label_filename": "vgg19_labels.txt" + } + ], + "instance_group": [ + { + "count": 3, + "kind": "KIND_GPU" + } + ], + "dynamic_batching": {} + }, + "resnet50_libtorch_config_6": { + "name": "resnet50_libtorch", + "platform": "pytorch_libtorch", + "max_batch_size": 1, + "input": [ + { + "name": "input", + "data_type": "TYPE_FP32", + "format": "FORMAT_NCHW", + "dims": [ + "3", + "224", + "224" + ] + } + ], + "output": [ + { + "name": "output", + "data_type": "TYPE_FP32", + "dims": [ + "1000" + ], + "label_filename": "resnet50_labels.txt" + } + ], + "instance_group": [ + { + "count": 3, + "kind": "KIND_GPU" + } + ], + "dynamic_batching": {} + }, + "resnet50_libtorch_config_7": { + "name": "resnet50_libtorch", + "platform": "pytorch_libtorch", + "max_batch_size": 2, + "input": [ + { + "name": "input", + "data_type": "TYPE_FP32", + "format": "FORMAT_NCHW", + "dims": [ + "3", + "224", + "224" + ] + } + ], + "output": [ + { + "name": "output", + "data_type": "TYPE_FP32", + "dims": [ + "1000" + ], + "label_filename": "resnet50_labels.txt" + } + ], + "instance_group": [ + { + "count": 1, + "kind": "KIND_GPU" + } + ], + "dynamic_batching": {} + }, + "vgg19_libtorch_config_7": { + "name": "vgg19_libtorch", + "platform": "pytorch_libtorch", + "max_batch_size": 4, + "input": [ + { + "name": "INPUT__0", + "data_type": "TYPE_FP32", + "format": "FORMAT_NCHW", + "dims": [ + "3", + "224", + "224" + ] + } + ], + "output": [ + { + "name": "OUTPUT__0", + "data_type": "TYPE_FP32", + "dims": [ + "1000" + ], + "label_filename": "vgg19_labels.txt" + } + ], + "instance_group": [ + { + "count": 1, + "kind": "KIND_GPU" + } + ], + "dynamic_batching": {} + }, + "vgg19_libtorch_config_8": { + "name": "vgg19_libtorch", + "platform": "pytorch_libtorch", + "max_batch_size": 2, + "input": [ + { + "name": "INPUT__0", + "data_type": "TYPE_FP32", + "format": "FORMAT_NCHW", + "dims": [ + "3", + "224", + "224" + ] + } + ], + "output": [ + { + "name": "OUTPUT__0", + "data_type": "TYPE_FP32", + "dims": [ + "1000" + ], + "label_filename": "vgg19_labels.txt" + } + ], + "instance_group": [ + { + "count": 3, + "kind": "KIND_GPU" + } + ], + "dynamic_batching": {} + }, + "vgg19_libtorch_config_9": { + "name": "vgg19_libtorch", + "platform": "pytorch_libtorch", + "max_batch_size": 8, + "input": [ + { + "name": "INPUT__0", + "data_type": "TYPE_FP32", + "format": "FORMAT_NCHW", + "dims": [ + "3", + "224", + "224" + ] + } + ], + "output": [ + { + "name": "OUTPUT__0", + "data_type": "TYPE_FP32", + "dims": [ + "1000" + ], + "label_filename": "vgg19_labels.txt" + } + ], + "instance_group": [ + { + "count": 1, + "kind": "KIND_GPU" + } + ], + "dynamic_batching": {} + }, + "vgg19_libtorch_config_10": { + "name": "vgg19_libtorch", + "platform": "pytorch_libtorch", + "max_batch_size": 2, + "input": [ + { + "name": "INPUT__0", + "data_type": "TYPE_FP32", + "format": "FORMAT_NCHW", + "dims": [ + "3", + "224", + "224" + ] + } + ], + "output": [ + { + "name": "OUTPUT__0", + "data_type": "TYPE_FP32", + "dims": [ + "1000" + ], + "label_filename": "vgg19_labels.txt" + } + ], + "instance_group": [ + { + "count": 2, + "kind": "KIND_GPU" + } + ], + "dynamic_batching": {} + } + }, + "_model_name_index": { + "resnet50_libtorch": 7, + "vgg19_libtorch": 10 + } + } +} \ No newline at end of file diff --git a/tests/common/multi-model-ckpt/golden-metrics-model-gpu.csv b/tests/common/multi-model-ckpt/golden-metrics-model-gpu.csv index 9ad56518d..d8cf2bf36 100644 --- a/tests/common/multi-model-ckpt/golden-metrics-model-gpu.csv +++ b/tests/common/multi-model-ckpt/golden-metrics-model-gpu.csv @@ -1,38 +1,14 @@ Model,GPU UUID,Batch,Concurrency,Model Config Path,Instance Group,Satisfies Constraints,GPU Memory Usage (MB),GPU Utilization (%),GPU Power Usage (W) -"resnet50_libtorch,vgg19_libtorch",GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,"1,1","32,2","resnet50_libtorch_config_10,vgg19_libtorch_config_0","1:GPU,1:GPU",Yes,2432.7,87.7,279.3 -"resnet50_libtorch,vgg19_libtorch",GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,"1,1","256,256","resnet50_libtorch_config_10,vgg19_libtorch_config_0","1:GPU,1:GPU",Yes,2432.7,89.3,279.3 -"resnet50_libtorch,vgg19_libtorch",GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,"1,1","32,32","resnet50_libtorch_config_10,vgg19_libtorch_config_0","1:GPU,1:GPU",Yes,2432.7,65.7,280.0 -"resnet50_libtorch,vgg19_libtorch",GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,"1,1","128,128","resnet50_libtorch_config_10,vgg19_libtorch_config_0","1:GPU,1:GPU",Yes,2432.7,73.0,276.3 -"resnet50_libtorch,vgg19_libtorch",GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,"1,1","64,64","resnet50_libtorch_config_10,vgg19_libtorch_config_0","1:GPU,1:GPU",Yes,2432.7,68.7,277.3 -"resnet50_libtorch,vgg19_libtorch",GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,"1,1","16,16","resnet50_libtorch_config_10,vgg19_libtorch_config_0","1:GPU,1:GPU",Yes,2432.7,66.0,219.3 -"resnet50_libtorch,vgg19_libtorch",GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,"1,1","8,8","resnet50_libtorch_config_10,vgg19_libtorch_config_0","1:GPU,1:GPU",Yes,2432.7,67.3,218.1 -"resnet50_libtorch,vgg19_libtorch",GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,"1,1","4,4","resnet50_libtorch_config_10,vgg19_libtorch_config_0","1:GPU,1:GPU",Yes,2432.7,64.0,218.8 -"resnet50_libtorch,vgg19_libtorch",GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,"1,1","1,1","resnet50_libtorch_config_10,vgg19_libtorch_config_0","1:GPU,1:GPU",Yes,2432.7,83.2,280.9 -"resnet50_libtorch,vgg19_libtorch",GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,"1,1","2,2","resnet50_libtorch_config_10,vgg19_libtorch_config_0","1:GPU,1:GPU",Yes,2432.7,66.0,218.5 -"resnet50_libtorch,vgg19_libtorch",GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,"1,1","16,2","resnet50_libtorch_config_9,vgg19_libtorch_config_0","1:GPU,1:GPU",Yes,2380.3,86.2,280.6 -"resnet50_libtorch,vgg19_libtorch",GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,"1,1","32,32","resnet50_libtorch_config_9,vgg19_libtorch_config_0","1:GPU,1:GPU",Yes,2380.3,69.3,278.9 -"resnet50_libtorch,vgg19_libtorch",GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,"1,1","64,64","resnet50_libtorch_config_9,vgg19_libtorch_config_0","1:GPU,1:GPU",Yes,2380.3,76.5,280.2 -"resnet50_libtorch,vgg19_libtorch",GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,"1,1","128,128","resnet50_libtorch_config_9,vgg19_libtorch_config_0","1:GPU,1:GPU",Yes,2380.3,75.3,281.4 -"resnet50_libtorch,vgg19_libtorch",GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,"1,1","16,16","resnet50_libtorch_config_9,vgg19_libtorch_config_0","1:GPU,1:GPU",Yes,2380.3,66.0,219.0 -"resnet50_libtorch,vgg19_libtorch",GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,"1,1","8,8","resnet50_libtorch_config_9,vgg19_libtorch_config_0","1:GPU,1:GPU",Yes,2380.3,66.3,217.9 -"resnet50_libtorch,vgg19_libtorch",GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,"1,1","4,4","resnet50_libtorch_config_9,vgg19_libtorch_config_0","1:GPU,1:GPU",Yes,2380.3,55.0,227.4 -"resnet50_libtorch,vgg19_libtorch",GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,"1,1","1,1","resnet50_libtorch_config_9,vgg19_libtorch_config_0","1:GPU,1:GPU",Yes,2246.0,64.7,281.9 -"resnet50_libtorch,vgg19_libtorch",GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,"1,1","2,2","resnet50_libtorch_config_9,vgg19_libtorch_config_0","1:GPU,1:GPU",Yes,2246.0,63.7,218.0 -"resnet50_libtorch,vgg19_libtorch",GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,"1,1","8,2","resnet50_libtorch_config_8,vgg19_libtorch_config_0","1:GPU,1:GPU",Yes,2380.3,100.0,279.9 -"resnet50_libtorch,vgg19_libtorch",GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,"1,1","64,64","resnet50_libtorch_config_8,vgg19_libtorch_config_0","1:GPU,1:GPU",Yes,2380.3,71.3,283.0 -"resnet50_libtorch,vgg19_libtorch",GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,"1,1","16,16","resnet50_libtorch_config_8,vgg19_libtorch_config_0","1:GPU,1:GPU",Yes,2380.3,67.3,220.1 -"resnet50_libtorch,vgg19_libtorch",GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,"1,1","32,32","resnet50_libtorch_config_8,vgg19_libtorch_config_0","1:GPU,1:GPU",Yes,2380.3,65.7,220.2 -"resnet50_libtorch,vgg19_libtorch",GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,"1,1","8,8","resnet50_libtorch_config_8,vgg19_libtorch_config_0","1:GPU,1:GPU",Yes,2380.3,74.0,232.5 -"resnet50_libtorch,vgg19_libtorch",GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,"1,1","4,4","resnet50_libtorch_config_8,vgg19_libtorch_config_0","1:GPU,1:GPU",Yes,2380.3,100.0,279.6 -"resnet50_libtorch,vgg19_libtorch",GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,"1,1","1,1","resnet50_libtorch_config_8,vgg19_libtorch_config_0","1:GPU,1:GPU",Yes,2246.0,97.3,283.7 -"resnet50_libtorch,vgg19_libtorch",GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,"1,1","2,2","resnet50_libtorch_config_8,vgg19_libtorch_config_0","1:GPU,1:GPU",Yes,2246.0,65.7,217.2 -"resnet50_libtorch,vgg19_libtorch",GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,"1,1","4,2","resnet50_libtorch_config_6,vgg19_libtorch_config_0","1:GPU,1:GPU",Yes,2248.1,78.0,283.1 -"resnet50_libtorch,vgg19_libtorch",GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,"1,1","4,8","resnet50_libtorch_config_6,vgg19_libtorch_config_7","1:GPU,1:GPU",Yes,2353.0,99.0,277.6 -"resnet50_libtorch,vgg19_libtorch",GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,"1,1","1,1","resnet50_libtorch_config_default,vgg19_libtorch_config_default","1:GPU,1:GPU",Yes,2246.0,99.7,279.1 -"resnet50_libtorch,vgg19_libtorch",GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,"1,1","2,2","resnet50_libtorch_config_default,vgg19_libtorch_config_default","1:GPU,1:GPU",Yes,2246.0,92.7,278.7 -"resnet50_libtorch,vgg19_libtorch",GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,"1,1","4,4","resnet50_libtorch_config_default,vgg19_libtorch_config_default","1:GPU,1:GPU",Yes,2246.0,64.7,218.7 -"resnet50_libtorch,vgg19_libtorch",GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,"1,1","8,8","resnet50_libtorch_config_default,vgg19_libtorch_config_default","1:GPU,1:GPU",Yes,2246.0,65.7,221.7 -"resnet50_libtorch,vgg19_libtorch",GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,"1,1","2,2","resnet50_libtorch_config_0,vgg19_libtorch_config_0","1:GPU,1:GPU",Yes,2246.0,70.3,284.5 -"resnet50_libtorch,vgg19_libtorch",GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,"1,1","4,4","resnet50_libtorch_config_5,vgg19_libtorch_config_1","2:GPU,2:GPU",Yes,3183.5,86.0,276.9 -"resnet50_libtorch,vgg19_libtorch",GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,"1,1","2,4","resnet50_libtorch_config_0,vgg19_libtorch_config_4","1:GPU,1:GPU",Yes,2273.3,71.8,279.4 +"resnet50_libtorch,vgg19_libtorch",GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,"1,1","4,4","resnet50_libtorch_config_5,vgg19_libtorch_config_4","2:GPU,1:GPU",Yes,2602.6,100.0,276.1 +"resnet50_libtorch,vgg19_libtorch",GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,"1,1","1,1","resnet50_libtorch_config_5,vgg19_libtorch_config_4","2:GPU,1:GPU",Yes,2575.3,98.5,276.8 +"resnet50_libtorch,vgg19_libtorch",GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,"1,1","4,8","resnet50_libtorch_config_5,vgg19_libtorch_config_7","2:GPU,1:GPU",Yes,2680.2,100.0,274.2 +"resnet50_libtorch,vgg19_libtorch",GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,"1,1","1,1","resnet50_libtorch_config_5,vgg19_libtorch_config_7","2:GPU,1:GPU",Yes,2575.3,93.7,277.0 +"resnet50_libtorch,vgg19_libtorch",GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,"1,1","1,1","resnet50_libtorch_config_default,vgg19_libtorch_config_default","1:GPU,1:GPU",Yes,2296.4,99.7,273.8 +"resnet50_libtorch,vgg19_libtorch",GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,"1,1","4,4","resnet50_libtorch_config_default,vgg19_libtorch_config_default","1:GPU,1:GPU",Yes,2296.4,50.5,178.0 +"resnet50_libtorch,vgg19_libtorch",GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,"1,1","2,2","resnet50_libtorch_config_default,vgg19_libtorch_config_default","1:GPU,1:GPU",Yes,2296.4,99.5,277.9 +"resnet50_libtorch,vgg19_libtorch",GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,"1,1","8,8","resnet50_libtorch_config_default,vgg19_libtorch_config_default","1:GPU,1:GPU",Yes,2296.4,60.3,216.1 +"resnet50_libtorch,vgg19_libtorch",GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,"1,1","2,4","resnet50_libtorch_config_0,vgg19_libtorch_config_4","1:GPU,1:GPU",Yes,2323.6,99.3,275.4 +"resnet50_libtorch,vgg19_libtorch",GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,"1,1","2,2","resnet50_libtorch_config_0,vgg19_libtorch_config_0","1:GPU,1:GPU",Yes,2296.4,97.7,269.3 +"resnet50_libtorch,vgg19_libtorch",GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,"1,1","4,4","resnet50_libtorch_config_7,vgg19_libtorch_config_1","1:GPU,2:GPU",Yes,3089.1,79.7,280.6 +"resnet50_libtorch,vgg19_libtorch",GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,"1,1","2,8","resnet50_libtorch_config_0,vgg19_libtorch_config_7","1:GPU,1:GPU",Yes,2401.2,78.2,280.0 diff --git a/tests/common/multi-model-ckpt/golden-metrics-model-inference.csv b/tests/common/multi-model-ckpt/golden-metrics-model-inference.csv index 66d8c551e..8373a448f 100644 --- a/tests/common/multi-model-ckpt/golden-metrics-model-inference.csv +++ b/tests/common/multi-model-ckpt/golden-metrics-model-inference.csv @@ -1,38 +1,14 @@ Model,Batch,Concurrency,Model Config Path,Instance Group,Max Batch Size,Satisfies Constraints,Throughput (infer/sec),p99 Latency (ms) -"resnet50_libtorch,vgg19_libtorch","1,1","32,2","resnet50_libtorch_config_10,vgg19_libtorch_config_0","1:GPU,1:GPU","16,1",Yes,"720.1, [586.2,133.9]","38.3, [58.0,18.5]" -"resnet50_libtorch,vgg19_libtorch","1,1","256,256","resnet50_libtorch_config_10,vgg19_libtorch_config_0","1:GPU,1:GPU","16,1",Yes,"718.6, [586.1,132.5]","1311.6, [447.5,2175.7]" -"resnet50_libtorch,vgg19_libtorch","1,1","32,32","resnet50_libtorch_config_10,vgg19_libtorch_config_0","1:GPU,1:GPU","16,1",Yes,"714.4, [580.8,133.5]","151.6, [58.0,245.1]" -"resnet50_libtorch,vgg19_libtorch","1,1","128,128","resnet50_libtorch_config_10,vgg19_libtorch_config_0","1:GPU,1:GPU","16,1",Yes,"713.6, [580.8,132.9]","636.6, [224.9,1048.2]" -"resnet50_libtorch,vgg19_libtorch","1,1","64,64","resnet50_libtorch_config_10,vgg19_libtorch_config_0","1:GPU,1:GPU","16,1",Yes,"711.0, [578.4,132.6]","300.9, [113.9,487.9]" -"resnet50_libtorch,vgg19_libtorch","1,1","16,16","resnet50_libtorch_config_10,vgg19_libtorch_config_0","1:GPU,1:GPU","16,1",Yes,"586.3, [427.8,158.5]","86.4, [67.8,105.0]" -"resnet50_libtorch,vgg19_libtorch","1,1","8,8","resnet50_libtorch_config_10,vgg19_libtorch_config_0","1:GPU,1:GPU","16,1",Yes,"499.3, [325.4,173.8]","36.8, [26.3,47.2]" -"resnet50_libtorch,vgg19_libtorch","1,1","4,4","resnet50_libtorch_config_10,vgg19_libtorch_config_0","1:GPU,1:GPU","16,1",Yes,"405.4, [214.8,190.5]","20.8, [19.9,21.8]" -"resnet50_libtorch,vgg19_libtorch","1,1","1,1","resnet50_libtorch_config_10,vgg19_libtorch_config_0","1:GPU,1:GPU","16,1",Yes,"344.4, [171.2,173.2]","6.9, [7.9,5.9]" -"resnet50_libtorch,vgg19_libtorch","1,1","2,2","resnet50_libtorch_config_10,vgg19_libtorch_config_0","1:GPU,1:GPU","16,1",Yes,"345.4, [145.6,199.9]","12.6, [14.9,10.3]" -"resnet50_libtorch,vgg19_libtorch","1,1","16,2","resnet50_libtorch_config_9,vgg19_libtorch_config_0","1:GPU,1:GPU","8,1",Yes,"594.1, [431.6,162.5]","25.5, [37.8,13.2]" -"resnet50_libtorch,vgg19_libtorch","1,1","32,32","resnet50_libtorch_config_9,vgg19_libtorch_config_0","1:GPU,1:GPU","8,1",Yes,"589.3, [428.8,160.5]","139.0, [76.4,201.6]" -"resnet50_libtorch,vgg19_libtorch","1,1","64,64","resnet50_libtorch_config_9,vgg19_libtorch_config_0","1:GPU,1:GPU","8,1",Yes,"586.5, [426.4,160.1]","278.7, [153.2,404.2]" -"resnet50_libtorch,vgg19_libtorch","1,1","128,128","resnet50_libtorch_config_9,vgg19_libtorch_config_0","1:GPU,1:GPU","8,1",Yes,"585.6, [426.1,159.5]","553.8, [302.9,804.8]" -"resnet50_libtorch,vgg19_libtorch","1,1","16,16","resnet50_libtorch_config_9,vgg19_libtorch_config_0","1:GPU,1:GPU","8,1",Yes,"580.2, [419.0,161.2]","88.0, [74.5,101.6]" -"resnet50_libtorch,vgg19_libtorch","1,1","8,8","resnet50_libtorch_config_9,vgg19_libtorch_config_0","1:GPU,1:GPU","8,1",Yes,"500.2, [328.0,172.2]","36.9, [26.6,47.3]" -"resnet50_libtorch,vgg19_libtorch","1,1","4,4","resnet50_libtorch_config_9,vgg19_libtorch_config_0","1:GPU,1:GPU","8,1",Yes,"406.1, [217.2,188.9]","22.2, [19.8,24.7]" -"resnet50_libtorch,vgg19_libtorch","1,1","1,1","resnet50_libtorch_config_9,vgg19_libtorch_config_0","1:GPU,1:GPU","8,1",Yes,"343.4, [170.9,172.5]","6.5, [7.0,5.9]" -"resnet50_libtorch,vgg19_libtorch","1,1","2,2","resnet50_libtorch_config_9,vgg19_libtorch_config_0","1:GPU,1:GPU","8,1",Yes,"343.1, [145.2,197.9]","12.6, [14.8,10.4]" -"resnet50_libtorch,vgg19_libtorch","1,1","8,2","resnet50_libtorch_config_8,vgg19_libtorch_config_0","1:GPU,1:GPU","4,1",Yes,"498.6, [317.1,181.5]","19.3, [27.2,11.5]" -"resnet50_libtorch,vgg19_libtorch","1,1","64,64","resnet50_libtorch_config_8,vgg19_libtorch_config_0","1:GPU,1:GPU","4,1",Yes,"492.1, [314.3,177.9]","287.0, [210.9,363.0]" -"resnet50_libtorch,vgg19_libtorch","1,1","16,16","resnet50_libtorch_config_8,vgg19_libtorch_config_0","1:GPU,1:GPU","4,1",Yes,"488.8, [310.3,178.5]","72.9, [54.2,91.5]" -"resnet50_libtorch,vgg19_libtorch","1,1","32,32","resnet50_libtorch_config_8,vgg19_libtorch_config_0","1:GPU,1:GPU","4,1",Yes,"489.2, [312.0,177.2]","144.8, [107.1,182.5]" -"resnet50_libtorch,vgg19_libtorch","1,1","8,8","resnet50_libtorch_config_8,vgg19_libtorch_config_0","1:GPU,1:GPU","4,1",Yes,"487.4, [308.9,178.5]","37.1, [27.5,46.7]" -"resnet50_libtorch,vgg19_libtorch","1,1","4,4","resnet50_libtorch_config_8,vgg19_libtorch_config_0","1:GPU,1:GPU","4,1",Yes,"406.4, [216.8,189.5]","20.8, [20.0,21.7]" -"resnet50_libtorch,vgg19_libtorch","1,1","1,1","resnet50_libtorch_config_8,vgg19_libtorch_config_0","1:GPU,1:GPU","4,1",Yes,"343.8, [169.9,173.9]","6.6, [7.1,6.0]" -"resnet50_libtorch,vgg19_libtorch","1,1","2,2","resnet50_libtorch_config_8,vgg19_libtorch_config_0","1:GPU,1:GPU","4,1",Yes,"341.8, [143.9,197.9]","12.7, [15.1,10.4]" -"resnet50_libtorch,vgg19_libtorch","1,1","4,2","resnet50_libtorch_config_6,vgg19_libtorch_config_0","1:GPU,1:GPU","2,1",Yes,"405.3, [209.8,195.5]","15.6, [20.6,10.6]" -"resnet50_libtorch,vgg19_libtorch","1,1","4,8","resnet50_libtorch_config_6,vgg19_libtorch_config_7","1:GPU,1:GPU","2,4",Yes,"418.3, [125.2,293.0]","31.8, [35.7,27.8]" -"resnet50_libtorch,vgg19_libtorch","1,1","1,1","resnet50_libtorch_config_default,vgg19_libtorch_config_default","1:GPU,1:GPU","128,128",Yes,"352.4, [175.5,176.9]","6.5, [7.2,5.8]" -"resnet50_libtorch,vgg19_libtorch","1,1","2,2","resnet50_libtorch_config_default,vgg19_libtorch_config_default","1:GPU,1:GPU","128,128",Yes,"342.4, [144.2,198.2]","12.7, [15.1,10.3]" -"resnet50_libtorch,vgg19_libtorch","1,1","4,4","resnet50_libtorch_config_default,vgg19_libtorch_config_default","1:GPU,1:GPU","128,128",Yes,"341.8, [144.6,197.2]","24.8, [29.0,20.7]" -"resnet50_libtorch,vgg19_libtorch","1,1","8,8","resnet50_libtorch_config_default,vgg19_libtorch_config_default","1:GPU,1:GPU","128,128",Yes,"341.8, [144.6,197.2]","49.4, [57.7,41.0]" -"resnet50_libtorch,vgg19_libtorch","1,1","2,2","resnet50_libtorch_config_0,vgg19_libtorch_config_0","1:GPU,1:GPU","1,1",Yes,"352.4, [149.5,202.8]","12.3, [14.4,10.2]" -"resnet50_libtorch,vgg19_libtorch","1,1","4,4","resnet50_libtorch_config_5,vgg19_libtorch_config_1","2:GPU,2:GPU","1,1",Yes,"355.7, [129.2,226.5]","25.7, [33.1,18.2]" -"resnet50_libtorch,vgg19_libtorch","1,1","2,4","resnet50_libtorch_config_0,vgg19_libtorch_config_4","1:GPU,1:GPU","1,2",Yes,"372.2, [112.5,259.7]","17.6, [19.6,15.7]" +"resnet50_libtorch,vgg19_libtorch","1,1","4,4","resnet50_libtorch_config_5,vgg19_libtorch_config_4","2:GPU,1:GPU","1,2",Yes,"421.0, [184.5,236.5]","20.7, [23.9,17.4]" +"resnet50_libtorch,vgg19_libtorch","1,1","1,1","resnet50_libtorch_config_5,vgg19_libtorch_config_4","2:GPU,1:GPU","1,2",Yes,"340.1, [168.9,171.2]","6.5, [7.0,6.0]" +"resnet50_libtorch,vgg19_libtorch","1,1","4,8","resnet50_libtorch_config_5,vgg19_libtorch_config_7","2:GPU,1:GPU","1,4",Yes,"419.6, [139.9,279.7]","30.1, [31.2,29.0]" +"resnet50_libtorch,vgg19_libtorch","1,1","1,1","resnet50_libtorch_config_5,vgg19_libtorch_config_7","2:GPU,1:GPU","1,4",Yes,"299.7, [123.6,176.2]","8.0, [10.0,6.0]" +"resnet50_libtorch,vgg19_libtorch","1,1","1,1","resnet50_libtorch_config_default,vgg19_libtorch_config_default","1:GPU,1:GPU","128,128",Yes,"344.1, [167.9,176.2]","6.8, [7.7,5.9]" +"resnet50_libtorch,vgg19_libtorch","1,1","4,4","resnet50_libtorch_config_default,vgg19_libtorch_config_default","1:GPU,1:GPU","128,128",Yes,"338.8, [139.2,199.5]","25.6, [30.4,20.7]" +"resnet50_libtorch,vgg19_libtorch","1,1","2,2","resnet50_libtorch_config_default,vgg19_libtorch_config_default","1:GPU,1:GPU","128,128",Yes,"339.1, [138.6,200.5]","13.0, [15.6,10.3]" +"resnet50_libtorch,vgg19_libtorch","1,1","8,8","resnet50_libtorch_config_default,vgg19_libtorch_config_default","1:GPU,1:GPU","128,128",Yes,"338.1, [138.5,199.5]","50.7, [60.7,40.7]" +"resnet50_libtorch,vgg19_libtorch","1,1","2,4","resnet50_libtorch_config_0,vgg19_libtorch_config_4","1:GPU,1:GPU","1,2",Yes,"368.4, [108.6,259.8]","17.9, [19.9,15.8]" +"resnet50_libtorch,vgg19_libtorch","1,1","2,2","resnet50_libtorch_config_0,vgg19_libtorch_config_0","1:GPU,1:GPU","1,1",Yes,"341.4, [139.6,201.8]","12.9, [15.5,10.3]" +"resnet50_libtorch,vgg19_libtorch","1,1","4,4","resnet50_libtorch_config_7,vgg19_libtorch_config_1","1:GPU,2:GPU","2,1",Yes,"345.4, [108.6,236.8]","28.4, [39.2,17.5]" +"resnet50_libtorch,vgg19_libtorch","1,1","2,8","resnet50_libtorch_config_0,vgg19_libtorch_config_7","1:GPU,1:GPU","1,4",Yes,"374.7, [74.9,299.8]","27.1, [27.1,27.1]" diff --git a/tests/common/multi-model-ckpt/golden-metrics-server-only.csv b/tests/common/multi-model-ckpt/golden-metrics-server-only.csv index 7e7f2a72c..25a6071cf 100644 --- a/tests/common/multi-model-ckpt/golden-metrics-server-only.csv +++ b/tests/common/multi-model-ckpt/golden-metrics-server-only.csv @@ -1,3 +1,3 @@ Model,GPU UUID,GPU Memory Usage (MB),GPU Utilization (%),GPU Power Usage (W) -triton-server,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,457.0,0.2,55.3 +triton-server,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,870.0,0.0,55.8 diff --git a/tests/common/multi-model-ckpt/plot_manager.json b/tests/common/multi-model-ckpt/plot_manager.json index e4c0153ab..7ad1567c5 100644 --- a/tests/common/multi-model-ckpt/plot_manager.json +++ b/tests/common/multi-model-ckpt/plot_manager.json @@ -1 +1 @@ -{"_simple_plots": {"resnet50_libtorch,vgg19_libtorch": {"throughput_v_latency": {"_name": "throughput_v_latency", "_title": "Throughput vs. Latency", "_x_axis": "perf_latency_p99", "_y_axis": "perf_throughput", "_monotonic": true, "_data": {"resnet50_libtorch_config_10,vgg19_libtorch_config_0": {"x_data": [38.2595, 1311.589, 151.566, 636.5830000000001, 300.90500000000003, 86.4235, 36.751, 20.811, 6.888999999999999, 12.595], "y_data": [720.086, 718.623, 714.375, 713.646, 710.9939999999999, 586.329, 499.274, 405.35699999999997, 344.396, 345.438]}, "resnet50_libtorch_config_9,vgg19_libtorch_config_0": {"x_data": [25.5215, 138.9855, 278.7145, 553.8275, 88.048, 36.937, 22.2335, 6.474, 12.587499999999999], "y_data": [594.129, 589.275, 586.452, 585.617, 580.212, 500.201, 406.078, 343.403, 343.089]}, "resnet50_libtorch_config_8,vgg19_libtorch_config_0": {"x_data": [19.334500000000002, 286.9765, 72.884, 144.79250000000002, 37.119, 20.8475, 6.5765, 12.7045], "y_data": [498.574, 492.115, 488.843, 489.198, 487.397, 406.355, 343.77, 341.793]}, "resnet50_libtorch_config_default,vgg19_libtorch_config_default": {"x_data": [6.509, 12.699, 24.848, 49.394], "y_data": [352.39300000000003, 342.395, 341.791, 341.752]}}}, "gpu_mem_v_latency": {"_name": "gpu_mem_v_latency", "_title": "GPU Memory vs. Latency", "_x_axis": "perf_latency_p99", "_y_axis": "gpu_used_memory", "_monotonic": false, "_data": {"resnet50_libtorch_config_10,vgg19_libtorch_config_0": {"x_data": [38.2595, 1311.589, 151.566, 636.5830000000001, 300.90500000000003, 86.4235, 36.751, 20.811, 6.888999999999999, 12.595], "y_data": [2432.69632, 2432.69632, 2432.69632, 2432.69632, 2432.69632, 2432.69632, 2432.69632, 2432.69632, 2432.69632, 2432.69632]}, "resnet50_libtorch_config_9,vgg19_libtorch_config_0": {"x_data": [25.5215, 138.9855, 278.7145, 553.8275, 88.048, 36.937, 22.2335, 6.474, 12.587499999999999], "y_data": [2380.26752, 2380.26752, 2380.26752, 2380.26752, 2380.26752, 2380.26752, 2380.26752, 2246.049792, 2246.049792]}, "resnet50_libtorch_config_8,vgg19_libtorch_config_0": {"x_data": [19.334500000000002, 286.9765, 72.884, 144.79250000000002, 37.119, 20.8475, 6.5765, 12.7045], "y_data": [2380.26752, 2380.26752, 2380.26752, 2380.26752, 2380.26752, 2380.26752, 2246.049792, 2246.049792]}, "resnet50_libtorch_config_default,vgg19_libtorch_config_default": {"x_data": [6.509, 12.699, 24.848, 49.394], "y_data": [2246.049792, 2246.049792, 2246.049792, 2246.049792]}}}}}} \ No newline at end of file +{"_simple_plots": {"resnet50_libtorch,vgg19_libtorch": {"throughput_v_latency": {"_name": "throughput_v_latency", "_title": "Throughput vs. Latency", "_x_axis": "perf_latency_p99", "_y_axis": "perf_throughput", "_monotonic": true, "_data": {"resnet50_libtorch_config_5,vgg19_libtorch_config_4": {"x_data": [20.661, 6.497], "y_data": [420.97299999999996, 340.14300000000003]}, "resnet50_libtorch_config_5,vgg19_libtorch_config_7": {"x_data": [30.093, 8.0365], "y_data": [419.61899999999997, 299.746]}, "resnet50_libtorch_config_default,vgg19_libtorch_config_default": {"x_data": [6.7745, 25.5655, 12.9815, 50.6635], "y_data": [344.119, 338.762, 339.052, 338.05600000000004]}}}, "gpu_mem_v_latency": {"_name": "gpu_mem_v_latency", "_title": "GPU Memory vs. Latency", "_x_axis": "perf_latency_p99", "_y_axis": "gpu_used_memory", "_monotonic": false, "_data": {"resnet50_libtorch_config_5,vgg19_libtorch_config_4": {"x_data": [20.661, 6.497], "y_data": [2602.565632, 2575.302656]}, "resnet50_libtorch_config_5,vgg19_libtorch_config_7": {"x_data": [30.093, 8.0365], "y_data": [2680.160256, 2575.302656]}, "resnet50_libtorch_config_default,vgg19_libtorch_config_default": {"x_data": [6.7745, 25.5655, 12.9815, 50.6635], "y_data": [2296.38144, 2296.38144, 2296.38144, 2296.38144]}}}}}} \ No newline at end of file diff --git a/tests/common/request-rate-ckpt/0.ckpt b/tests/common/request-rate-ckpt/0.ckpt index 6c8701d3b..0f224d4c5 100644 --- a/tests/common/request-rate-ckpt/0.ckpt +++ b/tests/common/request-rate-ckpt/0.ckpt @@ -1,39 +1,58 @@ { "ResultManager.results": { "_results": { - "vgg19_libtorch": { - "vgg19_libtorch_config_default": [ + "add_sub": { + "add_sub_config_default": [ { "_triton_env": {}, "_model_run_configs": [ { - "_model_name": "vgg19_libtorch", - "_model_config": { - "name": "vgg19_libtorch_config_default", - "platform": "pytorch_libtorch", - "maxBatchSize": 128, - "input": [ - { - "name": "INPUT__0", - "dataType": "TYPE_FP32", - "format": "FORMAT_NCHW", - "dims": [ - "3", - "224", - "224" - ] - } - ], - "output": [ - { - "name": "OUTPUT__0", - "dataType": "TYPE_FP32", - "dims": [ - "1000" - ], - "labelFilename": "vgg19_labels.txt" - } - ], + "_model_name": "add_sub", + "_model_config_variant": { + "model_config": { + "name": "add_sub", + "platform": "pytorch_libtorch", + "maxBatchSize": 8, + "input": [ + { + "name": "INPUT__0", + "dataType": "TYPE_FP32", + "dims": [ + "16" + ] + }, + { + "name": "INPUT__1", + "dataType": "TYPE_FP32", + "dims": [ + "16" + ] + } + ], + "output": [ + { + "name": "OUTPUT__0", + "dataType": "TYPE_FP32", + "dims": [ + "16" + ], + "labelFilename": "output0_labels.txt" + }, + { + "name": "OUTPUT__1", + "dataType": "TYPE_FP32", + "dims": [ + "16" + ] + } + ], + "instanceGroup": [ + { + "kind": "KIND_GPU" + } + ] + }, + "variant_name": "add_sub_config_default", "cpu_only": false }, "_perf_config": { @@ -45,7 +64,7 @@ "measurement-interval": null, "concurrency-range": null, "request-rate-range": 16, - "request-distribution": "poisson", + "request-distribution": null, "request-intervals": null, "binary-search": null, "num-of-sequences": null, @@ -63,7 +82,7 @@ "string-length": null, "string-data": null, "measurement-mode": "count_windows", - "measurement-request-count": 150, + "measurement-request-count": null, "streaming": null, "grpc-compression-algorithm": null, "triton-server-directory": null, @@ -81,16 +100,16 @@ "ssl-https-private-key-file": null, "collect-metrics": "True", "metrics-url": "http://localhost:8002/metrics", - "metrics-interval": 1000.0, + "metrics-interval": 1000, "bls-composing-models": null }, "_options": { - "-m": "vgg19_libtorch_config_default", + "-m": "add_sub", "-x": null, "-b": 1, "-u": "localhost:8001", "-i": "grpc", - "-f": "vgg19_libtorch_config_default-results.csv", + "-f": "add_sub-results.csv", "-H": null }, "_verbose": { @@ -117,19 +136,19 @@ "shape": null } }, - "_composing_configs": [] + "_composing_config_variants": [] } ] }, { - "-m vgg19_libtorch_config_default -b 1 -i grpc -f vgg19_libtorch_config_default-results.csv --verbose-csv --request-rate-range=16 --request-distribution=poisson --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "vgg19_libtorch_config_default", + "add_sub_config_default -m add_sub -b 1 -i grpc -f add_sub-results.csv --verbose-csv --request-rate-range=16 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000": { + "_model_variants_name": "add_sub_config_default", "_gpu_data": { "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ [ "gpu_used_memory", { - "_value": 2386.558976, + "_value": 872.415232, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } @@ -137,7 +156,7 @@ [ "gpu_free_memory", { - "_value": 23383.2448, + "_value": 24897.388544, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } @@ -145,7 +164,7 @@ [ "gpu_utilization", { - "_value": 7.60606, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -153,7 +172,7 @@ [ "gpu_power_usage", { - "_value": 62.3125, + "_value": 56.4722, "_timestamp": 0, "_device_uuid": null } @@ -164,7 +183,7 @@ [ "gpu_used_memory", { - "_value": 2386.558976, + "_value": 872.415232, "_timestamp": 0, "_device_uuid": null } @@ -172,7 +191,7 @@ [ "gpu_free_memory", { - "_value": 23383.2448, + "_value": 24897.388544, "_timestamp": 0, "_device_uuid": null } @@ -180,7 +199,7 @@ [ "gpu_utilization", { - "_value": 7.60606, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -188,7 +207,7 @@ [ "gpu_power_usage", { - "_value": 62.3125, + "_value": 56.4722, "_timestamp": 0, "_device_uuid": null } @@ -198,7 +217,7 @@ "gpu_used_memory": [ "gpu_used_memory", { - "_value": 2386.558976, + "_value": 872.415232, "_timestamp": 0, "_device_uuid": null } @@ -206,7 +225,7 @@ "gpu_free_memory": [ "gpu_free_memory", { - "_value": 23383.2448, + "_value": 24897.388544, "_timestamp": 0, "_device_uuid": null } @@ -214,7 +233,7 @@ "gpu_utilization": [ "gpu_utilization", { - "_value": 7.60606, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -222,7 +241,7 @@ "gpu_power_usage": [ "gpu_power_usage", { - "_value": 62.3125, + "_value": 56.4722, "_timestamp": 0, "_device_uuid": null } @@ -230,7 +249,7 @@ }, "_model_config_measurements": [ { - "_model_config_name": "vgg19_libtorch_config_default", + "_model_config_name": "add_sub", "_model_specific_pa_params": { "batch-size": 1, "concurrency-range": null, @@ -240,77 +259,77 @@ [ "perf_latency_avg", { - "_value": 7.416, + "_value": 1.061, "_timestamp": 0 } ], [ "perf_latency_p90", { - "_value": 7.501, + "_value": 1.225, "_timestamp": 0 } ], [ "perf_latency_p95", { - "_value": 10.947, + "_value": 1.36, "_timestamp": 0 } ], [ "perf_latency_p99", { - "_value": 12.577, + "_value": 1.427, "_timestamp": 0 } ], [ "perf_throughput", { - "_value": 14.716, + "_value": 16.079, "_timestamp": 0 } ], [ "perf_client_send_recv", { - "_value": 0.167, + "_value": 0.013, "_timestamp": 0 } ], [ "perf_client_response_wait", { - "_value": 7.213, + "_value": 1.013, "_timestamp": 0 } ], [ "perf_server_queue", { - "_value": 0.435, + "_value": 0.073, "_timestamp": 0 } ], [ "perf_server_compute_infer", { - "_value": 5.333, + "_value": 0.155, "_timestamp": 0 } ], [ "perf_server_compute_input", { - "_value": 0.257, + "_value": 0.104, "_timestamp": 0 } ], [ "perf_server_compute_output", { - "_value": 0.046, + "_value": 0.097, "_timestamp": 0 } ] @@ -319,77 +338,77 @@ "perf_latency_avg": [ "perf_latency_avg", { - "_value": 7.416, + "_value": 1.061, "_timestamp": 0 } ], "perf_latency_p90": [ "perf_latency_p90", { - "_value": 7.501, + "_value": 1.225, "_timestamp": 0 } ], "perf_latency_p95": [ "perf_latency_p95", { - "_value": 10.947, + "_value": 1.36, "_timestamp": 0 } ], "perf_latency_p99": [ "perf_latency_p99", { - "_value": 12.577, + "_value": 1.427, "_timestamp": 0 } ], "perf_throughput": [ "perf_throughput", { - "_value": 14.716, + "_value": 16.079, "_timestamp": 0 } ], "perf_client_send_recv": [ "perf_client_send_recv", { - "_value": 0.167, + "_value": 0.013, "_timestamp": 0 } ], "perf_client_response_wait": [ "perf_client_response_wait", { - "_value": 7.213, + "_value": 1.013, "_timestamp": 0 } ], "perf_server_queue": [ "perf_server_queue", { - "_value": 0.435, + "_value": 0.073, "_timestamp": 0 } ], "perf_server_compute_infer": [ "perf_server_compute_infer", { - "_value": 5.333, + "_value": 0.155, "_timestamp": 0 } ], "perf_server_compute_input": [ "perf_server_compute_input", { - "_value": 0.257, + "_value": 0.104, "_timestamp": 0 } ], "perf_server_compute_output": [ "perf_server_compute_output", { - "_value": 0.046, + "_value": 0.097, "_timestamp": 0 } ] @@ -397,14 +416,14 @@ } ] }, - "-m vgg19_libtorch_config_default -b 1 -i grpc -f vgg19_libtorch_config_default-results.csv --verbose-csv --request-rate-range=32 --request-distribution=poisson --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "vgg19_libtorch_config_default", + "add_sub_config_default -m add_sub -b 1 -i grpc -f add_sub-results.csv --verbose-csv --request-rate-range=32 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000": { + "_model_variants_name": "add_sub_config_default", "_gpu_data": { "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ [ "gpu_used_memory", { - "_value": 2386.558976, + "_value": 872.415232, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } @@ -412,7 +431,7 @@ [ "gpu_free_memory", { - "_value": 23383.2448, + "_value": 24897.388544, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } @@ -420,7 +439,7 @@ [ "gpu_utilization", { - "_value": 17.4286, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -428,7 +447,7 @@ [ "gpu_power_usage", { - "_value": 72.6813, + "_value": 56.4765, "_timestamp": 0, "_device_uuid": null } @@ -439,7 +458,7 @@ [ "gpu_used_memory", { - "_value": 2386.558976, + "_value": 872.415232, "_timestamp": 0, "_device_uuid": null } @@ -447,7 +466,7 @@ [ "gpu_free_memory", { - "_value": 23383.2448, + "_value": 24897.388544, "_timestamp": 0, "_device_uuid": null } @@ -455,7 +474,7 @@ [ "gpu_utilization", { - "_value": 17.4286, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -463,7 +482,7 @@ [ "gpu_power_usage", { - "_value": 72.6813, + "_value": 56.4765, "_timestamp": 0, "_device_uuid": null } @@ -473,7 +492,7 @@ "gpu_used_memory": [ "gpu_used_memory", { - "_value": 2386.558976, + "_value": 872.415232, "_timestamp": 0, "_device_uuid": null } @@ -481,7 +500,7 @@ "gpu_free_memory": [ "gpu_free_memory", { - "_value": 23383.2448, + "_value": 24897.388544, "_timestamp": 0, "_device_uuid": null } @@ -489,7 +508,7 @@ "gpu_utilization": [ "gpu_utilization", { - "_value": 17.4286, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -497,7 +516,7 @@ "gpu_power_usage": [ "gpu_power_usage", { - "_value": 72.6813, + "_value": 56.4765, "_timestamp": 0, "_device_uuid": null } @@ -505,7 +524,7 @@ }, "_model_config_measurements": [ { - "_model_config_name": "vgg19_libtorch_config_default", + "_model_config_name": "add_sub", "_model_specific_pa_params": { "batch-size": 1, "concurrency-range": null, @@ -515,77 +534,77 @@ [ "perf_latency_avg", { - "_value": 7.589, + "_value": 1.02, "_timestamp": 0 } ], [ "perf_latency_p90", { - "_value": 8.974, + "_value": 1.122, "_timestamp": 0 } ], [ "perf_latency_p95", { - "_value": 11.599, + "_value": 1.258, "_timestamp": 0 } ], [ "perf_latency_p99", { - "_value": 16.098, + "_value": 1.36, "_timestamp": 0 } ], [ "perf_throughput", { - "_value": 35.9823, + "_value": 31.9887, "_timestamp": 0 } ], [ "perf_client_send_recv", { - "_value": 0.16, + "_value": 0.012, "_timestamp": 0 } ], [ "perf_client_response_wait", { - "_value": 7.394, + "_value": 0.975, "_timestamp": 0 } ], [ "perf_server_queue", { - "_value": 0.671, + "_value": 0.07, "_timestamp": 0 } ], [ "perf_server_compute_infer", { - "_value": 5.305, + "_value": 0.144, "_timestamp": 0 } ], [ "perf_server_compute_input", { - "_value": 0.248, + "_value": 0.098, "_timestamp": 0 } ], [ "perf_server_compute_output", { - "_value": 0.042, + "_value": 0.093, "_timestamp": 0 } ] @@ -594,77 +613,77 @@ "perf_latency_avg": [ "perf_latency_avg", { - "_value": 7.589, + "_value": 1.02, "_timestamp": 0 } ], "perf_latency_p90": [ "perf_latency_p90", { - "_value": 8.974, + "_value": 1.122, "_timestamp": 0 } ], "perf_latency_p95": [ "perf_latency_p95", { - "_value": 11.599, + "_value": 1.258, "_timestamp": 0 } ], "perf_latency_p99": [ "perf_latency_p99", { - "_value": 16.098, + "_value": 1.36, "_timestamp": 0 } ], "perf_throughput": [ "perf_throughput", { - "_value": 35.9823, + "_value": 31.9887, "_timestamp": 0 } ], "perf_client_send_recv": [ "perf_client_send_recv", { - "_value": 0.16, + "_value": 0.012, "_timestamp": 0 } ], "perf_client_response_wait": [ "perf_client_response_wait", { - "_value": 7.394, + "_value": 0.975, "_timestamp": 0 } ], "perf_server_queue": [ "perf_server_queue", { - "_value": 0.671, + "_value": 0.07, "_timestamp": 0 } ], "perf_server_compute_infer": [ "perf_server_compute_infer", { - "_value": 5.305, + "_value": 0.144, "_timestamp": 0 } ], "perf_server_compute_input": [ "perf_server_compute_input", { - "_value": 0.248, + "_value": 0.098, "_timestamp": 0 } ], "perf_server_compute_output": [ "perf_server_compute_output", { - "_value": 0.042, + "_value": 0.093, "_timestamp": 0 } ] @@ -672,14 +691,14 @@ } ] }, - "-m vgg19_libtorch_config_default -b 1 -i grpc -f vgg19_libtorch_config_default-results.csv --verbose-csv --request-rate-range=64 --request-distribution=poisson --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "vgg19_libtorch_config_default", + "add_sub_config_default -m add_sub -b 2 -i grpc -f add_sub-results.csv --verbose-csv --request-rate-range=16 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000": { + "_model_variants_name": "add_sub_config_default", "_gpu_data": { "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ [ "gpu_used_memory", { - "_value": 2386.558976, + "_value": 872.415232, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } @@ -687,7 +706,7 @@ [ "gpu_free_memory", { - "_value": 23383.2448, + "_value": 24897.388544, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } @@ -695,7 +714,7 @@ [ "gpu_utilization", { - "_value": 30.3333, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -703,7 +722,7 @@ [ "gpu_power_usage", { - "_value": 94.4873, + "_value": 56.5077, "_timestamp": 0, "_device_uuid": null } @@ -714,7 +733,7 @@ [ "gpu_used_memory", { - "_value": 2386.558976, + "_value": 872.415232, "_timestamp": 0, "_device_uuid": null } @@ -722,7 +741,7 @@ [ "gpu_free_memory", { - "_value": 23383.2448, + "_value": 24897.388544, "_timestamp": 0, "_device_uuid": null } @@ -730,7 +749,7 @@ [ "gpu_utilization", { - "_value": 30.3333, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -738,7 +757,7 @@ [ "gpu_power_usage", { - "_value": 94.4873, + "_value": 56.5077, "_timestamp": 0, "_device_uuid": null } @@ -748,7 +767,7 @@ "gpu_used_memory": [ "gpu_used_memory", { - "_value": 2386.558976, + "_value": 872.415232, "_timestamp": 0, "_device_uuid": null } @@ -756,7 +775,7 @@ "gpu_free_memory": [ "gpu_free_memory", { - "_value": 23383.2448, + "_value": 24897.388544, "_timestamp": 0, "_device_uuid": null } @@ -764,7 +783,7 @@ "gpu_utilization": [ "gpu_utilization", { - "_value": 30.3333, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -772,7 +791,7 @@ "gpu_power_usage": [ "gpu_power_usage", { - "_value": 94.4873, + "_value": 56.5077, "_timestamp": 0, "_device_uuid": null } @@ -780,87 +799,87 @@ }, "_model_config_measurements": [ { - "_model_config_name": "vgg19_libtorch_config_default", + "_model_config_name": "add_sub", "_model_specific_pa_params": { - "batch-size": 1, + "batch-size": 2, "concurrency-range": null, - "request-rate-range": 64 + "request-rate-range": 16 }, "_non_gpu_data": [ [ "perf_latency_avg", { - "_value": 8.051, + "_value": 1.122, "_timestamp": 0 } ], [ "perf_latency_p90", { - "_value": 11.758, + "_value": 1.282, "_timestamp": 0 } ], [ "perf_latency_p95", { - "_value": 13.91, + "_value": 1.417, "_timestamp": 0 } ], [ "perf_latency_p99", { - "_value": 18.579, + "_value": 1.532, "_timestamp": 0 } ], [ "perf_throughput", { - "_value": 64.534, + "_value": 32.1578, "_timestamp": 0 } ], [ "perf_client_send_recv", { - "_value": 0.155, + "_value": 0.014, "_timestamp": 0 } ], [ "perf_client_response_wait", { - "_value": 7.861, + "_value": 1.072, "_timestamp": 0 } ], [ "perf_server_queue", { - "_value": 1.537, + "_value": 0.078, "_timestamp": 0 } ], [ "perf_server_compute_infer", { - "_value": 4.905, + "_value": 0.167, "_timestamp": 0 } ], [ "perf_server_compute_input", { - "_value": 0.246, + "_value": 0.107, "_timestamp": 0 } ], [ "perf_server_compute_output", { - "_value": 0.038, + "_value": 0.101, "_timestamp": 0 } ] @@ -869,77 +888,77 @@ "perf_latency_avg": [ "perf_latency_avg", { - "_value": 8.051, + "_value": 1.122, "_timestamp": 0 } ], "perf_latency_p90": [ "perf_latency_p90", { - "_value": 11.758, + "_value": 1.282, "_timestamp": 0 } ], "perf_latency_p95": [ "perf_latency_p95", { - "_value": 13.91, + "_value": 1.417, "_timestamp": 0 } ], "perf_latency_p99": [ "perf_latency_p99", { - "_value": 18.579, + "_value": 1.532, "_timestamp": 0 } ], "perf_throughput": [ "perf_throughput", { - "_value": 64.534, + "_value": 32.1578, "_timestamp": 0 } ], "perf_client_send_recv": [ "perf_client_send_recv", { - "_value": 0.155, + "_value": 0.014, "_timestamp": 0 } ], "perf_client_response_wait": [ "perf_client_response_wait", { - "_value": 7.861, + "_value": 1.072, "_timestamp": 0 } ], "perf_server_queue": [ "perf_server_queue", { - "_value": 1.537, + "_value": 0.078, "_timestamp": 0 } ], "perf_server_compute_infer": [ "perf_server_compute_infer", { - "_value": 4.905, + "_value": 0.167, "_timestamp": 0 } ], "perf_server_compute_input": [ "perf_server_compute_input", { - "_value": 0.246, + "_value": 0.107, "_timestamp": 0 } ], "perf_server_compute_output": [ "perf_server_compute_output", { - "_value": 0.038, + "_value": 0.101, "_timestamp": 0 } ] @@ -947,14 +966,14 @@ } ] }, - "-m vgg19_libtorch_config_default -b 1 -i grpc -f vgg19_libtorch_config_default-results.csv --verbose-csv --request-rate-range=128 --request-distribution=poisson --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "vgg19_libtorch_config_default", + "add_sub_config_default -m add_sub -b 2 -i grpc -f add_sub-results.csv --verbose-csv --request-rate-range=32 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000": { + "_model_variants_name": "add_sub_config_default", "_gpu_data": { "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ [ "gpu_used_memory", { - "_value": 2386.558976, + "_value": 872.415232, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } @@ -962,7 +981,7 @@ [ "gpu_free_memory", { - "_value": 23383.2448, + "_value": 24897.388544, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } @@ -970,7 +989,7 @@ [ "gpu_utilization", { - "_value": 52.6667, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -978,7 +997,7 @@ [ "gpu_power_usage", { - "_value": 172.555, + "_value": 56.5913, "_timestamp": 0, "_device_uuid": null } @@ -989,7 +1008,7 @@ [ "gpu_used_memory", { - "_value": 2386.558976, + "_value": 872.415232, "_timestamp": 0, "_device_uuid": null } @@ -997,7 +1016,7 @@ [ "gpu_free_memory", { - "_value": 23383.2448, + "_value": 24897.388544, "_timestamp": 0, "_device_uuid": null } @@ -1005,7 +1024,7 @@ [ "gpu_utilization", { - "_value": 52.6667, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -1013,7 +1032,7 @@ [ "gpu_power_usage", { - "_value": 172.555, + "_value": 56.5913, "_timestamp": 0, "_device_uuid": null } @@ -1023,7 +1042,7 @@ "gpu_used_memory": [ "gpu_used_memory", { - "_value": 2386.558976, + "_value": 872.415232, "_timestamp": 0, "_device_uuid": null } @@ -1031,7 +1050,7 @@ "gpu_free_memory": [ "gpu_free_memory", { - "_value": 23383.2448, + "_value": 24897.388544, "_timestamp": 0, "_device_uuid": null } @@ -1039,7 +1058,7 @@ "gpu_utilization": [ "gpu_utilization", { - "_value": 52.6667, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -1047,7 +1066,7 @@ "gpu_power_usage": [ "gpu_power_usage", { - "_value": 172.555, + "_value": 56.5913, "_timestamp": 0, "_device_uuid": null } @@ -1055,87 +1074,87 @@ }, "_model_config_measurements": [ { - "_model_config_name": "vgg19_libtorch_config_default", + "_model_config_name": "add_sub", "_model_specific_pa_params": { - "batch-size": 1, + "batch-size": 2, "concurrency-range": null, - "request-rate-range": 128 + "request-rate-range": 32 }, "_non_gpu_data": [ [ "perf_latency_avg", { - "_value": 9.183, + "_value": 1.073, "_timestamp": 0 } ], [ "perf_latency_p90", { - "_value": 17.241, + "_value": 1.283, "_timestamp": 0 } ], [ "perf_latency_p95", { - "_value": 17.725, + "_value": 1.339, "_timestamp": 0 } ], [ "perf_latency_p99", { - "_value": 18.21, + "_value": 1.392, "_timestamp": 0 } ], [ "perf_throughput", { - "_value": 125.954, + "_value": 63.9755, "_timestamp": 0 } ], [ "perf_client_send_recv", { - "_value": 0.13, + "_value": 0.014, "_timestamp": 0 } ], [ "perf_client_response_wait", { - "_value": 9.021, + "_value": 1.025, "_timestamp": 0 } ], [ "perf_server_queue", { - "_value": 3.536, + "_value": 0.074, "_timestamp": 0 } ], [ "perf_server_compute_infer", { - "_value": 4.246, + "_value": 0.162, "_timestamp": 0 } ], [ "perf_server_compute_input", { - "_value": 0.2, + "_value": 0.103, "_timestamp": 0 } ], [ "perf_server_compute_output", { - "_value": 0.027, + "_value": 0.097, "_timestamp": 0 } ] @@ -1144,92 +1163,235 @@ "perf_latency_avg": [ "perf_latency_avg", { - "_value": 9.183, + "_value": 1.073, "_timestamp": 0 } ], "perf_latency_p90": [ "perf_latency_p90", { - "_value": 17.241, + "_value": 1.283, "_timestamp": 0 } ], "perf_latency_p95": [ "perf_latency_p95", { - "_value": 17.725, + "_value": 1.339, "_timestamp": 0 } ], "perf_latency_p99": [ "perf_latency_p99", { - "_value": 18.21, + "_value": 1.392, "_timestamp": 0 } ], "perf_throughput": [ "perf_throughput", { - "_value": 125.954, + "_value": 63.9755, "_timestamp": 0 } ], "perf_client_send_recv": [ "perf_client_send_recv", { - "_value": 0.13, + "_value": 0.014, "_timestamp": 0 } ], "perf_client_response_wait": [ "perf_client_response_wait", { - "_value": 9.021, + "_value": 1.025, "_timestamp": 0 } ], "perf_server_queue": [ "perf_server_queue", { - "_value": 3.536, + "_value": 0.074, "_timestamp": 0 } ], "perf_server_compute_infer": [ "perf_server_compute_infer", { - "_value": 4.246, + "_value": 0.162, "_timestamp": 0 } ], "perf_server_compute_input": [ "perf_server_compute_input", { - "_value": 0.2, + "_value": 0.103, "_timestamp": 0 } ], "perf_server_compute_output": [ "perf_server_compute_output", { - "_value": 0.027, + "_value": 0.097, "_timestamp": 0 } ] } } ] - }, - "-m vgg19_libtorch_config_default -b 1 -i grpc -f vgg19_libtorch_config_default-results.csv --verbose-csv --request-rate-range=256 --request-distribution=poisson --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "vgg19_libtorch_config_default", + } + } + ], + "add_sub_config_0": [ + { + "_triton_env": {}, + "_model_run_configs": [ + { + "_model_name": "add_sub", + "_model_config_variant": { + "model_config": { + "name": "add_sub", + "platform": "pytorch_libtorch", + "maxBatchSize": 1, + "input": [ + { + "name": "INPUT__0", + "dataType": "TYPE_FP32", + "dims": [ + "16" + ] + }, + { + "name": "INPUT__1", + "dataType": "TYPE_FP32", + "dims": [ + "16" + ] + } + ], + "output": [ + { + "name": "OUTPUT__0", + "dataType": "TYPE_FP32", + "dims": [ + "16" + ], + "labelFilename": "output0_labels.txt" + }, + { + "name": "OUTPUT__1", + "dataType": "TYPE_FP32", + "dims": [ + "16" + ] + } + ], + "instanceGroup": [ + { + "count": 1, + "kind": "KIND_GPU" + } + ], + "dynamicBatching": {} + }, + "variant_name": "add_sub_config_0", + "cpu_only": false + }, + "_perf_config": { + "_args": { + "service-kind": null, + "model-signature-name": null, + "async": null, + "sync": null, + "measurement-interval": null, + "concurrency-range": null, + "request-rate-range": 16, + "request-distribution": null, + "request-intervals": null, + "binary-search": null, + "num-of-sequences": null, + "latency-threshold": null, + "max-threads": null, + "stability-percentage": null, + "max-trials": null, + "percentile": null, + "input-data": null, + "shared-memory": null, + "output-shared-memory-size": null, + "shape": null, + "sequence-length": null, + "sequence-id-range": null, + "string-length": null, + "string-data": null, + "measurement-mode": "count_windows", + "measurement-request-count": null, + "streaming": null, + "grpc-compression-algorithm": null, + "triton-server-directory": null, + "model-repository": null, + "ssl-grpc-use-ssl": null, + "ssl-grpc-root-certifications-file": null, + "ssl-grpc-private-key-file": null, + "ssl-grpc-certificate-chain-file": null, + "ssl-https-verify-peer": null, + "ssl-https-verify-host": null, + "ssl-https-ca-certificates-file": null, + "ssl-https-client-certificate-type": null, + "ssl-https-client-certificate-file": null, + "ssl-https-private-key-type": null, + "ssl-https-private-key-file": null, + "collect-metrics": "True", + "metrics-url": "http://localhost:8002/metrics", + "metrics-interval": 1000, + "bls-composing-models": null + }, + "_options": { + "-m": "add_sub", + "-x": null, + "-b": 1, + "-u": "localhost:8001", + "-i": "grpc", + "-f": "add_sub-results.csv", + "-H": null + }, + "_verbose": { + "-v": null, + "-v -v": null, + "--verbose-csv": "--verbose-csv" + }, + "_input_to_options": { + "model-name": "-m", + "model-version": "-x", + "batch-size": "-b", + "url": "-u", + "protocol": "-i", + "latency-report-file": "-f", + "http-header": "-H" + }, + "_input_to_verbose": { + "verbose": "-v", + "extra-verbose": "-v -v", + "verbose-csv": "--verbose-csv" + }, + "_additive_args": { + "input-data": null, + "shape": null + } + }, + "_composing_config_variants": [] + } + ] + }, + { + "add_sub_config_0 -m add_sub -b 1 -i grpc -f add_sub-results.csv --verbose-csv --request-rate-range=16 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000": { + "_model_variants_name": "add_sub_config_0", "_gpu_data": { "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ [ "gpu_used_memory", { - "_value": 2386.558976, + "_value": 872.415232, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } @@ -1237,7 +1399,7 @@ [ "gpu_free_memory", { - "_value": 23383.2448, + "_value": 24897.388544, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } @@ -1245,7 +1407,7 @@ [ "gpu_utilization", { - "_value": 49.3333, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -1253,7 +1415,7 @@ [ "gpu_power_usage", { - "_value": 213.023, + "_value": 56.691, "_timestamp": 0, "_device_uuid": null } @@ -1264,7 +1426,7 @@ [ "gpu_used_memory", { - "_value": 2386.558976, + "_value": 872.415232, "_timestamp": 0, "_device_uuid": null } @@ -1272,7 +1434,7 @@ [ "gpu_free_memory", { - "_value": 23383.2448, + "_value": 24897.388544, "_timestamp": 0, "_device_uuid": null } @@ -1280,7 +1442,7 @@ [ "gpu_utilization", { - "_value": 49.3333, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -1288,7 +1450,7 @@ [ "gpu_power_usage", { - "_value": 213.023, + "_value": 56.691, "_timestamp": 0, "_device_uuid": null } @@ -1298,7 +1460,7 @@ "gpu_used_memory": [ "gpu_used_memory", { - "_value": 2386.558976, + "_value": 872.415232, "_timestamp": 0, "_device_uuid": null } @@ -1306,7 +1468,7 @@ "gpu_free_memory": [ "gpu_free_memory", { - "_value": 23383.2448, + "_value": 24897.388544, "_timestamp": 0, "_device_uuid": null } @@ -1314,7 +1476,7 @@ "gpu_utilization": [ "gpu_utilization", { - "_value": 49.3333, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -1322,7 +1484,7 @@ "gpu_power_usage": [ "gpu_power_usage", { - "_value": 213.023, + "_value": 56.691, "_timestamp": 0, "_device_uuid": null } @@ -1330,87 +1492,87 @@ }, "_model_config_measurements": [ { - "_model_config_name": "vgg19_libtorch_config_default", + "_model_config_name": "add_sub", "_model_specific_pa_params": { "batch-size": 1, "concurrency-range": null, - "request-rate-range": 256 + "request-rate-range": 16 }, "_non_gpu_data": [ [ "perf_latency_avg", { - "_value": 17.044, + "_value": 1.121, "_timestamp": 0 } ], [ "perf_latency_p90", { - "_value": 17.634, + "_value": 1.237, "_timestamp": 0 } ], [ "perf_latency_p95", { - "_value": 17.678, + "_value": 1.369, "_timestamp": 0 } ], [ "perf_latency_p99", { - "_value": 17.762, + "_value": 1.506, "_timestamp": 0 } ], [ "perf_throughput", { - "_value": 223.167, + "_value": 16.0791, "_timestamp": 0 } ], [ "perf_client_send_recv", { - "_value": 0.09, + "_value": 0.013, "_timestamp": 0 } ], [ "perf_client_response_wait", { - "_value": 16.929, + "_value": 1.074, "_timestamp": 0 } ], [ "perf_server_queue", { - "_value": 11.833, + "_value": 0.146, "_timestamp": 0 } ], [ "perf_server_compute_infer", { - "_value": 4.184, + "_value": 0.149, "_timestamp": 0 } ], [ "perf_server_compute_input", { - "_value": 0.133, + "_value": 0.1, "_timestamp": 0 } ], [ "perf_server_compute_output", { - "_value": 0.014, + "_value": 0.094, "_timestamp": 0 } ] @@ -1419,77 +1581,77 @@ "perf_latency_avg": [ "perf_latency_avg", { - "_value": 17.044, + "_value": 1.121, "_timestamp": 0 } ], "perf_latency_p90": [ "perf_latency_p90", { - "_value": 17.634, + "_value": 1.237, "_timestamp": 0 } ], "perf_latency_p95": [ "perf_latency_p95", { - "_value": 17.678, + "_value": 1.369, "_timestamp": 0 } ], "perf_latency_p99": [ "perf_latency_p99", { - "_value": 17.762, + "_value": 1.506, "_timestamp": 0 } ], "perf_throughput": [ "perf_throughput", { - "_value": 223.167, + "_value": 16.0791, "_timestamp": 0 } ], "perf_client_send_recv": [ "perf_client_send_recv", { - "_value": 0.09, + "_value": 0.013, "_timestamp": 0 } ], "perf_client_response_wait": [ "perf_client_response_wait", { - "_value": 16.929, + "_value": 1.074, "_timestamp": 0 } ], "perf_server_queue": [ "perf_server_queue", { - "_value": 11.833, + "_value": 0.146, "_timestamp": 0 } ], "perf_server_compute_infer": [ "perf_server_compute_infer", { - "_value": 4.184, + "_value": 0.149, "_timestamp": 0 } ], "perf_server_compute_input": [ "perf_server_compute_input", { - "_value": 0.133, + "_value": 0.1, "_timestamp": 0 } ], "perf_server_compute_output": [ "perf_server_compute_output", { - "_value": 0.014, + "_value": 0.094, "_timestamp": 0 } ] @@ -1497,14 +1659,14 @@ } ] }, - "-m vgg19_libtorch_config_default -b 1 -i grpc -f vgg19_libtorch_config_default-results.csv --verbose-csv --request-rate-range=512 --request-distribution=poisson --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "vgg19_libtorch_config_default", + "add_sub_config_0 -m add_sub -b 1 -i grpc -f add_sub-results.csv --verbose-csv --request-rate-range=32 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000": { + "_model_variants_name": "add_sub_config_0", "_gpu_data": { "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ [ "gpu_used_memory", { - "_value": 2386.558976, + "_value": 872.415232, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } @@ -1512,7 +1674,7 @@ [ "gpu_free_memory", { - "_value": 23383.2448, + "_value": 24897.388544, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } @@ -1520,7 +1682,7 @@ [ "gpu_utilization", { - "_value": 59.66669999999999, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -1528,7 +1690,7 @@ [ "gpu_power_usage", { - "_value": 215.072, + "_value": 56.7322, "_timestamp": 0, "_device_uuid": null } @@ -1539,7 +1701,7 @@ [ "gpu_used_memory", { - "_value": 2386.558976, + "_value": 872.415232, "_timestamp": 0, "_device_uuid": null } @@ -1547,7 +1709,7 @@ [ "gpu_free_memory", { - "_value": 23383.2448, + "_value": 24897.388544, "_timestamp": 0, "_device_uuid": null } @@ -1555,7 +1717,7 @@ [ "gpu_utilization", { - "_value": 59.66669999999999, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -1563,7 +1725,7 @@ [ "gpu_power_usage", { - "_value": 215.072, + "_value": 56.7322, "_timestamp": 0, "_device_uuid": null } @@ -1573,7 +1735,7 @@ "gpu_used_memory": [ "gpu_used_memory", { - "_value": 2386.558976, + "_value": 872.415232, "_timestamp": 0, "_device_uuid": null } @@ -1581,7 +1743,7 @@ "gpu_free_memory": [ "gpu_free_memory", { - "_value": 23383.2448, + "_value": 24897.388544, "_timestamp": 0, "_device_uuid": null } @@ -1589,7 +1751,7 @@ "gpu_utilization": [ "gpu_utilization", { - "_value": 59.66669999999999, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -1597,7 +1759,7 @@ "gpu_power_usage": [ "gpu_power_usage", { - "_value": 215.072, + "_value": 56.7322, "_timestamp": 0, "_device_uuid": null } @@ -1605,87 +1767,87 @@ }, "_model_config_measurements": [ { - "_model_config_name": "vgg19_libtorch_config_default", + "_model_config_name": "add_sub", "_model_specific_pa_params": { "batch-size": 1, "concurrency-range": null, - "request-rate-range": 512 + "request-rate-range": 32 }, "_non_gpu_data": [ [ "perf_latency_avg", { - "_value": 17.511, + "_value": 1.07, "_timestamp": 0 } ], [ "perf_latency_p90", { - "_value": 17.752, + "_value": 1.19, "_timestamp": 0 } ], [ "perf_latency_p95", { - "_value": 17.809, + "_value": 1.336, "_timestamp": 0 } ], [ "perf_latency_p99", { - "_value": 17.947, + "_value": 1.466, "_timestamp": 0 } ], [ "perf_throughput", { - "_value": 226.168, + "_value": 31.9878, "_timestamp": 0 } ], [ "perf_client_send_recv", { - "_value": 0.089, + "_value": 0.013, "_timestamp": 0 } ], [ "perf_client_response_wait", { - "_value": 17.398, + "_value": 1.024, "_timestamp": 0 } ], [ "perf_server_queue", { - "_value": 12.295, + "_value": 0.136, "_timestamp": 0 } ], [ "perf_server_compute_infer", { - "_value": 4.196, + "_value": 0.143, "_timestamp": 0 } ], [ "perf_server_compute_input", { - "_value": 0.139, + "_value": 0.097, "_timestamp": 0 } ], [ "perf_server_compute_output", { - "_value": 0.014, + "_value": 0.091, "_timestamp": 0 } ] @@ -1694,2696 +1856,235 @@ "perf_latency_avg": [ "perf_latency_avg", { - "_value": 17.511, + "_value": 1.07, "_timestamp": 0 } ], "perf_latency_p90": [ "perf_latency_p90", { - "_value": 17.752, + "_value": 1.19, "_timestamp": 0 } ], "perf_latency_p95": [ "perf_latency_p95", { - "_value": 17.809, + "_value": 1.336, "_timestamp": 0 } ], "perf_latency_p99": [ "perf_latency_p99", { - "_value": 17.947, + "_value": 1.466, "_timestamp": 0 } ], "perf_throughput": [ "perf_throughput", { - "_value": 226.168, + "_value": 31.9878, "_timestamp": 0 } ], "perf_client_send_recv": [ "perf_client_send_recv", { - "_value": 0.089, + "_value": 0.013, "_timestamp": 0 } ], "perf_client_response_wait": [ "perf_client_response_wait", { - "_value": 17.398, + "_value": 1.024, "_timestamp": 0 } ], "perf_server_queue": [ "perf_server_queue", { - "_value": 12.295, + "_value": 0.136, "_timestamp": 0 } ], "perf_server_compute_infer": [ "perf_server_compute_infer", { - "_value": 4.196, + "_value": 0.143, "_timestamp": 0 } ], "perf_server_compute_input": [ "perf_server_compute_input", { - "_value": 0.139, + "_value": 0.097, "_timestamp": 0 } ], "perf_server_compute_output": [ "perf_server_compute_output", { - "_value": 0.014, + "_value": 0.091, "_timestamp": 0 } ] } } ] - }, - "-m vgg19_libtorch_config_default -b 1 -i grpc -f vgg19_libtorch_config_default-results.csv --verbose-csv --request-rate-range=1024 --request-distribution=poisson --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "vgg19_libtorch_config_default", - "_gpu_data": { - "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ - [ - "gpu_used_memory", - { - "_value": 2386.558976, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_free_memory", - { - "_value": 23383.2448, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_utilization", - { - "_value": 61.3333, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 219.238, - "_timestamp": 0, - "_device_uuid": null - } - ] - ] - }, - "_avg_gpu_data": [ - [ - "gpu_used_memory", - { - "_value": 2386.558976, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_free_memory", - { - "_value": 23383.2448, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_utilization", - { - "_value": 61.3333, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 219.238, - "_timestamp": 0, - "_device_uuid": null - } - ] - ], - "_avg_gpu_data_from_tag": { - "gpu_used_memory": [ - "gpu_used_memory", - { - "_value": 2386.558976, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_free_memory": [ - "gpu_free_memory", - { - "_value": 23383.2448, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_utilization": [ - "gpu_utilization", - { - "_value": 61.3333, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_power_usage": [ - "gpu_power_usage", - { - "_value": 219.238, - "_timestamp": 0, - "_device_uuid": null - } - ] - }, - "_model_config_measurements": [ - { - "_model_config_name": "vgg19_libtorch_config_default", - "_model_specific_pa_params": { - "batch-size": 1, - "concurrency-range": null, - "request-rate-range": 1024 - }, - "_non_gpu_data": [ - [ - "perf_latency_avg", - { - "_value": 17.665, - "_timestamp": 0 - } - ], - [ - "perf_latency_p90", - { - "_value": 17.804, - "_timestamp": 0 - } - ], - [ - "perf_latency_p95", - { - "_value": 17.861, - "_timestamp": 0 - } - ], - [ - "perf_latency_p99", - { - "_value": 18.017, - "_timestamp": 0 - } - ], - [ - "perf_throughput", - { - "_value": 225.515, - "_timestamp": 0 - } - ], - [ - "perf_client_send_recv", - { - "_value": 0.088, - "_timestamp": 0 - } - ], - [ - "perf_client_response_wait", - { - "_value": 17.553, - "_timestamp": 0 - } - ], - [ - "perf_server_queue", - { - "_value": 12.442, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_infer", - { - "_value": 4.206, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_input", - { - "_value": 0.143, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_output", - { - "_value": 0.015, - "_timestamp": 0 - } - ] - ], - "_non_gpu_data_from_tag": { - "perf_latency_avg": [ - "perf_latency_avg", - { - "_value": 17.665, - "_timestamp": 0 - } - ], - "perf_latency_p90": [ - "perf_latency_p90", - { - "_value": 17.804, - "_timestamp": 0 - } - ], - "perf_latency_p95": [ - "perf_latency_p95", - { - "_value": 17.861, - "_timestamp": 0 - } - ], - "perf_latency_p99": [ - "perf_latency_p99", - { - "_value": 18.017, - "_timestamp": 0 - } - ], - "perf_throughput": [ - "perf_throughput", - { - "_value": 225.515, - "_timestamp": 0 - } - ], - "perf_client_send_recv": [ - "perf_client_send_recv", - { - "_value": 0.088, - "_timestamp": 0 - } - ], - "perf_client_response_wait": [ - "perf_client_response_wait", - { - "_value": 17.553, - "_timestamp": 0 - } - ], - "perf_server_queue": [ - "perf_server_queue", - { - "_value": 12.442, - "_timestamp": 0 - } - ], - "perf_server_compute_infer": [ - "perf_server_compute_infer", - { - "_value": 4.206, - "_timestamp": 0 - } - ], - "perf_server_compute_input": [ - "perf_server_compute_input", - { - "_value": 0.143, - "_timestamp": 0 - } - ], - "perf_server_compute_output": [ - "perf_server_compute_output", - { - "_value": 0.015, - "_timestamp": 0 - } - ] - } - } - ] - }, - "-m vgg19_libtorch_config_default -b 1 -i grpc -f vgg19_libtorch_config_default-results.csv --verbose-csv --request-rate-range=2048 --request-distribution=poisson --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "vgg19_libtorch_config_default", - "_gpu_data": { - "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ - [ - "gpu_used_memory", - { - "_value": 2386.558976, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_free_memory", - { - "_value": 23383.2448, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_utilization", - { - "_value": 61.0, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 216.291, - "_timestamp": 0, - "_device_uuid": null - } - ] - ] - }, - "_avg_gpu_data": [ - [ - "gpu_used_memory", - { - "_value": 2386.558976, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_free_memory", - { - "_value": 23383.2448, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_utilization", - { - "_value": 61.0, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 216.291, - "_timestamp": 0, - "_device_uuid": null - } - ] - ], - "_avg_gpu_data_from_tag": { - "gpu_used_memory": [ - "gpu_used_memory", - { - "_value": 2386.558976, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_free_memory": [ - "gpu_free_memory", - { - "_value": 23383.2448, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_utilization": [ - "gpu_utilization", - { - "_value": 61.0, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_power_usage": [ - "gpu_power_usage", - { - "_value": 216.291, - "_timestamp": 0, - "_device_uuid": null - } - ] - }, - "_model_config_measurements": [ - { - "_model_config_name": "vgg19_libtorch_config_default", - "_model_specific_pa_params": { - "batch-size": 1, - "concurrency-range": null, - "request-rate-range": 2048 - }, - "_non_gpu_data": [ - [ - "perf_latency_avg", - { - "_value": 17.627, - "_timestamp": 0 - } - ], - [ - "perf_latency_p90", - { - "_value": 17.754, - "_timestamp": 0 - } - ], - [ - "perf_latency_p95", - { - "_value": 17.802, - "_timestamp": 0 - } - ], - [ - "perf_latency_p99", - { - "_value": 18.028, - "_timestamp": 0 - } - ], - [ - "perf_throughput", - { - "_value": 226.171, - "_timestamp": 0 - } - ], - [ - "perf_client_send_recv", - { - "_value": 0.089, - "_timestamp": 0 - } - ], - [ - "perf_client_response_wait", - { - "_value": 17.513, - "_timestamp": 0 - } - ], - [ - "perf_server_queue", - { - "_value": 12.391, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_infer", - { - "_value": 4.203, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_input", - { - "_value": 0.136, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_output", - { - "_value": 0.014, - "_timestamp": 0 - } - ] - ], - "_non_gpu_data_from_tag": { - "perf_latency_avg": [ - "perf_latency_avg", - { - "_value": 17.627, - "_timestamp": 0 - } - ], - "perf_latency_p90": [ - "perf_latency_p90", - { - "_value": 17.754, - "_timestamp": 0 - } - ], - "perf_latency_p95": [ - "perf_latency_p95", - { - "_value": 17.802, - "_timestamp": 0 - } - ], - "perf_latency_p99": [ - "perf_latency_p99", - { - "_value": 18.028, - "_timestamp": 0 - } - ], - "perf_throughput": [ - "perf_throughput", - { - "_value": 226.171, - "_timestamp": 0 - } - ], - "perf_client_send_recv": [ - "perf_client_send_recv", - { - "_value": 0.089, - "_timestamp": 0 - } - ], - "perf_client_response_wait": [ - "perf_client_response_wait", - { - "_value": 17.513, - "_timestamp": 0 - } - ], - "perf_server_queue": [ - "perf_server_queue", - { - "_value": 12.391, - "_timestamp": 0 - } - ], - "perf_server_compute_infer": [ - "perf_server_compute_infer", - { - "_value": 4.203, - "_timestamp": 0 - } - ], - "perf_server_compute_input": [ - "perf_server_compute_input", - { - "_value": 0.136, - "_timestamp": 0 - } - ], - "perf_server_compute_output": [ - "perf_server_compute_output", - { - "_value": 0.014, - "_timestamp": 0 - } - ] - } - } - ] - } - } - ], - "vgg19_libtorch_config_0": [ - { - "_triton_env": {}, - "_model_run_configs": [ - { - "_model_name": "vgg19_libtorch", - "_model_config": { - "name": "vgg19_libtorch_config_0", - "platform": "pytorch_libtorch", - "maxBatchSize": 1, - "input": [ - { - "name": "INPUT__0", - "dataType": "TYPE_FP32", - "format": "FORMAT_NCHW", - "dims": [ - "3", - "224", - "224" - ] - } - ], - "output": [ - { - "name": "OUTPUT__0", - "dataType": "TYPE_FP32", - "dims": [ - "1000" - ], - "labelFilename": "vgg19_labels.txt" - } - ], - "instanceGroup": [ - { - "count": 1, - "kind": "KIND_GPU" - } - ], - "dynamicBatching": {}, - "cpu_only": false - }, - "_perf_config": { - "_args": { - "service-kind": null, - "model-signature-name": null, - "async": null, - "sync": null, - "measurement-interval": null, - "concurrency-range": null, - "request-rate-range": 16, - "request-distribution": "poisson", - "request-intervals": null, - "binary-search": null, - "num-of-sequences": null, - "latency-threshold": null, - "max-threads": null, - "stability-percentage": null, - "max-trials": null, - "percentile": null, - "input-data": null, - "shared-memory": null, - "output-shared-memory-size": null, - "shape": null, - "sequence-length": null, - "sequence-id-range": null, - "string-length": null, - "string-data": null, - "measurement-mode": "count_windows", - "measurement-request-count": 150, - "streaming": null, - "grpc-compression-algorithm": null, - "triton-server-directory": null, - "model-repository": null, - "ssl-grpc-use-ssl": null, - "ssl-grpc-root-certifications-file": null, - "ssl-grpc-private-key-file": null, - "ssl-grpc-certificate-chain-file": null, - "ssl-https-verify-peer": null, - "ssl-https-verify-host": null, - "ssl-https-ca-certificates-file": null, - "ssl-https-client-certificate-type": null, - "ssl-https-client-certificate-file": null, - "ssl-https-private-key-type": null, - "ssl-https-private-key-file": null, - "collect-metrics": "True", - "metrics-url": "http://localhost:8002/metrics", - "metrics-interval": 1000.0, - "bls-composing-models": null - }, - "_options": { - "-m": "vgg19_libtorch_config_0", - "-x": null, - "-b": 1, - "-u": "localhost:8001", - "-i": "grpc", - "-f": "vgg19_libtorch_config_0-results.csv", - "-H": null - }, - "_verbose": { - "-v": null, - "-v -v": null, - "--verbose-csv": "--verbose-csv" - }, - "_input_to_options": { - "model-name": "-m", - "model-version": "-x", - "batch-size": "-b", - "url": "-u", - "protocol": "-i", - "latency-report-file": "-f", - "http-header": "-H" - }, - "_input_to_verbose": { - "verbose": "-v", - "extra-verbose": "-v -v", - "verbose-csv": "--verbose-csv" - }, - "_additive_args": { - "input-data": null, - "shape": null - } - }, - "_composing_configs": [] - } - ] - }, - { - "-m vgg19_libtorch_config_0 -b 1 -i grpc -f vgg19_libtorch_config_0-results.csv --verbose-csv --request-rate-range=16 --request-distribution=poisson --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "vgg19_libtorch_config_0", - "_gpu_data": { - "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ - [ - "gpu_used_memory", - { - "_value": 2386.558976, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_free_memory", - { - "_value": 23383.2448, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_utilization", - { - "_value": 7.60606, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 63.2401, - "_timestamp": 0, - "_device_uuid": null - } - ] - ] - }, - "_avg_gpu_data": [ - [ - "gpu_used_memory", - { - "_value": 2386.558976, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_free_memory", - { - "_value": 23383.2448, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_utilization", - { - "_value": 7.60606, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 63.2401, - "_timestamp": 0, - "_device_uuid": null - } - ] - ], - "_avg_gpu_data_from_tag": { - "gpu_used_memory": [ - "gpu_used_memory", - { - "_value": 2386.558976, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_free_memory": [ - "gpu_free_memory", - { - "_value": 23383.2448, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_utilization": [ - "gpu_utilization", - { - "_value": 7.60606, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_power_usage": [ - "gpu_power_usage", - { - "_value": 63.2401, - "_timestamp": 0, - "_device_uuid": null - } - ] - }, - "_model_config_measurements": [ - { - "_model_config_name": "vgg19_libtorch_config_0", - "_model_specific_pa_params": { - "batch-size": 1, - "concurrency-range": null, - "request-rate-range": 16 - }, - "_non_gpu_data": [ - [ - "perf_latency_avg", - { - "_value": 7.556, - "_timestamp": 0 - } - ], - [ - "perf_latency_p90", - { - "_value": 7.644, - "_timestamp": 0 - } - ], - [ - "perf_latency_p95", - { - "_value": 11.146, - "_timestamp": 0 - } - ], - [ - "perf_latency_p99", - { - "_value": 12.726, - "_timestamp": 0 - } - ], - [ - "perf_throughput", - { - "_value": 14.7162, - "_timestamp": 0 - } - ], - [ - "perf_client_send_recv", - { - "_value": 0.169, - "_timestamp": 0 - } - ], - [ - "perf_client_response_wait", - { - "_value": 7.349, - "_timestamp": 0 - } - ], - [ - "perf_server_queue", - { - "_value": 0.517, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_infer", - { - "_value": 5.352, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_input", - { - "_value": 0.26, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_output", - { - "_value": 0.048, - "_timestamp": 0 - } - ] - ], - "_non_gpu_data_from_tag": { - "perf_latency_avg": [ - "perf_latency_avg", - { - "_value": 7.556, - "_timestamp": 0 - } - ], - "perf_latency_p90": [ - "perf_latency_p90", - { - "_value": 7.644, - "_timestamp": 0 - } - ], - "perf_latency_p95": [ - "perf_latency_p95", - { - "_value": 11.146, - "_timestamp": 0 - } - ], - "perf_latency_p99": [ - "perf_latency_p99", - { - "_value": 12.726, - "_timestamp": 0 - } - ], - "perf_throughput": [ - "perf_throughput", - { - "_value": 14.7162, - "_timestamp": 0 - } - ], - "perf_client_send_recv": [ - "perf_client_send_recv", - { - "_value": 0.169, - "_timestamp": 0 - } - ], - "perf_client_response_wait": [ - "perf_client_response_wait", - { - "_value": 7.349, - "_timestamp": 0 - } - ], - "perf_server_queue": [ - "perf_server_queue", - { - "_value": 0.517, - "_timestamp": 0 - } - ], - "perf_server_compute_infer": [ - "perf_server_compute_infer", - { - "_value": 5.352, - "_timestamp": 0 - } - ], - "perf_server_compute_input": [ - "perf_server_compute_input", - { - "_value": 0.26, - "_timestamp": 0 - } - ], - "perf_server_compute_output": [ - "perf_server_compute_output", - { - "_value": 0.048, - "_timestamp": 0 - } - ] - } - } - ] - }, - "-m vgg19_libtorch_config_0 -b 1 -i grpc -f vgg19_libtorch_config_0-results.csv --verbose-csv --request-rate-range=32 --request-distribution=poisson --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "vgg19_libtorch_config_0", - "_gpu_data": { - "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ - [ - "gpu_used_memory", - { - "_value": 2386.558976, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_free_memory", - { - "_value": 23383.2448, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_utilization", - { - "_value": 17.5714, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 72.9666, - "_timestamp": 0, - "_device_uuid": null - } - ] - ] - }, - "_avg_gpu_data": [ - [ - "gpu_used_memory", - { - "_value": 2386.558976, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_free_memory", - { - "_value": 23383.2448, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_utilization", - { - "_value": 17.5714, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 72.9666, - "_timestamp": 0, - "_device_uuid": null - } - ] - ], - "_avg_gpu_data_from_tag": { - "gpu_used_memory": [ - "gpu_used_memory", - { - "_value": 2386.558976, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_free_memory": [ - "gpu_free_memory", - { - "_value": 23383.2448, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_utilization": [ - "gpu_utilization", - { - "_value": 17.5714, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_power_usage": [ - "gpu_power_usage", - { - "_value": 72.9666, - "_timestamp": 0, - "_device_uuid": null - } - ] - }, - "_model_config_measurements": [ - { - "_model_config_name": "vgg19_libtorch_config_0", - "_model_specific_pa_params": { - "batch-size": 1, - "concurrency-range": null, - "request-rate-range": 32 - }, - "_non_gpu_data": [ - [ - "perf_latency_avg", - { - "_value": 7.713, - "_timestamp": 0 - } - ], - [ - "perf_latency_p90", - { - "_value": 9.097, - "_timestamp": 0 - } - ], - [ - "perf_latency_p95", - { - "_value": 11.535, - "_timestamp": 0 - } - ], - [ - "perf_latency_p99", - { - "_value": 16.322, - "_timestamp": 0 - } - ], - [ - "perf_throughput", - { - "_value": 35.983, - "_timestamp": 0 - } - ], - [ - "perf_client_send_recv", - { - "_value": 0.16, - "_timestamp": 0 - } - ], - [ - "perf_client_response_wait", - { - "_value": 7.518, - "_timestamp": 0 - } - ], - [ - "perf_server_queue", - { - "_value": 0.745, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_infer", - { - "_value": 5.326, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_input", - { - "_value": 0.25, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_output", - { - "_value": 0.044, - "_timestamp": 0 - } - ] - ], - "_non_gpu_data_from_tag": { - "perf_latency_avg": [ - "perf_latency_avg", - { - "_value": 7.713, - "_timestamp": 0 - } - ], - "perf_latency_p90": [ - "perf_latency_p90", - { - "_value": 9.097, - "_timestamp": 0 - } - ], - "perf_latency_p95": [ - "perf_latency_p95", - { - "_value": 11.535, - "_timestamp": 0 - } - ], - "perf_latency_p99": [ - "perf_latency_p99", - { - "_value": 16.322, - "_timestamp": 0 - } - ], - "perf_throughput": [ - "perf_throughput", - { - "_value": 35.983, - "_timestamp": 0 - } - ], - "perf_client_send_recv": [ - "perf_client_send_recv", - { - "_value": 0.16, - "_timestamp": 0 - } - ], - "perf_client_response_wait": [ - "perf_client_response_wait", - { - "_value": 7.518, - "_timestamp": 0 - } - ], - "perf_server_queue": [ - "perf_server_queue", - { - "_value": 0.745, - "_timestamp": 0 - } - ], - "perf_server_compute_infer": [ - "perf_server_compute_infer", - { - "_value": 5.326, - "_timestamp": 0 - } - ], - "perf_server_compute_input": [ - "perf_server_compute_input", - { - "_value": 0.25, - "_timestamp": 0 - } - ], - "perf_server_compute_output": [ - "perf_server_compute_output", - { - "_value": 0.044, - "_timestamp": 0 - } - ] - } - } - ] - }, - "-m vgg19_libtorch_config_0 -b 1 -i grpc -f vgg19_libtorch_config_0-results.csv --verbose-csv --request-rate-range=64 --request-distribution=poisson --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "vgg19_libtorch_config_0", - "_gpu_data": { - "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ - [ - "gpu_used_memory", - { - "_value": 2386.558976, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_free_memory", - { - "_value": 23383.2448, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_utilization", - { - "_value": 26.6, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 120.486, - "_timestamp": 0, - "_device_uuid": null - } - ] - ] - }, - "_avg_gpu_data": [ - [ - "gpu_used_memory", - { - "_value": 2386.558976, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_free_memory", - { - "_value": 23383.2448, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_utilization", - { - "_value": 26.6, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 120.486, - "_timestamp": 0, - "_device_uuid": null - } - ] - ], - "_avg_gpu_data_from_tag": { - "gpu_used_memory": [ - "gpu_used_memory", - { - "_value": 2386.558976, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_free_memory": [ - "gpu_free_memory", - { - "_value": 23383.2448, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_utilization": [ - "gpu_utilization", - { - "_value": 26.6, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_power_usage": [ - "gpu_power_usage", - { - "_value": 120.486, - "_timestamp": 0, - "_device_uuid": null - } - ] - }, - "_model_config_measurements": [ - { - "_model_config_name": "vgg19_libtorch_config_0", - "_model_specific_pa_params": { - "batch-size": 1, - "concurrency-range": null, - "request-rate-range": 64 - }, - "_non_gpu_data": [ - [ - "perf_latency_avg", - { - "_value": 7.514, - "_timestamp": 0 - } - ], - [ - "perf_latency_p90", - { - "_value": 10.814, - "_timestamp": 0 - } - ], - [ - "perf_latency_p95", - { - "_value": 12.431, - "_timestamp": 0 - } - ], - [ - "perf_latency_p99", - { - "_value": 18.35, - "_timestamp": 0 - } - ], - [ - "perf_throughput", - { - "_value": 60.6463, - "_timestamp": 0 - } - ], - [ - "perf_client_send_recv", - { - "_value": 0.159, - "_timestamp": 0 - } - ], - [ - "perf_client_response_wait", - { - "_value": 7.319, - "_timestamp": 0 - } - ], - [ - "perf_server_queue", - { - "_value": 1.34, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_infer", - { - "_value": 4.545, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_input", - { - "_value": 0.243, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_output", - { - "_value": 0.039, - "_timestamp": 0 - } - ] - ], - "_non_gpu_data_from_tag": { - "perf_latency_avg": [ - "perf_latency_avg", - { - "_value": 7.514, - "_timestamp": 0 - } - ], - "perf_latency_p90": [ - "perf_latency_p90", - { - "_value": 10.814, - "_timestamp": 0 - } - ], - "perf_latency_p95": [ - "perf_latency_p95", - { - "_value": 12.431, - "_timestamp": 0 - } - ], - "perf_latency_p99": [ - "perf_latency_p99", - { - "_value": 18.35, - "_timestamp": 0 - } - ], - "perf_throughput": [ - "perf_throughput", - { - "_value": 60.6463, - "_timestamp": 0 - } - ], - "perf_client_send_recv": [ - "perf_client_send_recv", - { - "_value": 0.159, - "_timestamp": 0 - } - ], - "perf_client_response_wait": [ - "perf_client_response_wait", - { - "_value": 7.319, - "_timestamp": 0 - } - ], - "perf_server_queue": [ - "perf_server_queue", - { - "_value": 1.34, - "_timestamp": 0 - } - ], - "perf_server_compute_infer": [ - "perf_server_compute_infer", - { - "_value": 4.545, - "_timestamp": 0 - } - ], - "perf_server_compute_input": [ - "perf_server_compute_input", - { - "_value": 0.243, - "_timestamp": 0 - } - ], - "perf_server_compute_output": [ - "perf_server_compute_output", - { - "_value": 0.039, - "_timestamp": 0 - } - ] - } - } - ] - }, - "-m vgg19_libtorch_config_0 -b 1 -i grpc -f vgg19_libtorch_config_0-results.csv --verbose-csv --request-rate-range=128 --request-distribution=poisson --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "vgg19_libtorch_config_0", - "_gpu_data": { - "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ - [ - "gpu_used_memory", - { - "_value": 2386.558976, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_free_memory", - { - "_value": 23383.2448, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_utilization", - { - "_value": 52.5, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 179.564, - "_timestamp": 0, - "_device_uuid": null - } - ] - ] - }, - "_avg_gpu_data": [ - [ - "gpu_used_memory", - { - "_value": 2386.558976, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_free_memory", - { - "_value": 23383.2448, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_utilization", - { - "_value": 52.5, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 179.564, - "_timestamp": 0, - "_device_uuid": null - } - ] - ], - "_avg_gpu_data_from_tag": { - "gpu_used_memory": [ - "gpu_used_memory", - { - "_value": 2386.558976, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_free_memory": [ - "gpu_free_memory", - { - "_value": 23383.2448, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_utilization": [ - "gpu_utilization", - { - "_value": 52.5, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_power_usage": [ - "gpu_power_usage", - { - "_value": 179.564, - "_timestamp": 0, - "_device_uuid": null - } - ] - }, - "_model_config_measurements": [ - { - "_model_config_name": "vgg19_libtorch_config_0", - "_model_specific_pa_params": { - "batch-size": 1, - "concurrency-range": null, - "request-rate-range": 128 - }, - "_non_gpu_data": [ - [ - "perf_latency_avg", - { - "_value": 9.302, - "_timestamp": 0 - } - ], - [ - "perf_latency_p90", - { - "_value": 17.589, - "_timestamp": 0 - } - ], - [ - "perf_latency_p95", - { - "_value": 17.825, - "_timestamp": 0 - } - ], - [ - "perf_latency_p99", - { - "_value": 18.384, - "_timestamp": 0 - } - ], - [ - "perf_throughput", - { - "_value": 125.948, - "_timestamp": 0 - } - ], - [ - "perf_client_send_recv", - { - "_value": 0.13, - "_timestamp": 0 - } - ], - [ - "perf_client_response_wait", - { - "_value": 9.14, - "_timestamp": 0 - } - ], - [ - "perf_server_queue", - { - "_value": 3.646, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_infer", - { - "_value": 4.255, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_input", - { - "_value": 0.203, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_output", - { - "_value": 0.029, - "_timestamp": 0 - } - ] - ], - "_non_gpu_data_from_tag": { - "perf_latency_avg": [ - "perf_latency_avg", - { - "_value": 9.302, - "_timestamp": 0 - } - ], - "perf_latency_p90": [ - "perf_latency_p90", - { - "_value": 17.589, - "_timestamp": 0 - } - ], - "perf_latency_p95": [ - "perf_latency_p95", - { - "_value": 17.825, - "_timestamp": 0 - } - ], - "perf_latency_p99": [ - "perf_latency_p99", - { - "_value": 18.384, - "_timestamp": 0 - } - ], - "perf_throughput": [ - "perf_throughput", - { - "_value": 125.948, - "_timestamp": 0 - } - ], - "perf_client_send_recv": [ - "perf_client_send_recv", - { - "_value": 0.13, - "_timestamp": 0 - } - ], - "perf_client_response_wait": [ - "perf_client_response_wait", - { - "_value": 9.14, - "_timestamp": 0 - } - ], - "perf_server_queue": [ - "perf_server_queue", - { - "_value": 3.646, - "_timestamp": 0 - } - ], - "perf_server_compute_infer": [ - "perf_server_compute_infer", - { - "_value": 4.255, - "_timestamp": 0 - } - ], - "perf_server_compute_input": [ - "perf_server_compute_input", - { - "_value": 0.203, - "_timestamp": 0 - } - ], - "perf_server_compute_output": [ - "perf_server_compute_output", - { - "_value": 0.029, - "_timestamp": 0 - } - ] - } - } - ] - }, - "-m vgg19_libtorch_config_0 -b 1 -i grpc -f vgg19_libtorch_config_0-results.csv --verbose-csv --request-rate-range=256 --request-distribution=poisson --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "vgg19_libtorch_config_0", - "_gpu_data": { - "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ - [ - "gpu_used_memory", - { - "_value": 2386.558976, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_free_memory", - { - "_value": 23383.2448, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_utilization", - { - "_value": 49.6667, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 216.955, - "_timestamp": 0, - "_device_uuid": null - } - ] - ] - }, - "_avg_gpu_data": [ - [ - "gpu_used_memory", - { - "_value": 2386.558976, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_free_memory", - { - "_value": 23383.2448, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_utilization", - { - "_value": 49.6667, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 216.955, - "_timestamp": 0, - "_device_uuid": null - } - ] - ], - "_avg_gpu_data_from_tag": { - "gpu_used_memory": [ - "gpu_used_memory", - { - "_value": 2386.558976, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_free_memory": [ - "gpu_free_memory", - { - "_value": 23383.2448, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_utilization": [ - "gpu_utilization", - { - "_value": 49.6667, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_power_usage": [ - "gpu_power_usage", - { - "_value": 216.955, - "_timestamp": 0, - "_device_uuid": null - } - ] - }, - "_model_config_measurements": [ - { - "_model_config_name": "vgg19_libtorch_config_0", - "_model_specific_pa_params": { - "batch-size": 1, - "concurrency-range": null, - "request-rate-range": 256 - }, - "_non_gpu_data": [ - [ - "perf_latency_avg", - { - "_value": 17.317, - "_timestamp": 0 - } - ], - [ - "perf_latency_p90", - { - "_value": 17.777, - "_timestamp": 0 - } - ], - [ - "perf_latency_p95", - { - "_value": 17.85, - "_timestamp": 0 - } - ], - [ - "perf_latency_p99", - { - "_value": 17.978, - "_timestamp": 0 - } - ], - [ - "perf_throughput", - { - "_value": 221.852, - "_timestamp": 0 - } - ], - [ - "perf_client_send_recv", - { - "_value": 0.09, - "_timestamp": 0 - } - ], - [ - "perf_client_response_wait", - { - "_value": 17.201, - "_timestamp": 0 - } - ], - [ - "perf_server_queue", - { - "_value": 12.066, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_infer", - { - "_value": 4.204, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_input", - { - "_value": 0.138, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_output", - { - "_value": 0.015, - "_timestamp": 0 - } - ] - ], - "_non_gpu_data_from_tag": { - "perf_latency_avg": [ - "perf_latency_avg", - { - "_value": 17.317, - "_timestamp": 0 - } - ], - "perf_latency_p90": [ - "perf_latency_p90", - { - "_value": 17.777, - "_timestamp": 0 - } - ], - "perf_latency_p95": [ - "perf_latency_p95", - { - "_value": 17.85, - "_timestamp": 0 - } - ], - "perf_latency_p99": [ - "perf_latency_p99", - { - "_value": 17.978, - "_timestamp": 0 - } - ], - "perf_throughput": [ - "perf_throughput", - { - "_value": 221.852, - "_timestamp": 0 - } - ], - "perf_client_send_recv": [ - "perf_client_send_recv", - { - "_value": 0.09, - "_timestamp": 0 - } - ], - "perf_client_response_wait": [ - "perf_client_response_wait", - { - "_value": 17.201, - "_timestamp": 0 - } - ], - "perf_server_queue": [ - "perf_server_queue", - { - "_value": 12.066, - "_timestamp": 0 - } - ], - "perf_server_compute_infer": [ - "perf_server_compute_infer", - { - "_value": 4.204, - "_timestamp": 0 - } - ], - "perf_server_compute_input": [ - "perf_server_compute_input", - { - "_value": 0.138, - "_timestamp": 0 - } - ], - "perf_server_compute_output": [ - "perf_server_compute_output", - { - "_value": 0.015, - "_timestamp": 0 - } - ] - } - } - ] - }, - "-m vgg19_libtorch_config_0 -b 1 -i grpc -f vgg19_libtorch_config_0-results.csv --verbose-csv --request-rate-range=512 --request-distribution=poisson --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "vgg19_libtorch_config_0", - "_gpu_data": { - "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ - [ - "gpu_used_memory", - { - "_value": 2386.558976, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_free_memory", - { - "_value": 23383.2448, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_utilization", - { - "_value": 61.3333, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 218.111, - "_timestamp": 0, - "_device_uuid": null - } - ] - ] - }, - "_avg_gpu_data": [ - [ - "gpu_used_memory", - { - "_value": 2386.558976, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_free_memory", - { - "_value": 23383.2448, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_utilization", - { - "_value": 61.3333, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 218.111, - "_timestamp": 0, - "_device_uuid": null - } - ] - ], - "_avg_gpu_data_from_tag": { - "gpu_used_memory": [ - "gpu_used_memory", - { - "_value": 2386.558976, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_free_memory": [ - "gpu_free_memory", - { - "_value": 23383.2448, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_utilization": [ - "gpu_utilization", - { - "_value": 61.3333, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_power_usage": [ - "gpu_power_usage", - { - "_value": 218.111, - "_timestamp": 0, - "_device_uuid": null - } - ] - }, - "_model_config_measurements": [ - { - "_model_config_name": "vgg19_libtorch_config_0", - "_model_specific_pa_params": { - "batch-size": 1, - "concurrency-range": null, - "request-rate-range": 512 - }, - "_non_gpu_data": [ - [ - "perf_latency_avg", - { - "_value": 17.563, - "_timestamp": 0 - } - ], - [ - "perf_latency_p90", - { - "_value": 17.82, - "_timestamp": 0 - } - ], - [ - "perf_latency_p95", - { - "_value": 17.88, - "_timestamp": 0 - } - ], - [ - "perf_latency_p99", - { - "_value": 18.011, - "_timestamp": 0 - } - ], - [ - "perf_throughput", - { - "_value": 225.513, - "_timestamp": 0 - } - ], - [ - "perf_client_send_recv", - { - "_value": 0.089, - "_timestamp": 0 - } - ], - [ - "perf_client_response_wait", - { - "_value": 17.448, - "_timestamp": 0 - } - ], - [ - "perf_server_queue", - { - "_value": 12.329, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_infer", - { - "_value": 4.209, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_input", - { - "_value": 0.136, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_output", - { - "_value": 0.015, - "_timestamp": 0 - } - ] - ], - "_non_gpu_data_from_tag": { - "perf_latency_avg": [ - "perf_latency_avg", - { - "_value": 17.563, - "_timestamp": 0 - } - ], - "perf_latency_p90": [ - "perf_latency_p90", - { - "_value": 17.82, - "_timestamp": 0 - } - ], - "perf_latency_p95": [ - "perf_latency_p95", - { - "_value": 17.88, - "_timestamp": 0 - } - ], - "perf_latency_p99": [ - "perf_latency_p99", - { - "_value": 18.011, - "_timestamp": 0 - } - ], - "perf_throughput": [ - "perf_throughput", - { - "_value": 225.513, - "_timestamp": 0 - } - ], - "perf_client_send_recv": [ - "perf_client_send_recv", - { - "_value": 0.089, - "_timestamp": 0 - } - ], - "perf_client_response_wait": [ - "perf_client_response_wait", - { - "_value": 17.448, - "_timestamp": 0 - } - ], - "perf_server_queue": [ - "perf_server_queue", - { - "_value": 12.329, - "_timestamp": 0 - } - ], - "perf_server_compute_infer": [ - "perf_server_compute_infer", - { - "_value": 4.209, - "_timestamp": 0 - } - ], - "perf_server_compute_input": [ - "perf_server_compute_input", - { - "_value": 0.136, - "_timestamp": 0 - } - ], - "perf_server_compute_output": [ - "perf_server_compute_output", - { - "_value": 0.015, - "_timestamp": 0 - } - ] - } - } - ] - }, - "-m vgg19_libtorch_config_0 -b 1 -i grpc -f vgg19_libtorch_config_0-results.csv --verbose-csv --request-rate-range=1024 --request-distribution=poisson --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "vgg19_libtorch_config_0", - "_gpu_data": { - "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ - [ - "gpu_used_memory", - { - "_value": 2386.558976, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_free_memory", - { - "_value": 23383.2448, - "_timestamp": 0, - "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" - } - ], - [ - "gpu_utilization", - { - "_value": 61.3333, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 220.102, - "_timestamp": 0, - "_device_uuid": null - } - ] - ] - }, - "_avg_gpu_data": [ - [ - "gpu_used_memory", - { - "_value": 2386.558976, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_free_memory", - { - "_value": 23383.2448, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_utilization", - { - "_value": 61.3333, - "_timestamp": 0, - "_device_uuid": null - } - ], - [ - "gpu_power_usage", - { - "_value": 220.102, - "_timestamp": 0, - "_device_uuid": null - } - ] - ], - "_avg_gpu_data_from_tag": { - "gpu_used_memory": [ - "gpu_used_memory", - { - "_value": 2386.558976, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_free_memory": [ - "gpu_free_memory", - { - "_value": 23383.2448, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_utilization": [ - "gpu_utilization", - { - "_value": 61.3333, - "_timestamp": 0, - "_device_uuid": null - } - ], - "gpu_power_usage": [ - "gpu_power_usage", - { - "_value": 220.102, - "_timestamp": 0, - "_device_uuid": null - } - ] - }, - "_model_config_measurements": [ - { - "_model_config_name": "vgg19_libtorch_config_0", - "_model_specific_pa_params": { - "batch-size": 1, - "concurrency-range": null, - "request-rate-range": 1024 - }, - "_non_gpu_data": [ - [ - "perf_latency_avg", - { - "_value": 17.664, - "_timestamp": 0 - } - ], - [ - "perf_latency_p90", - { - "_value": 17.815, - "_timestamp": 0 - } - ], - [ - "perf_latency_p95", - { - "_value": 17.876, - "_timestamp": 0 - } - ], - [ - "perf_latency_p99", - { - "_value": 18.151, - "_timestamp": 0 - } - ], - [ - "perf_throughput", - { - "_value": 225.52, - "_timestamp": 0 - } - ], - [ - "perf_client_send_recv", - { - "_value": 0.09, - "_timestamp": 0 - } - ], - [ - "perf_client_response_wait", - { - "_value": 17.548, - "_timestamp": 0 - } - ], - [ - "perf_server_queue", - { - "_value": 12.431, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_infer", - { - "_value": 4.21, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_input", - { - "_value": 0.136, - "_timestamp": 0 - } - ], - [ - "perf_server_compute_output", - { - "_value": 0.015, - "_timestamp": 0 - } - ] - ], - "_non_gpu_data_from_tag": { - "perf_latency_avg": [ - "perf_latency_avg", - { - "_value": 17.664, - "_timestamp": 0 - } - ], - "perf_latency_p90": [ - "perf_latency_p90", - { - "_value": 17.815, - "_timestamp": 0 - } - ], - "perf_latency_p95": [ - "perf_latency_p95", - { - "_value": 17.876, - "_timestamp": 0 - } - ], - "perf_latency_p99": [ - "perf_latency_p99", - { - "_value": 18.151, - "_timestamp": 0 - } - ], - "perf_throughput": [ - "perf_throughput", - { - "_value": 225.52, - "_timestamp": 0 - } - ], - "perf_client_send_recv": [ - "perf_client_send_recv", + } + } + ], + "add_sub_config_1": [ + { + "_triton_env": {}, + "_model_run_configs": [ + { + "_model_name": "add_sub", + "_model_config_variant": { + "model_config": { + "name": "add_sub", + "platform": "pytorch_libtorch", + "maxBatchSize": 2, + "input": [ { - "_value": 0.09, - "_timestamp": 0 - } - ], - "perf_client_response_wait": [ - "perf_client_response_wait", + "name": "INPUT__0", + "dataType": "TYPE_FP32", + "dims": [ + "16" + ] + }, { - "_value": 17.548, - "_timestamp": 0 + "name": "INPUT__1", + "dataType": "TYPE_FP32", + "dims": [ + "16" + ] } ], - "perf_server_queue": [ - "perf_server_queue", + "output": [ { - "_value": 12.431, - "_timestamp": 0 - } - ], - "perf_server_compute_infer": [ - "perf_server_compute_infer", + "name": "OUTPUT__0", + "dataType": "TYPE_FP32", + "dims": [ + "16" + ], + "labelFilename": "output0_labels.txt" + }, { - "_value": 4.21, - "_timestamp": 0 + "name": "OUTPUT__1", + "dataType": "TYPE_FP32", + "dims": [ + "16" + ] } ], - "perf_server_compute_input": [ - "perf_server_compute_input", + "instanceGroup": [ { - "_value": 0.136, - "_timestamp": 0 + "count": 1, + "kind": "KIND_GPU" } ], - "perf_server_compute_output": [ - "perf_server_compute_output", - { - "_value": 0.015, - "_timestamp": 0 - } - ] + "dynamicBatching": {} + }, + "variant_name": "add_sub_config_1", + "cpu_only": false + }, + "_perf_config": { + "_args": { + "service-kind": null, + "model-signature-name": null, + "async": null, + "sync": null, + "measurement-interval": null, + "concurrency-range": null, + "request-rate-range": 16, + "request-distribution": null, + "request-intervals": null, + "binary-search": null, + "num-of-sequences": null, + "latency-threshold": null, + "max-threads": null, + "stability-percentage": null, + "max-trials": null, + "percentile": null, + "input-data": null, + "shared-memory": null, + "output-shared-memory-size": null, + "shape": null, + "sequence-length": null, + "sequence-id-range": null, + "string-length": null, + "string-data": null, + "measurement-mode": "count_windows", + "measurement-request-count": null, + "streaming": null, + "grpc-compression-algorithm": null, + "triton-server-directory": null, + "model-repository": null, + "ssl-grpc-use-ssl": null, + "ssl-grpc-root-certifications-file": null, + "ssl-grpc-private-key-file": null, + "ssl-grpc-certificate-chain-file": null, + "ssl-https-verify-peer": null, + "ssl-https-verify-host": null, + "ssl-https-ca-certificates-file": null, + "ssl-https-client-certificate-type": null, + "ssl-https-client-certificate-file": null, + "ssl-https-private-key-type": null, + "ssl-https-private-key-file": null, + "collect-metrics": "True", + "metrics-url": "http://localhost:8002/metrics", + "metrics-interval": 1000, + "bls-composing-models": null + }, + "_options": { + "-m": "add_sub", + "-x": null, + "-b": 1, + "-u": "localhost:8001", + "-i": "grpc", + "-f": "add_sub-results.csv", + "-H": null + }, + "_verbose": { + "-v": null, + "-v -v": null, + "--verbose-csv": "--verbose-csv" + }, + "_input_to_options": { + "model-name": "-m", + "model-version": "-x", + "batch-size": "-b", + "url": "-u", + "protocol": "-i", + "latency-report-file": "-f", + "http-header": "-H" + }, + "_input_to_verbose": { + "verbose": "-v", + "extra-verbose": "-v -v", + "verbose-csv": "--verbose-csv" + }, + "_additive_args": { + "input-data": null, + "shape": null } - } - ] - }, - "-m vgg19_libtorch_config_0 -b 1 -i grpc -f vgg19_libtorch_config_0-results.csv --verbose-csv --request-rate-range=2048 --request-distribution=poisson --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "vgg19_libtorch_config_0", + }, + "_composing_config_variants": [] + } + ] + }, + { + "add_sub_config_1 -m add_sub -b 1 -i grpc -f add_sub-results.csv --verbose-csv --request-rate-range=16 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000": { + "_model_variants_name": "add_sub_config_1", "_gpu_data": { "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ [ "gpu_used_memory", { - "_value": 2386.558976, + "_value": 872.415232, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } @@ -4391,7 +2092,7 @@ [ "gpu_free_memory", { - "_value": 23383.2448, + "_value": 24897.388544, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } @@ -4399,7 +2100,7 @@ [ "gpu_utilization", { - "_value": 60.66669999999999, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -4407,7 +2108,7 @@ [ "gpu_power_usage", { - "_value": 218.061, + "_value": 56.7208, "_timestamp": 0, "_device_uuid": null } @@ -4418,7 +2119,7 @@ [ "gpu_used_memory", { - "_value": 2386.558976, + "_value": 872.415232, "_timestamp": 0, "_device_uuid": null } @@ -4426,7 +2127,7 @@ [ "gpu_free_memory", { - "_value": 23383.2448, + "_value": 24897.388544, "_timestamp": 0, "_device_uuid": null } @@ -4434,7 +2135,7 @@ [ "gpu_utilization", { - "_value": 60.66669999999999, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -4442,7 +2143,7 @@ [ "gpu_power_usage", { - "_value": 218.061, + "_value": 56.7208, "_timestamp": 0, "_device_uuid": null } @@ -4452,7 +2153,7 @@ "gpu_used_memory": [ "gpu_used_memory", { - "_value": 2386.558976, + "_value": 872.415232, "_timestamp": 0, "_device_uuid": null } @@ -4460,7 +2161,7 @@ "gpu_free_memory": [ "gpu_free_memory", { - "_value": 23383.2448, + "_value": 24897.388544, "_timestamp": 0, "_device_uuid": null } @@ -4468,7 +2169,7 @@ "gpu_utilization": [ "gpu_utilization", { - "_value": 60.66669999999999, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -4476,7 +2177,7 @@ "gpu_power_usage": [ "gpu_power_usage", { - "_value": 218.061, + "_value": 56.7208, "_timestamp": 0, "_device_uuid": null } @@ -4484,87 +2185,87 @@ }, "_model_config_measurements": [ { - "_model_config_name": "vgg19_libtorch_config_0", + "_model_config_name": "add_sub", "_model_specific_pa_params": { "batch-size": 1, "concurrency-range": null, - "request-rate-range": 2048 + "request-rate-range": 16 }, "_non_gpu_data": [ [ "perf_latency_avg", { - "_value": 17.694, + "_value": 1.12, "_timestamp": 0 } ], [ "perf_latency_p90", { - "_value": 17.836, + "_value": 1.225, "_timestamp": 0 } ], [ "perf_latency_p95", { - "_value": 17.887, + "_value": 1.307, "_timestamp": 0 } ], [ "perf_latency_p99", { - "_value": 18.27, + "_value": 1.508, "_timestamp": 0 } ], [ "perf_throughput", { - "_value": 225.178, + "_value": 16.0794, "_timestamp": 0 } ], [ "perf_client_send_recv", { - "_value": 0.089, + "_value": 0.013, "_timestamp": 0 } ], [ "perf_client_response_wait", { - "_value": 17.58, + "_value": 1.072, "_timestamp": 0 } ], [ "perf_server_queue", { - "_value": 12.452, + "_value": 0.133, "_timestamp": 0 } ], [ "perf_server_compute_infer", { - "_value": 4.215, + "_value": 0.147, "_timestamp": 0 } ], [ "perf_server_compute_input", { - "_value": 0.137, + "_value": 0.099, "_timestamp": 0 } ], [ "perf_server_compute_output", { - "_value": 0.015, + "_value": 0.092, "_timestamp": 0 } ] @@ -4573,221 +2274,92 @@ "perf_latency_avg": [ "perf_latency_avg", { - "_value": 17.694, + "_value": 1.12, "_timestamp": 0 } ], "perf_latency_p90": [ "perf_latency_p90", { - "_value": 17.836, + "_value": 1.225, "_timestamp": 0 } ], "perf_latency_p95": [ "perf_latency_p95", { - "_value": 17.887, + "_value": 1.307, "_timestamp": 0 } ], "perf_latency_p99": [ "perf_latency_p99", { - "_value": 18.27, + "_value": 1.508, "_timestamp": 0 } ], "perf_throughput": [ "perf_throughput", { - "_value": 225.178, + "_value": 16.0794, "_timestamp": 0 } ], "perf_client_send_recv": [ "perf_client_send_recv", { - "_value": 0.089, - "_timestamp": 0 - } - ], - "perf_client_response_wait": [ - "perf_client_response_wait", - { - "_value": 17.58, - "_timestamp": 0 - } - ], - "perf_server_queue": [ - "perf_server_queue", - { - "_value": 12.452, - "_timestamp": 0 - } - ], - "perf_server_compute_infer": [ - "perf_server_compute_infer", - { - "_value": 4.215, - "_timestamp": 0 - } - ], - "perf_server_compute_input": [ - "perf_server_compute_input", - { - "_value": 0.137, - "_timestamp": 0 - } - ], - "perf_server_compute_output": [ - "perf_server_compute_output", - { - "_value": 0.015, - "_timestamp": 0 - } - ] - } - } - ] - } - } - ], - "vgg19_libtorch_config_1": [ - { - "_triton_env": {}, - "_model_run_configs": [ - { - "_model_name": "vgg19_libtorch", - "_model_config": { - "name": "vgg19_libtorch_config_1", - "platform": "pytorch_libtorch", - "maxBatchSize": 2, - "input": [ - { - "name": "INPUT__0", - "dataType": "TYPE_FP32", - "format": "FORMAT_NCHW", - "dims": [ - "3", - "224", - "224" - ] - } - ], - "output": [ - { - "name": "OUTPUT__0", - "dataType": "TYPE_FP32", - "dims": [ - "1000" - ], - "labelFilename": "vgg19_labels.txt" - } - ], - "instanceGroup": [ - { - "count": 1, - "kind": "KIND_GPU" - } - ], - "dynamicBatching": {}, - "cpu_only": false - }, - "_perf_config": { - "_args": { - "service-kind": null, - "model-signature-name": null, - "async": null, - "sync": null, - "measurement-interval": null, - "concurrency-range": null, - "request-rate-range": 16, - "request-distribution": "poisson", - "request-intervals": null, - "binary-search": null, - "num-of-sequences": null, - "latency-threshold": null, - "max-threads": null, - "stability-percentage": null, - "max-trials": null, - "percentile": null, - "input-data": null, - "shared-memory": null, - "output-shared-memory-size": null, - "shape": null, - "sequence-length": null, - "sequence-id-range": null, - "string-length": null, - "string-data": null, - "measurement-mode": "count_windows", - "measurement-request-count": 150, - "streaming": null, - "grpc-compression-algorithm": null, - "triton-server-directory": null, - "model-repository": null, - "ssl-grpc-use-ssl": null, - "ssl-grpc-root-certifications-file": null, - "ssl-grpc-private-key-file": null, - "ssl-grpc-certificate-chain-file": null, - "ssl-https-verify-peer": null, - "ssl-https-verify-host": null, - "ssl-https-ca-certificates-file": null, - "ssl-https-client-certificate-type": null, - "ssl-https-client-certificate-file": null, - "ssl-https-private-key-type": null, - "ssl-https-private-key-file": null, - "collect-metrics": "True", - "metrics-url": "http://localhost:8002/metrics", - "metrics-interval": 1000.0, - "bls-composing-models": null - }, - "_options": { - "-m": "vgg19_libtorch_config_1", - "-x": null, - "-b": 1, - "-u": "localhost:8001", - "-i": "grpc", - "-f": "vgg19_libtorch_config_1-results.csv", - "-H": null - }, - "_verbose": { - "-v": null, - "-v -v": null, - "--verbose-csv": "--verbose-csv" - }, - "_input_to_options": { - "model-name": "-m", - "model-version": "-x", - "batch-size": "-b", - "url": "-u", - "protocol": "-i", - "latency-report-file": "-f", - "http-header": "-H" - }, - "_input_to_verbose": { - "verbose": "-v", - "extra-verbose": "-v -v", - "verbose-csv": "--verbose-csv" - }, - "_additive_args": { - "input-data": null, - "shape": null + "_value": 0.013, + "_timestamp": 0 + } + ], + "perf_client_response_wait": [ + "perf_client_response_wait", + { + "_value": 1.072, + "_timestamp": 0 + } + ], + "perf_server_queue": [ + "perf_server_queue", + { + "_value": 0.133, + "_timestamp": 0 + } + ], + "perf_server_compute_infer": [ + "perf_server_compute_infer", + { + "_value": 0.147, + "_timestamp": 0 + } + ], + "perf_server_compute_input": [ + "perf_server_compute_input", + { + "_value": 0.099, + "_timestamp": 0 + } + ], + "perf_server_compute_output": [ + "perf_server_compute_output", + { + "_value": 0.092, + "_timestamp": 0 + } + ] } - }, - "_composing_configs": [] - } - ] - }, - { - "-m vgg19_libtorch_config_1 -b 1 -i grpc -f vgg19_libtorch_config_1-results.csv --verbose-csv --request-rate-range=16 --request-distribution=poisson --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "vgg19_libtorch_config_1", + } + ] + }, + "add_sub_config_1 -m add_sub -b 1 -i grpc -f add_sub-results.csv --verbose-csv --request-rate-range=32 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000": { + "_model_variants_name": "add_sub_config_1", "_gpu_data": { "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ [ "gpu_used_memory", { - "_value": 2413.821952, + "_value": 872.415232, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } @@ -4795,7 +2367,7 @@ [ "gpu_free_memory", { - "_value": 23355.981824000002, + "_value": 24897.388544, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } @@ -4803,7 +2375,7 @@ [ "gpu_utilization", { - "_value": 7.575759999999999, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -4811,7 +2383,7 @@ [ "gpu_power_usage", { - "_value": 63.9002, + "_value": 56.7795, "_timestamp": 0, "_device_uuid": null } @@ -4822,7 +2394,7 @@ [ "gpu_used_memory", { - "_value": 2413.821952, + "_value": 872.415232, "_timestamp": 0, "_device_uuid": null } @@ -4830,7 +2402,7 @@ [ "gpu_free_memory", { - "_value": 23355.981824000002, + "_value": 24897.388544, "_timestamp": 0, "_device_uuid": null } @@ -4838,7 +2410,7 @@ [ "gpu_utilization", { - "_value": 7.575759999999999, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -4846,7 +2418,7 @@ [ "gpu_power_usage", { - "_value": 63.9002, + "_value": 56.7795, "_timestamp": 0, "_device_uuid": null } @@ -4856,7 +2428,7 @@ "gpu_used_memory": [ "gpu_used_memory", { - "_value": 2413.821952, + "_value": 872.415232, "_timestamp": 0, "_device_uuid": null } @@ -4864,7 +2436,7 @@ "gpu_free_memory": [ "gpu_free_memory", { - "_value": 23355.981824000002, + "_value": 24897.388544, "_timestamp": 0, "_device_uuid": null } @@ -4872,7 +2444,7 @@ "gpu_utilization": [ "gpu_utilization", { - "_value": 7.575759999999999, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -4880,7 +2452,7 @@ "gpu_power_usage": [ "gpu_power_usage", { - "_value": 63.9002, + "_value": 56.7795, "_timestamp": 0, "_device_uuid": null } @@ -4888,87 +2460,87 @@ }, "_model_config_measurements": [ { - "_model_config_name": "vgg19_libtorch_config_1", + "_model_config_name": "add_sub", "_model_specific_pa_params": { "batch-size": 1, "concurrency-range": null, - "request-rate-range": 16 + "request-rate-range": 32 }, "_non_gpu_data": [ [ "perf_latency_avg", { - "_value": 7.507, + "_value": 1.094, "_timestamp": 0 } ], [ "perf_latency_p90", { - "_value": 7.52, + "_value": 1.212, "_timestamp": 0 } ], [ "perf_latency_p95", { - "_value": 10.706, + "_value": 1.294, "_timestamp": 0 } ], [ "perf_latency_p99", { - "_value": 12.583, + "_value": 1.452, "_timestamp": 0 } ], [ "perf_throughput", { - "_value": 14.7162, + "_value": 31.9889, "_timestamp": 0 } ], [ "perf_client_send_recv", { - "_value": 0.167, + "_value": 0.012, "_timestamp": 0 } ], [ "perf_client_response_wait", { - "_value": 7.303, + "_value": 1.049, "_timestamp": 0 } ], [ "perf_server_queue", { - "_value": 0.46, + "_value": 0.144, "_timestamp": 0 } ], [ "perf_server_compute_infer", { - "_value": 5.383, + "_value": 0.142, "_timestamp": 0 } ], [ "perf_server_compute_input", { - "_value": 0.259, + "_value": 0.096, "_timestamp": 0 } ], [ "perf_server_compute_output", { - "_value": 0.048, + "_value": 0.091, "_timestamp": 0 } ] @@ -4977,77 +2549,77 @@ "perf_latency_avg": [ "perf_latency_avg", { - "_value": 7.507, + "_value": 1.094, "_timestamp": 0 } ], "perf_latency_p90": [ "perf_latency_p90", { - "_value": 7.52, + "_value": 1.212, "_timestamp": 0 } ], "perf_latency_p95": [ "perf_latency_p95", { - "_value": 10.706, + "_value": 1.294, "_timestamp": 0 } ], "perf_latency_p99": [ "perf_latency_p99", { - "_value": 12.583, + "_value": 1.452, "_timestamp": 0 } ], "perf_throughput": [ "perf_throughput", { - "_value": 14.7162, + "_value": 31.9889, "_timestamp": 0 } ], "perf_client_send_recv": [ "perf_client_send_recv", { - "_value": 0.167, + "_value": 0.012, "_timestamp": 0 } ], "perf_client_response_wait": [ "perf_client_response_wait", { - "_value": 7.303, + "_value": 1.049, "_timestamp": 0 } ], "perf_server_queue": [ "perf_server_queue", { - "_value": 0.46, + "_value": 0.144, "_timestamp": 0 } ], "perf_server_compute_infer": [ "perf_server_compute_infer", { - "_value": 5.383, + "_value": 0.142, "_timestamp": 0 } ], "perf_server_compute_input": [ "perf_server_compute_input", { - "_value": 0.259, + "_value": 0.096, "_timestamp": 0 } ], "perf_server_compute_output": [ "perf_server_compute_output", { - "_value": 0.048, + "_value": 0.091, "_timestamp": 0 } ] @@ -5055,14 +2627,14 @@ } ] }, - "-m vgg19_libtorch_config_1 -b 1 -i grpc -f vgg19_libtorch_config_1-results.csv --verbose-csv --request-rate-range=32 --request-distribution=poisson --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "vgg19_libtorch_config_1", + "add_sub_config_1 -m add_sub -b 2 -i grpc -f add_sub-results.csv --verbose-csv --request-rate-range=16 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000": { + "_model_variants_name": "add_sub_config_1", "_gpu_data": { "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ [ "gpu_used_memory", { - "_value": 2413.821952, + "_value": 872.415232, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } @@ -5070,7 +2642,7 @@ [ "gpu_free_memory", { - "_value": 23355.981824000002, + "_value": 24897.388544, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } @@ -5078,7 +2650,7 @@ [ "gpu_utilization", { - "_value": 17.4286, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -5086,7 +2658,7 @@ [ "gpu_power_usage", { - "_value": 75.419, + "_value": 56.7322, "_timestamp": 0, "_device_uuid": null } @@ -5097,7 +2669,7 @@ [ "gpu_used_memory", { - "_value": 2413.821952, + "_value": 872.415232, "_timestamp": 0, "_device_uuid": null } @@ -5105,7 +2677,7 @@ [ "gpu_free_memory", { - "_value": 23355.981824000002, + "_value": 24897.388544, "_timestamp": 0, "_device_uuid": null } @@ -5113,7 +2685,7 @@ [ "gpu_utilization", { - "_value": 17.4286, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -5121,7 +2693,7 @@ [ "gpu_power_usage", { - "_value": 75.419, + "_value": 56.7322, "_timestamp": 0, "_device_uuid": null } @@ -5131,7 +2703,7 @@ "gpu_used_memory": [ "gpu_used_memory", { - "_value": 2413.821952, + "_value": 872.415232, "_timestamp": 0, "_device_uuid": null } @@ -5139,7 +2711,7 @@ "gpu_free_memory": [ "gpu_free_memory", { - "_value": 23355.981824000002, + "_value": 24897.388544, "_timestamp": 0, "_device_uuid": null } @@ -5147,7 +2719,7 @@ "gpu_utilization": [ "gpu_utilization", { - "_value": 17.4286, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -5155,7 +2727,7 @@ "gpu_power_usage": [ "gpu_power_usage", { - "_value": 75.419, + "_value": 56.7322, "_timestamp": 0, "_device_uuid": null } @@ -5163,87 +2735,87 @@ }, "_model_config_measurements": [ { - "_model_config_name": "vgg19_libtorch_config_1", + "_model_config_name": "add_sub", "_model_specific_pa_params": { - "batch-size": 1, + "batch-size": 2, "concurrency-range": null, - "request-rate-range": 32 + "request-rate-range": 16 }, "_non_gpu_data": [ [ "perf_latency_avg", { - "_value": 7.673, + "_value": 1.118, "_timestamp": 0 } ], [ "perf_latency_p90", { - "_value": 8.743, + "_value": 1.263, "_timestamp": 0 } ], [ "perf_latency_p95", { - "_value": 11.401, + "_value": 1.329, "_timestamp": 0 } ], [ "perf_latency_p99", { - "_value": 16.709, + "_value": 1.596, "_timestamp": 0 } ], [ "perf_throughput", { - "_value": 35.984, + "_value": 32.158, "_timestamp": 0 } ], [ "perf_client_send_recv", { - "_value": 0.158, + "_value": 0.014, "_timestamp": 0 } ], [ "perf_client_response_wait", { - "_value": 7.479, + "_value": 1.071, "_timestamp": 0 } ], [ "perf_server_queue", { - "_value": 0.624, + "_value": 0.141, "_timestamp": 0 } ], [ "perf_server_compute_infer", { - "_value": 5.412, + "_value": 0.155, "_timestamp": 0 } ], [ "perf_server_compute_input", { - "_value": 0.255, + "_value": 0.098, "_timestamp": 0 } ], [ "perf_server_compute_output", { - "_value": 0.046, + "_value": 0.093, "_timestamp": 0 } ] @@ -5252,77 +2824,77 @@ "perf_latency_avg": [ "perf_latency_avg", { - "_value": 7.673, + "_value": 1.118, "_timestamp": 0 } ], "perf_latency_p90": [ "perf_latency_p90", { - "_value": 8.743, + "_value": 1.263, "_timestamp": 0 } ], "perf_latency_p95": [ "perf_latency_p95", { - "_value": 11.401, + "_value": 1.329, "_timestamp": 0 } ], "perf_latency_p99": [ "perf_latency_p99", { - "_value": 16.709, + "_value": 1.596, "_timestamp": 0 } ], "perf_throughput": [ "perf_throughput", { - "_value": 35.984, + "_value": 32.158, "_timestamp": 0 } ], "perf_client_send_recv": [ "perf_client_send_recv", { - "_value": 0.158, + "_value": 0.014, "_timestamp": 0 } ], "perf_client_response_wait": [ "perf_client_response_wait", { - "_value": 7.479, + "_value": 1.071, "_timestamp": 0 } ], "perf_server_queue": [ "perf_server_queue", { - "_value": 0.624, + "_value": 0.141, "_timestamp": 0 } ], "perf_server_compute_infer": [ "perf_server_compute_infer", { - "_value": 5.412, + "_value": 0.155, "_timestamp": 0 } ], "perf_server_compute_input": [ "perf_server_compute_input", { - "_value": 0.255, + "_value": 0.098, "_timestamp": 0 } ], "perf_server_compute_output": [ "perf_server_compute_output", { - "_value": 0.046, + "_value": 0.093, "_timestamp": 0 } ] @@ -5330,14 +2902,14 @@ } ] }, - "-m vgg19_libtorch_config_1 -b 1 -i grpc -f vgg19_libtorch_config_1-results.csv --verbose-csv --request-rate-range=64 --request-distribution=poisson --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "vgg19_libtorch_config_1", + "add_sub_config_1 -m add_sub -b 2 -i grpc -f add_sub-results.csv --verbose-csv --request-rate-range=32 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000": { + "_model_variants_name": "add_sub_config_1", "_gpu_data": { "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ [ "gpu_used_memory", { - "_value": 2413.821952, + "_value": 872.415232, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } @@ -5345,7 +2917,7 @@ [ "gpu_free_memory", { - "_value": 23355.981824000002, + "_value": 24897.388544, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } @@ -5353,7 +2925,7 @@ [ "gpu_utilization", { - "_value": 26.2, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -5361,7 +2933,7 @@ [ "gpu_power_usage", { - "_value": 123.202, + "_value": 56.7118, "_timestamp": 0, "_device_uuid": null } @@ -5372,7 +2944,7 @@ [ "gpu_used_memory", { - "_value": 2413.821952, + "_value": 872.415232, "_timestamp": 0, "_device_uuid": null } @@ -5380,7 +2952,7 @@ [ "gpu_free_memory", { - "_value": 23355.981824000002, + "_value": 24897.388544, "_timestamp": 0, "_device_uuid": null } @@ -5388,7 +2960,7 @@ [ "gpu_utilization", { - "_value": 26.2, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -5396,7 +2968,7 @@ [ "gpu_power_usage", { - "_value": 123.202, + "_value": 56.7118, "_timestamp": 0, "_device_uuid": null } @@ -5406,7 +2978,7 @@ "gpu_used_memory": [ "gpu_used_memory", { - "_value": 2413.821952, + "_value": 872.415232, "_timestamp": 0, "_device_uuid": null } @@ -5414,7 +2986,7 @@ "gpu_free_memory": [ "gpu_free_memory", { - "_value": 23355.981824000002, + "_value": 24897.388544, "_timestamp": 0, "_device_uuid": null } @@ -5422,7 +2994,7 @@ "gpu_utilization": [ "gpu_utilization", { - "_value": 26.2, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -5430,7 +3002,7 @@ "gpu_power_usage": [ "gpu_power_usage", { - "_value": 123.202, + "_value": 56.7118, "_timestamp": 0, "_device_uuid": null } @@ -5438,87 +3010,87 @@ }, "_model_config_measurements": [ { - "_model_config_name": "vgg19_libtorch_config_1", + "_model_config_name": "add_sub", "_model_specific_pa_params": { - "batch-size": 1, + "batch-size": 2, "concurrency-range": null, - "request-rate-range": 64 + "request-rate-range": 32 }, "_non_gpu_data": [ [ "perf_latency_avg", { - "_value": 7.428, + "_value": 1.098, "_timestamp": 0 } ], [ "perf_latency_p90", { - "_value": 10.598, + "_value": 1.199, "_timestamp": 0 } ], [ "perf_latency_p95", { - "_value": 11.85, + "_value": 1.332, "_timestamp": 0 } ], [ "perf_latency_p99", { - "_value": 15.434, + "_value": 1.49, "_timestamp": 0 } ], [ "perf_throughput", { - "_value": 60.6452, + "_value": 63.9739, "_timestamp": 0 } ], [ "perf_client_send_recv", { - "_value": 0.159, + "_value": 0.014, "_timestamp": 0 } ], [ "perf_client_response_wait", { - "_value": 7.233, + "_value": 1.051, "_timestamp": 0 } ], [ "perf_server_queue", { - "_value": 0.97, + "_value": 0.139, "_timestamp": 0 } ], [ "perf_server_compute_infer", { - "_value": 4.816, + "_value": 0.152, "_timestamp": 0 } ], [ "perf_server_compute_input", { - "_value": 0.264, + "_value": 0.097, "_timestamp": 0 } ], [ "perf_server_compute_output", { - "_value": 0.04, + "_value": 0.092, "_timestamp": 0 } ] @@ -5527,92 +3099,235 @@ "perf_latency_avg": [ "perf_latency_avg", { - "_value": 7.428, + "_value": 1.098, "_timestamp": 0 } ], "perf_latency_p90": [ "perf_latency_p90", { - "_value": 10.598, + "_value": 1.199, "_timestamp": 0 } ], "perf_latency_p95": [ "perf_latency_p95", { - "_value": 11.85, + "_value": 1.332, "_timestamp": 0 } ], "perf_latency_p99": [ "perf_latency_p99", { - "_value": 15.434, + "_value": 1.49, "_timestamp": 0 } ], "perf_throughput": [ "perf_throughput", { - "_value": 60.6452, + "_value": 63.9739, "_timestamp": 0 } ], "perf_client_send_recv": [ "perf_client_send_recv", { - "_value": 0.159, + "_value": 0.014, "_timestamp": 0 } ], "perf_client_response_wait": [ "perf_client_response_wait", { - "_value": 7.233, + "_value": 1.051, "_timestamp": 0 } ], "perf_server_queue": [ "perf_server_queue", { - "_value": 0.97, + "_value": 0.139, "_timestamp": 0 } ], "perf_server_compute_infer": [ "perf_server_compute_infer", { - "_value": 4.816, + "_value": 0.152, "_timestamp": 0 } ], "perf_server_compute_input": [ "perf_server_compute_input", { - "_value": 0.264, + "_value": 0.097, "_timestamp": 0 } ], "perf_server_compute_output": [ "perf_server_compute_output", { - "_value": 0.04, + "_value": 0.092, "_timestamp": 0 } ] } } ] - }, - "-m vgg19_libtorch_config_1 -b 1 -i grpc -f vgg19_libtorch_config_1-results.csv --verbose-csv --request-rate-range=128 --request-distribution=poisson --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "vgg19_libtorch_config_1", + } + } + ], + "add_sub_config_2": [ + { + "_triton_env": {}, + "_model_run_configs": [ + { + "_model_name": "add_sub", + "_model_config_variant": { + "model_config": { + "name": "add_sub", + "platform": "pytorch_libtorch", + "maxBatchSize": 1, + "input": [ + { + "name": "INPUT__0", + "dataType": "TYPE_FP32", + "dims": [ + "16" + ] + }, + { + "name": "INPUT__1", + "dataType": "TYPE_FP32", + "dims": [ + "16" + ] + } + ], + "output": [ + { + "name": "OUTPUT__0", + "dataType": "TYPE_FP32", + "dims": [ + "16" + ], + "labelFilename": "output0_labels.txt" + }, + { + "name": "OUTPUT__1", + "dataType": "TYPE_FP32", + "dims": [ + "16" + ] + } + ], + "instanceGroup": [ + { + "count": 2, + "kind": "KIND_GPU" + } + ], + "dynamicBatching": {} + }, + "variant_name": "add_sub_config_2", + "cpu_only": false + }, + "_perf_config": { + "_args": { + "service-kind": null, + "model-signature-name": null, + "async": null, + "sync": null, + "measurement-interval": null, + "concurrency-range": null, + "request-rate-range": 16, + "request-distribution": null, + "request-intervals": null, + "binary-search": null, + "num-of-sequences": null, + "latency-threshold": null, + "max-threads": null, + "stability-percentage": null, + "max-trials": null, + "percentile": null, + "input-data": null, + "shared-memory": null, + "output-shared-memory-size": null, + "shape": null, + "sequence-length": null, + "sequence-id-range": null, + "string-length": null, + "string-data": null, + "measurement-mode": "count_windows", + "measurement-request-count": null, + "streaming": null, + "grpc-compression-algorithm": null, + "triton-server-directory": null, + "model-repository": null, + "ssl-grpc-use-ssl": null, + "ssl-grpc-root-certifications-file": null, + "ssl-grpc-private-key-file": null, + "ssl-grpc-certificate-chain-file": null, + "ssl-https-verify-peer": null, + "ssl-https-verify-host": null, + "ssl-https-ca-certificates-file": null, + "ssl-https-client-certificate-type": null, + "ssl-https-client-certificate-file": null, + "ssl-https-private-key-type": null, + "ssl-https-private-key-file": null, + "collect-metrics": "True", + "metrics-url": "http://localhost:8002/metrics", + "metrics-interval": 1000, + "bls-composing-models": null + }, + "_options": { + "-m": "add_sub", + "-x": null, + "-b": 1, + "-u": "localhost:8001", + "-i": "grpc", + "-f": "add_sub-results.csv", + "-H": null + }, + "_verbose": { + "-v": null, + "-v -v": null, + "--verbose-csv": "--verbose-csv" + }, + "_input_to_options": { + "model-name": "-m", + "model-version": "-x", + "batch-size": "-b", + "url": "-u", + "protocol": "-i", + "latency-report-file": "-f", + "http-header": "-H" + }, + "_input_to_verbose": { + "verbose": "-v", + "extra-verbose": "-v -v", + "verbose-csv": "--verbose-csv" + }, + "_additive_args": { + "input-data": null, + "shape": null + } + }, + "_composing_config_variants": [] + } + ] + }, + { + "add_sub_config_2 -m add_sub -b 1 -i grpc -f add_sub-results.csv --verbose-csv --request-rate-range=16 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000": { + "_model_variants_name": "add_sub_config_2", "_gpu_data": { "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ [ "gpu_used_memory", { - "_value": 2413.821952, + "_value": 874.512384, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } @@ -5620,7 +3335,7 @@ [ "gpu_free_memory", { - "_value": 23355.981824000002, + "_value": 24895.291392, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } @@ -5628,7 +3343,7 @@ [ "gpu_utilization", { - "_value": 49.5, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -5636,7 +3351,7 @@ [ "gpu_power_usage", { - "_value": 195.133, + "_value": 56.8955, "_timestamp": 0, "_device_uuid": null } @@ -5647,7 +3362,7 @@ [ "gpu_used_memory", { - "_value": 2413.821952, + "_value": 874.512384, "_timestamp": 0, "_device_uuid": null } @@ -5655,7 +3370,7 @@ [ "gpu_free_memory", { - "_value": 23355.981824000002, + "_value": 24895.291392, "_timestamp": 0, "_device_uuid": null } @@ -5663,7 +3378,7 @@ [ "gpu_utilization", { - "_value": 49.5, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -5671,7 +3386,7 @@ [ "gpu_power_usage", { - "_value": 195.133, + "_value": 56.8955, "_timestamp": 0, "_device_uuid": null } @@ -5681,7 +3396,7 @@ "gpu_used_memory": [ "gpu_used_memory", { - "_value": 2413.821952, + "_value": 874.512384, "_timestamp": 0, "_device_uuid": null } @@ -5689,7 +3404,7 @@ "gpu_free_memory": [ "gpu_free_memory", { - "_value": 23355.981824000002, + "_value": 24895.291392, "_timestamp": 0, "_device_uuid": null } @@ -5697,7 +3412,7 @@ "gpu_utilization": [ "gpu_utilization", { - "_value": 49.5, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -5705,7 +3420,7 @@ "gpu_power_usage": [ "gpu_power_usage", { - "_value": 195.133, + "_value": 56.8955, "_timestamp": 0, "_device_uuid": null } @@ -5713,87 +3428,87 @@ }, "_model_config_measurements": [ { - "_model_config_name": "vgg19_libtorch_config_1", + "_model_config_name": "add_sub", "_model_specific_pa_params": { "batch-size": 1, "concurrency-range": null, - "request-rate-range": 128 + "request-rate-range": 16 }, "_non_gpu_data": [ [ "perf_latency_avg", { - "_value": 8.421, + "_value": 1.132, "_timestamp": 0 } ], [ "perf_latency_p90", { - "_value": 13.14, + "_value": 1.251, "_timestamp": 0 } ], [ "perf_latency_p95", { - "_value": 14.211, + "_value": 1.357, "_timestamp": 0 } ], [ "perf_latency_p99", { - "_value": 16.409, + "_value": 1.489, "_timestamp": 0 } ], [ "perf_throughput", { - "_value": 126.115, + "_value": 16.0791, "_timestamp": 0 } ], [ "perf_client_send_recv", { - "_value": 0.138, + "_value": 0.013, "_timestamp": 0 } ], [ "perf_client_response_wait", { - "_value": 8.25, + "_value": 1.085, "_timestamp": 0 } ], [ "perf_server_queue", { - "_value": 1.898, + "_value": 0.139, "_timestamp": 0 } ], [ "perf_server_compute_infer", { - "_value": 5.014, + "_value": 0.153, "_timestamp": 0 } ], [ "perf_server_compute_input", { - "_value": 0.263, + "_value": 0.102, "_timestamp": 0 } ], [ "perf_server_compute_output", { - "_value": 0.032, + "_value": 0.097, "_timestamp": 0 } ] @@ -5802,77 +3517,77 @@ "perf_latency_avg": [ "perf_latency_avg", { - "_value": 8.421, + "_value": 1.132, "_timestamp": 0 } ], "perf_latency_p90": [ "perf_latency_p90", { - "_value": 13.14, + "_value": 1.251, "_timestamp": 0 } ], "perf_latency_p95": [ "perf_latency_p95", { - "_value": 14.211, + "_value": 1.357, "_timestamp": 0 } ], "perf_latency_p99": [ "perf_latency_p99", { - "_value": 16.409, + "_value": 1.489, "_timestamp": 0 } ], "perf_throughput": [ "perf_throughput", { - "_value": 126.115, + "_value": 16.0791, "_timestamp": 0 } ], "perf_client_send_recv": [ "perf_client_send_recv", { - "_value": 0.138, + "_value": 0.013, "_timestamp": 0 } ], "perf_client_response_wait": [ "perf_client_response_wait", { - "_value": 8.25, + "_value": 1.085, "_timestamp": 0 } ], "perf_server_queue": [ "perf_server_queue", { - "_value": 1.898, + "_value": 0.139, "_timestamp": 0 } ], "perf_server_compute_infer": [ "perf_server_compute_infer", { - "_value": 5.014, + "_value": 0.153, "_timestamp": 0 } ], "perf_server_compute_input": [ "perf_server_compute_input", { - "_value": 0.263, + "_value": 0.102, "_timestamp": 0 } ], "perf_server_compute_output": [ "perf_server_compute_output", { - "_value": 0.032, + "_value": 0.097, "_timestamp": 0 } ] @@ -5880,14 +3595,14 @@ } ] }, - "-m vgg19_libtorch_config_1 -b 1 -i grpc -f vgg19_libtorch_config_1-results.csv --verbose-csv --request-rate-range=256 --request-distribution=poisson --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "vgg19_libtorch_config_1", + "add_sub_config_2 -m add_sub -b 1 -i grpc -f add_sub-results.csv --verbose-csv --request-rate-range=32 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000": { + "_model_variants_name": "add_sub_config_2", "_gpu_data": { "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ [ "gpu_used_memory", { - "_value": 2413.821952, + "_value": 874.512384, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } @@ -5895,7 +3610,7 @@ [ "gpu_free_memory", { - "_value": 23355.981824000002, + "_value": 24895.291392, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } @@ -5903,7 +3618,7 @@ [ "gpu_utilization", { - "_value": 87.3333, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -5911,7 +3626,7 @@ [ "gpu_power_usage", { - "_value": 267.492, + "_value": 56.7785, "_timestamp": 0, "_device_uuid": null } @@ -5922,7 +3637,7 @@ [ "gpu_used_memory", { - "_value": 2413.821952, + "_value": 874.512384, "_timestamp": 0, "_device_uuid": null } @@ -5930,7 +3645,7 @@ [ "gpu_free_memory", { - "_value": 23355.981824000002, + "_value": 24895.291392, "_timestamp": 0, "_device_uuid": null } @@ -5938,7 +3653,7 @@ [ "gpu_utilization", { - "_value": 87.3333, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -5946,7 +3661,7 @@ [ "gpu_power_usage", { - "_value": 267.492, + "_value": 56.7785, "_timestamp": 0, "_device_uuid": null } @@ -5956,7 +3671,7 @@ "gpu_used_memory": [ "gpu_used_memory", { - "_value": 2413.821952, + "_value": 874.512384, "_timestamp": 0, "_device_uuid": null } @@ -5964,7 +3679,7 @@ "gpu_free_memory": [ "gpu_free_memory", { - "_value": 23355.981824000002, + "_value": 24895.291392, "_timestamp": 0, "_device_uuid": null } @@ -5972,7 +3687,7 @@ "gpu_utilization": [ "gpu_utilization", { - "_value": 87.3333, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -5980,7 +3695,7 @@ "gpu_power_usage": [ "gpu_power_usage", { - "_value": 267.492, + "_value": 56.7785, "_timestamp": 0, "_device_uuid": null } @@ -5988,87 +3703,87 @@ }, "_model_config_measurements": [ { - "_model_config_name": "vgg19_libtorch_config_1", + "_model_config_name": "add_sub", "_model_specific_pa_params": { "batch-size": 1, "concurrency-range": null, - "request-rate-range": 256 + "request-rate-range": 32 }, "_non_gpu_data": [ [ "perf_latency_avg", { - "_value": 11.878, + "_value": 1.127, "_timestamp": 0 } ], [ "perf_latency_p90", { - "_value": 14.265, + "_value": 1.259, "_timestamp": 0 } ], [ "perf_latency_p95", { - "_value": 14.321, + "_value": 1.384, "_timestamp": 0 } ], [ "perf_latency_p99", { - "_value": 15.779, + "_value": 1.527, "_timestamp": 0 } ], [ "perf_throughput", { - "_value": 242.843, + "_value": 31.9868, "_timestamp": 0 } ], [ "perf_client_send_recv", { - "_value": 0.103, + "_value": 0.014, "_timestamp": 0 } ], [ "perf_client_response_wait", { - "_value": 11.748, + "_value": 1.078, "_timestamp": 0 } ], [ "perf_server_queue", { - "_value": 4.487, + "_value": 0.121, "_timestamp": 0 } ], [ "perf_server_compute_infer", { - "_value": 6.165, + "_value": 0.163, "_timestamp": 0 } ], [ "perf_server_compute_input", { - "_value": 0.235, + "_value": 0.107, "_timestamp": 0 } ], [ "perf_server_compute_output", { - "_value": 0.019, + "_value": 0.1, "_timestamp": 0 } ] @@ -6077,92 +3792,235 @@ "perf_latency_avg": [ "perf_latency_avg", { - "_value": 11.878, + "_value": 1.127, "_timestamp": 0 } ], "perf_latency_p90": [ "perf_latency_p90", { - "_value": 14.265, + "_value": 1.259, "_timestamp": 0 } ], "perf_latency_p95": [ "perf_latency_p95", { - "_value": 14.321, + "_value": 1.384, "_timestamp": 0 } ], "perf_latency_p99": [ "perf_latency_p99", { - "_value": 15.779, + "_value": 1.527, "_timestamp": 0 } ], "perf_throughput": [ "perf_throughput", { - "_value": 242.843, + "_value": 31.9868, "_timestamp": 0 } ], "perf_client_send_recv": [ "perf_client_send_recv", { - "_value": 0.103, + "_value": 0.014, "_timestamp": 0 } ], "perf_client_response_wait": [ "perf_client_response_wait", { - "_value": 11.748, + "_value": 1.078, "_timestamp": 0 } ], "perf_server_queue": [ "perf_server_queue", { - "_value": 4.487, + "_value": 0.121, "_timestamp": 0 } ], "perf_server_compute_infer": [ "perf_server_compute_infer", { - "_value": 6.165, + "_value": 0.163, "_timestamp": 0 } ], "perf_server_compute_input": [ "perf_server_compute_input", { - "_value": 0.235, + "_value": 0.107, "_timestamp": 0 } ], "perf_server_compute_output": [ "perf_server_compute_output", { - "_value": 0.019, + "_value": 0.1, "_timestamp": 0 } ] } - } - ] - }, - "-m vgg19_libtorch_config_1 -b 1 -i grpc -f vgg19_libtorch_config_1-results.csv --verbose-csv --request-rate-range=512 --request-distribution=poisson --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "vgg19_libtorch_config_1", + } + ] + } + } + ], + "add_sub_config_3": [ + { + "_triton_env": {}, + "_model_run_configs": [ + { + "_model_name": "add_sub", + "_model_config_variant": { + "model_config": { + "name": "add_sub", + "platform": "pytorch_libtorch", + "maxBatchSize": 2, + "input": [ + { + "name": "INPUT__0", + "dataType": "TYPE_FP32", + "dims": [ + "16" + ] + }, + { + "name": "INPUT__1", + "dataType": "TYPE_FP32", + "dims": [ + "16" + ] + } + ], + "output": [ + { + "name": "OUTPUT__0", + "dataType": "TYPE_FP32", + "dims": [ + "16" + ], + "labelFilename": "output0_labels.txt" + }, + { + "name": "OUTPUT__1", + "dataType": "TYPE_FP32", + "dims": [ + "16" + ] + } + ], + "instanceGroup": [ + { + "count": 2, + "kind": "KIND_GPU" + } + ], + "dynamicBatching": {} + }, + "variant_name": "add_sub_config_3", + "cpu_only": false + }, + "_perf_config": { + "_args": { + "service-kind": null, + "model-signature-name": null, + "async": null, + "sync": null, + "measurement-interval": null, + "concurrency-range": null, + "request-rate-range": 16, + "request-distribution": null, + "request-intervals": null, + "binary-search": null, + "num-of-sequences": null, + "latency-threshold": null, + "max-threads": null, + "stability-percentage": null, + "max-trials": null, + "percentile": null, + "input-data": null, + "shared-memory": null, + "output-shared-memory-size": null, + "shape": null, + "sequence-length": null, + "sequence-id-range": null, + "string-length": null, + "string-data": null, + "measurement-mode": "count_windows", + "measurement-request-count": null, + "streaming": null, + "grpc-compression-algorithm": null, + "triton-server-directory": null, + "model-repository": null, + "ssl-grpc-use-ssl": null, + "ssl-grpc-root-certifications-file": null, + "ssl-grpc-private-key-file": null, + "ssl-grpc-certificate-chain-file": null, + "ssl-https-verify-peer": null, + "ssl-https-verify-host": null, + "ssl-https-ca-certificates-file": null, + "ssl-https-client-certificate-type": null, + "ssl-https-client-certificate-file": null, + "ssl-https-private-key-type": null, + "ssl-https-private-key-file": null, + "collect-metrics": "True", + "metrics-url": "http://localhost:8002/metrics", + "metrics-interval": 1000, + "bls-composing-models": null + }, + "_options": { + "-m": "add_sub", + "-x": null, + "-b": 1, + "-u": "localhost:8001", + "-i": "grpc", + "-f": "add_sub-results.csv", + "-H": null + }, + "_verbose": { + "-v": null, + "-v -v": null, + "--verbose-csv": "--verbose-csv" + }, + "_input_to_options": { + "model-name": "-m", + "model-version": "-x", + "batch-size": "-b", + "url": "-u", + "protocol": "-i", + "latency-report-file": "-f", + "http-header": "-H" + }, + "_input_to_verbose": { + "verbose": "-v", + "extra-verbose": "-v -v", + "verbose-csv": "--verbose-csv" + }, + "_additive_args": { + "input-data": null, + "shape": null + } + }, + "_composing_config_variants": [] + } + ] + }, + { + "add_sub_config_3 -m add_sub -b 1 -i grpc -f add_sub-results.csv --verbose-csv --request-rate-range=16 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000": { + "_model_variants_name": "add_sub_config_3", "_gpu_data": { "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ [ "gpu_used_memory", { - "_value": 2413.821952, + "_value": 874.512384, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } @@ -6170,7 +4028,7 @@ [ "gpu_free_memory", { - "_value": 23355.981824000002, + "_value": 24895.291392, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } @@ -6178,7 +4036,7 @@ [ "gpu_utilization", { - "_value": 59.3333, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -6186,7 +4044,7 @@ [ "gpu_power_usage", { - "_value": 216.268, + "_value": 56.9191, "_timestamp": 0, "_device_uuid": null } @@ -6197,7 +4055,7 @@ [ "gpu_used_memory", { - "_value": 2413.821952, + "_value": 874.512384, "_timestamp": 0, "_device_uuid": null } @@ -6205,7 +4063,7 @@ [ "gpu_free_memory", { - "_value": 23355.981824000002, + "_value": 24895.291392, "_timestamp": 0, "_device_uuid": null } @@ -6213,7 +4071,7 @@ [ "gpu_utilization", { - "_value": 59.3333, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -6221,7 +4079,7 @@ [ "gpu_power_usage", { - "_value": 216.268, + "_value": 56.9191, "_timestamp": 0, "_device_uuid": null } @@ -6231,7 +4089,7 @@ "gpu_used_memory": [ "gpu_used_memory", { - "_value": 2413.821952, + "_value": 874.512384, "_timestamp": 0, "_device_uuid": null } @@ -6239,7 +4097,7 @@ "gpu_free_memory": [ "gpu_free_memory", { - "_value": 23355.981824000002, + "_value": 24895.291392, "_timestamp": 0, "_device_uuid": null } @@ -6247,7 +4105,7 @@ "gpu_utilization": [ "gpu_utilization", { - "_value": 59.3333, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -6255,7 +4113,7 @@ "gpu_power_usage": [ "gpu_power_usage", { - "_value": 216.268, + "_value": 56.9191, "_timestamp": 0, "_device_uuid": null } @@ -6263,87 +4121,87 @@ }, "_model_config_measurements": [ { - "_model_config_name": "vgg19_libtorch_config_1", + "_model_config_name": "add_sub", "_model_specific_pa_params": { "batch-size": 1, "concurrency-range": null, - "request-rate-range": 512 + "request-rate-range": 16 }, "_non_gpu_data": [ [ "perf_latency_avg", { - "_value": 14.146, + "_value": 1.116, "_timestamp": 0 } ], [ "perf_latency_p90", { - "_value": 14.335, + "_value": 1.235, "_timestamp": 0 } ], [ "perf_latency_p95", { - "_value": 14.367, + "_value": 1.302, "_timestamp": 0 } ], [ "perf_latency_p99", { - "_value": 14.473, + "_value": 1.442, "_timestamp": 0 } ], [ "perf_throughput", { - "_value": 279.481, + "_value": 16.0792, "_timestamp": 0 } ], [ "perf_client_send_recv", { - "_value": 0.119, + "_value": 0.013, "_timestamp": 0 } ], [ "perf_client_response_wait", { - "_value": 14.001, + "_value": 1.068, "_timestamp": 0 } ], [ "perf_server_queue", { - "_value": 6.083, + "_value": 0.136, "_timestamp": 0 } ], [ "perf_server_compute_infer", { - "_value": 6.707, + "_value": 0.148, "_timestamp": 0 } ], [ "perf_server_compute_input", { - "_value": 0.266, + "_value": 0.098, "_timestamp": 0 } ], [ "perf_server_compute_output", { - "_value": 0.017, + "_value": 0.094, "_timestamp": 0 } ] @@ -6352,77 +4210,77 @@ "perf_latency_avg": [ "perf_latency_avg", { - "_value": 14.146, + "_value": 1.116, "_timestamp": 0 } ], "perf_latency_p90": [ "perf_latency_p90", { - "_value": 14.335, + "_value": 1.235, "_timestamp": 0 } ], "perf_latency_p95": [ "perf_latency_p95", { - "_value": 14.367, + "_value": 1.302, "_timestamp": 0 } ], "perf_latency_p99": [ "perf_latency_p99", { - "_value": 14.473, + "_value": 1.442, "_timestamp": 0 } ], "perf_throughput": [ "perf_throughput", { - "_value": 279.481, + "_value": 16.0792, "_timestamp": 0 } ], "perf_client_send_recv": [ "perf_client_send_recv", { - "_value": 0.119, + "_value": 0.013, "_timestamp": 0 } ], "perf_client_response_wait": [ "perf_client_response_wait", { - "_value": 14.001, + "_value": 1.068, "_timestamp": 0 } ], "perf_server_queue": [ "perf_server_queue", { - "_value": 6.083, + "_value": 0.136, "_timestamp": 0 } ], "perf_server_compute_infer": [ "perf_server_compute_infer", { - "_value": 6.707, + "_value": 0.148, "_timestamp": 0 } ], "perf_server_compute_input": [ "perf_server_compute_input", { - "_value": 0.266, + "_value": 0.098, "_timestamp": 0 } ], "perf_server_compute_output": [ "perf_server_compute_output", { - "_value": 0.017, + "_value": 0.094, "_timestamp": 0 } ] @@ -6430,14 +4288,14 @@ } ] }, - "-m vgg19_libtorch_config_1 -b 1 -i grpc -f vgg19_libtorch_config_1-results.csv --verbose-csv --request-rate-range=1024 --request-distribution=poisson --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "vgg19_libtorch_config_1", + "add_sub_config_3 -m add_sub -b 1 -i grpc -f add_sub-results.csv --verbose-csv --request-rate-range=32 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000": { + "_model_variants_name": "add_sub_config_3", "_gpu_data": { "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ [ "gpu_used_memory", { - "_value": 2413.821952, + "_value": 874.512384, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } @@ -6445,7 +4303,7 @@ [ "gpu_free_memory", { - "_value": 23355.981824000002, + "_value": 24895.291392, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } @@ -6453,7 +4311,7 @@ [ "gpu_utilization", { - "_value": 61.0, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -6461,7 +4319,7 @@ [ "gpu_power_usage", { - "_value": 216.677, + "_value": 56.9508, "_timestamp": 0, "_device_uuid": null } @@ -6472,7 +4330,7 @@ [ "gpu_used_memory", { - "_value": 2413.821952, + "_value": 874.512384, "_timestamp": 0, "_device_uuid": null } @@ -6480,7 +4338,7 @@ [ "gpu_free_memory", { - "_value": 23355.981824000002, + "_value": 24895.291392, "_timestamp": 0, "_device_uuid": null } @@ -6488,7 +4346,7 @@ [ "gpu_utilization", { - "_value": 61.0, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -6496,7 +4354,7 @@ [ "gpu_power_usage", { - "_value": 216.677, + "_value": 56.9508, "_timestamp": 0, "_device_uuid": null } @@ -6506,7 +4364,7 @@ "gpu_used_memory": [ "gpu_used_memory", { - "_value": 2413.821952, + "_value": 874.512384, "_timestamp": 0, "_device_uuid": null } @@ -6514,7 +4372,7 @@ "gpu_free_memory": [ "gpu_free_memory", { - "_value": 23355.981824000002, + "_value": 24895.291392, "_timestamp": 0, "_device_uuid": null } @@ -6522,7 +4380,7 @@ "gpu_utilization": [ "gpu_utilization", { - "_value": 61.0, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -6530,7 +4388,7 @@ "gpu_power_usage": [ "gpu_power_usage", { - "_value": 216.677, + "_value": 56.9508, "_timestamp": 0, "_device_uuid": null } @@ -6538,87 +4396,87 @@ }, "_model_config_measurements": [ { - "_model_config_name": "vgg19_libtorch_config_1", + "_model_config_name": "add_sub", "_model_specific_pa_params": { "batch-size": 1, "concurrency-range": null, - "request-rate-range": 1024 + "request-rate-range": 32 }, "_non_gpu_data": [ [ "perf_latency_avg", { - "_value": 14.208, + "_value": 1.103, "_timestamp": 0 } ], [ "perf_latency_p90", { - "_value": 14.352, + "_value": 1.193, "_timestamp": 0 } ], [ "perf_latency_p95", { - "_value": 14.399, + "_value": 1.302, "_timestamp": 0 } ], [ "perf_latency_p99", { - "_value": 14.516, + "_value": 1.45, "_timestamp": 0 } ], [ "perf_throughput", { - "_value": 280.439, + "_value": 31.9884, "_timestamp": 0 } ], [ "perf_client_send_recv", { - "_value": 0.118, + "_value": 0.012, "_timestamp": 0 } ], [ "perf_client_response_wait", { - "_value": 14.064, + "_value": 1.059, "_timestamp": 0 } ], [ "perf_server_queue", { - "_value": 6.128, + "_value": 0.147, "_timestamp": 0 } ], [ "perf_server_compute_infer", { - "_value": 6.729, + "_value": 0.146, "_timestamp": 0 } ], [ "perf_server_compute_input", { - "_value": 0.261, + "_value": 0.098, "_timestamp": 0 } ], [ "perf_server_compute_output", { - "_value": 0.018, + "_value": 0.094, "_timestamp": 0 } ] @@ -6627,77 +4485,77 @@ "perf_latency_avg": [ "perf_latency_avg", { - "_value": 14.208, + "_value": 1.103, "_timestamp": 0 } ], "perf_latency_p90": [ "perf_latency_p90", { - "_value": 14.352, + "_value": 1.193, "_timestamp": 0 } ], "perf_latency_p95": [ "perf_latency_p95", { - "_value": 14.399, + "_value": 1.302, "_timestamp": 0 } ], "perf_latency_p99": [ "perf_latency_p99", { - "_value": 14.516, + "_value": 1.45, "_timestamp": 0 } ], "perf_throughput": [ "perf_throughput", { - "_value": 280.439, + "_value": 31.9884, "_timestamp": 0 } ], "perf_client_send_recv": [ "perf_client_send_recv", { - "_value": 0.118, + "_value": 0.012, "_timestamp": 0 } ], "perf_client_response_wait": [ "perf_client_response_wait", { - "_value": 14.064, + "_value": 1.059, "_timestamp": 0 } ], "perf_server_queue": [ "perf_server_queue", { - "_value": 6.128, + "_value": 0.147, "_timestamp": 0 } ], "perf_server_compute_infer": [ "perf_server_compute_infer", { - "_value": 6.729, + "_value": 0.146, "_timestamp": 0 } ], "perf_server_compute_input": [ "perf_server_compute_input", { - "_value": 0.261, + "_value": 0.098, "_timestamp": 0 } ], "perf_server_compute_output": [ "perf_server_compute_output", { - "_value": 0.018, + "_value": 0.094, "_timestamp": 0 } ] @@ -6705,14 +4563,14 @@ } ] }, - "-m vgg19_libtorch_config_1 -b 1 -i grpc -f vgg19_libtorch_config_1-results.csv --verbose-csv --request-rate-range=2048 --request-distribution=poisson --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "vgg19_libtorch_config_1", + "add_sub_config_3 -m add_sub -b 2 -i grpc -f add_sub-results.csv --verbose-csv --request-rate-range=16 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000": { + "_model_variants_name": "add_sub_config_3", "_gpu_data": { "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ [ "gpu_used_memory", { - "_value": 2413.821952, + "_value": 874.512384, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } @@ -6720,7 +4578,7 @@ [ "gpu_free_memory", { - "_value": 23355.981824000002, + "_value": 24895.291392, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } @@ -6728,7 +4586,7 @@ [ "gpu_utilization", { - "_value": 60.3333, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -6736,7 +4594,7 @@ [ "gpu_power_usage", { - "_value": 217.059, + "_value": 56.9373, "_timestamp": 0, "_device_uuid": null } @@ -6747,7 +4605,7 @@ [ "gpu_used_memory", { - "_value": 2413.821952, + "_value": 874.512384, "_timestamp": 0, "_device_uuid": null } @@ -6755,7 +4613,7 @@ [ "gpu_free_memory", { - "_value": 23355.981824000002, + "_value": 24895.291392, "_timestamp": 0, "_device_uuid": null } @@ -6763,7 +4621,7 @@ [ "gpu_utilization", { - "_value": 60.3333, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -6771,7 +4629,7 @@ [ "gpu_power_usage", { - "_value": 217.059, + "_value": 56.9373, "_timestamp": 0, "_device_uuid": null } @@ -6781,7 +4639,7 @@ "gpu_used_memory": [ "gpu_used_memory", { - "_value": 2413.821952, + "_value": 874.512384, "_timestamp": 0, "_device_uuid": null } @@ -6789,7 +4647,7 @@ "gpu_free_memory": [ "gpu_free_memory", { - "_value": 23355.981824000002, + "_value": 24895.291392, "_timestamp": 0, "_device_uuid": null } @@ -6797,7 +4655,7 @@ "gpu_utilization": [ "gpu_utilization", { - "_value": 60.3333, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -6805,7 +4663,7 @@ "gpu_power_usage": [ "gpu_power_usage", { - "_value": 217.059, + "_value": 56.9373, "_timestamp": 0, "_device_uuid": null } @@ -6813,87 +4671,87 @@ }, "_model_config_measurements": [ { - "_model_config_name": "vgg19_libtorch_config_1", + "_model_config_name": "add_sub", "_model_specific_pa_params": { - "batch-size": 1, + "batch-size": 2, "concurrency-range": null, - "request-rate-range": 2048 + "request-rate-range": 16 }, "_non_gpu_data": [ [ "perf_latency_avg", { - "_value": 14.21, + "_value": 1.122, "_timestamp": 0 } ], [ "perf_latency_p90", { - "_value": 14.327, + "_value": 1.231, "_timestamp": 0 } ], [ "perf_latency_p95", { - "_value": 14.376, + "_value": 1.267, "_timestamp": 0 } ], [ "perf_latency_p99", { - "_value": 14.494, + "_value": 1.402, "_timestamp": 0 } ], [ "perf_throughput", { - "_value": 280.789, + "_value": 32.1575, "_timestamp": 0 } ], [ "perf_client_send_recv", { - "_value": 0.118, + "_value": 0.014, "_timestamp": 0 } ], [ "perf_client_response_wait", { - "_value": 14.065, + "_value": 1.074, "_timestamp": 0 } ], [ "perf_server_queue", { - "_value": 6.136, + "_value": 0.138, "_timestamp": 0 } ], [ "perf_server_compute_infer", { - "_value": 6.736, + "_value": 0.155, "_timestamp": 0 } ], [ "perf_server_compute_input", { - "_value": 0.256, + "_value": 0.098, "_timestamp": 0 } ], [ "perf_server_compute_output", { - "_value": 0.017, + "_value": 0.096, "_timestamp": 0 } ] @@ -6902,77 +4760,77 @@ "perf_latency_avg": [ "perf_latency_avg", { - "_value": 14.21, + "_value": 1.122, "_timestamp": 0 } ], "perf_latency_p90": [ "perf_latency_p90", { - "_value": 14.327, + "_value": 1.231, "_timestamp": 0 } ], "perf_latency_p95": [ "perf_latency_p95", { - "_value": 14.376, + "_value": 1.267, "_timestamp": 0 } ], "perf_latency_p99": [ "perf_latency_p99", { - "_value": 14.494, + "_value": 1.402, "_timestamp": 0 } ], "perf_throughput": [ "perf_throughput", { - "_value": 280.789, + "_value": 32.1575, "_timestamp": 0 } ], "perf_client_send_recv": [ "perf_client_send_recv", { - "_value": 0.118, + "_value": 0.014, "_timestamp": 0 } ], "perf_client_response_wait": [ "perf_client_response_wait", { - "_value": 14.065, + "_value": 1.074, "_timestamp": 0 } ], "perf_server_queue": [ "perf_server_queue", { - "_value": 6.136, + "_value": 0.138, "_timestamp": 0 } ], "perf_server_compute_infer": [ "perf_server_compute_infer", { - "_value": 6.736, + "_value": 0.155, "_timestamp": 0 } ], "perf_server_compute_input": [ "perf_server_compute_input", { - "_value": 0.256, + "_value": 0.098, "_timestamp": 0 } ], "perf_server_compute_output": [ "perf_server_compute_output", { - "_value": 0.017, + "_value": 0.096, "_timestamp": 0 } ] @@ -6980,14 +4838,14 @@ } ] }, - "-m vgg19_libtorch_config_1 -b 1 -i grpc -f vgg19_libtorch_config_1-results.csv --verbose-csv --request-rate-range=4096 --request-distribution=poisson --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "vgg19_libtorch_config_1", + "add_sub_config_3 -m add_sub -b 2 -i grpc -f add_sub-results.csv --verbose-csv --request-rate-range=32 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000": { + "_model_variants_name": "add_sub_config_3", "_gpu_data": { "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ [ "gpu_used_memory", { - "_value": 2413.821952, + "_value": 874.512384, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } @@ -6995,7 +4853,7 @@ [ "gpu_free_memory", { - "_value": 23355.981824000002, + "_value": 24895.291392, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } @@ -7003,7 +4861,7 @@ [ "gpu_utilization", { - "_value": 61.0, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -7011,7 +4869,7 @@ [ "gpu_power_usage", { - "_value": 218.017, + "_value": 56.8725, "_timestamp": 0, "_device_uuid": null } @@ -7022,7 +4880,7 @@ [ "gpu_used_memory", { - "_value": 2413.821952, + "_value": 874.512384, "_timestamp": 0, "_device_uuid": null } @@ -7030,7 +4888,7 @@ [ "gpu_free_memory", { - "_value": 23355.981824000002, + "_value": 24895.291392, "_timestamp": 0, "_device_uuid": null } @@ -7038,7 +4896,7 @@ [ "gpu_utilization", { - "_value": 61.0, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -7046,7 +4904,7 @@ [ "gpu_power_usage", { - "_value": 218.017, + "_value": 56.8725, "_timestamp": 0, "_device_uuid": null } @@ -7056,7 +4914,7 @@ "gpu_used_memory": [ "gpu_used_memory", { - "_value": 2413.821952, + "_value": 874.512384, "_timestamp": 0, "_device_uuid": null } @@ -7064,7 +4922,7 @@ "gpu_free_memory": [ "gpu_free_memory", { - "_value": 23355.981824000002, + "_value": 24895.291392, "_timestamp": 0, "_device_uuid": null } @@ -7072,7 +4930,7 @@ "gpu_utilization": [ "gpu_utilization", { - "_value": 61.0, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -7080,7 +4938,7 @@ "gpu_power_usage": [ "gpu_power_usage", { - "_value": 218.017, + "_value": 56.8725, "_timestamp": 0, "_device_uuid": null } @@ -7088,87 +4946,87 @@ }, "_model_config_measurements": [ { - "_model_config_name": "vgg19_libtorch_config_1", + "_model_config_name": "add_sub", "_model_specific_pa_params": { - "batch-size": 1, + "batch-size": 2, "concurrency-range": null, - "request-rate-range": 4096 + "request-rate-range": 32 }, "_non_gpu_data": [ [ "perf_latency_avg", { - "_value": 14.279, + "_value": 1.127, "_timestamp": 0 } ], [ "perf_latency_p90", { - "_value": 14.393, + "_value": 1.339, "_timestamp": 0 } ], [ "perf_latency_p95", { - "_value": 14.516, + "_value": 1.447, "_timestamp": 0 } ], [ "perf_latency_p99", { - "_value": 14.675, + "_value": 1.518, "_timestamp": 0 } ], [ "perf_throughput", { - "_value": 279.511, + "_value": 63.9765, "_timestamp": 0 } ], [ "perf_client_send_recv", { - "_value": 0.123, + "_value": 0.013, "_timestamp": 0 } ], [ "perf_client_response_wait", { - "_value": 14.129, + "_value": 1.081, "_timestamp": 0 } ], [ "perf_server_queue", { - "_value": 6.156, + "_value": 0.137, "_timestamp": 0 } ], [ "perf_server_compute_infer", { - "_value": 6.759, + "_value": 0.16, "_timestamp": 0 } ], [ "perf_server_compute_input", { - "_value": 0.26, + "_value": 0.101, "_timestamp": 0 } ], [ "perf_server_compute_output", { - "_value": 0.018, + "_value": 0.097, "_timestamp": 0 } ] @@ -7177,77 +5035,77 @@ "perf_latency_avg": [ "perf_latency_avg", { - "_value": 14.279, + "_value": 1.127, "_timestamp": 0 } ], "perf_latency_p90": [ "perf_latency_p90", { - "_value": 14.393, + "_value": 1.339, "_timestamp": 0 } ], "perf_latency_p95": [ "perf_latency_p95", { - "_value": 14.516, + "_value": 1.447, "_timestamp": 0 } ], "perf_latency_p99": [ "perf_latency_p99", { - "_value": 14.675, + "_value": 1.518, "_timestamp": 0 } ], "perf_throughput": [ "perf_throughput", { - "_value": 279.511, + "_value": 63.9765, "_timestamp": 0 } ], "perf_client_send_recv": [ "perf_client_send_recv", { - "_value": 0.123, + "_value": 0.013, "_timestamp": 0 } ], "perf_client_response_wait": [ "perf_client_response_wait", { - "_value": 14.129, + "_value": 1.081, "_timestamp": 0 } ], "perf_server_queue": [ "perf_server_queue", { - "_value": 6.156, + "_value": 0.137, "_timestamp": 0 } ], "perf_server_compute_infer": [ "perf_server_compute_infer", { - "_value": 6.759, + "_value": 0.16, "_timestamp": 0 } ], "perf_server_compute_input": [ "perf_server_compute_input", { - "_value": 0.26, + "_value": 0.101, "_timestamp": 0 } ], "perf_server_compute_output": [ "perf_server_compute_output", { - "_value": 0.018, + "_value": 0.097, "_timestamp": 0 } ] @@ -7257,45 +5115,59 @@ } } ], - "vgg19_libtorch_config_2": [ + "add_sub_config_4": [ { "_triton_env": {}, "_model_run_configs": [ { - "_model_name": "vgg19_libtorch", - "_model_config": { - "name": "vgg19_libtorch_config_2", - "platform": "pytorch_libtorch", - "maxBatchSize": 4, - "input": [ - { - "name": "INPUT__0", - "dataType": "TYPE_FP32", - "format": "FORMAT_NCHW", - "dims": [ - "3", - "224", - "224" - ] - } - ], - "output": [ - { - "name": "OUTPUT__0", - "dataType": "TYPE_FP32", - "dims": [ - "1000" - ], - "labelFilename": "vgg19_labels.txt" - } - ], - "instanceGroup": [ - { - "count": 1, - "kind": "KIND_GPU" - } - ], - "dynamicBatching": {}, + "_model_name": "add_sub", + "_model_config_variant": { + "model_config": { + "name": "add_sub", + "platform": "pytorch_libtorch", + "maxBatchSize": 1, + "input": [ + { + "name": "INPUT__0", + "dataType": "TYPE_FP32", + "dims": [ + "16" + ] + }, + { + "name": "INPUT__1", + "dataType": "TYPE_FP32", + "dims": [ + "16" + ] + } + ], + "output": [ + { + "name": "OUTPUT__0", + "dataType": "TYPE_FP32", + "dims": [ + "16" + ], + "labelFilename": "output0_labels.txt" + }, + { + "name": "OUTPUT__1", + "dataType": "TYPE_FP32", + "dims": [ + "16" + ] + } + ], + "instanceGroup": [ + { + "count": 3, + "kind": "KIND_GPU" + } + ], + "dynamicBatching": {} + }, + "variant_name": "add_sub_config_4", "cpu_only": false }, "_perf_config": { @@ -7307,7 +5179,7 @@ "measurement-interval": null, "concurrency-range": null, "request-rate-range": 16, - "request-distribution": "poisson", + "request-distribution": null, "request-intervals": null, "binary-search": null, "num-of-sequences": null, @@ -7325,7 +5197,7 @@ "string-length": null, "string-data": null, "measurement-mode": "count_windows", - "measurement-request-count": 150, + "measurement-request-count": null, "streaming": null, "grpc-compression-algorithm": null, "triton-server-directory": null, @@ -7343,16 +5215,16 @@ "ssl-https-private-key-file": null, "collect-metrics": "True", "metrics-url": "http://localhost:8002/metrics", - "metrics-interval": 1000.0, + "metrics-interval": 1000, "bls-composing-models": null }, "_options": { - "-m": "vgg19_libtorch_config_2", + "-m": "add_sub", "-x": null, "-b": 1, "-u": "localhost:8001", "-i": "grpc", - "-f": "vgg19_libtorch_config_2-results.csv", + "-f": "add_sub-results.csv", "-H": null }, "_verbose": { @@ -7378,20 +5250,295 @@ "input-data": null, "shape": null } - }, - "_composing_configs": [] - } - ] - }, - { - "-m vgg19_libtorch_config_2 -b 1 -i grpc -f vgg19_libtorch_config_2-results.csv --verbose-csv --request-rate-range=16 --request-distribution=poisson --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "vgg19_libtorch_config_2", + }, + "_composing_config_variants": [] + } + ] + }, + { + "add_sub_config_4 -m add_sub -b 1 -i grpc -f add_sub-results.csv --verbose-csv --request-rate-range=16 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000": { + "_model_variants_name": "add_sub_config_4", + "_gpu_data": { + "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ + [ + "gpu_used_memory", + { + "_value": 876.609536, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_free_memory", + { + "_value": 24893.19424, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_utilization", + { + "_value": 0.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 56.9247, + "_timestamp": 0, + "_device_uuid": null + } + ] + ] + }, + "_avg_gpu_data": [ + [ + "gpu_used_memory", + { + "_value": 876.609536, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_free_memory", + { + "_value": 24893.19424, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_utilization", + { + "_value": 0.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 56.9247, + "_timestamp": 0, + "_device_uuid": null + } + ] + ], + "_avg_gpu_data_from_tag": { + "gpu_used_memory": [ + "gpu_used_memory", + { + "_value": 876.609536, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_free_memory": [ + "gpu_free_memory", + { + "_value": 24893.19424, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_utilization": [ + "gpu_utilization", + { + "_value": 0.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_power_usage": [ + "gpu_power_usage", + { + "_value": 56.9247, + "_timestamp": 0, + "_device_uuid": null + } + ] + }, + "_model_config_measurements": [ + { + "_model_config_name": "add_sub", + "_model_specific_pa_params": { + "batch-size": 1, + "concurrency-range": null, + "request-rate-range": 16 + }, + "_non_gpu_data": [ + [ + "perf_latency_avg", + { + "_value": 1.097, + "_timestamp": 0 + } + ], + [ + "perf_latency_p90", + { + "_value": 1.21, + "_timestamp": 0 + } + ], + [ + "perf_latency_p95", + { + "_value": 1.263, + "_timestamp": 0 + } + ], + [ + "perf_latency_p99", + { + "_value": 1.51, + "_timestamp": 0 + } + ], + [ + "perf_throughput", + { + "_value": 16.0798, + "_timestamp": 0 + } + ], + [ + "perf_client_send_recv", + { + "_value": 0.012, + "_timestamp": 0 + } + ], + [ + "perf_client_response_wait", + { + "_value": 1.052, + "_timestamp": 0 + } + ], + [ + "perf_server_queue", + { + "_value": 0.135, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_infer", + { + "_value": 0.148, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_input", + { + "_value": 0.101, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_output", + { + "_value": 0.097, + "_timestamp": 0 + } + ] + ], + "_non_gpu_data_from_tag": { + "perf_latency_avg": [ + "perf_latency_avg", + { + "_value": 1.097, + "_timestamp": 0 + } + ], + "perf_latency_p90": [ + "perf_latency_p90", + { + "_value": 1.21, + "_timestamp": 0 + } + ], + "perf_latency_p95": [ + "perf_latency_p95", + { + "_value": 1.263, + "_timestamp": 0 + } + ], + "perf_latency_p99": [ + "perf_latency_p99", + { + "_value": 1.51, + "_timestamp": 0 + } + ], + "perf_throughput": [ + "perf_throughput", + { + "_value": 16.0798, + "_timestamp": 0 + } + ], + "perf_client_send_recv": [ + "perf_client_send_recv", + { + "_value": 0.012, + "_timestamp": 0 + } + ], + "perf_client_response_wait": [ + "perf_client_response_wait", + { + "_value": 1.052, + "_timestamp": 0 + } + ], + "perf_server_queue": [ + "perf_server_queue", + { + "_value": 0.135, + "_timestamp": 0 + } + ], + "perf_server_compute_infer": [ + "perf_server_compute_infer", + { + "_value": 0.148, + "_timestamp": 0 + } + ], + "perf_server_compute_input": [ + "perf_server_compute_input", + { + "_value": 0.101, + "_timestamp": 0 + } + ], + "perf_server_compute_output": [ + "perf_server_compute_output", + { + "_value": 0.097, + "_timestamp": 0 + } + ] + } + } + ] + }, + "add_sub_config_4 -m add_sub -b 1 -i grpc -f add_sub-results.csv --verbose-csv --request-rate-range=32 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000": { + "_model_variants_name": "add_sub_config_4", "_gpu_data": { "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ [ "gpu_used_memory", { - "_value": 2466.250752, + "_value": 876.609536, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } @@ -7399,7 +5546,7 @@ [ "gpu_free_memory", { - "_value": 23303.553024, + "_value": 24893.19424, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } @@ -7407,7 +5554,7 @@ [ "gpu_utilization", { - "_value": 7.63636, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -7415,7 +5562,7 @@ [ "gpu_power_usage", { - "_value": 63.5583, + "_value": 56.9072, "_timestamp": 0, "_device_uuid": null } @@ -7426,7 +5573,7 @@ [ "gpu_used_memory", { - "_value": 2466.250752, + "_value": 876.609536, "_timestamp": 0, "_device_uuid": null } @@ -7434,7 +5581,7 @@ [ "gpu_free_memory", { - "_value": 23303.553024, + "_value": 24893.19424, "_timestamp": 0, "_device_uuid": null } @@ -7442,7 +5589,7 @@ [ "gpu_utilization", { - "_value": 7.63636, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -7450,7 +5597,7 @@ [ "gpu_power_usage", { - "_value": 63.5583, + "_value": 56.9072, "_timestamp": 0, "_device_uuid": null } @@ -7460,7 +5607,7 @@ "gpu_used_memory": [ "gpu_used_memory", { - "_value": 2466.250752, + "_value": 876.609536, "_timestamp": 0, "_device_uuid": null } @@ -7468,7 +5615,7 @@ "gpu_free_memory": [ "gpu_free_memory", { - "_value": 23303.553024, + "_value": 24893.19424, "_timestamp": 0, "_device_uuid": null } @@ -7476,7 +5623,7 @@ "gpu_utilization": [ "gpu_utilization", { - "_value": 7.63636, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -7484,7 +5631,7 @@ "gpu_power_usage": [ "gpu_power_usage", { - "_value": 63.5583, + "_value": 56.9072, "_timestamp": 0, "_device_uuid": null } @@ -7492,87 +5639,87 @@ }, "_model_config_measurements": [ { - "_model_config_name": "vgg19_libtorch_config_2", + "_model_config_name": "add_sub", "_model_specific_pa_params": { "batch-size": 1, "concurrency-range": null, - "request-rate-range": 16 + "request-rate-range": 32 }, "_non_gpu_data": [ [ "perf_latency_avg", { - "_value": 7.547, + "_value": 1.099, "_timestamp": 0 } ], [ "perf_latency_p90", { - "_value": 7.622, + "_value": 1.212, "_timestamp": 0 } ], [ "perf_latency_p95", { - "_value": 10.832, + "_value": 1.27, "_timestamp": 0 } ], [ "perf_latency_p99", { - "_value": 12.712, + "_value": 1.461, "_timestamp": 0 } ], [ "perf_throughput", { - "_value": 14.716, + "_value": 31.9889, "_timestamp": 0 } ], [ "perf_client_send_recv", { - "_value": 0.17, + "_value": 0.012, "_timestamp": 0 } ], [ "perf_client_response_wait", { - "_value": 7.339, + "_value": 1.054, "_timestamp": 0 } ], [ "perf_server_queue", { - "_value": 0.466, + "_value": 0.136, "_timestamp": 0 } ], [ "perf_server_compute_infer", { - "_value": 5.389, + "_value": 0.145, "_timestamp": 0 } ], [ "perf_server_compute_input", { - "_value": 0.261, + "_value": 0.099, "_timestamp": 0 } ], [ "perf_server_compute_output", { - "_value": 0.047, + "_value": 0.094, "_timestamp": 0 } ] @@ -7581,92 +5728,235 @@ "perf_latency_avg": [ "perf_latency_avg", { - "_value": 7.547, + "_value": 1.099, "_timestamp": 0 } ], "perf_latency_p90": [ "perf_latency_p90", { - "_value": 7.622, + "_value": 1.212, "_timestamp": 0 } ], "perf_latency_p95": [ "perf_latency_p95", { - "_value": 10.832, + "_value": 1.27, "_timestamp": 0 } ], "perf_latency_p99": [ "perf_latency_p99", { - "_value": 12.712, + "_value": 1.461, "_timestamp": 0 } ], "perf_throughput": [ "perf_throughput", { - "_value": 14.716, + "_value": 31.9889, "_timestamp": 0 } ], "perf_client_send_recv": [ "perf_client_send_recv", { - "_value": 0.17, + "_value": 0.012, "_timestamp": 0 } ], "perf_client_response_wait": [ "perf_client_response_wait", { - "_value": 7.339, + "_value": 1.054, "_timestamp": 0 } ], "perf_server_queue": [ "perf_server_queue", { - "_value": 0.466, + "_value": 0.136, "_timestamp": 0 } ], "perf_server_compute_infer": [ "perf_server_compute_infer", { - "_value": 5.389, + "_value": 0.145, "_timestamp": 0 } ], "perf_server_compute_input": [ "perf_server_compute_input", { - "_value": 0.261, + "_value": 0.099, "_timestamp": 0 } ], "perf_server_compute_output": [ "perf_server_compute_output", { - "_value": 0.047, + "_value": 0.094, "_timestamp": 0 } ] } } ] - }, - "-m vgg19_libtorch_config_2 -b 1 -i grpc -f vgg19_libtorch_config_2-results.csv --verbose-csv --request-rate-range=32 --request-distribution=poisson --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "vgg19_libtorch_config_2", + } + } + ], + "add_sub_config_5": [ + { + "_triton_env": {}, + "_model_run_configs": [ + { + "_model_name": "add_sub", + "_model_config_variant": { + "model_config": { + "name": "add_sub", + "platform": "pytorch_libtorch", + "maxBatchSize": 2, + "input": [ + { + "name": "INPUT__0", + "dataType": "TYPE_FP32", + "dims": [ + "16" + ] + }, + { + "name": "INPUT__1", + "dataType": "TYPE_FP32", + "dims": [ + "16" + ] + } + ], + "output": [ + { + "name": "OUTPUT__0", + "dataType": "TYPE_FP32", + "dims": [ + "16" + ], + "labelFilename": "output0_labels.txt" + }, + { + "name": "OUTPUT__1", + "dataType": "TYPE_FP32", + "dims": [ + "16" + ] + } + ], + "instanceGroup": [ + { + "count": 3, + "kind": "KIND_GPU" + } + ], + "dynamicBatching": {} + }, + "variant_name": "add_sub_config_5", + "cpu_only": false + }, + "_perf_config": { + "_args": { + "service-kind": null, + "model-signature-name": null, + "async": null, + "sync": null, + "measurement-interval": null, + "concurrency-range": null, + "request-rate-range": 16, + "request-distribution": null, + "request-intervals": null, + "binary-search": null, + "num-of-sequences": null, + "latency-threshold": null, + "max-threads": null, + "stability-percentage": null, + "max-trials": null, + "percentile": null, + "input-data": null, + "shared-memory": null, + "output-shared-memory-size": null, + "shape": null, + "sequence-length": null, + "sequence-id-range": null, + "string-length": null, + "string-data": null, + "measurement-mode": "count_windows", + "measurement-request-count": null, + "streaming": null, + "grpc-compression-algorithm": null, + "triton-server-directory": null, + "model-repository": null, + "ssl-grpc-use-ssl": null, + "ssl-grpc-root-certifications-file": null, + "ssl-grpc-private-key-file": null, + "ssl-grpc-certificate-chain-file": null, + "ssl-https-verify-peer": null, + "ssl-https-verify-host": null, + "ssl-https-ca-certificates-file": null, + "ssl-https-client-certificate-type": null, + "ssl-https-client-certificate-file": null, + "ssl-https-private-key-type": null, + "ssl-https-private-key-file": null, + "collect-metrics": "True", + "metrics-url": "http://localhost:8002/metrics", + "metrics-interval": 1000, + "bls-composing-models": null + }, + "_options": { + "-m": "add_sub", + "-x": null, + "-b": 1, + "-u": "localhost:8001", + "-i": "grpc", + "-f": "add_sub-results.csv", + "-H": null + }, + "_verbose": { + "-v": null, + "-v -v": null, + "--verbose-csv": "--verbose-csv" + }, + "_input_to_options": { + "model-name": "-m", + "model-version": "-x", + "batch-size": "-b", + "url": "-u", + "protocol": "-i", + "latency-report-file": "-f", + "http-header": "-H" + }, + "_input_to_verbose": { + "verbose": "-v", + "extra-verbose": "-v -v", + "verbose-csv": "--verbose-csv" + }, + "_additive_args": { + "input-data": null, + "shape": null + } + }, + "_composing_config_variants": [] + } + ] + }, + { + "add_sub_config_5 -m add_sub -b 1 -i grpc -f add_sub-results.csv --verbose-csv --request-rate-range=16 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000": { + "_model_variants_name": "add_sub_config_5", "_gpu_data": { "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ [ "gpu_used_memory", { - "_value": 2466.250752, + "_value": 876.609536, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } @@ -7674,7 +5964,7 @@ [ "gpu_free_memory", { - "_value": 23303.553024, + "_value": 24893.19424, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } @@ -7682,7 +5972,7 @@ [ "gpu_utilization", { - "_value": 17.285700000000002, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -7690,7 +5980,7 @@ [ "gpu_power_usage", { - "_value": 74.6296, + "_value": 56.9405, "_timestamp": 0, "_device_uuid": null } @@ -7701,7 +5991,7 @@ [ "gpu_used_memory", { - "_value": 2466.250752, + "_value": 876.609536, "_timestamp": 0, "_device_uuid": null } @@ -7709,7 +5999,7 @@ [ "gpu_free_memory", { - "_value": 23303.553024, + "_value": 24893.19424, "_timestamp": 0, "_device_uuid": null } @@ -7717,7 +6007,7 @@ [ "gpu_utilization", { - "_value": 17.285700000000002, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -7725,7 +6015,7 @@ [ "gpu_power_usage", { - "_value": 74.6296, + "_value": 56.9405, "_timestamp": 0, "_device_uuid": null } @@ -7735,7 +6025,7 @@ "gpu_used_memory": [ "gpu_used_memory", { - "_value": 2466.250752, + "_value": 876.609536, "_timestamp": 0, "_device_uuid": null } @@ -7743,7 +6033,7 @@ "gpu_free_memory": [ "gpu_free_memory", { - "_value": 23303.553024, + "_value": 24893.19424, "_timestamp": 0, "_device_uuid": null } @@ -7751,7 +6041,7 @@ "gpu_utilization": [ "gpu_utilization", { - "_value": 17.285700000000002, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -7759,7 +6049,7 @@ "gpu_power_usage": [ "gpu_power_usage", { - "_value": 74.6296, + "_value": 56.9405, "_timestamp": 0, "_device_uuid": null } @@ -7767,87 +6057,87 @@ }, "_model_config_measurements": [ { - "_model_config_name": "vgg19_libtorch_config_2", + "_model_config_name": "add_sub", "_model_specific_pa_params": { "batch-size": 1, "concurrency-range": null, - "request-rate-range": 32 + "request-rate-range": 16 }, "_non_gpu_data": [ [ "perf_latency_avg", { - "_value": 7.753, + "_value": 1.103, "_timestamp": 0 } ], [ "perf_latency_p90", { - "_value": 9.139, + "_value": 1.212, "_timestamp": 0 } ], [ "perf_latency_p95", { - "_value": 11.272, + "_value": 1.284, "_timestamp": 0 } ], [ "perf_latency_p99", { - "_value": 16.969, + "_value": 1.384, "_timestamp": 0 } ], [ "perf_throughput", { - "_value": 35.9826, + "_value": 16.0791, "_timestamp": 0 } ], [ "perf_client_send_recv", { - "_value": 0.165, + "_value": 0.013, "_timestamp": 0 } ], [ "perf_client_response_wait", { - "_value": 7.552, + "_value": 1.053, "_timestamp": 0 } ], [ "perf_server_queue", { - "_value": 0.579, + "_value": 0.14, "_timestamp": 0 } ], [ "perf_server_compute_infer", { - "_value": 5.488, + "_value": 0.147, "_timestamp": 0 } ], [ "perf_server_compute_input", { - "_value": 0.267, + "_value": 0.1, "_timestamp": 0 } ], [ "perf_server_compute_output", { - "_value": 0.045, + "_value": 0.094, "_timestamp": 0 } ] @@ -7856,77 +6146,77 @@ "perf_latency_avg": [ "perf_latency_avg", { - "_value": 7.753, + "_value": 1.103, "_timestamp": 0 } ], "perf_latency_p90": [ "perf_latency_p90", { - "_value": 9.139, + "_value": 1.212, "_timestamp": 0 } ], "perf_latency_p95": [ "perf_latency_p95", { - "_value": 11.272, + "_value": 1.284, "_timestamp": 0 } ], "perf_latency_p99": [ "perf_latency_p99", { - "_value": 16.969, + "_value": 1.384, "_timestamp": 0 } ], "perf_throughput": [ "perf_throughput", { - "_value": 35.9826, + "_value": 16.0791, "_timestamp": 0 } ], "perf_client_send_recv": [ "perf_client_send_recv", { - "_value": 0.165, + "_value": 0.013, "_timestamp": 0 } ], "perf_client_response_wait": [ "perf_client_response_wait", { - "_value": 7.552, + "_value": 1.053, "_timestamp": 0 } ], "perf_server_queue": [ "perf_server_queue", { - "_value": 0.579, + "_value": 0.14, "_timestamp": 0 } ], "perf_server_compute_infer": [ "perf_server_compute_infer", { - "_value": 5.488, + "_value": 0.147, "_timestamp": 0 } ], "perf_server_compute_input": [ "perf_server_compute_input", { - "_value": 0.267, + "_value": 0.1, "_timestamp": 0 } ], "perf_server_compute_output": [ "perf_server_compute_output", { - "_value": 0.045, + "_value": 0.094, "_timestamp": 0 } ] @@ -7934,14 +6224,14 @@ } ] }, - "-m vgg19_libtorch_config_2 -b 1 -i grpc -f vgg19_libtorch_config_2-results.csv --verbose-csv --request-rate-range=64 --request-distribution=poisson --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "vgg19_libtorch_config_2", + "add_sub_config_5 -m add_sub -b 1 -i grpc -f add_sub-results.csv --verbose-csv --request-rate-range=32 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000": { + "_model_variants_name": "add_sub_config_5", "_gpu_data": { "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ [ "gpu_used_memory", { - "_value": 2466.250752, + "_value": 876.609536, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } @@ -7949,7 +6239,7 @@ [ "gpu_free_memory", { - "_value": 23303.553024, + "_value": 24893.19424, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } @@ -7957,7 +6247,7 @@ [ "gpu_utilization", { - "_value": 26.6667, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -7965,7 +6255,7 @@ [ "gpu_power_usage", { - "_value": 116.834, + "_value": 56.9038, "_timestamp": 0, "_device_uuid": null } @@ -7976,7 +6266,7 @@ [ "gpu_used_memory", { - "_value": 2466.250752, + "_value": 876.609536, "_timestamp": 0, "_device_uuid": null } @@ -7984,7 +6274,7 @@ [ "gpu_free_memory", { - "_value": 23303.553024, + "_value": 24893.19424, "_timestamp": 0, "_device_uuid": null } @@ -7992,7 +6282,7 @@ [ "gpu_utilization", { - "_value": 26.6667, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -8000,7 +6290,7 @@ [ "gpu_power_usage", { - "_value": 116.834, + "_value": 56.9038, "_timestamp": 0, "_device_uuid": null } @@ -8010,7 +6300,7 @@ "gpu_used_memory": [ "gpu_used_memory", { - "_value": 2466.250752, + "_value": 876.609536, "_timestamp": 0, "_device_uuid": null } @@ -8018,7 +6308,7 @@ "gpu_free_memory": [ "gpu_free_memory", { - "_value": 23303.553024, + "_value": 24893.19424, "_timestamp": 0, "_device_uuid": null } @@ -8026,7 +6316,7 @@ "gpu_utilization": [ "gpu_utilization", { - "_value": 26.6667, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -8034,7 +6324,7 @@ "gpu_power_usage": [ "gpu_power_usage", { - "_value": 116.834, + "_value": 56.9038, "_timestamp": 0, "_device_uuid": null } @@ -8042,87 +6332,87 @@ }, "_model_config_measurements": [ { - "_model_config_name": "vgg19_libtorch_config_2", + "_model_config_name": "add_sub", "_model_specific_pa_params": { "batch-size": 1, "concurrency-range": null, - "request-rate-range": 64 + "request-rate-range": 32 }, "_non_gpu_data": [ [ "perf_latency_avg", { - "_value": 7.493, + "_value": 1.113, "_timestamp": 0 } ], [ "perf_latency_p90", { - "_value": 10.67, + "_value": 1.22, "_timestamp": 0 } ], [ "perf_latency_p95", { - "_value": 12.554, + "_value": 1.328, "_timestamp": 0 } ], [ "perf_latency_p99", { - "_value": 15.69, + "_value": 1.498, "_timestamp": 0 } ], [ "perf_throughput", { - "_value": 60.6457, + "_value": 31.9885, "_timestamp": 0 } ], [ "perf_client_send_recv", { - "_value": 0.159, + "_value": 0.014, "_timestamp": 0 } ], [ "perf_client_response_wait", { - "_value": 7.298, + "_value": 1.065, "_timestamp": 0 } ], [ "perf_server_queue", { - "_value": 0.966, + "_value": 0.141, "_timestamp": 0 } ], [ "perf_server_compute_infer", { - "_value": 4.87, + "_value": 0.147, "_timestamp": 0 } ], [ "perf_server_compute_input", { - "_value": 0.27, + "_value": 0.1, "_timestamp": 0 } ], [ "perf_server_compute_output", { - "_value": 0.041, + "_value": 0.095, "_timestamp": 0 } ] @@ -8131,77 +6421,77 @@ "perf_latency_avg": [ "perf_latency_avg", { - "_value": 7.493, + "_value": 1.113, "_timestamp": 0 } ], "perf_latency_p90": [ "perf_latency_p90", { - "_value": 10.67, + "_value": 1.22, "_timestamp": 0 } ], "perf_latency_p95": [ "perf_latency_p95", { - "_value": 12.554, + "_value": 1.328, "_timestamp": 0 } ], "perf_latency_p99": [ "perf_latency_p99", { - "_value": 15.69, + "_value": 1.498, "_timestamp": 0 } ], "perf_throughput": [ "perf_throughput", { - "_value": 60.6457, + "_value": 31.9885, "_timestamp": 0 } ], "perf_client_send_recv": [ "perf_client_send_recv", { - "_value": 0.159, + "_value": 0.014, "_timestamp": 0 } ], "perf_client_response_wait": [ "perf_client_response_wait", { - "_value": 7.298, + "_value": 1.065, "_timestamp": 0 } ], "perf_server_queue": [ "perf_server_queue", { - "_value": 0.966, + "_value": 0.141, "_timestamp": 0 } ], "perf_server_compute_infer": [ "perf_server_compute_infer", { - "_value": 4.87, + "_value": 0.147, "_timestamp": 0 } ], "perf_server_compute_input": [ "perf_server_compute_input", { - "_value": 0.27, + "_value": 0.1, "_timestamp": 0 } ], "perf_server_compute_output": [ "perf_server_compute_output", { - "_value": 0.041, + "_value": 0.095, "_timestamp": 0 } ] @@ -8209,14 +6499,14 @@ } ] }, - "-m vgg19_libtorch_config_2 -b 1 -i grpc -f vgg19_libtorch_config_2-results.csv --verbose-csv --request-rate-range=128 --request-distribution=poisson --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "vgg19_libtorch_config_2", + "add_sub_config_5 -m add_sub -b 2 -i grpc -f add_sub-results.csv --verbose-csv --request-rate-range=16 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000": { + "_model_variants_name": "add_sub_config_5", "_gpu_data": { "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ [ "gpu_used_memory", { - "_value": 2466.250752, + "_value": 876.609536, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } @@ -8224,7 +6514,7 @@ [ "gpu_free_memory", { - "_value": 23303.553024, + "_value": 24893.19424, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } @@ -8232,7 +6522,7 @@ [ "gpu_utilization", { - "_value": 49.8333, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -8240,7 +6530,7 @@ [ "gpu_power_usage", { - "_value": 173.752, + "_value": 57.0292, "_timestamp": 0, "_device_uuid": null } @@ -8251,7 +6541,7 @@ [ "gpu_used_memory", { - "_value": 2466.250752, + "_value": 876.609536, "_timestamp": 0, "_device_uuid": null } @@ -8259,7 +6549,7 @@ [ "gpu_free_memory", { - "_value": 23303.553024, + "_value": 24893.19424, "_timestamp": 0, "_device_uuid": null } @@ -8267,7 +6557,7 @@ [ "gpu_utilization", { - "_value": 49.8333, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -8275,7 +6565,7 @@ [ "gpu_power_usage", { - "_value": 173.752, + "_value": 57.0292, "_timestamp": 0, "_device_uuid": null } @@ -8285,7 +6575,7 @@ "gpu_used_memory": [ "gpu_used_memory", { - "_value": 2466.250752, + "_value": 876.609536, "_timestamp": 0, "_device_uuid": null } @@ -8293,7 +6583,7 @@ "gpu_free_memory": [ "gpu_free_memory", { - "_value": 23303.553024, + "_value": 24893.19424, "_timestamp": 0, "_device_uuid": null } @@ -8301,7 +6591,7 @@ "gpu_utilization": [ "gpu_utilization", { - "_value": 49.8333, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -8309,7 +6599,7 @@ "gpu_power_usage": [ "gpu_power_usage", { - "_value": 173.752, + "_value": 57.0292, "_timestamp": 0, "_device_uuid": null } @@ -8317,87 +6607,87 @@ }, "_model_config_measurements": [ { - "_model_config_name": "vgg19_libtorch_config_2", + "_model_config_name": "add_sub", "_model_specific_pa_params": { - "batch-size": 1, + "batch-size": 2, "concurrency-range": null, - "request-rate-range": 128 + "request-rate-range": 16 }, "_non_gpu_data": [ [ "perf_latency_avg", { - "_value": 8.564, + "_value": 1.145, "_timestamp": 0 } ], [ "perf_latency_p90", { - "_value": 13.903, + "_value": 1.28, "_timestamp": 0 } ], [ "perf_latency_p95", { - "_value": 14.641, + "_value": 1.325, "_timestamp": 0 } ], [ "perf_latency_p99", { - "_value": 15.653, + "_value": 1.498, "_timestamp": 0 } ], [ "perf_throughput", { - "_value": 125.776, + "_value": 32.1583, "_timestamp": 0 } ], [ "perf_client_send_recv", { - "_value": 0.14, + "_value": 0.013, "_timestamp": 0 } ], [ "perf_client_response_wait", { - "_value": 8.392, + "_value": 1.098, "_timestamp": 0 } ], [ "perf_server_queue", { - "_value": 1.826, + "_value": 0.149, "_timestamp": 0 } ], [ "perf_server_compute_infer", { - "_value": 5.203, + "_value": 0.162, "_timestamp": 0 } ], [ "perf_server_compute_input", { - "_value": 0.275, + "_value": 0.103, "_timestamp": 0 } ], [ "perf_server_compute_output", { - "_value": 0.031, + "_value": 0.098, "_timestamp": 0 } ] @@ -8406,77 +6696,77 @@ "perf_latency_avg": [ "perf_latency_avg", { - "_value": 8.564, + "_value": 1.145, "_timestamp": 0 } ], "perf_latency_p90": [ "perf_latency_p90", { - "_value": 13.903, + "_value": 1.28, "_timestamp": 0 } ], "perf_latency_p95": [ "perf_latency_p95", { - "_value": 14.641, + "_value": 1.325, "_timestamp": 0 } ], "perf_latency_p99": [ "perf_latency_p99", { - "_value": 15.653, + "_value": 1.498, "_timestamp": 0 } ], "perf_throughput": [ "perf_throughput", { - "_value": 125.776, + "_value": 32.1583, "_timestamp": 0 } ], "perf_client_send_recv": [ "perf_client_send_recv", { - "_value": 0.14, + "_value": 0.013, "_timestamp": 0 } ], "perf_client_response_wait": [ "perf_client_response_wait", { - "_value": 8.392, + "_value": 1.098, "_timestamp": 0 } ], "perf_server_queue": [ "perf_server_queue", { - "_value": 1.826, + "_value": 0.149, "_timestamp": 0 } ], "perf_server_compute_infer": [ "perf_server_compute_infer", { - "_value": 5.203, + "_value": 0.162, "_timestamp": 0 } ], "perf_server_compute_input": [ "perf_server_compute_input", { - "_value": 0.275, + "_value": 0.103, "_timestamp": 0 } ], "perf_server_compute_output": [ "perf_server_compute_output", { - "_value": 0.031, + "_value": 0.098, "_timestamp": 0 } ] @@ -8484,14 +6774,14 @@ } ] }, - "-m vgg19_libtorch_config_2 -b 1 -i grpc -f vgg19_libtorch_config_2-results.csv --verbose-csv --request-rate-range=256 --request-distribution=poisson --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "vgg19_libtorch_config_2", + "add_sub_config_5 -m add_sub -b 2 -i grpc -f add_sub-results.csv --verbose-csv --request-rate-range=32 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000": { + "_model_variants_name": "add_sub_config_5", "_gpu_data": { "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ [ "gpu_used_memory", { - "_value": 2466.250752, + "_value": 876.609536, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } @@ -8499,7 +6789,7 @@ [ "gpu_free_memory", { - "_value": 23303.553024, + "_value": 24893.19424, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } @@ -8507,7 +6797,7 @@ [ "gpu_utilization", { - "_value": 88.66669999999999, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -8515,7 +6805,7 @@ [ "gpu_power_usage", { - "_value": 270.673, + "_value": 57.0632, "_timestamp": 0, "_device_uuid": null } @@ -8526,7 +6816,7 @@ [ "gpu_used_memory", { - "_value": 2466.250752, + "_value": 876.609536, "_timestamp": 0, "_device_uuid": null } @@ -8534,7 +6824,7 @@ [ "gpu_free_memory", { - "_value": 23303.553024, + "_value": 24893.19424, "_timestamp": 0, "_device_uuid": null } @@ -8542,7 +6832,7 @@ [ "gpu_utilization", { - "_value": 88.66669999999999, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -8550,7 +6840,7 @@ [ "gpu_power_usage", { - "_value": 270.673, + "_value": 57.0632, "_timestamp": 0, "_device_uuid": null } @@ -8560,7 +6850,7 @@ "gpu_used_memory": [ "gpu_used_memory", { - "_value": 2466.250752, + "_value": 876.609536, "_timestamp": 0, "_device_uuid": null } @@ -8568,7 +6858,7 @@ "gpu_free_memory": [ "gpu_free_memory", { - "_value": 23303.553024, + "_value": 24893.19424, "_timestamp": 0, "_device_uuid": null } @@ -8576,7 +6866,7 @@ "gpu_utilization": [ "gpu_utilization", { - "_value": 88.66669999999999, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -8584,7 +6874,7 @@ "gpu_power_usage": [ "gpu_power_usage", { - "_value": 270.673, + "_value": 57.0632, "_timestamp": 0, "_device_uuid": null } @@ -8592,87 +6882,87 @@ }, "_model_config_measurements": [ { - "_model_config_name": "vgg19_libtorch_config_2", + "_model_config_name": "add_sub", "_model_specific_pa_params": { - "batch-size": 1, + "batch-size": 2, "concurrency-range": null, - "request-rate-range": 256 + "request-rate-range": 32 }, "_non_gpu_data": [ [ "perf_latency_avg", { - "_value": 13.106, + "_value": 1.118, "_timestamp": 0 } ], [ "perf_latency_p90", { - "_value": 15.204, + "_value": 1.23, "_timestamp": 0 } ], [ "perf_latency_p95", { - "_value": 15.246, + "_value": 1.349, "_timestamp": 0 } ], [ "perf_latency_p99", { - "_value": 15.434, + "_value": 1.445, "_timestamp": 0 } ], [ "perf_throughput", { - "_value": 242.512, + "_value": 63.9786, "_timestamp": 0 } ], [ "perf_client_send_recv", { - "_value": 0.102, + "_value": 0.013, "_timestamp": 0 } ], [ "perf_client_response_wait", { - "_value": 12.975, + "_value": 1.072, "_timestamp": 0 } ], [ "perf_server_queue", { - "_value": 4.054, + "_value": 0.14, "_timestamp": 0 } ], [ "perf_server_compute_infer", { - "_value": 7.689, + "_value": 0.154, "_timestamp": 0 } ], [ "perf_server_compute_input", { - "_value": 0.295, + "_value": 0.097, "_timestamp": 0 } ], [ "perf_server_compute_output", { - "_value": 0.019, + "_value": 0.093, "_timestamp": 0 } ] @@ -8681,92 +6971,235 @@ "perf_latency_avg": [ "perf_latency_avg", { - "_value": 13.106, + "_value": 1.118, "_timestamp": 0 } ], "perf_latency_p90": [ "perf_latency_p90", { - "_value": 15.204, + "_value": 1.23, "_timestamp": 0 } ], "perf_latency_p95": [ "perf_latency_p95", { - "_value": 15.246, + "_value": 1.349, "_timestamp": 0 } ], "perf_latency_p99": [ "perf_latency_p99", { - "_value": 15.434, + "_value": 1.445, "_timestamp": 0 } ], "perf_throughput": [ "perf_throughput", { - "_value": 242.512, + "_value": 63.9786, "_timestamp": 0 } ], "perf_client_send_recv": [ "perf_client_send_recv", { - "_value": 0.102, + "_value": 0.013, "_timestamp": 0 } ], "perf_client_response_wait": [ "perf_client_response_wait", { - "_value": 12.975, + "_value": 1.072, "_timestamp": 0 } ], "perf_server_queue": [ "perf_server_queue", { - "_value": 4.054, + "_value": 0.14, "_timestamp": 0 } ], "perf_server_compute_infer": [ "perf_server_compute_infer", { - "_value": 7.689, + "_value": 0.154, "_timestamp": 0 } ], "perf_server_compute_input": [ "perf_server_compute_input", { - "_value": 0.295, + "_value": 0.097, "_timestamp": 0 } ], "perf_server_compute_output": [ "perf_server_compute_output", { - "_value": 0.019, + "_value": 0.093, "_timestamp": 0 } ] } - } - ] - }, - "-m vgg19_libtorch_config_2 -b 1 -i grpc -f vgg19_libtorch_config_2-results.csv --verbose-csv --request-rate-range=512 --request-distribution=poisson --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "vgg19_libtorch_config_2", + } + ] + } + } + ], + "add_sub_config_6": [ + { + "_triton_env": {}, + "_model_run_configs": [ + { + "_model_name": "add_sub", + "_model_config_variant": { + "model_config": { + "name": "add_sub", + "platform": "pytorch_libtorch", + "maxBatchSize": 1, + "input": [ + { + "name": "INPUT__0", + "dataType": "TYPE_FP32", + "dims": [ + "16" + ] + }, + { + "name": "INPUT__1", + "dataType": "TYPE_FP32", + "dims": [ + "16" + ] + } + ], + "output": [ + { + "name": "OUTPUT__0", + "dataType": "TYPE_FP32", + "dims": [ + "16" + ], + "labelFilename": "output0_labels.txt" + }, + { + "name": "OUTPUT__1", + "dataType": "TYPE_FP32", + "dims": [ + "16" + ] + } + ], + "instanceGroup": [ + { + "count": 4, + "kind": "KIND_GPU" + } + ], + "dynamicBatching": {} + }, + "variant_name": "add_sub_config_6", + "cpu_only": false + }, + "_perf_config": { + "_args": { + "service-kind": null, + "model-signature-name": null, + "async": null, + "sync": null, + "measurement-interval": null, + "concurrency-range": null, + "request-rate-range": 16, + "request-distribution": null, + "request-intervals": null, + "binary-search": null, + "num-of-sequences": null, + "latency-threshold": null, + "max-threads": null, + "stability-percentage": null, + "max-trials": null, + "percentile": null, + "input-data": null, + "shared-memory": null, + "output-shared-memory-size": null, + "shape": null, + "sequence-length": null, + "sequence-id-range": null, + "string-length": null, + "string-data": null, + "measurement-mode": "count_windows", + "measurement-request-count": null, + "streaming": null, + "grpc-compression-algorithm": null, + "triton-server-directory": null, + "model-repository": null, + "ssl-grpc-use-ssl": null, + "ssl-grpc-root-certifications-file": null, + "ssl-grpc-private-key-file": null, + "ssl-grpc-certificate-chain-file": null, + "ssl-https-verify-peer": null, + "ssl-https-verify-host": null, + "ssl-https-ca-certificates-file": null, + "ssl-https-client-certificate-type": null, + "ssl-https-client-certificate-file": null, + "ssl-https-private-key-type": null, + "ssl-https-private-key-file": null, + "collect-metrics": "True", + "metrics-url": "http://localhost:8002/metrics", + "metrics-interval": 1000, + "bls-composing-models": null + }, + "_options": { + "-m": "add_sub", + "-x": null, + "-b": 1, + "-u": "localhost:8001", + "-i": "grpc", + "-f": "add_sub-results.csv", + "-H": null + }, + "_verbose": { + "-v": null, + "-v -v": null, + "--verbose-csv": "--verbose-csv" + }, + "_input_to_options": { + "model-name": "-m", + "model-version": "-x", + "batch-size": "-b", + "url": "-u", + "protocol": "-i", + "latency-report-file": "-f", + "http-header": "-H" + }, + "_input_to_verbose": { + "verbose": "-v", + "extra-verbose": "-v -v", + "verbose-csv": "--verbose-csv" + }, + "_additive_args": { + "input-data": null, + "shape": null + } + }, + "_composing_config_variants": [] + } + ] + }, + { + "add_sub_config_6 -m add_sub -b 1 -i grpc -f add_sub-results.csv --verbose-csv --request-rate-range=16 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000": { + "_model_variants_name": "add_sub_config_6", "_gpu_data": { "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ [ "gpu_used_memory", { - "_value": 2466.250752, + "_value": 878.706688, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } @@ -8774,7 +7207,7 @@ [ "gpu_free_memory", { - "_value": 23303.553024, + "_value": 24891.097088000002, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } @@ -8782,7 +7215,7 @@ [ "gpu_utilization", { - "_value": 60.0, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -8790,7 +7223,7 @@ [ "gpu_power_usage", { - "_value": 213.446, + "_value": 57.0973, "_timestamp": 0, "_device_uuid": null } @@ -8801,7 +7234,7 @@ [ "gpu_used_memory", { - "_value": 2466.250752, + "_value": 878.706688, "_timestamp": 0, "_device_uuid": null } @@ -8809,7 +7242,7 @@ [ "gpu_free_memory", { - "_value": 23303.553024, + "_value": 24891.097088000002, "_timestamp": 0, "_device_uuid": null } @@ -8817,7 +7250,7 @@ [ "gpu_utilization", { - "_value": 60.0, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -8825,7 +7258,7 @@ [ "gpu_power_usage", { - "_value": 213.446, + "_value": 57.0973, "_timestamp": 0, "_device_uuid": null } @@ -8835,7 +7268,7 @@ "gpu_used_memory": [ "gpu_used_memory", { - "_value": 2466.250752, + "_value": 878.706688, "_timestamp": 0, "_device_uuid": null } @@ -8843,7 +7276,7 @@ "gpu_free_memory": [ "gpu_free_memory", { - "_value": 23303.553024, + "_value": 24891.097088000002, "_timestamp": 0, "_device_uuid": null } @@ -8851,7 +7284,7 @@ "gpu_utilization": [ "gpu_utilization", { - "_value": 60.0, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -8859,7 +7292,7 @@ "gpu_power_usage": [ "gpu_power_usage", { - "_value": 213.446, + "_value": 57.0973, "_timestamp": 0, "_device_uuid": null } @@ -8867,87 +7300,87 @@ }, "_model_config_measurements": [ { - "_model_config_name": "vgg19_libtorch_config_2", + "_model_config_name": "add_sub", "_model_specific_pa_params": { "batch-size": 1, "concurrency-range": null, - "request-rate-range": 512 + "request-rate-range": 16 }, "_non_gpu_data": [ [ "perf_latency_avg", { - "_value": 15.048, + "_value": 1.121, "_timestamp": 0 } ], [ "perf_latency_p90", { - "_value": 15.26, + "_value": 1.219, "_timestamp": 0 } ], [ "perf_latency_p95", { - "_value": 15.308, + "_value": 1.314, "_timestamp": 0 } ], [ "perf_latency_p99", { - "_value": 15.411, + "_value": 1.547, "_timestamp": 0 } ], [ "perf_throughput", { - "_value": 262.485, + "_value": 16.0791, "_timestamp": 0 } ], [ "perf_client_send_recv", { - "_value": 0.104, + "_value": 0.013, "_timestamp": 0 } ], [ "perf_client_response_wait", { - "_value": 14.92, + "_value": 1.074, "_timestamp": 0 } ], [ "perf_server_queue", { - "_value": 5.033, + "_value": 0.14, "_timestamp": 0 } ], [ "perf_server_compute_infer", { - "_value": 8.581, + "_value": 0.148, "_timestamp": 0 } ], [ "perf_server_compute_input", { - "_value": 0.322, + "_value": 0.1, "_timestamp": 0 } ], [ "perf_server_compute_output", { - "_value": 0.018, + "_value": 0.096, "_timestamp": 0 } ] @@ -8956,77 +7389,77 @@ "perf_latency_avg": [ "perf_latency_avg", { - "_value": 15.048, + "_value": 1.121, "_timestamp": 0 } ], "perf_latency_p90": [ "perf_latency_p90", { - "_value": 15.26, + "_value": 1.219, "_timestamp": 0 } ], "perf_latency_p95": [ "perf_latency_p95", { - "_value": 15.308, + "_value": 1.314, "_timestamp": 0 } ], "perf_latency_p99": [ "perf_latency_p99", { - "_value": 15.411, + "_value": 1.547, "_timestamp": 0 } ], "perf_throughput": [ "perf_throughput", { - "_value": 262.485, + "_value": 16.0791, "_timestamp": 0 } ], "perf_client_send_recv": [ "perf_client_send_recv", { - "_value": 0.104, + "_value": 0.013, "_timestamp": 0 } ], "perf_client_response_wait": [ "perf_client_response_wait", { - "_value": 14.92, + "_value": 1.074, "_timestamp": 0 } ], "perf_server_queue": [ "perf_server_queue", { - "_value": 5.033, + "_value": 0.14, "_timestamp": 0 } ], "perf_server_compute_infer": [ "perf_server_compute_infer", { - "_value": 8.581, + "_value": 0.148, "_timestamp": 0 } ], "perf_server_compute_input": [ "perf_server_compute_input", { - "_value": 0.322, + "_value": 0.1, "_timestamp": 0 } ], "perf_server_compute_output": [ "perf_server_compute_output", { - "_value": 0.018, + "_value": 0.096, "_timestamp": 0 } ] @@ -9034,14 +7467,14 @@ } ] }, - "-m vgg19_libtorch_config_2 -b 1 -i grpc -f vgg19_libtorch_config_2-results.csv --verbose-csv --request-rate-range=1024 --request-distribution=poisson --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "vgg19_libtorch_config_2", + "add_sub_config_6 -m add_sub -b 1 -i grpc -f add_sub-results.csv --verbose-csv --request-rate-range=32 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000": { + "_model_variants_name": "add_sub_config_6", "_gpu_data": { "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ [ "gpu_used_memory", { - "_value": 2466.250752, + "_value": 878.706688, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } @@ -9049,7 +7482,7 @@ [ "gpu_free_memory", { - "_value": 23303.553024, + "_value": 24891.097088000002, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } @@ -9057,7 +7490,7 @@ [ "gpu_utilization", { - "_value": 61.0, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -9065,7 +7498,7 @@ [ "gpu_power_usage", { - "_value": 217.26, + "_value": 57.1385, "_timestamp": 0, "_device_uuid": null } @@ -9076,7 +7509,7 @@ [ "gpu_used_memory", { - "_value": 2466.250752, + "_value": 878.706688, "_timestamp": 0, "_device_uuid": null } @@ -9084,7 +7517,7 @@ [ "gpu_free_memory", { - "_value": 23303.553024, + "_value": 24891.097088000002, "_timestamp": 0, "_device_uuid": null } @@ -9092,7 +7525,7 @@ [ "gpu_utilization", { - "_value": 61.0, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -9100,7 +7533,7 @@ [ "gpu_power_usage", { - "_value": 217.26, + "_value": 57.1385, "_timestamp": 0, "_device_uuid": null } @@ -9110,7 +7543,7 @@ "gpu_used_memory": [ "gpu_used_memory", { - "_value": 2466.250752, + "_value": 878.706688, "_timestamp": 0, "_device_uuid": null } @@ -9118,7 +7551,7 @@ "gpu_free_memory": [ "gpu_free_memory", { - "_value": 23303.553024, + "_value": 24891.097088000002, "_timestamp": 0, "_device_uuid": null } @@ -9126,7 +7559,7 @@ "gpu_utilization": [ "gpu_utilization", { - "_value": 61.0, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -9134,7 +7567,7 @@ "gpu_power_usage": [ "gpu_power_usage", { - "_value": 217.26, + "_value": 57.1385, "_timestamp": 0, "_device_uuid": null } @@ -9142,87 +7575,87 @@ }, "_model_config_measurements": [ { - "_model_config_name": "vgg19_libtorch_config_2", + "_model_config_name": "add_sub", "_model_specific_pa_params": { "batch-size": 1, "concurrency-range": null, - "request-rate-range": 1024 + "request-rate-range": 32 }, "_non_gpu_data": [ [ "perf_latency_avg", { - "_value": 15.078, + "_value": 1.101, "_timestamp": 0 } ], [ "perf_latency_p90", { - "_value": 15.183, + "_value": 1.187, "_timestamp": 0 } ], [ "perf_latency_p95", { - "_value": 15.232, + "_value": 1.258, "_timestamp": 0 } ], [ "perf_latency_p99", { - "_value": 15.327, + "_value": 1.475, "_timestamp": 0 } ], [ "perf_throughput", { - "_value": 264.503, + "_value": 31.9883, "_timestamp": 0 } ], [ "perf_client_send_recv", { - "_value": 0.113, + "_value": 0.012, "_timestamp": 0 } ], [ "perf_client_response_wait", { - "_value": 14.938, + "_value": 1.057, "_timestamp": 0 } ], [ "perf_server_queue", { - "_value": 4.949, + "_value": 0.137, "_timestamp": 0 } ], [ "perf_server_compute_infer", { - "_value": 8.624, + "_value": 0.145, "_timestamp": 0 } ], [ "perf_server_compute_input", { - "_value": 0.317, + "_value": 0.099, "_timestamp": 0 } ], [ "perf_server_compute_output", { - "_value": 0.018, + "_value": 0.094, "_timestamp": 0 } ] @@ -9231,92 +7664,235 @@ "perf_latency_avg": [ "perf_latency_avg", { - "_value": 15.078, + "_value": 1.101, "_timestamp": 0 } ], "perf_latency_p90": [ "perf_latency_p90", { - "_value": 15.183, + "_value": 1.187, "_timestamp": 0 } ], "perf_latency_p95": [ "perf_latency_p95", { - "_value": 15.232, + "_value": 1.258, "_timestamp": 0 } ], "perf_latency_p99": [ "perf_latency_p99", { - "_value": 15.327, + "_value": 1.475, "_timestamp": 0 } ], "perf_throughput": [ "perf_throughput", { - "_value": 264.503, + "_value": 31.9883, "_timestamp": 0 } ], "perf_client_send_recv": [ "perf_client_send_recv", { - "_value": 0.113, + "_value": 0.012, "_timestamp": 0 } ], "perf_client_response_wait": [ "perf_client_response_wait", { - "_value": 14.938, + "_value": 1.057, "_timestamp": 0 } ], "perf_server_queue": [ "perf_server_queue", { - "_value": 4.949, + "_value": 0.137, "_timestamp": 0 } ], "perf_server_compute_infer": [ "perf_server_compute_infer", { - "_value": 8.624, + "_value": 0.145, "_timestamp": 0 } ], "perf_server_compute_input": [ "perf_server_compute_input", { - "_value": 0.317, + "_value": 0.099, "_timestamp": 0 } ], "perf_server_compute_output": [ "perf_server_compute_output", { - "_value": 0.018, + "_value": 0.094, "_timestamp": 0 } ] } } ] - }, - "-m vgg19_libtorch_config_2 -b 1 -i grpc -f vgg19_libtorch_config_2-results.csv --verbose-csv --request-rate-range=2048 --request-distribution=poisson --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "vgg19_libtorch_config_2", + } + } + ], + "add_sub_config_7": [ + { + "_triton_env": {}, + "_model_run_configs": [ + { + "_model_name": "add_sub", + "_model_config_variant": { + "model_config": { + "name": "add_sub", + "platform": "pytorch_libtorch", + "maxBatchSize": 2, + "input": [ + { + "name": "INPUT__0", + "dataType": "TYPE_FP32", + "dims": [ + "16" + ] + }, + { + "name": "INPUT__1", + "dataType": "TYPE_FP32", + "dims": [ + "16" + ] + } + ], + "output": [ + { + "name": "OUTPUT__0", + "dataType": "TYPE_FP32", + "dims": [ + "16" + ], + "labelFilename": "output0_labels.txt" + }, + { + "name": "OUTPUT__1", + "dataType": "TYPE_FP32", + "dims": [ + "16" + ] + } + ], + "instanceGroup": [ + { + "count": 4, + "kind": "KIND_GPU" + } + ], + "dynamicBatching": {} + }, + "variant_name": "add_sub_config_7", + "cpu_only": false + }, + "_perf_config": { + "_args": { + "service-kind": null, + "model-signature-name": null, + "async": null, + "sync": null, + "measurement-interval": null, + "concurrency-range": null, + "request-rate-range": 16, + "request-distribution": null, + "request-intervals": null, + "binary-search": null, + "num-of-sequences": null, + "latency-threshold": null, + "max-threads": null, + "stability-percentage": null, + "max-trials": null, + "percentile": null, + "input-data": null, + "shared-memory": null, + "output-shared-memory-size": null, + "shape": null, + "sequence-length": null, + "sequence-id-range": null, + "string-length": null, + "string-data": null, + "measurement-mode": "count_windows", + "measurement-request-count": null, + "streaming": null, + "grpc-compression-algorithm": null, + "triton-server-directory": null, + "model-repository": null, + "ssl-grpc-use-ssl": null, + "ssl-grpc-root-certifications-file": null, + "ssl-grpc-private-key-file": null, + "ssl-grpc-certificate-chain-file": null, + "ssl-https-verify-peer": null, + "ssl-https-verify-host": null, + "ssl-https-ca-certificates-file": null, + "ssl-https-client-certificate-type": null, + "ssl-https-client-certificate-file": null, + "ssl-https-private-key-type": null, + "ssl-https-private-key-file": null, + "collect-metrics": "True", + "metrics-url": "http://localhost:8002/metrics", + "metrics-interval": 1000, + "bls-composing-models": null + }, + "_options": { + "-m": "add_sub", + "-x": null, + "-b": 1, + "-u": "localhost:8001", + "-i": "grpc", + "-f": "add_sub-results.csv", + "-H": null + }, + "_verbose": { + "-v": null, + "-v -v": null, + "--verbose-csv": "--verbose-csv" + }, + "_input_to_options": { + "model-name": "-m", + "model-version": "-x", + "batch-size": "-b", + "url": "-u", + "protocol": "-i", + "latency-report-file": "-f", + "http-header": "-H" + }, + "_input_to_verbose": { + "verbose": "-v", + "extra-verbose": "-v -v", + "verbose-csv": "--verbose-csv" + }, + "_additive_args": { + "input-data": null, + "shape": null + } + }, + "_composing_config_variants": [] + } + ] + }, + { + "add_sub_config_7 -m add_sub -b 1 -i grpc -f add_sub-results.csv --verbose-csv --request-rate-range=16 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000": { + "_model_variants_name": "add_sub_config_7", "_gpu_data": { "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ [ "gpu_used_memory", { - "_value": 2466.250752, + "_value": 878.706688, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } @@ -9324,7 +7900,7 @@ [ "gpu_free_memory", { - "_value": 23303.553024, + "_value": 24891.097088000002, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } @@ -9332,7 +7908,7 @@ [ "gpu_utilization", { - "_value": 60.66669999999999, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -9340,7 +7916,7 @@ [ "gpu_power_usage", { - "_value": 216.854, + "_value": 57.0168, "_timestamp": 0, "_device_uuid": null } @@ -9351,7 +7927,7 @@ [ "gpu_used_memory", { - "_value": 2466.250752, + "_value": 878.706688, "_timestamp": 0, "_device_uuid": null } @@ -9359,7 +7935,7 @@ [ "gpu_free_memory", { - "_value": 23303.553024, + "_value": 24891.097088000002, "_timestamp": 0, "_device_uuid": null } @@ -9367,7 +7943,7 @@ [ "gpu_utilization", { - "_value": 60.66669999999999, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -9375,7 +7951,7 @@ [ "gpu_power_usage", { - "_value": 216.854, + "_value": 57.0168, "_timestamp": 0, "_device_uuid": null } @@ -9385,7 +7961,7 @@ "gpu_used_memory": [ "gpu_used_memory", { - "_value": 2466.250752, + "_value": 878.706688, "_timestamp": 0, "_device_uuid": null } @@ -9393,7 +7969,7 @@ "gpu_free_memory": [ "gpu_free_memory", { - "_value": 23303.553024, + "_value": 24891.097088000002, "_timestamp": 0, "_device_uuid": null } @@ -9401,7 +7977,7 @@ "gpu_utilization": [ "gpu_utilization", { - "_value": 60.66669999999999, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -9409,7 +7985,7 @@ "gpu_power_usage": [ "gpu_power_usage", { - "_value": 216.854, + "_value": 57.0168, "_timestamp": 0, "_device_uuid": null } @@ -9417,87 +7993,87 @@ }, "_model_config_measurements": [ { - "_model_config_name": "vgg19_libtorch_config_2", + "_model_config_name": "add_sub", "_model_specific_pa_params": { "batch-size": 1, "concurrency-range": null, - "request-rate-range": 2048 + "request-rate-range": 16 }, "_non_gpu_data": [ [ "perf_latency_avg", { - "_value": 15.108, + "_value": 1.125, "_timestamp": 0 } ], [ "perf_latency_p90", { - "_value": 15.228, + "_value": 1.213, "_timestamp": 0 } ], [ "perf_latency_p95", { - "_value": 15.29, + "_value": 1.296, "_timestamp": 0 } ], [ "perf_latency_p99", { - "_value": 15.444, + "_value": 1.451, "_timestamp": 0 } ], [ "perf_throughput", { - "_value": 264.155, + "_value": 16.0792, "_timestamp": 0 } ], [ "perf_client_send_recv", { - "_value": 0.109, + "_value": 0.013, "_timestamp": 0 } ], [ "perf_client_response_wait", { - "_value": 14.974, + "_value": 1.079, "_timestamp": 0 } ], [ "perf_server_queue", { - "_value": 4.995, + "_value": 0.135, "_timestamp": 0 } ], [ "perf_server_compute_infer", { - "_value": 8.635, + "_value": 0.149, "_timestamp": 0 } ], [ "perf_server_compute_input", { - "_value": 0.317, + "_value": 0.103, "_timestamp": 0 } ], [ "perf_server_compute_output", { - "_value": 0.018, + "_value": 0.095, "_timestamp": 0 } ] @@ -9506,77 +8082,77 @@ "perf_latency_avg": [ "perf_latency_avg", { - "_value": 15.108, + "_value": 1.125, "_timestamp": 0 } ], "perf_latency_p90": [ "perf_latency_p90", { - "_value": 15.228, + "_value": 1.213, "_timestamp": 0 } ], "perf_latency_p95": [ "perf_latency_p95", { - "_value": 15.29, + "_value": 1.296, "_timestamp": 0 } ], "perf_latency_p99": [ "perf_latency_p99", { - "_value": 15.444, + "_value": 1.451, "_timestamp": 0 } ], "perf_throughput": [ "perf_throughput", { - "_value": 264.155, + "_value": 16.0792, "_timestamp": 0 } ], "perf_client_send_recv": [ "perf_client_send_recv", { - "_value": 0.109, + "_value": 0.013, "_timestamp": 0 } ], "perf_client_response_wait": [ "perf_client_response_wait", { - "_value": 14.974, + "_value": 1.079, "_timestamp": 0 } ], "perf_server_queue": [ "perf_server_queue", { - "_value": 4.995, + "_value": 0.135, "_timestamp": 0 } ], "perf_server_compute_infer": [ "perf_server_compute_infer", { - "_value": 8.635, + "_value": 0.149, "_timestamp": 0 } ], "perf_server_compute_input": [ "perf_server_compute_input", { - "_value": 0.317, + "_value": 0.103, "_timestamp": 0 } ], "perf_server_compute_output": [ "perf_server_compute_output", { - "_value": 0.018, + "_value": 0.095, "_timestamp": 0 } ] @@ -9584,14 +8160,14 @@ } ] }, - "-m vgg19_libtorch_config_2 -b 1 -i grpc -f vgg19_libtorch_config_2-results.csv --verbose-csv --request-rate-range=4096 --request-distribution=poisson --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "vgg19_libtorch_config_2", + "add_sub_config_7 -m add_sub -b 1 -i grpc -f add_sub-results.csv --verbose-csv --request-rate-range=32 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000": { + "_model_variants_name": "add_sub_config_7", "_gpu_data": { "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ [ "gpu_used_memory", { - "_value": 2466.250752, + "_value": 878.706688, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } @@ -9599,7 +8175,7 @@ [ "gpu_free_memory", { - "_value": 23303.553024, + "_value": 24891.097088000002, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } @@ -9607,7 +8183,7 @@ [ "gpu_utilization", { - "_value": 60.66669999999999, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -9615,7 +8191,7 @@ [ "gpu_power_usage", { - "_value": 215.561, + "_value": 57.0233, "_timestamp": 0, "_device_uuid": null } @@ -9626,7 +8202,7 @@ [ "gpu_used_memory", { - "_value": 2466.250752, + "_value": 878.706688, "_timestamp": 0, "_device_uuid": null } @@ -9634,7 +8210,7 @@ [ "gpu_free_memory", { - "_value": 23303.553024, + "_value": 24891.097088000002, "_timestamp": 0, "_device_uuid": null } @@ -9642,7 +8218,7 @@ [ "gpu_utilization", { - "_value": 60.66669999999999, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -9650,7 +8226,7 @@ [ "gpu_power_usage", { - "_value": 215.561, + "_value": 57.0233, "_timestamp": 0, "_device_uuid": null } @@ -9660,7 +8236,7 @@ "gpu_used_memory": [ "gpu_used_memory", { - "_value": 2466.250752, + "_value": 878.706688, "_timestamp": 0, "_device_uuid": null } @@ -9668,7 +8244,7 @@ "gpu_free_memory": [ "gpu_free_memory", { - "_value": 23303.553024, + "_value": 24891.097088000002, "_timestamp": 0, "_device_uuid": null } @@ -9676,7 +8252,7 @@ "gpu_utilization": [ "gpu_utilization", { - "_value": 60.66669999999999, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -9684,7 +8260,7 @@ "gpu_power_usage": [ "gpu_power_usage", { - "_value": 215.561, + "_value": 57.0233, "_timestamp": 0, "_device_uuid": null } @@ -9692,87 +8268,87 @@ }, "_model_config_measurements": [ { - "_model_config_name": "vgg19_libtorch_config_2", + "_model_config_name": "add_sub", "_model_specific_pa_params": { "batch-size": 1, "concurrency-range": null, - "request-rate-range": 4096 + "request-rate-range": 32 }, "_non_gpu_data": [ [ "perf_latency_avg", { - "_value": 15.13, + "_value": 1.121, "_timestamp": 0 } ], [ "perf_latency_p90", { - "_value": 15.254, + "_value": 1.34, "_timestamp": 0 } ], [ "perf_latency_p95", { - "_value": 15.281, + "_value": 1.433, "_timestamp": 0 } ], [ "perf_latency_p99", { - "_value": 15.412, + "_value": 1.517, "_timestamp": 0 } ], [ "perf_throughput", { - "_value": 263.813, + "_value": 31.9895, "_timestamp": 0 } ], [ "perf_client_send_recv", { - "_value": 0.117, + "_value": 0.013, "_timestamp": 0 } ], [ "perf_client_response_wait", { - "_value": 14.988, + "_value": 1.075, "_timestamp": 0 } ], [ "perf_server_queue", { - "_value": 4.969, + "_value": 0.135, "_timestamp": 0 } ], [ "perf_server_compute_infer", { - "_value": 8.649, + "_value": 0.15, "_timestamp": 0 } ], [ "perf_server_compute_input", { - "_value": 0.328, + "_value": 0.102, "_timestamp": 0 } ], [ "perf_server_compute_output", { - "_value": 0.018, + "_value": 0.097, "_timestamp": 0 } ] @@ -9781,221 +8357,92 @@ "perf_latency_avg": [ "perf_latency_avg", { - "_value": 15.13, + "_value": 1.121, "_timestamp": 0 } ], "perf_latency_p90": [ "perf_latency_p90", { - "_value": 15.254, + "_value": 1.34, "_timestamp": 0 } ], "perf_latency_p95": [ "perf_latency_p95", { - "_value": 15.281, + "_value": 1.433, "_timestamp": 0 } ], "perf_latency_p99": [ "perf_latency_p99", { - "_value": 15.412, + "_value": 1.517, "_timestamp": 0 } ], "perf_throughput": [ "perf_throughput", { - "_value": 263.813, + "_value": 31.9895, "_timestamp": 0 } ], "perf_client_send_recv": [ "perf_client_send_recv", { - "_value": 0.117, + "_value": 0.013, "_timestamp": 0 } ], "perf_client_response_wait": [ "perf_client_response_wait", { - "_value": 14.988, + "_value": 1.075, "_timestamp": 0 } ], "perf_server_queue": [ "perf_server_queue", { - "_value": 4.969, + "_value": 0.135, "_timestamp": 0 } ], "perf_server_compute_infer": [ "perf_server_compute_infer", { - "_value": 8.649, + "_value": 0.15, "_timestamp": 0 } ], "perf_server_compute_input": [ "perf_server_compute_input", { - "_value": 0.328, + "_value": 0.102, "_timestamp": 0 } ], "perf_server_compute_output": [ "perf_server_compute_output", { - "_value": 0.018, + "_value": 0.097, "_timestamp": 0 - } - ] - } - } - ] - } - } - ], - "vgg19_libtorch_config_4": [ - { - "_triton_env": {}, - "_model_run_configs": [ - { - "_model_name": "vgg19_libtorch", - "_model_config": { - "name": "vgg19_libtorch_config_4", - "platform": "pytorch_libtorch", - "maxBatchSize": 1, - "input": [ - { - "name": "INPUT__0", - "dataType": "TYPE_FP32", - "format": "FORMAT_NCHW", - "dims": [ - "3", - "224", - "224" - ] - } - ], - "output": [ - { - "name": "OUTPUT__0", - "dataType": "TYPE_FP32", - "dims": [ - "1000" - ], - "labelFilename": "vgg19_labels.txt" - } - ], - "instanceGroup": [ - { - "count": 3, - "kind": "KIND_GPU" - } - ], - "dynamicBatching": {}, - "cpu_only": false - }, - "_perf_config": { - "_args": { - "service-kind": null, - "model-signature-name": null, - "async": null, - "sync": null, - "measurement-interval": null, - "concurrency-range": null, - "request-rate-range": 16, - "request-distribution": "poisson", - "request-intervals": null, - "binary-search": null, - "num-of-sequences": null, - "latency-threshold": null, - "max-threads": null, - "stability-percentage": null, - "max-trials": null, - "percentile": null, - "input-data": null, - "shared-memory": null, - "output-shared-memory-size": null, - "shape": null, - "sequence-length": null, - "sequence-id-range": null, - "string-length": null, - "string-data": null, - "measurement-mode": "count_windows", - "measurement-request-count": 150, - "streaming": null, - "grpc-compression-algorithm": null, - "triton-server-directory": null, - "model-repository": null, - "ssl-grpc-use-ssl": null, - "ssl-grpc-root-certifications-file": null, - "ssl-grpc-private-key-file": null, - "ssl-grpc-certificate-chain-file": null, - "ssl-https-verify-peer": null, - "ssl-https-verify-host": null, - "ssl-https-ca-certificates-file": null, - "ssl-https-client-certificate-type": null, - "ssl-https-client-certificate-file": null, - "ssl-https-private-key-type": null, - "ssl-https-private-key-file": null, - "collect-metrics": "True", - "metrics-url": "http://localhost:8002/metrics", - "metrics-interval": 1000.0, - "bls-composing-models": null - }, - "_options": { - "-m": "vgg19_libtorch_config_4", - "-x": null, - "-b": 1, - "-u": "localhost:8001", - "-i": "grpc", - "-f": "vgg19_libtorch_config_4-results.csv", - "-H": null - }, - "_verbose": { - "-v": null, - "-v -v": null, - "--verbose-csv": "--verbose-csv" - }, - "_input_to_options": { - "model-name": "-m", - "model-version": "-x", - "batch-size": "-b", - "url": "-u", - "protocol": "-i", - "latency-report-file": "-f", - "http-header": "-H" - }, - "_input_to_verbose": { - "verbose": "-v", - "extra-verbose": "-v -v", - "verbose-csv": "--verbose-csv" - }, - "_additive_args": { - "input-data": null, - "shape": null + } + ] } - }, - "_composing_configs": [] - } - ] - }, - { - "-m vgg19_libtorch_config_4 -b 1 -i grpc -f vgg19_libtorch_config_4-results.csv --verbose-csv --request-rate-range=16 --request-distribution=poisson --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "vgg19_libtorch_config_4", + } + ] + }, + "add_sub_config_7 -m add_sub -b 2 -i grpc -f add_sub-results.csv --verbose-csv --request-rate-range=16 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000": { + "_model_variants_name": "add_sub_config_7", "_gpu_data": { "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ [ "gpu_used_memory", { - "_value": 3701.47328, + "_value": 878.706688, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } @@ -10003,7 +8450,7 @@ [ "gpu_free_memory", { - "_value": 22068.330496000002, + "_value": 24891.097088000002, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } @@ -10011,7 +8458,7 @@ [ "gpu_utilization", { - "_value": 7.28125, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -10019,7 +8466,7 @@ [ "gpu_power_usage", { - "_value": 62.8716, + "_value": 57.0362, "_timestamp": 0, "_device_uuid": null } @@ -10030,7 +8477,7 @@ [ "gpu_used_memory", { - "_value": 3701.47328, + "_value": 878.706688, "_timestamp": 0, "_device_uuid": null } @@ -10038,7 +8485,7 @@ [ "gpu_free_memory", { - "_value": 22068.330496000002, + "_value": 24891.097088000002, "_timestamp": 0, "_device_uuid": null } @@ -10046,7 +8493,7 @@ [ "gpu_utilization", { - "_value": 7.28125, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -10054,7 +8501,7 @@ [ "gpu_power_usage", { - "_value": 62.8716, + "_value": 57.0362, "_timestamp": 0, "_device_uuid": null } @@ -10064,7 +8511,7 @@ "gpu_used_memory": [ "gpu_used_memory", { - "_value": 3701.47328, + "_value": 878.706688, "_timestamp": 0, "_device_uuid": null } @@ -10072,7 +8519,7 @@ "gpu_free_memory": [ "gpu_free_memory", { - "_value": 22068.330496000002, + "_value": 24891.097088000002, "_timestamp": 0, "_device_uuid": null } @@ -10080,7 +8527,7 @@ "gpu_utilization": [ "gpu_utilization", { - "_value": 7.28125, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -10088,7 +8535,7 @@ "gpu_power_usage": [ "gpu_power_usage", { - "_value": 62.8716, + "_value": 57.0362, "_timestamp": 0, "_device_uuid": null } @@ -10096,9 +8543,9 @@ }, "_model_config_measurements": [ { - "_model_config_name": "vgg19_libtorch_config_4", + "_model_config_name": "add_sub", "_model_specific_pa_params": { - "batch-size": 1, + "batch-size": 2, "concurrency-range": null, "request-rate-range": 16 }, @@ -10106,77 +8553,77 @@ [ "perf_latency_avg", { - "_value": 7.537, + "_value": 1.126, "_timestamp": 0 } ], [ "perf_latency_p90", { - "_value": 9.005, + "_value": 1.243, "_timestamp": 0 } ], [ "perf_latency_p95", { - "_value": 9.945, + "_value": 1.3, "_timestamp": 0 } ], [ "perf_latency_p99", { - "_value": 11.026, + "_value": 1.522, "_timestamp": 0 } ], [ "perf_throughput", { - "_value": 14.7164, + "_value": 32.1576, "_timestamp": 0 } ], [ "perf_client_send_recv", { - "_value": 0.167, + "_value": 0.013, "_timestamp": 0 } ], [ "perf_client_response_wait", { - "_value": 7.334, + "_value": 1.079, "_timestamp": 0 } ], [ "perf_server_queue", { - "_value": 0.125, + "_value": 0.137, "_timestamp": 0 } ], [ "perf_server_compute_infer", { - "_value": 5.737, + "_value": 0.159, "_timestamp": 0 } ], [ "perf_server_compute_input", { - "_value": 0.263, + "_value": 0.101, "_timestamp": 0 } ], [ "perf_server_compute_output", { - "_value": 0.05, + "_value": 0.097, "_timestamp": 0 } ] @@ -10185,77 +8632,77 @@ "perf_latency_avg": [ "perf_latency_avg", { - "_value": 7.537, + "_value": 1.126, "_timestamp": 0 } ], "perf_latency_p90": [ "perf_latency_p90", { - "_value": 9.005, + "_value": 1.243, "_timestamp": 0 } ], "perf_latency_p95": [ "perf_latency_p95", { - "_value": 9.945, + "_value": 1.3, "_timestamp": 0 } ], "perf_latency_p99": [ "perf_latency_p99", { - "_value": 11.026, + "_value": 1.522, "_timestamp": 0 } ], "perf_throughput": [ "perf_throughput", { - "_value": 14.7164, + "_value": 32.1576, "_timestamp": 0 } ], "perf_client_send_recv": [ "perf_client_send_recv", { - "_value": 0.167, + "_value": 0.013, "_timestamp": 0 } ], "perf_client_response_wait": [ "perf_client_response_wait", { - "_value": 7.334, + "_value": 1.079, "_timestamp": 0 } ], "perf_server_queue": [ "perf_server_queue", { - "_value": 0.125, + "_value": 0.137, "_timestamp": 0 } ], "perf_server_compute_infer": [ "perf_server_compute_infer", { - "_value": 5.737, + "_value": 0.159, "_timestamp": 0 } ], "perf_server_compute_input": [ "perf_server_compute_input", { - "_value": 0.263, + "_value": 0.101, "_timestamp": 0 } ], "perf_server_compute_output": [ "perf_server_compute_output", { - "_value": 0.05, + "_value": 0.097, "_timestamp": 0 } ] @@ -10263,14 +8710,14 @@ } ] }, - "-m vgg19_libtorch_config_4 -b 1 -i grpc -f vgg19_libtorch_config_4-results.csv --verbose-csv --request-rate-range=32 --request-distribution=poisson --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "vgg19_libtorch_config_4", + "add_sub_config_7 -m add_sub -b 2 -i grpc -f add_sub-results.csv --verbose-csv --request-rate-range=32 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000": { + "_model_variants_name": "add_sub_config_7", "_gpu_data": { "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ [ "gpu_used_memory", { - "_value": 3701.47328, + "_value": 878.706688, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } @@ -10278,7 +8725,7 @@ [ "gpu_free_memory", { - "_value": 22068.330496000002, + "_value": 24891.097088000002, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } @@ -10286,7 +8733,7 @@ [ "gpu_utilization", { - "_value": 17.4286, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -10294,7 +8741,7 @@ [ "gpu_power_usage", { - "_value": 75.2471, + "_value": 57.125, "_timestamp": 0, "_device_uuid": null } @@ -10305,7 +8752,7 @@ [ "gpu_used_memory", { - "_value": 3701.47328, + "_value": 878.706688, "_timestamp": 0, "_device_uuid": null } @@ -10313,7 +8760,7 @@ [ "gpu_free_memory", { - "_value": 22068.330496000002, + "_value": 24891.097088000002, "_timestamp": 0, "_device_uuid": null } @@ -10321,7 +8768,7 @@ [ "gpu_utilization", { - "_value": 17.4286, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -10329,7 +8776,7 @@ [ "gpu_power_usage", { - "_value": 75.2471, + "_value": 57.125, "_timestamp": 0, "_device_uuid": null } @@ -10339,7 +8786,7 @@ "gpu_used_memory": [ "gpu_used_memory", { - "_value": 3701.47328, + "_value": 878.706688, "_timestamp": 0, "_device_uuid": null } @@ -10347,7 +8794,7 @@ "gpu_free_memory": [ "gpu_free_memory", { - "_value": 22068.330496000002, + "_value": 24891.097088000002, "_timestamp": 0, "_device_uuid": null } @@ -10355,7 +8802,7 @@ "gpu_utilization": [ "gpu_utilization", { - "_value": 17.4286, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -10363,7 +8810,7 @@ "gpu_power_usage": [ "gpu_power_usage", { - "_value": 75.2471, + "_value": 57.125, "_timestamp": 0, "_device_uuid": null } @@ -10371,9 +8818,9 @@ }, "_model_config_measurements": [ { - "_model_config_name": "vgg19_libtorch_config_4", + "_model_config_name": "add_sub", "_model_specific_pa_params": { - "batch-size": 1, + "batch-size": 2, "concurrency-range": null, "request-rate-range": 32 }, @@ -10381,77 +8828,77 @@ [ "perf_latency_avg", { - "_value": 7.666, + "_value": 1.104, "_timestamp": 0 } ], [ "perf_latency_p90", { - "_value": 9.372, + "_value": 1.18, "_timestamp": 0 } ], [ "perf_latency_p95", { - "_value": 10.726, + "_value": 1.288, "_timestamp": 0 } ], [ "perf_latency_p99", { - "_value": 13.91, + "_value": 1.441, "_timestamp": 0 } ], [ "perf_throughput", { - "_value": 35.9851, + "_value": 63.9766, "_timestamp": 0 } ], [ "perf_client_send_recv", { - "_value": 0.159, + "_value": 0.013, "_timestamp": 0 } ], [ "perf_client_response_wait", { - "_value": 7.471, + "_value": 1.059, "_timestamp": 0 } ], [ "perf_server_queue", { - "_value": 0.136, + "_value": 0.135, "_timestamp": 0 } ], [ "perf_server_compute_infer", { - "_value": 5.898, + "_value": 0.151, "_timestamp": 0 } ], [ "perf_server_compute_input", { - "_value": 0.258, + "_value": 0.097, "_timestamp": 0 } ], [ "perf_server_compute_output", { - "_value": 0.049, + "_value": 0.092, "_timestamp": 0 } ] @@ -10460,77 +8907,77 @@ "perf_latency_avg": [ "perf_latency_avg", { - "_value": 7.666, + "_value": 1.104, "_timestamp": 0 } ], "perf_latency_p90": [ "perf_latency_p90", { - "_value": 9.372, + "_value": 1.18, "_timestamp": 0 } ], "perf_latency_p95": [ "perf_latency_p95", { - "_value": 10.726, + "_value": 1.288, "_timestamp": 0 } ], "perf_latency_p99": [ "perf_latency_p99", { - "_value": 13.91, + "_value": 1.441, "_timestamp": 0 } ], "perf_throughput": [ "perf_throughput", { - "_value": 35.9851, + "_value": 63.9766, "_timestamp": 0 } ], "perf_client_send_recv": [ "perf_client_send_recv", { - "_value": 0.159, + "_value": 0.013, "_timestamp": 0 } ], "perf_client_response_wait": [ "perf_client_response_wait", { - "_value": 7.471, + "_value": 1.059, "_timestamp": 0 } ], "perf_server_queue": [ "perf_server_queue", { - "_value": 0.136, + "_value": 0.135, "_timestamp": 0 } ], "perf_server_compute_infer": [ "perf_server_compute_infer", { - "_value": 5.898, + "_value": 0.151, "_timestamp": 0 } ], "perf_server_compute_input": [ "perf_server_compute_input", { - "_value": 0.258, + "_value": 0.097, "_timestamp": 0 } ], "perf_server_compute_output": [ "perf_server_compute_output", { - "_value": 0.049, + "_value": 0.092, "_timestamp": 0 } ] @@ -10540,45 +8987,59 @@ } } ], - "vgg19_libtorch_config_5": [ + "add_sub_config_8": [ { "_triton_env": {}, "_model_run_configs": [ { - "_model_name": "vgg19_libtorch", - "_model_config": { - "name": "vgg19_libtorch_config_5", - "platform": "pytorch_libtorch", - "maxBatchSize": 2, - "input": [ - { - "name": "INPUT__0", - "dataType": "TYPE_FP32", - "format": "FORMAT_NCHW", - "dims": [ - "3", - "224", - "224" - ] - } - ], - "output": [ - { - "name": "OUTPUT__0", - "dataType": "TYPE_FP32", - "dims": [ - "1000" - ], - "labelFilename": "vgg19_labels.txt" - } - ], - "instanceGroup": [ - { - "count": 3, - "kind": "KIND_GPU" - } - ], - "dynamicBatching": {}, + "_model_name": "add_sub", + "_model_config_variant": { + "model_config": { + "name": "add_sub", + "platform": "pytorch_libtorch", + "maxBatchSize": 1, + "input": [ + { + "name": "INPUT__0", + "dataType": "TYPE_FP32", + "dims": [ + "16" + ] + }, + { + "name": "INPUT__1", + "dataType": "TYPE_FP32", + "dims": [ + "16" + ] + } + ], + "output": [ + { + "name": "OUTPUT__0", + "dataType": "TYPE_FP32", + "dims": [ + "16" + ], + "labelFilename": "output0_labels.txt" + }, + { + "name": "OUTPUT__1", + "dataType": "TYPE_FP32", + "dims": [ + "16" + ] + } + ], + "instanceGroup": [ + { + "count": 5, + "kind": "KIND_GPU" + } + ], + "dynamicBatching": {} + }, + "variant_name": "add_sub_config_8", "cpu_only": false }, "_perf_config": { @@ -10590,7 +9051,7 @@ "measurement-interval": null, "concurrency-range": null, "request-rate-range": 16, - "request-distribution": "poisson", + "request-distribution": null, "request-intervals": null, "binary-search": null, "num-of-sequences": null, @@ -10608,7 +9069,7 @@ "string-length": null, "string-data": null, "measurement-mode": "count_windows", - "measurement-request-count": 150, + "measurement-request-count": null, "streaming": null, "grpc-compression-algorithm": null, "triton-server-directory": null, @@ -10626,16 +9087,16 @@ "ssl-https-private-key-file": null, "collect-metrics": "True", "metrics-url": "http://localhost:8002/metrics", - "metrics-interval": 1000.0, + "metrics-interval": 1000, "bls-composing-models": null }, "_options": { - "-m": "vgg19_libtorch_config_5", + "-m": "add_sub", "-x": null, "-b": 1, "-u": "localhost:8001", "-i": "grpc", - "-f": "vgg19_libtorch_config_5-results.csv", + "-f": "add_sub-results.csv", "-H": null }, "_verbose": { @@ -10661,20 +9122,295 @@ "input-data": null, "shape": null } - }, - "_composing_configs": [] - } - ] - }, - { - "-m vgg19_libtorch_config_5 -b 1 -i grpc -f vgg19_libtorch_config_5-results.csv --verbose-csv --request-rate-range=16 --request-distribution=poisson --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "vgg19_libtorch_config_5", + }, + "_composing_config_variants": [] + } + ] + }, + { + "add_sub_config_8 -m add_sub -b 1 -i grpc -f add_sub-results.csv --verbose-csv --request-rate-range=16 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000": { + "_model_variants_name": "add_sub_config_8", + "_gpu_data": { + "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ + [ + "gpu_used_memory", + { + "_value": 880.80384, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_free_memory", + { + "_value": 24888.999936, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_utilization", + { + "_value": 0.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 56.9587, + "_timestamp": 0, + "_device_uuid": null + } + ] + ] + }, + "_avg_gpu_data": [ + [ + "gpu_used_memory", + { + "_value": 880.80384, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_free_memory", + { + "_value": 24888.999936, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_utilization", + { + "_value": 0.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 56.9587, + "_timestamp": 0, + "_device_uuid": null + } + ] + ], + "_avg_gpu_data_from_tag": { + "gpu_used_memory": [ + "gpu_used_memory", + { + "_value": 880.80384, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_free_memory": [ + "gpu_free_memory", + { + "_value": 24888.999936, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_utilization": [ + "gpu_utilization", + { + "_value": 0.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_power_usage": [ + "gpu_power_usage", + { + "_value": 56.9587, + "_timestamp": 0, + "_device_uuid": null + } + ] + }, + "_model_config_measurements": [ + { + "_model_config_name": "add_sub", + "_model_specific_pa_params": { + "batch-size": 1, + "concurrency-range": null, + "request-rate-range": 16 + }, + "_non_gpu_data": [ + [ + "perf_latency_avg", + { + "_value": 1.132, + "_timestamp": 0 + } + ], + [ + "perf_latency_p90", + { + "_value": 1.285, + "_timestamp": 0 + } + ], + [ + "perf_latency_p95", + { + "_value": 1.38, + "_timestamp": 0 + } + ], + [ + "perf_latency_p99", + { + "_value": 1.472, + "_timestamp": 0 + } + ], + [ + "perf_throughput", + { + "_value": 16.0791, + "_timestamp": 0 + } + ], + [ + "perf_client_send_recv", + { + "_value": 0.014, + "_timestamp": 0 + } + ], + [ + "perf_client_response_wait", + { + "_value": 1.083, + "_timestamp": 0 + } + ], + [ + "perf_server_queue", + { + "_value": 0.14, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_infer", + { + "_value": 0.152, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_input", + { + "_value": 0.104, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_output", + { + "_value": 0.098, + "_timestamp": 0 + } + ] + ], + "_non_gpu_data_from_tag": { + "perf_latency_avg": [ + "perf_latency_avg", + { + "_value": 1.132, + "_timestamp": 0 + } + ], + "perf_latency_p90": [ + "perf_latency_p90", + { + "_value": 1.285, + "_timestamp": 0 + } + ], + "perf_latency_p95": [ + "perf_latency_p95", + { + "_value": 1.38, + "_timestamp": 0 + } + ], + "perf_latency_p99": [ + "perf_latency_p99", + { + "_value": 1.472, + "_timestamp": 0 + } + ], + "perf_throughput": [ + "perf_throughput", + { + "_value": 16.0791, + "_timestamp": 0 + } + ], + "perf_client_send_recv": [ + "perf_client_send_recv", + { + "_value": 0.014, + "_timestamp": 0 + } + ], + "perf_client_response_wait": [ + "perf_client_response_wait", + { + "_value": 1.083, + "_timestamp": 0 + } + ], + "perf_server_queue": [ + "perf_server_queue", + { + "_value": 0.14, + "_timestamp": 0 + } + ], + "perf_server_compute_infer": [ + "perf_server_compute_infer", + { + "_value": 0.152, + "_timestamp": 0 + } + ], + "perf_server_compute_input": [ + "perf_server_compute_input", + { + "_value": 0.104, + "_timestamp": 0 + } + ], + "perf_server_compute_output": [ + "perf_server_compute_output", + { + "_value": 0.098, + "_timestamp": 0 + } + ] + } + } + ] + }, + "add_sub_config_8 -m add_sub -b 1 -i grpc -f add_sub-results.csv --verbose-csv --request-rate-range=32 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000": { + "_model_variants_name": "add_sub_config_8", "_gpu_data": { "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ [ "gpu_used_memory", { - "_value": 3783.262208, + "_value": 880.80384, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } @@ -10682,7 +9418,7 @@ [ "gpu_free_memory", { - "_value": 21986.541568, + "_value": 24888.999936, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } @@ -10690,7 +9426,7 @@ [ "gpu_utilization", { - "_value": 7.272729999999999, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -10698,7 +9434,7 @@ [ "gpu_power_usage", { - "_value": 62.0762, + "_value": 56.942, "_timestamp": 0, "_device_uuid": null } @@ -10709,7 +9445,7 @@ [ "gpu_used_memory", { - "_value": 3783.262208, + "_value": 880.80384, "_timestamp": 0, "_device_uuid": null } @@ -10717,7 +9453,7 @@ [ "gpu_free_memory", { - "_value": 21986.541568, + "_value": 24888.999936, "_timestamp": 0, "_device_uuid": null } @@ -10725,7 +9461,7 @@ [ "gpu_utilization", { - "_value": 7.272729999999999, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -10733,7 +9469,7 @@ [ "gpu_power_usage", { - "_value": 62.0762, + "_value": 56.942, "_timestamp": 0, "_device_uuid": null } @@ -10743,7 +9479,7 @@ "gpu_used_memory": [ "gpu_used_memory", { - "_value": 3783.262208, + "_value": 880.80384, "_timestamp": 0, "_device_uuid": null } @@ -10751,7 +9487,7 @@ "gpu_free_memory": [ "gpu_free_memory", { - "_value": 21986.541568, + "_value": 24888.999936, "_timestamp": 0, "_device_uuid": null } @@ -10759,7 +9495,7 @@ "gpu_utilization": [ "gpu_utilization", { - "_value": 7.272729999999999, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -10767,7 +9503,7 @@ "gpu_power_usage": [ "gpu_power_usage", { - "_value": 62.0762, + "_value": 56.942, "_timestamp": 0, "_device_uuid": null } @@ -10775,87 +9511,87 @@ }, "_model_config_measurements": [ { - "_model_config_name": "vgg19_libtorch_config_5", + "_model_config_name": "add_sub", "_model_specific_pa_params": { "batch-size": 1, "concurrency-range": null, - "request-rate-range": 16 + "request-rate-range": 32 }, "_non_gpu_data": [ [ "perf_latency_avg", { - "_value": 7.56, + "_value": 1.121, "_timestamp": 0 } ], [ "perf_latency_p90", { - "_value": 8.96, + "_value": 1.223, "_timestamp": 0 } ], [ "perf_latency_p95", { - "_value": 9.926, + "_value": 1.353, "_timestamp": 0 } ], [ "perf_latency_p99", { - "_value": 11.176, + "_value": 1.445, "_timestamp": 0 } ], [ "perf_throughput", { - "_value": 14.716, + "_value": 31.9868, "_timestamp": 0 } ], [ "perf_client_send_recv", { - "_value": 0.168, + "_value": 0.014, "_timestamp": 0 } ], [ "perf_client_response_wait", { - "_value": 7.355, + "_value": 1.074, "_timestamp": 0 } ], [ "perf_server_queue", { - "_value": 0.132, + "_value": 0.133, "_timestamp": 0 } ], [ "perf_server_compute_infer", { - "_value": 5.743, + "_value": 0.148, "_timestamp": 0 } ], [ "perf_server_compute_input", { - "_value": 0.263, + "_value": 0.1, "_timestamp": 0 } ], [ "perf_server_compute_output", { - "_value": 0.051, + "_value": 0.097, "_timestamp": 0 } ] @@ -10864,92 +9600,235 @@ "perf_latency_avg": [ "perf_latency_avg", { - "_value": 7.56, + "_value": 1.121, "_timestamp": 0 } ], "perf_latency_p90": [ "perf_latency_p90", { - "_value": 8.96, + "_value": 1.223, "_timestamp": 0 } ], "perf_latency_p95": [ "perf_latency_p95", { - "_value": 9.926, + "_value": 1.353, "_timestamp": 0 } ], "perf_latency_p99": [ "perf_latency_p99", { - "_value": 11.176, + "_value": 1.445, "_timestamp": 0 } ], "perf_throughput": [ "perf_throughput", { - "_value": 14.716, + "_value": 31.9868, "_timestamp": 0 } ], "perf_client_send_recv": [ "perf_client_send_recv", { - "_value": 0.168, + "_value": 0.014, "_timestamp": 0 } ], "perf_client_response_wait": [ "perf_client_response_wait", { - "_value": 7.355, + "_value": 1.074, "_timestamp": 0 } ], "perf_server_queue": [ "perf_server_queue", { - "_value": 0.132, + "_value": 0.133, "_timestamp": 0 } ], "perf_server_compute_infer": [ "perf_server_compute_infer", { - "_value": 5.743, + "_value": 0.148, "_timestamp": 0 } ], "perf_server_compute_input": [ "perf_server_compute_input", { - "_value": 0.263, + "_value": 0.1, "_timestamp": 0 } ], "perf_server_compute_output": [ "perf_server_compute_output", { - "_value": 0.051, + "_value": 0.097, "_timestamp": 0 } ] } } ] - }, - "-m vgg19_libtorch_config_5 -b 1 -i grpc -f vgg19_libtorch_config_5-results.csv --verbose-csv --request-rate-range=32 --request-distribution=poisson --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "vgg19_libtorch_config_5", + } + } + ], + "add_sub_config_9": [ + { + "_triton_env": {}, + "_model_run_configs": [ + { + "_model_name": "add_sub", + "_model_config_variant": { + "model_config": { + "name": "add_sub", + "platform": "pytorch_libtorch", + "maxBatchSize": 2, + "input": [ + { + "name": "INPUT__0", + "dataType": "TYPE_FP32", + "dims": [ + "16" + ] + }, + { + "name": "INPUT__1", + "dataType": "TYPE_FP32", + "dims": [ + "16" + ] + } + ], + "output": [ + { + "name": "OUTPUT__0", + "dataType": "TYPE_FP32", + "dims": [ + "16" + ], + "labelFilename": "output0_labels.txt" + }, + { + "name": "OUTPUT__1", + "dataType": "TYPE_FP32", + "dims": [ + "16" + ] + } + ], + "instanceGroup": [ + { + "count": 5, + "kind": "KIND_GPU" + } + ], + "dynamicBatching": {} + }, + "variant_name": "add_sub_config_9", + "cpu_only": false + }, + "_perf_config": { + "_args": { + "service-kind": null, + "model-signature-name": null, + "async": null, + "sync": null, + "measurement-interval": null, + "concurrency-range": null, + "request-rate-range": 16, + "request-distribution": null, + "request-intervals": null, + "binary-search": null, + "num-of-sequences": null, + "latency-threshold": null, + "max-threads": null, + "stability-percentage": null, + "max-trials": null, + "percentile": null, + "input-data": null, + "shared-memory": null, + "output-shared-memory-size": null, + "shape": null, + "sequence-length": null, + "sequence-id-range": null, + "string-length": null, + "string-data": null, + "measurement-mode": "count_windows", + "measurement-request-count": null, + "streaming": null, + "grpc-compression-algorithm": null, + "triton-server-directory": null, + "model-repository": null, + "ssl-grpc-use-ssl": null, + "ssl-grpc-root-certifications-file": null, + "ssl-grpc-private-key-file": null, + "ssl-grpc-certificate-chain-file": null, + "ssl-https-verify-peer": null, + "ssl-https-verify-host": null, + "ssl-https-ca-certificates-file": null, + "ssl-https-client-certificate-type": null, + "ssl-https-client-certificate-file": null, + "ssl-https-private-key-type": null, + "ssl-https-private-key-file": null, + "collect-metrics": "True", + "metrics-url": "http://localhost:8002/metrics", + "metrics-interval": 1000, + "bls-composing-models": null + }, + "_options": { + "-m": "add_sub", + "-x": null, + "-b": 1, + "-u": "localhost:8001", + "-i": "grpc", + "-f": "add_sub-results.csv", + "-H": null + }, + "_verbose": { + "-v": null, + "-v -v": null, + "--verbose-csv": "--verbose-csv" + }, + "_input_to_options": { + "model-name": "-m", + "model-version": "-x", + "batch-size": "-b", + "url": "-u", + "protocol": "-i", + "latency-report-file": "-f", + "http-header": "-H" + }, + "_input_to_verbose": { + "verbose": "-v", + "extra-verbose": "-v -v", + "verbose-csv": "--verbose-csv" + }, + "_additive_args": { + "input-data": null, + "shape": null + } + }, + "_composing_config_variants": [] + } + ] + }, + { + "add_sub_config_9 -m add_sub -b 1 -i grpc -f add_sub-results.csv --verbose-csv --request-rate-range=16 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000": { + "_model_variants_name": "add_sub_config_9", "_gpu_data": { "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ [ "gpu_used_memory", { - "_value": 3783.262208, + "_value": 880.80384, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } @@ -10957,7 +9836,7 @@ [ "gpu_free_memory", { - "_value": 21986.541568, + "_value": 24888.999936, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } @@ -10965,7 +9844,7 @@ [ "gpu_utilization", { - "_value": 16.8571, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -10973,7 +9852,7 @@ [ "gpu_power_usage", { - "_value": 73.9334, + "_value": 56.9757, "_timestamp": 0, "_device_uuid": null } @@ -10984,7 +9863,7 @@ [ "gpu_used_memory", { - "_value": 3783.262208, + "_value": 880.80384, "_timestamp": 0, "_device_uuid": null } @@ -10992,7 +9871,7 @@ [ "gpu_free_memory", { - "_value": 21986.541568, + "_value": 24888.999936, "_timestamp": 0, "_device_uuid": null } @@ -11000,7 +9879,7 @@ [ "gpu_utilization", { - "_value": 16.8571, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -11008,7 +9887,7 @@ [ "gpu_power_usage", { - "_value": 73.9334, + "_value": 56.9757, "_timestamp": 0, "_device_uuid": null } @@ -11018,7 +9897,7 @@ "gpu_used_memory": [ "gpu_used_memory", { - "_value": 3783.262208, + "_value": 880.80384, "_timestamp": 0, "_device_uuid": null } @@ -11026,7 +9905,7 @@ "gpu_free_memory": [ "gpu_free_memory", { - "_value": 21986.541568, + "_value": 24888.999936, "_timestamp": 0, "_device_uuid": null } @@ -11034,7 +9913,7 @@ "gpu_utilization": [ "gpu_utilization", { - "_value": 16.8571, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -11042,7 +9921,7 @@ "gpu_power_usage": [ "gpu_power_usage", { - "_value": 73.9334, + "_value": 56.9757, "_timestamp": 0, "_device_uuid": null } @@ -11050,87 +9929,87 @@ }, "_model_config_measurements": [ { - "_model_config_name": "vgg19_libtorch_config_5", + "_model_config_name": "add_sub", "_model_specific_pa_params": { "batch-size": 1, "concurrency-range": null, - "request-rate-range": 32 + "request-rate-range": 16 }, "_non_gpu_data": [ [ "perf_latency_avg", { - "_value": 7.681, + "_value": 1.138, "_timestamp": 0 } ], [ "perf_latency_p90", { - "_value": 9.219, + "_value": 1.269, "_timestamp": 0 } ], [ "perf_latency_p95", { - "_value": 10.304, + "_value": 1.345, "_timestamp": 0 } ], [ "perf_latency_p99", { - "_value": 14.813, + "_value": 1.516, "_timestamp": 0 } ], [ "perf_throughput", { - "_value": 35.983, + "_value": 16.0795, "_timestamp": 0 } ], [ "perf_client_send_recv", { - "_value": 0.162, + "_value": 0.012, "_timestamp": 0 } ], [ "perf_client_response_wait", { - "_value": 7.483, + "_value": 1.091, "_timestamp": 0 } ], [ "perf_server_queue", { - "_value": 0.144, + "_value": 0.142, "_timestamp": 0 } ], [ "perf_server_compute_infer", { - "_value": 5.889, + "_value": 0.153, "_timestamp": 0 } ], [ "perf_server_compute_input", { - "_value": 0.255, + "_value": 0.103, "_timestamp": 0 } ], [ "perf_server_compute_output", { - "_value": 0.048, + "_value": 0.099, "_timestamp": 0 } ] @@ -11139,221 +10018,92 @@ "perf_latency_avg": [ "perf_latency_avg", { - "_value": 7.681, + "_value": 1.138, "_timestamp": 0 } ], "perf_latency_p90": [ "perf_latency_p90", { - "_value": 9.219, + "_value": 1.269, "_timestamp": 0 } ], "perf_latency_p95": [ "perf_latency_p95", { - "_value": 10.304, + "_value": 1.345, "_timestamp": 0 } ], "perf_latency_p99": [ "perf_latency_p99", { - "_value": 14.813, + "_value": 1.516, "_timestamp": 0 } ], "perf_throughput": [ "perf_throughput", { - "_value": 35.983, + "_value": 16.0795, "_timestamp": 0 } ], "perf_client_send_recv": [ "perf_client_send_recv", { - "_value": 0.162, + "_value": 0.012, "_timestamp": 0 } ], "perf_client_response_wait": [ "perf_client_response_wait", { - "_value": 7.483, + "_value": 1.091, "_timestamp": 0 } ], "perf_server_queue": [ "perf_server_queue", { - "_value": 0.144, + "_value": 0.142, "_timestamp": 0 } ], "perf_server_compute_infer": [ "perf_server_compute_infer", { - "_value": 5.889, + "_value": 0.153, "_timestamp": 0 } ], "perf_server_compute_input": [ "perf_server_compute_input", { - "_value": 0.255, + "_value": 0.103, "_timestamp": 0 } ], "perf_server_compute_output": [ "perf_server_compute_output", { - "_value": 0.048, + "_value": 0.099, "_timestamp": 0 } ] } } - ] - } - } - ], - "vgg19_libtorch_config_6": [ - { - "_triton_env": {}, - "_model_run_configs": [ - { - "_model_name": "vgg19_libtorch", - "_model_config": { - "name": "vgg19_libtorch_config_6", - "platform": "pytorch_libtorch", - "maxBatchSize": 1, - "input": [ - { - "name": "INPUT__0", - "dataType": "TYPE_FP32", - "format": "FORMAT_NCHW", - "dims": [ - "3", - "224", - "224" - ] - } - ], - "output": [ - { - "name": "OUTPUT__0", - "dataType": "TYPE_FP32", - "dims": [ - "1000" - ], - "labelFilename": "vgg19_labels.txt" - } - ], - "instanceGroup": [ - { - "count": 4, - "kind": "KIND_GPU" - } - ], - "dynamicBatching": {}, - "cpu_only": false - }, - "_perf_config": { - "_args": { - "service-kind": null, - "model-signature-name": null, - "async": null, - "sync": null, - "measurement-interval": null, - "concurrency-range": null, - "request-rate-range": 16, - "request-distribution": "poisson", - "request-intervals": null, - "binary-search": null, - "num-of-sequences": null, - "latency-threshold": null, - "max-threads": null, - "stability-percentage": null, - "max-trials": null, - "percentile": null, - "input-data": null, - "shared-memory": null, - "output-shared-memory-size": null, - "shape": null, - "sequence-length": null, - "sequence-id-range": null, - "string-length": null, - "string-data": null, - "measurement-mode": "count_windows", - "measurement-request-count": 150, - "streaming": null, - "grpc-compression-algorithm": null, - "triton-server-directory": null, - "model-repository": null, - "ssl-grpc-use-ssl": null, - "ssl-grpc-root-certifications-file": null, - "ssl-grpc-private-key-file": null, - "ssl-grpc-certificate-chain-file": null, - "ssl-https-verify-peer": null, - "ssl-https-verify-host": null, - "ssl-https-ca-certificates-file": null, - "ssl-https-client-certificate-type": null, - "ssl-https-client-certificate-file": null, - "ssl-https-private-key-type": null, - "ssl-https-private-key-file": null, - "collect-metrics": "True", - "metrics-url": "http://localhost:8002/metrics", - "metrics-interval": 1000.0, - "bls-composing-models": null - }, - "_options": { - "-m": "vgg19_libtorch_config_6", - "-x": null, - "-b": 1, - "-u": "localhost:8001", - "-i": "grpc", - "-f": "vgg19_libtorch_config_6-results.csv", - "-H": null - }, - "_verbose": { - "-v": null, - "-v -v": null, - "--verbose-csv": "--verbose-csv" - }, - "_input_to_options": { - "model-name": "-m", - "model-version": "-x", - "batch-size": "-b", - "url": "-u", - "protocol": "-i", - "latency-report-file": "-f", - "http-header": "-H" - }, - "_input_to_verbose": { - "verbose": "-v", - "extra-verbose": "-v -v", - "verbose-csv": "--verbose-csv" - }, - "_additive_args": { - "input-data": null, - "shape": null - } - }, - "_composing_configs": [] - } - ] - }, - { - "-m vgg19_libtorch_config_6 -b 1 -i grpc -f vgg19_libtorch_config_6-results.csv --verbose-csv --request-rate-range=16 --request-distribution=poisson --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "vgg19_libtorch_config_6", + ] + }, + "add_sub_config_9 -m add_sub -b 1 -i grpc -f add_sub-results.csv --verbose-csv --request-rate-range=32 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000": { + "_model_variants_name": "add_sub_config_9", "_gpu_data": { "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ [ "gpu_used_memory", { - "_value": 4362.07616, + "_value": 880.80384, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } @@ -11361,7 +10111,7 @@ [ "gpu_free_memory", { - "_value": 21407.727616, + "_value": 24888.999936, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } @@ -11369,7 +10119,7 @@ [ "gpu_utilization", { - "_value": 7.45455, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -11377,7 +10127,7 @@ [ "gpu_power_usage", { - "_value": 62.2773, + "_value": 56.9743, "_timestamp": 0, "_device_uuid": null } @@ -11388,7 +10138,7 @@ [ "gpu_used_memory", { - "_value": 4362.07616, + "_value": 880.80384, "_timestamp": 0, "_device_uuid": null } @@ -11396,7 +10146,7 @@ [ "gpu_free_memory", { - "_value": 21407.727616, + "_value": 24888.999936, "_timestamp": 0, "_device_uuid": null } @@ -11404,7 +10154,7 @@ [ "gpu_utilization", { - "_value": 7.45455, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -11412,7 +10162,7 @@ [ "gpu_power_usage", { - "_value": 62.2773, + "_value": 56.9743, "_timestamp": 0, "_device_uuid": null } @@ -11422,7 +10172,7 @@ "gpu_used_memory": [ "gpu_used_memory", { - "_value": 4362.07616, + "_value": 880.80384, "_timestamp": 0, "_device_uuid": null } @@ -11430,7 +10180,7 @@ "gpu_free_memory": [ "gpu_free_memory", { - "_value": 21407.727616, + "_value": 24888.999936, "_timestamp": 0, "_device_uuid": null } @@ -11438,7 +10188,7 @@ "gpu_utilization": [ "gpu_utilization", { - "_value": 7.45455, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -11446,7 +10196,7 @@ "gpu_power_usage": [ "gpu_power_usage", { - "_value": 62.2773, + "_value": 56.9743, "_timestamp": 0, "_device_uuid": null } @@ -11454,59 +10204,59 @@ }, "_model_config_measurements": [ { - "_model_config_name": "vgg19_libtorch_config_6", + "_model_config_name": "add_sub", "_model_specific_pa_params": { "batch-size": 1, "concurrency-range": null, - "request-rate-range": 16 + "request-rate-range": 32 }, "_non_gpu_data": [ [ "perf_latency_avg", { - "_value": 7.527, + "_value": 1.097, "_timestamp": 0 } ], [ "perf_latency_p90", { - "_value": 8.893, + "_value": 1.2, "_timestamp": 0 } ], [ "perf_latency_p95", { - "_value": 9.97, + "_value": 1.285, "_timestamp": 0 } ], [ "perf_latency_p99", { - "_value": 11.021, + "_value": 1.488, "_timestamp": 0 } ], [ "perf_throughput", { - "_value": 14.716, + "_value": 31.9887, "_timestamp": 0 } ], [ "perf_client_send_recv", { - "_value": 0.164, + "_value": 0.012, "_timestamp": 0 } ], [ "perf_client_response_wait", { - "_value": 7.327, + "_value": 1.052, "_timestamp": 0 } ], @@ -11520,21 +10270,21 @@ [ "perf_server_compute_infer", { - "_value": 5.735, + "_value": 0.148, "_timestamp": 0 } ], [ "perf_server_compute_input", { - "_value": 0.263, + "_value": 0.099, "_timestamp": 0 } ], [ "perf_server_compute_output", { - "_value": 0.05, + "_value": 0.096, "_timestamp": 0 } ] @@ -11543,49 +10293,49 @@ "perf_latency_avg": [ "perf_latency_avg", { - "_value": 7.527, + "_value": 1.097, "_timestamp": 0 } ], "perf_latency_p90": [ "perf_latency_p90", { - "_value": 8.893, + "_value": 1.2, "_timestamp": 0 } ], "perf_latency_p95": [ "perf_latency_p95", { - "_value": 9.97, + "_value": 1.285, "_timestamp": 0 } ], "perf_latency_p99": [ "perf_latency_p99", { - "_value": 11.021, + "_value": 1.488, "_timestamp": 0 } ], "perf_throughput": [ "perf_throughput", { - "_value": 14.716, + "_value": 31.9887, "_timestamp": 0 } ], "perf_client_send_recv": [ "perf_client_send_recv", { - "_value": 0.164, + "_value": 0.012, "_timestamp": 0 } ], "perf_client_response_wait": [ "perf_client_response_wait", { - "_value": 7.327, + "_value": 1.052, "_timestamp": 0 } ], @@ -11599,21 +10349,21 @@ "perf_server_compute_infer": [ "perf_server_compute_infer", { - "_value": 5.735, + "_value": 0.148, "_timestamp": 0 } ], "perf_server_compute_input": [ "perf_server_compute_input", { - "_value": 0.263, + "_value": 0.099, "_timestamp": 0 } ], "perf_server_compute_output": [ "perf_server_compute_output", { - "_value": 0.05, + "_value": 0.096, "_timestamp": 0 } ] @@ -11621,14 +10371,14 @@ } ] }, - "-m vgg19_libtorch_config_6 -b 1 -i grpc -f vgg19_libtorch_config_6-results.csv --verbose-csv --request-rate-range=32 --request-distribution=poisson --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "vgg19_libtorch_config_6", + "add_sub_config_9 -m add_sub -b 2 -i grpc -f add_sub-results.csv --verbose-csv --request-rate-range=16 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000": { + "_model_variants_name": "add_sub_config_9", "_gpu_data": { "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ [ "gpu_used_memory", { - "_value": 4362.07616, + "_value": 880.80384, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } @@ -11636,7 +10386,7 @@ [ "gpu_free_memory", { - "_value": 21407.727616, + "_value": 24888.999936, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } @@ -11644,7 +10394,7 @@ [ "gpu_utilization", { - "_value": 17.4286, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -11652,7 +10402,7 @@ [ "gpu_power_usage", { - "_value": 73.8081, + "_value": 57.0045, "_timestamp": 0, "_device_uuid": null } @@ -11663,7 +10413,7 @@ [ "gpu_used_memory", { - "_value": 4362.07616, + "_value": 880.80384, "_timestamp": 0, "_device_uuid": null } @@ -11671,7 +10421,7 @@ [ "gpu_free_memory", { - "_value": 21407.727616, + "_value": 24888.999936, "_timestamp": 0, "_device_uuid": null } @@ -11679,7 +10429,7 @@ [ "gpu_utilization", { - "_value": 17.4286, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -11687,7 +10437,7 @@ [ "gpu_power_usage", { - "_value": 73.8081, + "_value": 57.0045, "_timestamp": 0, "_device_uuid": null } @@ -11697,7 +10447,7 @@ "gpu_used_memory": [ "gpu_used_memory", { - "_value": 4362.07616, + "_value": 880.80384, "_timestamp": 0, "_device_uuid": null } @@ -11705,7 +10455,7 @@ "gpu_free_memory": [ "gpu_free_memory", { - "_value": 21407.727616, + "_value": 24888.999936, "_timestamp": 0, "_device_uuid": null } @@ -11713,7 +10463,7 @@ "gpu_utilization": [ "gpu_utilization", { - "_value": 17.4286, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -11721,7 +10471,7 @@ "gpu_power_usage": [ "gpu_power_usage", { - "_value": 73.8081, + "_value": 57.0045, "_timestamp": 0, "_device_uuid": null } @@ -11729,87 +10479,87 @@ }, "_model_config_measurements": [ { - "_model_config_name": "vgg19_libtorch_config_6", + "_model_config_name": "add_sub", "_model_specific_pa_params": { - "batch-size": 1, + "batch-size": 2, "concurrency-range": null, - "request-rate-range": 32 + "request-rate-range": 16 }, "_non_gpu_data": [ [ "perf_latency_avg", { - "_value": 7.632, + "_value": 1.151, "_timestamp": 0 } ], [ "perf_latency_p90", { - "_value": 9.29, + "_value": 1.302, "_timestamp": 0 } ], [ "perf_latency_p95", { - "_value": 10.536, + "_value": 1.359, "_timestamp": 0 } ], [ "perf_latency_p99", { - "_value": 14.409, + "_value": 1.511, "_timestamp": 0 } ], [ "perf_throughput", { - "_value": 35.9829, + "_value": 32.1583, "_timestamp": 0 } ], [ "perf_client_send_recv", { - "_value": 0.156, + "_value": 0.014, "_timestamp": 0 } ], [ "perf_client_response_wait", { - "_value": 7.442, + "_value": 1.102, "_timestamp": 0 } ], [ "perf_server_queue", { - "_value": 0.121, + "_value": 0.135, "_timestamp": 0 } ], [ "perf_server_compute_infer", { - "_value": 5.909, + "_value": 0.165, "_timestamp": 0 } ], [ "perf_server_compute_input", { - "_value": 0.249, + "_value": 0.103, "_timestamp": 0 } ], [ "perf_server_compute_output", { - "_value": 0.047, + "_value": 0.099, "_timestamp": 0 } ] @@ -11818,221 +10568,92 @@ "perf_latency_avg": [ "perf_latency_avg", { - "_value": 7.632, + "_value": 1.151, "_timestamp": 0 } ], "perf_latency_p90": [ "perf_latency_p90", { - "_value": 9.29, + "_value": 1.302, "_timestamp": 0 } ], "perf_latency_p95": [ "perf_latency_p95", { - "_value": 10.536, + "_value": 1.359, "_timestamp": 0 } ], "perf_latency_p99": [ "perf_latency_p99", { - "_value": 14.409, + "_value": 1.511, "_timestamp": 0 } ], "perf_throughput": [ "perf_throughput", { - "_value": 35.9829, + "_value": 32.1583, "_timestamp": 0 } ], "perf_client_send_recv": [ "perf_client_send_recv", { - "_value": 0.156, + "_value": 0.014, "_timestamp": 0 } ], "perf_client_response_wait": [ "perf_client_response_wait", { - "_value": 7.442, + "_value": 1.102, "_timestamp": 0 } ], "perf_server_queue": [ "perf_server_queue", { - "_value": 0.121, + "_value": 0.135, "_timestamp": 0 } ], "perf_server_compute_infer": [ "perf_server_compute_infer", { - "_value": 5.909, + "_value": 0.165, "_timestamp": 0 } ], "perf_server_compute_input": [ "perf_server_compute_input", { - "_value": 0.249, + "_value": 0.103, "_timestamp": 0 } ], "perf_server_compute_output": [ "perf_server_compute_output", { - "_value": 0.047, + "_value": 0.099, "_timestamp": 0 } ] } } ] - } - } - ], - "vgg19_libtorch_config_7": [ - { - "_triton_env": {}, - "_model_run_configs": [ - { - "_model_name": "vgg19_libtorch", - "_model_config": { - "name": "vgg19_libtorch_config_7", - "platform": "pytorch_libtorch", - "maxBatchSize": 2, - "input": [ - { - "name": "INPUT__0", - "dataType": "TYPE_FP32", - "format": "FORMAT_NCHW", - "dims": [ - "3", - "224", - "224" - ] - } - ], - "output": [ - { - "name": "OUTPUT__0", - "dataType": "TYPE_FP32", - "dims": [ - "1000" - ], - "labelFilename": "vgg19_labels.txt" - } - ], - "instanceGroup": [ - { - "count": 4, - "kind": "KIND_GPU" - } - ], - "dynamicBatching": {}, - "cpu_only": false - }, - "_perf_config": { - "_args": { - "service-kind": null, - "model-signature-name": null, - "async": null, - "sync": null, - "measurement-interval": null, - "concurrency-range": null, - "request-rate-range": 16, - "request-distribution": "poisson", - "request-intervals": null, - "binary-search": null, - "num-of-sequences": null, - "latency-threshold": null, - "max-threads": null, - "stability-percentage": null, - "max-trials": null, - "percentile": null, - "input-data": null, - "shared-memory": null, - "output-shared-memory-size": null, - "shape": null, - "sequence-length": null, - "sequence-id-range": null, - "string-length": null, - "string-data": null, - "measurement-mode": "count_windows", - "measurement-request-count": 150, - "streaming": null, - "grpc-compression-algorithm": null, - "triton-server-directory": null, - "model-repository": null, - "ssl-grpc-use-ssl": null, - "ssl-grpc-root-certifications-file": null, - "ssl-grpc-private-key-file": null, - "ssl-grpc-certificate-chain-file": null, - "ssl-https-verify-peer": null, - "ssl-https-verify-host": null, - "ssl-https-ca-certificates-file": null, - "ssl-https-client-certificate-type": null, - "ssl-https-client-certificate-file": null, - "ssl-https-private-key-type": null, - "ssl-https-private-key-file": null, - "collect-metrics": "True", - "metrics-url": "http://localhost:8002/metrics", - "metrics-interval": 1000.0, - "bls-composing-models": null - }, - "_options": { - "-m": "vgg19_libtorch_config_7", - "-x": null, - "-b": 1, - "-u": "localhost:8001", - "-i": "grpc", - "-f": "vgg19_libtorch_config_7-results.csv", - "-H": null - }, - "_verbose": { - "-v": null, - "-v -v": null, - "--verbose-csv": "--verbose-csv" - }, - "_input_to_options": { - "model-name": "-m", - "model-version": "-x", - "batch-size": "-b", - "url": "-u", - "protocol": "-i", - "latency-report-file": "-f", - "http-header": "-H" - }, - "_input_to_verbose": { - "verbose": "-v", - "extra-verbose": "-v -v", - "verbose-csv": "--verbose-csv" - }, - "_additive_args": { - "input-data": null, - "shape": null - } - }, - "_composing_configs": [] - } - ] - }, - { - "-m vgg19_libtorch_config_7 -b 1 -i grpc -f vgg19_libtorch_config_7-results.csv --verbose-csv --request-rate-range=16 --request-distribution=poisson --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": { - "_model_variants_name": "vgg19_libtorch_config_7", + }, + "add_sub_config_9 -m add_sub -b 2 -i grpc -f add_sub-results.csv --verbose-csv --request-rate-range=32 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000": { + "_model_variants_name": "add_sub_config_9", "_gpu_data": { "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ [ "gpu_used_memory", { - "_value": 4471.128064, + "_value": 880.80384, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } @@ -12040,7 +10661,7 @@ [ "gpu_free_memory", { - "_value": 21298.675712, + "_value": 24888.999936, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } @@ -12048,7 +10669,7 @@ [ "gpu_utilization", { - "_value": 7.545449999999999, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -12056,7 +10677,7 @@ [ "gpu_power_usage", { - "_value": 63.1486, + "_value": 56.904, "_timestamp": 0, "_device_uuid": null } @@ -12067,7 +10688,7 @@ [ "gpu_used_memory", { - "_value": 4471.128064, + "_value": 880.80384, "_timestamp": 0, "_device_uuid": null } @@ -12075,7 +10696,7 @@ [ "gpu_free_memory", { - "_value": 21298.675712, + "_value": 24888.999936, "_timestamp": 0, "_device_uuid": null } @@ -12083,7 +10704,7 @@ [ "gpu_utilization", { - "_value": 7.545449999999999, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -12091,7 +10712,7 @@ [ "gpu_power_usage", { - "_value": 63.1486, + "_value": 56.904, "_timestamp": 0, "_device_uuid": null } @@ -12101,7 +10722,7 @@ "gpu_used_memory": [ "gpu_used_memory", { - "_value": 4471.128064, + "_value": 880.80384, "_timestamp": 0, "_device_uuid": null } @@ -12109,7 +10730,7 @@ "gpu_free_memory": [ "gpu_free_memory", { - "_value": 21298.675712, + "_value": 24888.999936, "_timestamp": 0, "_device_uuid": null } @@ -12117,7 +10738,7 @@ "gpu_utilization": [ "gpu_utilization", { - "_value": 7.545449999999999, + "_value": 0.0, "_timestamp": 0, "_device_uuid": null } @@ -12125,7 +10746,7 @@ "gpu_power_usage": [ "gpu_power_usage", { - "_value": 63.1486, + "_value": 56.904, "_timestamp": 0, "_device_uuid": null } @@ -12133,87 +10754,87 @@ }, "_model_config_measurements": [ { - "_model_config_name": "vgg19_libtorch_config_7", + "_model_config_name": "add_sub", "_model_specific_pa_params": { - "batch-size": 1, + "batch-size": 2, "concurrency-range": null, - "request-rate-range": 16 + "request-rate-range": 32 }, "_non_gpu_data": [ [ "perf_latency_avg", { - "_value": 7.544, + "_value": 1.112, "_timestamp": 0 } ], [ "perf_latency_p90", { - "_value": 8.928, + "_value": 1.224, "_timestamp": 0 } ], [ "perf_latency_p95", { - "_value": 9.94, + "_value": 1.338, "_timestamp": 0 } ], [ "perf_latency_p99", { - "_value": 11.023, + "_value": 1.459, "_timestamp": 0 } ], [ "perf_throughput", { - "_value": 14.7161, + "_value": 63.9746, "_timestamp": 0 } ], [ "perf_client_send_recv", { - "_value": 0.165, + "_value": 0.013, "_timestamp": 0 } ], [ "perf_client_response_wait", { - "_value": 7.343, + "_value": 1.066, "_timestamp": 0 } ], [ "perf_server_queue", { - "_value": 0.133, + "_value": 0.135, "_timestamp": 0 } ], [ "perf_server_compute_infer", { - "_value": 5.741, + "_value": 0.156, "_timestamp": 0 } ], [ "perf_server_compute_input", { - "_value": 0.263, + "_value": 0.099, "_timestamp": 0 } ], [ "perf_server_compute_output", { - "_value": 0.05, + "_value": 0.095, "_timestamp": 0 } ] @@ -12222,77 +10843,77 @@ "perf_latency_avg": [ "perf_latency_avg", { - "_value": 7.544, + "_value": 1.112, "_timestamp": 0 } ], "perf_latency_p90": [ "perf_latency_p90", { - "_value": 8.928, + "_value": 1.224, "_timestamp": 0 } ], "perf_latency_p95": [ "perf_latency_p95", { - "_value": 9.94, + "_value": 1.338, "_timestamp": 0 } ], "perf_latency_p99": [ "perf_latency_p99", { - "_value": 11.023, + "_value": 1.459, "_timestamp": 0 } ], "perf_throughput": [ "perf_throughput", { - "_value": 14.7161, + "_value": 63.9746, "_timestamp": 0 } ], "perf_client_send_recv": [ "perf_client_send_recv", { - "_value": 0.165, + "_value": 0.013, "_timestamp": 0 } ], "perf_client_response_wait": [ "perf_client_response_wait", { - "_value": 7.343, + "_value": 1.066, "_timestamp": 0 } ], "perf_server_queue": [ "perf_server_queue", { - "_value": 0.133, + "_value": 0.135, "_timestamp": 0 } ], "perf_server_compute_infer": [ "perf_server_compute_infer", { - "_value": 5.741, + "_value": 0.156, "_timestamp": 0 } ], "perf_server_compute_input": [ "perf_server_compute_input", { - "_value": 0.263, + "_value": 0.099, "_timestamp": 0 } ], "perf_server_compute_output": [ "perf_server_compute_output", { - "_value": 0.05, + "_value": 0.095, "_timestamp": 0 } ] @@ -12310,7 +10931,7 @@ [ "gpu_used_memory", { - "_value": 845.0, + "_value": 870.0, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } @@ -12318,7 +10939,7 @@ [ "gpu_free_memory", { - "_value": 24924.0, + "_value": 24899.0, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" } @@ -12334,7 +10955,7 @@ [ "gpu_power_usage", { - "_value": 56.65625, + "_value": 56.12249999999999, "_timestamp": 0, "_device_uuid": null } @@ -12349,19 +10970,23 @@ }, "ModelManager.model_variant_name_manager": { "_model_config_dicts": { - "vgg19_libtorch_config_0": { - "name": "vgg19_libtorch", + "add_sub_config_0": { + "name": "add_sub", "platform": "pytorch_libtorch", "max_batch_size": 1, "input": [ { "name": "INPUT__0", "data_type": "TYPE_FP32", - "format": "FORMAT_NCHW", "dims": [ - "3", - "224", - "224" + "16" + ] + }, + { + "name": "INPUT__1", + "data_type": "TYPE_FP32", + "dims": [ + "16" ] } ], @@ -12370,9 +10995,16 @@ "name": "OUTPUT__0", "data_type": "TYPE_FP32", "dims": [ - "1000" + "16" ], - "label_filename": "vgg19_labels.txt" + "label_filename": "output0_labels.txt" + }, + { + "name": "OUTPUT__1", + "data_type": "TYPE_FP32", + "dims": [ + "16" + ] } ], "instance_group": [ @@ -12383,19 +11015,23 @@ ], "dynamic_batching": {} }, - "vgg19_libtorch_config_1": { - "name": "vgg19_libtorch", + "add_sub_config_1": { + "name": "add_sub", "platform": "pytorch_libtorch", "max_batch_size": 2, "input": [ { "name": "INPUT__0", "data_type": "TYPE_FP32", - "format": "FORMAT_NCHW", "dims": [ - "3", - "224", - "224" + "16" + ] + }, + { + "name": "INPUT__1", + "data_type": "TYPE_FP32", + "dims": [ + "16" ] } ], @@ -12404,9 +11040,16 @@ "name": "OUTPUT__0", "data_type": "TYPE_FP32", "dims": [ - "1000" + "16" ], - "label_filename": "vgg19_labels.txt" + "label_filename": "output0_labels.txt" + }, + { + "name": "OUTPUT__1", + "data_type": "TYPE_FP32", + "dims": [ + "16" + ] } ], "instance_group": [ @@ -12417,19 +11060,23 @@ ], "dynamic_batching": {} }, - "vgg19_libtorch_config_2": { - "name": "vgg19_libtorch", + "add_sub_config_2": { + "name": "add_sub", "platform": "pytorch_libtorch", - "max_batch_size": 4, + "max_batch_size": 1, "input": [ { "name": "INPUT__0", "data_type": "TYPE_FP32", - "format": "FORMAT_NCHW", "dims": [ - "3", - "224", - "224" + "16" + ] + }, + { + "name": "INPUT__1", + "data_type": "TYPE_FP32", + "dims": [ + "16" ] } ], @@ -12438,32 +11085,43 @@ "name": "OUTPUT__0", "data_type": "TYPE_FP32", "dims": [ - "1000" + "16" ], - "label_filename": "vgg19_labels.txt" + "label_filename": "output0_labels.txt" + }, + { + "name": "OUTPUT__1", + "data_type": "TYPE_FP32", + "dims": [ + "16" + ] } ], "instance_group": [ { - "count": 1, + "count": 2, "kind": "KIND_GPU" } ], "dynamic_batching": {} }, - "vgg19_libtorch_config_3": { - "name": "vgg19_libtorch", + "add_sub_config_3": { + "name": "add_sub", "platform": "pytorch_libtorch", - "max_batch_size": 1, + "max_batch_size": 2, "input": [ { "name": "INPUT__0", "data_type": "TYPE_FP32", - "format": "FORMAT_NCHW", "dims": [ - "3", - "224", - "224" + "16" + ] + }, + { + "name": "INPUT__1", + "data_type": "TYPE_FP32", + "dims": [ + "16" ] } ], @@ -12472,9 +11130,16 @@ "name": "OUTPUT__0", "data_type": "TYPE_FP32", "dims": [ - "1000" + "16" ], - "label_filename": "vgg19_labels.txt" + "label_filename": "output0_labels.txt" + }, + { + "name": "OUTPUT__1", + "data_type": "TYPE_FP32", + "dims": [ + "16" + ] } ], "instance_group": [ @@ -12485,19 +11150,23 @@ ], "dynamic_batching": {} }, - "vgg19_libtorch_config_4": { - "name": "vgg19_libtorch", + "add_sub_config_4": { + "name": "add_sub", "platform": "pytorch_libtorch", "max_batch_size": 1, "input": [ { "name": "INPUT__0", "data_type": "TYPE_FP32", - "format": "FORMAT_NCHW", "dims": [ - "3", - "224", - "224" + "16" + ] + }, + { + "name": "INPUT__1", + "data_type": "TYPE_FP32", + "dims": [ + "16" ] } ], @@ -12506,9 +11175,16 @@ "name": "OUTPUT__0", "data_type": "TYPE_FP32", "dims": [ - "1000" + "16" ], - "label_filename": "vgg19_labels.txt" + "label_filename": "output0_labels.txt" + }, + { + "name": "OUTPUT__1", + "data_type": "TYPE_FP32", + "dims": [ + "16" + ] } ], "instance_group": [ @@ -12519,19 +11195,23 @@ ], "dynamic_batching": {} }, - "vgg19_libtorch_config_5": { - "name": "vgg19_libtorch", + "add_sub_config_5": { + "name": "add_sub", "platform": "pytorch_libtorch", "max_batch_size": 2, "input": [ { "name": "INPUT__0", "data_type": "TYPE_FP32", - "format": "FORMAT_NCHW", "dims": [ - "3", - "224", - "224" + "16" + ] + }, + { + "name": "INPUT__1", + "data_type": "TYPE_FP32", + "dims": [ + "16" ] } ], @@ -12540,9 +11220,16 @@ "name": "OUTPUT__0", "data_type": "TYPE_FP32", "dims": [ - "1000" + "16" ], - "label_filename": "vgg19_labels.txt" + "label_filename": "output0_labels.txt" + }, + { + "name": "OUTPUT__1", + "data_type": "TYPE_FP32", + "dims": [ + "16" + ] } ], "instance_group": [ @@ -12553,19 +11240,23 @@ ], "dynamic_batching": {} }, - "vgg19_libtorch_config_6": { - "name": "vgg19_libtorch", + "add_sub_config_6": { + "name": "add_sub", "platform": "pytorch_libtorch", "max_batch_size": 1, "input": [ { "name": "INPUT__0", "data_type": "TYPE_FP32", - "format": "FORMAT_NCHW", "dims": [ - "3", - "224", - "224" + "16" + ] + }, + { + "name": "INPUT__1", + "data_type": "TYPE_FP32", + "dims": [ + "16" ] } ], @@ -12574,9 +11265,16 @@ "name": "OUTPUT__0", "data_type": "TYPE_FP32", "dims": [ - "1000" + "16" ], - "label_filename": "vgg19_labels.txt" + "label_filename": "output0_labels.txt" + }, + { + "name": "OUTPUT__1", + "data_type": "TYPE_FP32", + "dims": [ + "16" + ] } ], "instance_group": [ @@ -12587,19 +11285,23 @@ ], "dynamic_batching": {} }, - "vgg19_libtorch_config_7": { - "name": "vgg19_libtorch", + "add_sub_config_7": { + "name": "add_sub", "platform": "pytorch_libtorch", "max_batch_size": 2, "input": [ { "name": "INPUT__0", "data_type": "TYPE_FP32", - "format": "FORMAT_NCHW", "dims": [ - "3", - "224", - "224" + "16" + ] + }, + { + "name": "INPUT__1", + "data_type": "TYPE_FP32", + "dims": [ + "16" ] } ], @@ -12608,9 +11310,16 @@ "name": "OUTPUT__0", "data_type": "TYPE_FP32", "dims": [ - "1000" + "16" ], - "label_filename": "vgg19_labels.txt" + "label_filename": "output0_labels.txt" + }, + { + "name": "OUTPUT__1", + "data_type": "TYPE_FP32", + "dims": [ + "16" + ] } ], "instance_group": [ @@ -12621,19 +11330,68 @@ ], "dynamic_batching": {} }, - "vgg19_libtorch_config_8": { - "name": "vgg19_libtorch", + "add_sub_config_8": { + "name": "add_sub", "platform": "pytorch_libtorch", "max_batch_size": 1, "input": [ { "name": "INPUT__0", "data_type": "TYPE_FP32", - "format": "FORMAT_NCHW", "dims": [ - "3", - "224", - "224" + "16" + ] + }, + { + "name": "INPUT__1", + "data_type": "TYPE_FP32", + "dims": [ + "16" + ] + } + ], + "output": [ + { + "name": "OUTPUT__0", + "data_type": "TYPE_FP32", + "dims": [ + "16" + ], + "label_filename": "output0_labels.txt" + }, + { + "name": "OUTPUT__1", + "data_type": "TYPE_FP32", + "dims": [ + "16" + ] + } + ], + "instance_group": [ + { + "count": 5, + "kind": "KIND_GPU" + } + ], + "dynamic_batching": {} + }, + "add_sub_config_9": { + "name": "add_sub", + "platform": "pytorch_libtorch", + "max_batch_size": 2, + "input": [ + { + "name": "INPUT__0", + "data_type": "TYPE_FP32", + "dims": [ + "16" + ] + }, + { + "name": "INPUT__1", + "data_type": "TYPE_FP32", + "dims": [ + "16" ] } ], @@ -12642,9 +11400,16 @@ "name": "OUTPUT__0", "data_type": "TYPE_FP32", "dims": [ - "1000" + "16" ], - "label_filename": "vgg19_labels.txt" + "label_filename": "output0_labels.txt" + }, + { + "name": "OUTPUT__1", + "data_type": "TYPE_FP32", + "dims": [ + "16" + ] } ], "instance_group": [ @@ -12657,7 +11422,7 @@ } }, "_model_name_index": { - "vgg19_libtorch": 8 + "add_sub": 9 } } } \ No newline at end of file diff --git a/tests/common/request-rate-ckpt/README b/tests/common/request-rate-ckpt/README new file mode 100644 index 000000000..3a2377082 --- /dev/null +++ b/tests/common/request-rate-ckpt/README @@ -0,0 +1,21 @@ +# Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +Settings used to create this checkpoint: + +--profile-models add_sub +--request-rate 16,32 +--run-config-search-max-instance-count 2 +--run-config-search-max-model-batch-size 2 +--request-rate-search-enable diff --git a/tests/common/request-rate-ckpt/golden-metrics-model-gpu.csv b/tests/common/request-rate-ckpt/golden-metrics-model-gpu.csv index ff2bdcf59..fd74ba39f 100644 --- a/tests/common/request-rate-ckpt/golden-metrics-model-gpu.csv +++ b/tests/common/request-rate-ckpt/golden-metrics-model-gpu.csv @@ -1,43 +1,36 @@ Model,GPU UUID,Batch,Request Rate,Model Config Path,Instance Group,Satisfies Constraints,GPU Memory Usage (MB),GPU Utilization (%),GPU Power Usage (W) -vgg19_libtorch,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,2048,vgg19_libtorch_config_1,1:GPU,Yes,2413.8,60.3,217.1 -vgg19_libtorch,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,1024,vgg19_libtorch_config_1,1:GPU,Yes,2413.8,61.0,216.7 -vgg19_libtorch,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,4096,vgg19_libtorch_config_1,1:GPU,Yes,2413.8,61.0,218.0 -vgg19_libtorch,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,512,vgg19_libtorch_config_1,1:GPU,Yes,2413.8,59.3,216.3 -vgg19_libtorch,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,256,vgg19_libtorch_config_1,1:GPU,Yes,2413.8,87.3,267.5 -vgg19_libtorch,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,128,vgg19_libtorch_config_1,1:GPU,Yes,2413.8,49.5,195.1 -vgg19_libtorch,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,64,vgg19_libtorch_config_1,1:GPU,Yes,2413.8,26.2,123.2 -vgg19_libtorch,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,32,vgg19_libtorch_config_1,1:GPU,Yes,2413.8,17.4,75.4 -vgg19_libtorch,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,16,vgg19_libtorch_config_1,1:GPU,Yes,2413.8,7.6,63.9 -vgg19_libtorch,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,1024,vgg19_libtorch_config_2,1:GPU,Yes,2466.3,61.0,217.3 -vgg19_libtorch,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,2048,vgg19_libtorch_config_2,1:GPU,Yes,2466.3,60.7,216.9 -vgg19_libtorch,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,4096,vgg19_libtorch_config_2,1:GPU,Yes,2466.3,60.7,215.6 -vgg19_libtorch,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,512,vgg19_libtorch_config_2,1:GPU,Yes,2466.3,60.0,213.4 -vgg19_libtorch,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,256,vgg19_libtorch_config_2,1:GPU,Yes,2466.3,88.7,270.7 -vgg19_libtorch,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,128,vgg19_libtorch_config_2,1:GPU,Yes,2466.3,49.8,173.8 -vgg19_libtorch,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,64,vgg19_libtorch_config_2,1:GPU,Yes,2466.3,26.7,116.8 -vgg19_libtorch,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,32,vgg19_libtorch_config_2,1:GPU,Yes,2466.3,17.3,74.6 -vgg19_libtorch,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,16,vgg19_libtorch_config_2,1:GPU,Yes,2466.3,7.6,63.6 -vgg19_libtorch,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,2048,vgg19_libtorch_config_default,1:GPU,Yes,2386.6,61.0,216.3 -vgg19_libtorch,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,512,vgg19_libtorch_config_default,1:GPU,Yes,2386.6,59.7,215.1 -vgg19_libtorch,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,1024,vgg19_libtorch_config_default,1:GPU,Yes,2386.6,61.3,219.2 -vgg19_libtorch,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,256,vgg19_libtorch_config_default,1:GPU,Yes,2386.6,49.3,213.0 -vgg19_libtorch,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,128,vgg19_libtorch_config_default,1:GPU,Yes,2386.6,52.7,172.6 -vgg19_libtorch,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,64,vgg19_libtorch_config_default,1:GPU,Yes,2386.6,30.3,94.5 -vgg19_libtorch,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,32,vgg19_libtorch_config_default,1:GPU,Yes,2386.6,17.4,72.7 -vgg19_libtorch,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,16,vgg19_libtorch_config_default,1:GPU,Yes,2386.6,7.6,62.3 -vgg19_libtorch,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,1024,vgg19_libtorch_config_0,1:GPU,Yes,2386.6,61.3,220.1 -vgg19_libtorch,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,512,vgg19_libtorch_config_0,1:GPU,Yes,2386.6,61.3,218.1 -vgg19_libtorch,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,2048,vgg19_libtorch_config_0,1:GPU,Yes,2386.6,60.7,218.1 -vgg19_libtorch,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,256,vgg19_libtorch_config_0,1:GPU,Yes,2386.6,49.7,217.0 -vgg19_libtorch,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,128,vgg19_libtorch_config_0,1:GPU,Yes,2386.6,52.5,179.6 -vgg19_libtorch,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,64,vgg19_libtorch_config_0,1:GPU,Yes,2386.6,26.6,120.5 -vgg19_libtorch,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,32,vgg19_libtorch_config_0,1:GPU,Yes,2386.6,17.6,73.0 -vgg19_libtorch,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,16,vgg19_libtorch_config_0,1:GPU,Yes,2386.6,7.6,63.2 -vgg19_libtorch,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,32,vgg19_libtorch_config_4,3:GPU,Yes,3701.5,17.4,75.2 -vgg19_libtorch,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,16,vgg19_libtorch_config_4,3:GPU,Yes,3701.5,7.3,62.9 -vgg19_libtorch,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,32,vgg19_libtorch_config_5,3:GPU,Yes,3783.3,16.9,73.9 -vgg19_libtorch,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,16,vgg19_libtorch_config_5,3:GPU,Yes,3783.3,7.3,62.1 -vgg19_libtorch,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,32,vgg19_libtorch_config_6,4:GPU,Yes,4362.1,17.4,73.8 -vgg19_libtorch,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,16,vgg19_libtorch_config_6,4:GPU,Yes,4362.1,7.5,62.3 -vgg19_libtorch,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,16,vgg19_libtorch_config_7,4:GPU,Yes,4471.1,7.5,63.1 +add_sub,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,2,32,add_sub_config_5,3:GPU,Yes,876.6,0.0,57.1 +add_sub,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,2,16,add_sub_config_5,3:GPU,Yes,876.6,0.0,57.0 +add_sub,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,32,add_sub_config_5,3:GPU,Yes,876.6,0.0,56.9 +add_sub,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,16,add_sub_config_5,3:GPU,Yes,876.6,0.0,56.9 +add_sub,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,2,32,add_sub_config_7,4:GPU,Yes,878.7,0.0,57.1 +add_sub,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,2,16,add_sub_config_7,4:GPU,Yes,878.7,0.0,57.0 +add_sub,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,32,add_sub_config_7,4:GPU,Yes,878.7,0.0,57.0 +add_sub,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,16,add_sub_config_7,4:GPU,Yes,878.7,0.0,57.0 +add_sub,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,2,32,add_sub_config_3,2:GPU,Yes,874.5,0.0,56.9 +add_sub,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,2,16,add_sub_config_3,2:GPU,Yes,874.5,0.0,56.9 +add_sub,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,32,add_sub_config_3,2:GPU,Yes,874.5,0.0,57.0 +add_sub,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,16,add_sub_config_3,2:GPU,Yes,874.5,0.0,56.9 +add_sub,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,2,32,add_sub_config_default,1:GPU,Yes,872.4,0.0,56.6 +add_sub,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,2,16,add_sub_config_default,1:GPU,Yes,872.4,0.0,56.5 +add_sub,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,32,add_sub_config_default,1:GPU,Yes,872.4,0.0,56.5 +add_sub,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,16,add_sub_config_default,1:GPU,Yes,872.4,0.0,56.5 +add_sub,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,2,32,add_sub_config_9,5:GPU,Yes,880.8,0.0,56.9 +add_sub,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,2,16,add_sub_config_9,5:GPU,Yes,880.8,0.0,57.0 +add_sub,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,32,add_sub_config_9,5:GPU,Yes,880.8,0.0,57.0 +add_sub,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,16,add_sub_config_9,5:GPU,Yes,880.8,0.0,57.0 +add_sub,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,2,32,add_sub_config_1,1:GPU,Yes,872.4,0.0,56.7 +add_sub,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,2,16,add_sub_config_1,1:GPU,Yes,872.4,0.0,56.7 +add_sub,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,32,add_sub_config_1,1:GPU,Yes,872.4,0.0,56.8 +add_sub,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,16,add_sub_config_1,1:GPU,Yes,872.4,0.0,56.7 +add_sub,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,32,add_sub_config_4,3:GPU,Yes,876.6,0.0,56.9 +add_sub,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,16,add_sub_config_4,3:GPU,Yes,876.6,0.0,56.9 +add_sub,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,32,add_sub_config_6,4:GPU,Yes,878.7,0.0,57.1 +add_sub,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,16,add_sub_config_6,4:GPU,Yes,878.7,0.0,57.1 +add_sub,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,32,add_sub_config_0,1:GPU,Yes,872.4,0.0,56.7 +add_sub,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,16,add_sub_config_0,1:GPU,Yes,872.4,0.0,56.7 +add_sub,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,32,add_sub_config_2,2:GPU,Yes,874.5,0.0,56.8 +add_sub,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,16,add_sub_config_2,2:GPU,Yes,874.5,0.0,56.9 +add_sub,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,32,add_sub_config_8,5:GPU,Yes,880.8,0.0,56.9 +add_sub,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,16,add_sub_config_8,5:GPU,Yes,880.8,0.0,57.0 diff --git a/tests/common/request-rate-ckpt/golden-metrics-model-inference.csv b/tests/common/request-rate-ckpt/golden-metrics-model-inference.csv index 826b850e8..1f7daaf25 100644 --- a/tests/common/request-rate-ckpt/golden-metrics-model-inference.csv +++ b/tests/common/request-rate-ckpt/golden-metrics-model-inference.csv @@ -1,43 +1,36 @@ Model,Batch,Request Rate,Model Config Path,Instance Group,Max Batch Size,Satisfies Constraints,Throughput (infer/sec),p99 Latency (ms) -vgg19_libtorch,1,2048,vgg19_libtorch_config_1,1:GPU,2,Yes,280.8,14.5 -vgg19_libtorch,1,1024,vgg19_libtorch_config_1,1:GPU,2,Yes,280.4,14.5 -vgg19_libtorch,1,4096,vgg19_libtorch_config_1,1:GPU,2,Yes,279.5,14.7 -vgg19_libtorch,1,512,vgg19_libtorch_config_1,1:GPU,2,Yes,279.5,14.5 -vgg19_libtorch,1,256,vgg19_libtorch_config_1,1:GPU,2,Yes,242.8,15.8 -vgg19_libtorch,1,128,vgg19_libtorch_config_1,1:GPU,2,Yes,126.1,16.4 -vgg19_libtorch,1,64,vgg19_libtorch_config_1,1:GPU,2,Yes,60.6,15.4 -vgg19_libtorch,1,32,vgg19_libtorch_config_1,1:GPU,2,Yes,36.0,16.7 -vgg19_libtorch,1,16,vgg19_libtorch_config_1,1:GPU,2,Yes,14.7,12.6 -vgg19_libtorch,1,1024,vgg19_libtorch_config_2,1:GPU,4,Yes,264.5,15.3 -vgg19_libtorch,1,2048,vgg19_libtorch_config_2,1:GPU,4,Yes,264.2,15.4 -vgg19_libtorch,1,4096,vgg19_libtorch_config_2,1:GPU,4,Yes,263.8,15.4 -vgg19_libtorch,1,512,vgg19_libtorch_config_2,1:GPU,4,Yes,262.5,15.4 -vgg19_libtorch,1,256,vgg19_libtorch_config_2,1:GPU,4,Yes,242.5,15.4 -vgg19_libtorch,1,128,vgg19_libtorch_config_2,1:GPU,4,Yes,125.8,15.7 -vgg19_libtorch,1,64,vgg19_libtorch_config_2,1:GPU,4,Yes,60.6,15.7 -vgg19_libtorch,1,32,vgg19_libtorch_config_2,1:GPU,4,Yes,36.0,17.0 -vgg19_libtorch,1,16,vgg19_libtorch_config_2,1:GPU,4,Yes,14.7,12.7 -vgg19_libtorch,1,2048,vgg19_libtorch_config_default,1:GPU,128,Yes,226.2,18.0 -vgg19_libtorch,1,512,vgg19_libtorch_config_default,1:GPU,128,Yes,226.2,17.9 -vgg19_libtorch,1,1024,vgg19_libtorch_config_default,1:GPU,128,Yes,225.5,18.0 -vgg19_libtorch,1,256,vgg19_libtorch_config_default,1:GPU,128,Yes,223.2,17.8 -vgg19_libtorch,1,128,vgg19_libtorch_config_default,1:GPU,128,Yes,126.0,18.2 -vgg19_libtorch,1,64,vgg19_libtorch_config_default,1:GPU,128,Yes,64.5,18.6 -vgg19_libtorch,1,32,vgg19_libtorch_config_default,1:GPU,128,Yes,36.0,16.1 -vgg19_libtorch,1,16,vgg19_libtorch_config_default,1:GPU,128,Yes,14.7,12.6 -vgg19_libtorch,1,1024,vgg19_libtorch_config_0,1:GPU,1,Yes,225.5,18.2 -vgg19_libtorch,1,512,vgg19_libtorch_config_0,1:GPU,1,Yes,225.5,18.0 -vgg19_libtorch,1,2048,vgg19_libtorch_config_0,1:GPU,1,Yes,225.2,18.3 -vgg19_libtorch,1,256,vgg19_libtorch_config_0,1:GPU,1,Yes,221.9,18.0 -vgg19_libtorch,1,128,vgg19_libtorch_config_0,1:GPU,1,Yes,125.9,18.4 -vgg19_libtorch,1,64,vgg19_libtorch_config_0,1:GPU,1,Yes,60.6,18.4 -vgg19_libtorch,1,32,vgg19_libtorch_config_0,1:GPU,1,Yes,36.0,16.3 -vgg19_libtorch,1,16,vgg19_libtorch_config_0,1:GPU,1,Yes,14.7,12.7 -vgg19_libtorch,1,32,vgg19_libtorch_config_4,3:GPU,1,Yes,36.0,13.9 -vgg19_libtorch,1,16,vgg19_libtorch_config_4,3:GPU,1,Yes,14.7,11.0 -vgg19_libtorch,1,32,vgg19_libtorch_config_5,3:GPU,2,Yes,36.0,14.8 -vgg19_libtorch,1,16,vgg19_libtorch_config_5,3:GPU,2,Yes,14.7,11.2 -vgg19_libtorch,1,32,vgg19_libtorch_config_6,4:GPU,1,Yes,36.0,14.4 -vgg19_libtorch,1,16,vgg19_libtorch_config_6,4:GPU,1,Yes,14.7,11.0 -vgg19_libtorch,1,16,vgg19_libtorch_config_7,4:GPU,2,Yes,14.7,11.0 +add_sub,2,32,add_sub_config_5,3:GPU,2,Yes,64.0,1.4 +add_sub,2,16,add_sub_config_5,3:GPU,2,Yes,32.2,1.5 +add_sub,1,32,add_sub_config_5,3:GPU,2,Yes,32.0,1.5 +add_sub,1,16,add_sub_config_5,3:GPU,2,Yes,16.1,1.4 +add_sub,2,32,add_sub_config_7,4:GPU,2,Yes,64.0,1.4 +add_sub,2,16,add_sub_config_7,4:GPU,2,Yes,32.2,1.5 +add_sub,1,32,add_sub_config_7,4:GPU,2,Yes,32.0,1.5 +add_sub,1,16,add_sub_config_7,4:GPU,2,Yes,16.1,1.5 +add_sub,2,32,add_sub_config_3,2:GPU,2,Yes,64.0,1.5 +add_sub,2,16,add_sub_config_3,2:GPU,2,Yes,32.2,1.4 +add_sub,1,32,add_sub_config_3,2:GPU,2,Yes,32.0,1.4 +add_sub,1,16,add_sub_config_3,2:GPU,2,Yes,16.1,1.4 +add_sub,2,32,add_sub_config_default,1:GPU,8,Yes,64.0,1.4 +add_sub,2,16,add_sub_config_default,1:GPU,8,Yes,32.2,1.5 +add_sub,1,32,add_sub_config_default,1:GPU,8,Yes,32.0,1.4 +add_sub,1,16,add_sub_config_default,1:GPU,8,Yes,16.1,1.4 +add_sub,2,32,add_sub_config_9,5:GPU,2,Yes,64.0,1.5 +add_sub,2,16,add_sub_config_9,5:GPU,2,Yes,32.2,1.5 +add_sub,1,32,add_sub_config_9,5:GPU,2,Yes,32.0,1.5 +add_sub,1,16,add_sub_config_9,5:GPU,2,Yes,16.1,1.5 +add_sub,2,32,add_sub_config_1,1:GPU,2,Yes,64.0,1.5 +add_sub,2,16,add_sub_config_1,1:GPU,2,Yes,32.2,1.6 +add_sub,1,32,add_sub_config_1,1:GPU,2,Yes,32.0,1.5 +add_sub,1,16,add_sub_config_1,1:GPU,2,Yes,16.1,1.5 +add_sub,1,32,add_sub_config_4,3:GPU,1,Yes,32.0,1.5 +add_sub,1,16,add_sub_config_4,3:GPU,1,Yes,16.1,1.5 +add_sub,1,32,add_sub_config_6,4:GPU,1,Yes,32.0,1.5 +add_sub,1,16,add_sub_config_6,4:GPU,1,Yes,16.1,1.5 +add_sub,1,32,add_sub_config_0,1:GPU,1,Yes,32.0,1.5 +add_sub,1,16,add_sub_config_0,1:GPU,1,Yes,16.1,1.5 +add_sub,1,32,add_sub_config_2,2:GPU,1,Yes,32.0,1.5 +add_sub,1,16,add_sub_config_2,2:GPU,1,Yes,16.1,1.5 +add_sub,1,32,add_sub_config_8,5:GPU,1,Yes,32.0,1.4 +add_sub,1,16,add_sub_config_8,5:GPU,1,Yes,16.1,1.5 diff --git a/tests/common/request-rate-ckpt/golden-metrics-server-only.csv b/tests/common/request-rate-ckpt/golden-metrics-server-only.csv index 1fc199396..4daa8fc3d 100644 --- a/tests/common/request-rate-ckpt/golden-metrics-server-only.csv +++ b/tests/common/request-rate-ckpt/golden-metrics-server-only.csv @@ -1,3 +1,3 @@ Model,GPU UUID,GPU Memory Usage (MB),GPU Utilization (%),GPU Power Usage (W) -triton-server,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,845.0,0.0,56.7 +triton-server,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,870.0,0.0,56.1 diff --git a/tests/common/single-model-ckpt/0.ckpt b/tests/common/single-model-ckpt/0.ckpt index 29c83c94a..a3d9e0489 100644 --- a/tests/common/single-model-ckpt/0.ckpt +++ b/tests/common/single-model-ckpt/0.ckpt @@ -1 +1,5350 @@ -{"ResultManager.results": {"_results": {"add_sub": {"add_sub_config_default": [{"_triton_env": {}, "_model_run_configs": [{"_model_name": "add_sub", "_model_config": {"name": "add_sub_config_default", "platform": "pytorch_libtorch", "maxBatchSize": 8, "input": [{"name": "INPUT__0", "dataType": "TYPE_FP32", "dims": ["16"]}, {"name": "INPUT__1", "dataType": "TYPE_FP32", "dims": ["16"]}], "output": [{"name": "OUTPUT__0", "dataType": "TYPE_FP32", "dims": ["16"], "labelFilename": "output0_labels.txt"}, {"name": "OUTPUT__1", "dataType": "TYPE_FP32", "dims": ["16"]}], "instanceGroup": [{"kind": "KIND_GPU"}], "cpu_only": false}, "_perf_config": {"_args": {"service-kind": null, "model-signature-name": null, "async": null, "sync": null, "measurement-interval": null, "concurrency-range": 1, "request-rate-range": null, "request-distribution": null, "request-intervals": null, "binary-search": null, "num-of-sequences": null, "latency-threshold": null, "max-threads": null, "stability-percentage": null, "max-trials": null, "percentile": null, "input-data": null, "shared-memory": null, "output-shared-memory-size": null, "shape": null, "sequence-length": null, "sequence-id-range": null, "string-length": null, "string-data": null, "measurement-mode": "count_windows", "measurement-request-count": null, "streaming": null, "grpc-compression-algorithm": null, "triton-server-directory": null, "model-repository": null, "ssl-grpc-use-ssl": null, "ssl-grpc-root-certifications-file": null, "ssl-grpc-private-key-file": null, "ssl-grpc-certificate-chain-file": null, "ssl-https-verify-peer": null, "ssl-https-verify-host": null, "ssl-https-ca-certificates-file": null, "ssl-https-client-certificate-type": null, "ssl-https-client-certificate-file": null, "ssl-https-private-key-type": null, "ssl-https-private-key-file": null, "collect-metrics": "True", "metrics-url": "http://localhost:8002/metrics", "metrics-interval": 1000.0}, "_options": {"-m": "add_sub_config_default", "-x": null, "-b": 1, "-u": "localhost:8001", "-i": "grpc", "-f": "add_sub_config_default-results.csv", "-H": null}, "_verbose": {"-v": null, "-v -v": null, "--verbose-csv": "--verbose-csv"}, "_input_to_options": {"model-name": "-m", "model-version": "-x", "batch-size": "-b", "url": "-u", "protocol": "-i", "latency-report-file": "-f", "http-header": "-H"}, "_input_to_verbose": {"verbose": "-v", "extra-verbose": "-v -v", "verbose-csv": "--verbose-csv"}, "_additive_args": {"input-data": null, "shape": null}}, "_ensemble_composing_configs": []}]}, {"-m add_sub_config_default -b 1 -i grpc -f add_sub_config_default-results.csv --verbose-csv --concurrency-range=1 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": {"_model_variants_name": "add_sub_config_default", "_gpu_data": {"GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [["gpu_used_memory", {"_value": 847.249408, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0"}], ["gpu_free_memory", {"_value": 24548.212736, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0"}], ["gpu_utilization", {"_value": 3.0, "_timestamp": 0, "_device_uuid": null}], ["gpu_power_usage", {"_value": 55.8185, "_timestamp": 0, "_device_uuid": null}]]}, "_avg_gpu_data": [["gpu_used_memory", {"_value": 847.249408, "_timestamp": 0, "_device_uuid": null}], ["gpu_free_memory", {"_value": 24548.212736, "_timestamp": 0, "_device_uuid": null}], ["gpu_utilization", {"_value": 3.0, "_timestamp": 0, "_device_uuid": null}], ["gpu_power_usage", {"_value": 55.8185, "_timestamp": 0, "_device_uuid": null}]], "_avg_gpu_data_from_tag": {"gpu_used_memory": ["gpu_used_memory", {"_value": 847.249408, "_timestamp": 0, "_device_uuid": null}], "gpu_free_memory": ["gpu_free_memory", {"_value": 24548.212736, "_timestamp": 0, "_device_uuid": null}], "gpu_utilization": ["gpu_utilization", {"_value": 3.0, "_timestamp": 0, "_device_uuid": null}], "gpu_power_usage": ["gpu_power_usage", {"_value": 55.8185, "_timestamp": 0, "_device_uuid": null}]}, "_model_config_measurements": [{"_model_config_name": "add_sub_config_default", "_model_specific_pa_params": {"batch-size": 1, "concurrency-range": 1}, "_non_gpu_data": [["perf_latency_avg", {"_value": 0.38, "_timestamp": 0}], ["perf_latency_p90", {"_value": 0.437, "_timestamp": 0}], ["perf_latency_p95", {"_value": 0.46, "_timestamp": 0}], ["perf_latency_p99", {"_value": 0.511, "_timestamp": 0}], ["perf_throughput", {"_value": 2626.05, "_timestamp": 0}], ["perf_client_send_recv", {"_value": 0.003, "_timestamp": 0}], ["perf_client_response_wait", {"_value": 0.367, "_timestamp": 0}], ["perf_server_queue", {"_value": 0.017, "_timestamp": 0}], ["perf_server_compute_infer", {"_value": 0.066, "_timestamp": 0}], ["perf_server_compute_input", {"_value": 0.031, "_timestamp": 0}], ["perf_server_compute_output", {"_value": 0.042, "_timestamp": 0}]], "_non_gpu_data_from_tag": {"perf_latency_avg": ["perf_latency_avg", {"_value": 0.38, "_timestamp": 0}], "perf_latency_p90": ["perf_latency_p90", {"_value": 0.437, "_timestamp": 0}], "perf_latency_p95": ["perf_latency_p95", {"_value": 0.46, "_timestamp": 0}], "perf_latency_p99": ["perf_latency_p99", {"_value": 0.511, "_timestamp": 0}], "perf_throughput": ["perf_throughput", {"_value": 2626.05, "_timestamp": 0}], "perf_client_send_recv": ["perf_client_send_recv", {"_value": 0.003, "_timestamp": 0}], "perf_client_response_wait": ["perf_client_response_wait", {"_value": 0.367, "_timestamp": 0}], "perf_server_queue": ["perf_server_queue", {"_value": 0.017, "_timestamp": 0}], "perf_server_compute_infer": ["perf_server_compute_infer", {"_value": 0.066, "_timestamp": 0}], "perf_server_compute_input": ["perf_server_compute_input", {"_value": 0.031, "_timestamp": 0}], "perf_server_compute_output": ["perf_server_compute_output", {"_value": 0.042, "_timestamp": 0}]}}]}, "-m add_sub_config_default -b 1 -i grpc -f add_sub_config_default-results.csv --verbose-csv --concurrency-range=2 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": {"_model_variants_name": "add_sub_config_default", "_gpu_data": {"GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [["gpu_used_memory", {"_value": 847.249408, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0"}], ["gpu_free_memory", {"_value": 24548.212736, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0"}], ["gpu_utilization", {"_value": 7.66667, "_timestamp": 0, "_device_uuid": null}], ["gpu_power_usage", {"_value": 56.0633, "_timestamp": 0, "_device_uuid": null}]]}, "_avg_gpu_data": [["gpu_used_memory", {"_value": 847.249408, "_timestamp": 0, "_device_uuid": null}], ["gpu_free_memory", {"_value": 24548.212736, "_timestamp": 0, "_device_uuid": null}], ["gpu_utilization", {"_value": 7.66667, "_timestamp": 0, "_device_uuid": null}], ["gpu_power_usage", {"_value": 56.0633, "_timestamp": 0, "_device_uuid": null}]], "_avg_gpu_data_from_tag": {"gpu_used_memory": ["gpu_used_memory", {"_value": 847.249408, "_timestamp": 0, "_device_uuid": null}], "gpu_free_memory": ["gpu_free_memory", {"_value": 24548.212736, "_timestamp": 0, "_device_uuid": null}], "gpu_utilization": ["gpu_utilization", {"_value": 7.66667, "_timestamp": 0, "_device_uuid": null}], "gpu_power_usage": ["gpu_power_usage", {"_value": 56.0633, "_timestamp": 0, "_device_uuid": null}]}, "_model_config_measurements": [{"_model_config_name": "add_sub_config_default", "_model_specific_pa_params": {"batch-size": 1, "concurrency-range": 2}, "_non_gpu_data": [["perf_latency_avg", {"_value": 0.307, "_timestamp": 0}], ["perf_latency_p90", {"_value": 0.337, "_timestamp": 0}], ["perf_latency_p95", {"_value": 0.395, "_timestamp": 0}], ["perf_latency_p99", {"_value": 0.54, "_timestamp": 0}], ["perf_throughput", {"_value": 6494.65, "_timestamp": 0}], ["perf_client_send_recv", {"_value": 0.003, "_timestamp": 0}], ["perf_client_response_wait", {"_value": 0.295, "_timestamp": 0}], ["perf_server_queue", {"_value": 0.037, "_timestamp": 0}], ["perf_server_compute_infer", {"_value": 0.033, "_timestamp": 0}], ["perf_server_compute_input", {"_value": 0.018, "_timestamp": 0}], ["perf_server_compute_output", {"_value": 0.023, "_timestamp": 0}]], "_non_gpu_data_from_tag": {"perf_latency_avg": ["perf_latency_avg", {"_value": 0.307, "_timestamp": 0}], "perf_latency_p90": ["perf_latency_p90", {"_value": 0.337, "_timestamp": 0}], "perf_latency_p95": ["perf_latency_p95", {"_value": 0.395, "_timestamp": 0}], "perf_latency_p99": ["perf_latency_p99", {"_value": 0.54, "_timestamp": 0}], "perf_throughput": ["perf_throughput", {"_value": 6494.65, "_timestamp": 0}], "perf_client_send_recv": ["perf_client_send_recv", {"_value": 0.003, "_timestamp": 0}], "perf_client_response_wait": ["perf_client_response_wait", {"_value": 0.295, "_timestamp": 0}], "perf_server_queue": ["perf_server_queue", {"_value": 0.037, "_timestamp": 0}], "perf_server_compute_infer": ["perf_server_compute_infer", {"_value": 0.033, "_timestamp": 0}], "perf_server_compute_input": ["perf_server_compute_input", {"_value": 0.018, "_timestamp": 0}], "perf_server_compute_output": ["perf_server_compute_output", {"_value": 0.023, "_timestamp": 0}]}}]}, "-m add_sub_config_default -b 2 -i grpc -f add_sub_config_default-results.csv --verbose-csv --concurrency-range=1 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": {"_model_variants_name": "add_sub_config_default", "_gpu_data": {"GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [["gpu_used_memory", {"_value": 847.249408, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0"}], ["gpu_free_memory", {"_value": 24548.212736, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0"}], ["gpu_utilization", {"_value": 2.25, "_timestamp": 0, "_device_uuid": null}], ["gpu_power_usage", {"_value": 55.9175, "_timestamp": 0, "_device_uuid": null}]]}, "_avg_gpu_data": [["gpu_used_memory", {"_value": 847.249408, "_timestamp": 0, "_device_uuid": null}], ["gpu_free_memory", {"_value": 24548.212736, "_timestamp": 0, "_device_uuid": null}], ["gpu_utilization", {"_value": 2.25, "_timestamp": 0, "_device_uuid": null}], ["gpu_power_usage", {"_value": 55.9175, "_timestamp": 0, "_device_uuid": null}]], "_avg_gpu_data_from_tag": {"gpu_used_memory": ["gpu_used_memory", {"_value": 847.249408, "_timestamp": 0, "_device_uuid": null}], "gpu_free_memory": ["gpu_free_memory", {"_value": 24548.212736, "_timestamp": 0, "_device_uuid": null}], "gpu_utilization": ["gpu_utilization", {"_value": 2.25, "_timestamp": 0, "_device_uuid": null}], "gpu_power_usage": ["gpu_power_usage", {"_value": 55.9175, "_timestamp": 0, "_device_uuid": null}]}, "_model_config_measurements": [{"_model_config_name": "add_sub_config_default", "_model_specific_pa_params": {"batch-size": 2, "concurrency-range": 1}, "_non_gpu_data": [["perf_latency_avg", {"_value": 0.394, "_timestamp": 0}], ["perf_latency_p90", {"_value": 0.471, "_timestamp": 0}], ["perf_latency_p95", {"_value": 0.535, "_timestamp": 0}], ["perf_latency_p99", {"_value": 0.637, "_timestamp": 0}], ["perf_throughput", {"_value": 5066.42, "_timestamp": 0}], ["perf_client_send_recv", {"_value": 0.003, "_timestamp": 0}], ["perf_client_response_wait", {"_value": 0.38, "_timestamp": 0}], ["perf_server_queue", {"_value": 0.018, "_timestamp": 0}], ["perf_server_compute_infer", {"_value": 0.073, "_timestamp": 0}], ["perf_server_compute_input", {"_value": 0.033, "_timestamp": 0}], ["perf_server_compute_output", {"_value": 0.043, "_timestamp": 0}]], "_non_gpu_data_from_tag": {"perf_latency_avg": ["perf_latency_avg", {"_value": 0.394, "_timestamp": 0}], "perf_latency_p90": ["perf_latency_p90", {"_value": 0.471, "_timestamp": 0}], "perf_latency_p95": ["perf_latency_p95", {"_value": 0.535, "_timestamp": 0}], "perf_latency_p99": ["perf_latency_p99", {"_value": 0.637, "_timestamp": 0}], "perf_throughput": ["perf_throughput", {"_value": 5066.42, "_timestamp": 0}], "perf_client_send_recv": ["perf_client_send_recv", {"_value": 0.003, "_timestamp": 0}], "perf_client_response_wait": ["perf_client_response_wait", {"_value": 0.38, "_timestamp": 0}], "perf_server_queue": ["perf_server_queue", {"_value": 0.018, "_timestamp": 0}], "perf_server_compute_infer": ["perf_server_compute_infer", {"_value": 0.073, "_timestamp": 0}], "perf_server_compute_input": ["perf_server_compute_input", {"_value": 0.033, "_timestamp": 0}], "perf_server_compute_output": ["perf_server_compute_output", {"_value": 0.043, "_timestamp": 0}]}}]}, "-m add_sub_config_default -b 2 -i grpc -f add_sub_config_default-results.csv --verbose-csv --concurrency-range=2 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": {"_model_variants_name": "add_sub_config_default", "_gpu_data": {"GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [["gpu_used_memory", {"_value": 847.249408, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0"}], ["gpu_free_memory", {"_value": 24548.212736, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0"}], ["gpu_utilization", {"_value": 5.0, "_timestamp": 0, "_device_uuid": null}], ["gpu_power_usage", {"_value": 55.9815, "_timestamp": 0, "_device_uuid": null}]]}, "_avg_gpu_data": [["gpu_used_memory", {"_value": 847.249408, "_timestamp": 0, "_device_uuid": null}], ["gpu_free_memory", {"_value": 24548.212736, "_timestamp": 0, "_device_uuid": null}], ["gpu_utilization", {"_value": 5.0, "_timestamp": 0, "_device_uuid": null}], ["gpu_power_usage", {"_value": 55.9815, "_timestamp": 0, "_device_uuid": null}]], "_avg_gpu_data_from_tag": {"gpu_used_memory": ["gpu_used_memory", {"_value": 847.249408, "_timestamp": 0, "_device_uuid": null}], "gpu_free_memory": ["gpu_free_memory", {"_value": 24548.212736, "_timestamp": 0, "_device_uuid": null}], "gpu_utilization": ["gpu_utilization", {"_value": 5.0, "_timestamp": 0, "_device_uuid": null}], "gpu_power_usage": ["gpu_power_usage", {"_value": 55.9815, "_timestamp": 0, "_device_uuid": null}]}, "_model_config_measurements": [{"_model_config_name": "add_sub_config_default", "_model_specific_pa_params": {"batch-size": 2, "concurrency-range": 2}, "_non_gpu_data": [["perf_latency_avg", {"_value": 0.314, "_timestamp": 0}], ["perf_latency_p90", {"_value": 0.354, "_timestamp": 0}], ["perf_latency_p95", {"_value": 0.408, "_timestamp": 0}], ["perf_latency_p99", {"_value": 0.577, "_timestamp": 0}], ["perf_throughput", {"_value": 12693.7, "_timestamp": 0}], ["perf_client_send_recv", {"_value": 0.003, "_timestamp": 0}], ["perf_client_response_wait", {"_value": 0.301, "_timestamp": 0}], ["perf_server_queue", {"_value": 0.031, "_timestamp": 0}], ["perf_server_compute_infer", {"_value": 0.034, "_timestamp": 0}], ["perf_server_compute_input", {"_value": 0.018, "_timestamp": 0}], ["perf_server_compute_output", {"_value": 0.022, "_timestamp": 0}]], "_non_gpu_data_from_tag": {"perf_latency_avg": ["perf_latency_avg", {"_value": 0.314, "_timestamp": 0}], "perf_latency_p90": ["perf_latency_p90", {"_value": 0.354, "_timestamp": 0}], "perf_latency_p95": ["perf_latency_p95", {"_value": 0.408, "_timestamp": 0}], "perf_latency_p99": ["perf_latency_p99", {"_value": 0.577, "_timestamp": 0}], "perf_throughput": ["perf_throughput", {"_value": 12693.7, "_timestamp": 0}], "perf_client_send_recv": ["perf_client_send_recv", {"_value": 0.003, "_timestamp": 0}], "perf_client_response_wait": ["perf_client_response_wait", {"_value": 0.301, "_timestamp": 0}], "perf_server_queue": ["perf_server_queue", {"_value": 0.031, "_timestamp": 0}], "perf_server_compute_infer": ["perf_server_compute_infer", {"_value": 0.034, "_timestamp": 0}], "perf_server_compute_input": ["perf_server_compute_input", {"_value": 0.018, "_timestamp": 0}], "perf_server_compute_output": ["perf_server_compute_output", {"_value": 0.022, "_timestamp": 0}]}}]}}], "add_sub_config_0": [{"_triton_env": {}, "_model_run_configs": [{"_model_name": "add_sub", "_model_config": {"name": "add_sub_config_0", "platform": "pytorch_libtorch", "maxBatchSize": 1, "input": [{"name": "INPUT__0", "dataType": "TYPE_FP32", "dims": ["16"]}, {"name": "INPUT__1", "dataType": "TYPE_FP32", "dims": ["16"]}], "output": [{"name": "OUTPUT__0", "dataType": "TYPE_FP32", "dims": ["16"], "labelFilename": "output0_labels.txt"}, {"name": "OUTPUT__1", "dataType": "TYPE_FP32", "dims": ["16"]}], "instanceGroup": [{"count": 1, "kind": "KIND_GPU"}], "dynamicBatching": {}, "cpu_only": false}, "_perf_config": {"_args": {"service-kind": null, "model-signature-name": null, "async": null, "sync": null, "measurement-interval": null, "concurrency-range": 1, "request-rate-range": null, "request-distribution": null, "request-intervals": null, "binary-search": null, "num-of-sequences": null, "latency-threshold": null, "max-threads": null, "stability-percentage": null, "max-trials": null, "percentile": null, "input-data": null, "shared-memory": null, "output-shared-memory-size": null, "shape": null, "sequence-length": null, "sequence-id-range": null, "string-length": null, "string-data": null, "measurement-mode": "count_windows", "measurement-request-count": null, "streaming": null, "grpc-compression-algorithm": null, "triton-server-directory": null, "model-repository": null, "ssl-grpc-use-ssl": null, "ssl-grpc-root-certifications-file": null, "ssl-grpc-private-key-file": null, "ssl-grpc-certificate-chain-file": null, "ssl-https-verify-peer": null, "ssl-https-verify-host": null, "ssl-https-ca-certificates-file": null, "ssl-https-client-certificate-type": null, "ssl-https-client-certificate-file": null, "ssl-https-private-key-type": null, "ssl-https-private-key-file": null, "collect-metrics": "True", "metrics-url": "http://localhost:8002/metrics", "metrics-interval": 1000.0}, "_options": {"-m": "add_sub_config_0", "-x": null, "-b": 1, "-u": "localhost:8001", "-i": "grpc", "-f": "add_sub_config_0-results.csv", "-H": null}, "_verbose": {"-v": null, "-v -v": null, "--verbose-csv": "--verbose-csv"}, "_input_to_options": {"model-name": "-m", "model-version": "-x", "batch-size": "-b", "url": "-u", "protocol": "-i", "latency-report-file": "-f", "http-header": "-H"}, "_input_to_verbose": {"verbose": "-v", "extra-verbose": "-v -v", "verbose-csv": "--verbose-csv"}, "_additive_args": {"input-data": null, "shape": null}}, "_ensemble_composing_configs": []}]}, {"-m add_sub_config_0 -b 1 -i grpc -f add_sub_config_0-results.csv --verbose-csv --concurrency-range=1 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": {"_model_variants_name": "add_sub_config_0", "_gpu_data": {"GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [["gpu_used_memory", {"_value": 847.249408, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0"}], ["gpu_free_memory", {"_value": 24548.212736, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0"}], ["gpu_utilization", {"_value": 3.25, "_timestamp": 0, "_device_uuid": null}], ["gpu_power_usage", {"_value": 55.9675, "_timestamp": 0, "_device_uuid": null}]]}, "_avg_gpu_data": [["gpu_used_memory", {"_value": 847.249408, "_timestamp": 0, "_device_uuid": null}], ["gpu_free_memory", {"_value": 24548.212736, "_timestamp": 0, "_device_uuid": null}], ["gpu_utilization", {"_value": 3.25, "_timestamp": 0, "_device_uuid": null}], ["gpu_power_usage", {"_value": 55.9675, "_timestamp": 0, "_device_uuid": null}]], "_avg_gpu_data_from_tag": {"gpu_used_memory": ["gpu_used_memory", {"_value": 847.249408, "_timestamp": 0, "_device_uuid": null}], "gpu_free_memory": ["gpu_free_memory", {"_value": 24548.212736, "_timestamp": 0, "_device_uuid": null}], "gpu_utilization": ["gpu_utilization", {"_value": 3.25, "_timestamp": 0, "_device_uuid": null}], "gpu_power_usage": ["gpu_power_usage", {"_value": 55.9675, "_timestamp": 0, "_device_uuid": null}]}, "_model_config_measurements": [{"_model_config_name": "add_sub_config_0", "_model_specific_pa_params": {"batch-size": 1, "concurrency-range": 1}, "_non_gpu_data": [["perf_latency_avg", {"_value": 0.437, "_timestamp": 0}], ["perf_latency_p90", {"_value": 0.51, "_timestamp": 0}], ["perf_latency_p95", {"_value": 0.539, "_timestamp": 0}], ["perf_latency_p99", {"_value": 0.621, "_timestamp": 0}], ["perf_throughput", {"_value": 2282.1, "_timestamp": 0}], ["perf_client_send_recv", {"_value": 0.004, "_timestamp": 0}], ["perf_client_response_wait", {"_value": 0.422, "_timestamp": 0}], ["perf_server_queue", {"_value": 0.038, "_timestamp": 0}], ["perf_server_compute_infer", {"_value": 0.074, "_timestamp": 0}], ["perf_server_compute_input", {"_value": 0.035, "_timestamp": 0}], ["perf_server_compute_output", {"_value": 0.047, "_timestamp": 0}]], "_non_gpu_data_from_tag": {"perf_latency_avg": ["perf_latency_avg", {"_value": 0.437, "_timestamp": 0}], "perf_latency_p90": ["perf_latency_p90", {"_value": 0.51, "_timestamp": 0}], "perf_latency_p95": ["perf_latency_p95", {"_value": 0.539, "_timestamp": 0}], "perf_latency_p99": ["perf_latency_p99", {"_value": 0.621, "_timestamp": 0}], "perf_throughput": ["perf_throughput", {"_value": 2282.1, "_timestamp": 0}], "perf_client_send_recv": ["perf_client_send_recv", {"_value": 0.004, "_timestamp": 0}], "perf_client_response_wait": ["perf_client_response_wait", {"_value": 0.422, "_timestamp": 0}], "perf_server_queue": ["perf_server_queue", {"_value": 0.038, "_timestamp": 0}], "perf_server_compute_infer": ["perf_server_compute_infer", {"_value": 0.074, "_timestamp": 0}], "perf_server_compute_input": ["perf_server_compute_input", {"_value": 0.035, "_timestamp": 0}], "perf_server_compute_output": ["perf_server_compute_output", {"_value": 0.047, "_timestamp": 0}]}}]}, "-m add_sub_config_0 -b 1 -i grpc -f add_sub_config_0-results.csv --verbose-csv --concurrency-range=2 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": {"_model_variants_name": "add_sub_config_0", "_gpu_data": {"GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [["gpu_used_memory", {"_value": 847.249408, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0"}], ["gpu_free_memory", {"_value": 24548.212736, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0"}], ["gpu_utilization", {"_value": 4.25, "_timestamp": 0, "_device_uuid": null}], ["gpu_power_usage", {"_value": 56.0195, "_timestamp": 0, "_device_uuid": null}]]}, "_avg_gpu_data": [["gpu_used_memory", {"_value": 847.249408, "_timestamp": 0, "_device_uuid": null}], ["gpu_free_memory", {"_value": 24548.212736, "_timestamp": 0, "_device_uuid": null}], ["gpu_utilization", {"_value": 4.25, "_timestamp": 0, "_device_uuid": null}], ["gpu_power_usage", {"_value": 56.0195, "_timestamp": 0, "_device_uuid": null}]], "_avg_gpu_data_from_tag": {"gpu_used_memory": ["gpu_used_memory", {"_value": 847.249408, "_timestamp": 0, "_device_uuid": null}], "gpu_free_memory": ["gpu_free_memory", {"_value": 24548.212736, "_timestamp": 0, "_device_uuid": null}], "gpu_utilization": ["gpu_utilization", {"_value": 4.25, "_timestamp": 0, "_device_uuid": null}], "gpu_power_usage": ["gpu_power_usage", {"_value": 56.0195, "_timestamp": 0, "_device_uuid": null}]}, "_model_config_measurements": [{"_model_config_name": "add_sub_config_0", "_model_specific_pa_params": {"batch-size": 1, "concurrency-range": 2}, "_non_gpu_data": [["perf_latency_avg", {"_value": 0.334, "_timestamp": 0}], ["perf_latency_p90", {"_value": 0.398, "_timestamp": 0}], ["perf_latency_p95", {"_value": 0.498, "_timestamp": 0}], ["perf_latency_p99", {"_value": 0.632, "_timestamp": 0}], ["perf_throughput", {"_value": 5966.26, "_timestamp": 0}], ["perf_client_send_recv", {"_value": 0.003, "_timestamp": 0}], ["perf_client_response_wait", {"_value": 0.321, "_timestamp": 0}], ["perf_server_queue", {"_value": 0.045, "_timestamp": 0}], ["perf_server_compute_infer", {"_value": 0.035, "_timestamp": 0}], ["perf_server_compute_input", {"_value": 0.019, "_timestamp": 0}], ["perf_server_compute_output", {"_value": 0.024, "_timestamp": 0}]], "_non_gpu_data_from_tag": {"perf_latency_avg": ["perf_latency_avg", {"_value": 0.334, "_timestamp": 0}], "perf_latency_p90": ["perf_latency_p90", {"_value": 0.398, "_timestamp": 0}], "perf_latency_p95": ["perf_latency_p95", {"_value": 0.498, "_timestamp": 0}], "perf_latency_p99": ["perf_latency_p99", {"_value": 0.632, "_timestamp": 0}], "perf_throughput": ["perf_throughput", {"_value": 5966.26, "_timestamp": 0}], "perf_client_send_recv": ["perf_client_send_recv", {"_value": 0.003, "_timestamp": 0}], "perf_client_response_wait": ["perf_client_response_wait", {"_value": 0.321, "_timestamp": 0}], "perf_server_queue": ["perf_server_queue", {"_value": 0.045, "_timestamp": 0}], "perf_server_compute_infer": ["perf_server_compute_infer", {"_value": 0.035, "_timestamp": 0}], "perf_server_compute_input": ["perf_server_compute_input", {"_value": 0.019, "_timestamp": 0}], "perf_server_compute_output": ["perf_server_compute_output", {"_value": 0.024, "_timestamp": 0}]}}]}}], "add_sub_config_1": [{"_triton_env": {}, "_model_run_configs": [{"_model_name": "add_sub", "_model_config": {"name": "add_sub_config_1", "platform": "pytorch_libtorch", "maxBatchSize": 2, "input": [{"name": "INPUT__0", "dataType": "TYPE_FP32", "dims": ["16"]}, {"name": "INPUT__1", "dataType": "TYPE_FP32", "dims": ["16"]}], "output": [{"name": "OUTPUT__0", "dataType": "TYPE_FP32", "dims": ["16"], "labelFilename": "output0_labels.txt"}, {"name": "OUTPUT__1", "dataType": "TYPE_FP32", "dims": ["16"]}], "instanceGroup": [{"count": 1, "kind": "KIND_GPU"}], "dynamicBatching": {}, "cpu_only": false}, "_perf_config": {"_args": {"service-kind": null, "model-signature-name": null, "async": null, "sync": null, "measurement-interval": null, "concurrency-range": 1, "request-rate-range": null, "request-distribution": null, "request-intervals": null, "binary-search": null, "num-of-sequences": null, "latency-threshold": null, "max-threads": null, "stability-percentage": null, "max-trials": null, "percentile": null, "input-data": null, "shared-memory": null, "output-shared-memory-size": null, "shape": null, "sequence-length": null, "sequence-id-range": null, "string-length": null, "string-data": null, "measurement-mode": "count_windows", "measurement-request-count": null, "streaming": null, "grpc-compression-algorithm": null, "triton-server-directory": null, "model-repository": null, "ssl-grpc-use-ssl": null, "ssl-grpc-root-certifications-file": null, "ssl-grpc-private-key-file": null, "ssl-grpc-certificate-chain-file": null, "ssl-https-verify-peer": null, "ssl-https-verify-host": null, "ssl-https-ca-certificates-file": null, "ssl-https-client-certificate-type": null, "ssl-https-client-certificate-file": null, "ssl-https-private-key-type": null, "ssl-https-private-key-file": null, "collect-metrics": "True", "metrics-url": "http://localhost:8002/metrics", "metrics-interval": 1000.0}, "_options": {"-m": "add_sub_config_1", "-x": null, "-b": 1, "-u": "localhost:8001", "-i": "grpc", "-f": "add_sub_config_1-results.csv", "-H": null}, "_verbose": {"-v": null, "-v -v": null, "--verbose-csv": "--verbose-csv"}, "_input_to_options": {"model-name": "-m", "model-version": "-x", "batch-size": "-b", "url": "-u", "protocol": "-i", "latency-report-file": "-f", "http-header": "-H"}, "_input_to_verbose": {"verbose": "-v", "extra-verbose": "-v -v", "verbose-csv": "--verbose-csv"}, "_additive_args": {"input-data": null, "shape": null}}, "_ensemble_composing_configs": []}]}, {"-m add_sub_config_1 -b 1 -i grpc -f add_sub_config_1-results.csv --verbose-csv --concurrency-range=1 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": {"_model_variants_name": "add_sub_config_1", "_gpu_data": {"GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [["gpu_used_memory", {"_value": 847.249408, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0"}], ["gpu_free_memory", {"_value": 24548.212736, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0"}], ["gpu_utilization", {"_value": 2.75, "_timestamp": 0, "_device_uuid": null}], ["gpu_power_usage", {"_value": 55.961, "_timestamp": 0, "_device_uuid": null}]]}, "_avg_gpu_data": [["gpu_used_memory", {"_value": 847.249408, "_timestamp": 0, "_device_uuid": null}], ["gpu_free_memory", {"_value": 24548.212736, "_timestamp": 0, "_device_uuid": null}], ["gpu_utilization", {"_value": 2.75, "_timestamp": 0, "_device_uuid": null}], ["gpu_power_usage", {"_value": 55.961, "_timestamp": 0, "_device_uuid": null}]], "_avg_gpu_data_from_tag": {"gpu_used_memory": ["gpu_used_memory", {"_value": 847.249408, "_timestamp": 0, "_device_uuid": null}], "gpu_free_memory": ["gpu_free_memory", {"_value": 24548.212736, "_timestamp": 0, "_device_uuid": null}], "gpu_utilization": ["gpu_utilization", {"_value": 2.75, "_timestamp": 0, "_device_uuid": null}], "gpu_power_usage": ["gpu_power_usage", {"_value": 55.961, "_timestamp": 0, "_device_uuid": null}]}, "_model_config_measurements": [{"_model_config_name": "add_sub_config_1", "_model_specific_pa_params": {"batch-size": 1, "concurrency-range": 1}, "_non_gpu_data": [["perf_latency_avg", {"_value": 0.412, "_timestamp": 0}], ["perf_latency_p90", {"_value": 0.474, "_timestamp": 0}], ["perf_latency_p95", {"_value": 0.495, "_timestamp": 0}], ["perf_latency_p99", {"_value": 0.568, "_timestamp": 0}], ["perf_throughput", {"_value": 2421.98, "_timestamp": 0}], ["perf_client_send_recv", {"_value": 0.003, "_timestamp": 0}], ["perf_client_response_wait", {"_value": 0.399, "_timestamp": 0}], ["perf_server_queue", {"_value": 0.035, "_timestamp": 0}], ["perf_server_compute_infer", {"_value": 0.069, "_timestamp": 0}], ["perf_server_compute_input", {"_value": 0.034, "_timestamp": 0}], ["perf_server_compute_output", {"_value": 0.044, "_timestamp": 0}]], "_non_gpu_data_from_tag": {"perf_latency_avg": ["perf_latency_avg", {"_value": 0.412, "_timestamp": 0}], "perf_latency_p90": ["perf_latency_p90", {"_value": 0.474, "_timestamp": 0}], "perf_latency_p95": ["perf_latency_p95", {"_value": 0.495, "_timestamp": 0}], "perf_latency_p99": ["perf_latency_p99", {"_value": 0.568, "_timestamp": 0}], "perf_throughput": ["perf_throughput", {"_value": 2421.98, "_timestamp": 0}], "perf_client_send_recv": ["perf_client_send_recv", {"_value": 0.003, "_timestamp": 0}], "perf_client_response_wait": ["perf_client_response_wait", {"_value": 0.399, "_timestamp": 0}], "perf_server_queue": ["perf_server_queue", {"_value": 0.035, "_timestamp": 0}], "perf_server_compute_infer": ["perf_server_compute_infer", {"_value": 0.069, "_timestamp": 0}], "perf_server_compute_input": ["perf_server_compute_input", {"_value": 0.034, "_timestamp": 0}], "perf_server_compute_output": ["perf_server_compute_output", {"_value": 0.044, "_timestamp": 0}]}}]}, "-m add_sub_config_1 -b 1 -i grpc -f add_sub_config_1-results.csv --verbose-csv --concurrency-range=2 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": {"_model_variants_name": "add_sub_config_1", "_gpu_data": {"GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [["gpu_used_memory", {"_value": 847.249408, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0"}], ["gpu_free_memory", {"_value": 24548.212736, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0"}], ["gpu_utilization", {"_value": 4.25, "_timestamp": 0, "_device_uuid": null}], ["gpu_power_usage", {"_value": 56.1902, "_timestamp": 0, "_device_uuid": null}]]}, "_avg_gpu_data": [["gpu_used_memory", {"_value": 847.249408, "_timestamp": 0, "_device_uuid": null}], ["gpu_free_memory", {"_value": 24548.212736, "_timestamp": 0, "_device_uuid": null}], ["gpu_utilization", {"_value": 4.25, "_timestamp": 0, "_device_uuid": null}], ["gpu_power_usage", {"_value": 56.1902, "_timestamp": 0, "_device_uuid": null}]], "_avg_gpu_data_from_tag": {"gpu_used_memory": ["gpu_used_memory", {"_value": 847.249408, "_timestamp": 0, "_device_uuid": null}], "gpu_free_memory": ["gpu_free_memory", {"_value": 24548.212736, "_timestamp": 0, "_device_uuid": null}], "gpu_utilization": ["gpu_utilization", {"_value": 4.25, "_timestamp": 0, "_device_uuid": null}], "gpu_power_usage": ["gpu_power_usage", {"_value": 56.1902, "_timestamp": 0, "_device_uuid": null}]}, "_model_config_measurements": [{"_model_config_name": "add_sub_config_1", "_model_specific_pa_params": {"batch-size": 1, "concurrency-range": 2}, "_non_gpu_data": [["perf_latency_avg", {"_value": 0.321, "_timestamp": 0}], ["perf_latency_p90", {"_value": 0.357, "_timestamp": 0}], ["perf_latency_p95", {"_value": 0.42, "_timestamp": 0}], ["perf_latency_p99", {"_value": 0.579, "_timestamp": 0}], ["perf_throughput", {"_value": 6209.88, "_timestamp": 0}], ["perf_client_send_recv", {"_value": 0.003, "_timestamp": 0}], ["perf_client_response_wait", {"_value": 0.308, "_timestamp": 0}], ["perf_server_queue", {"_value": 0.044, "_timestamp": 0}], ["perf_server_compute_infer", {"_value": 0.033, "_timestamp": 0}], ["perf_server_compute_input", {"_value": 0.018, "_timestamp": 0}], ["perf_server_compute_output", {"_value": 0.023, "_timestamp": 0}]], "_non_gpu_data_from_tag": {"perf_latency_avg": ["perf_latency_avg", {"_value": 0.321, "_timestamp": 0}], "perf_latency_p90": ["perf_latency_p90", {"_value": 0.357, "_timestamp": 0}], "perf_latency_p95": ["perf_latency_p95", {"_value": 0.42, "_timestamp": 0}], "perf_latency_p99": ["perf_latency_p99", {"_value": 0.579, "_timestamp": 0}], "perf_throughput": ["perf_throughput", {"_value": 6209.88, "_timestamp": 0}], "perf_client_send_recv": ["perf_client_send_recv", {"_value": 0.003, "_timestamp": 0}], "perf_client_response_wait": ["perf_client_response_wait", {"_value": 0.308, "_timestamp": 0}], "perf_server_queue": ["perf_server_queue", {"_value": 0.044, "_timestamp": 0}], "perf_server_compute_infer": ["perf_server_compute_infer", {"_value": 0.033, "_timestamp": 0}], "perf_server_compute_input": ["perf_server_compute_input", {"_value": 0.018, "_timestamp": 0}], "perf_server_compute_output": ["perf_server_compute_output", {"_value": 0.023, "_timestamp": 0}]}}]}, "-m add_sub_config_1 -b 2 -i grpc -f add_sub_config_1-results.csv --verbose-csv --concurrency-range=1 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": {"_model_variants_name": "add_sub_config_1", "_gpu_data": {"GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [["gpu_used_memory", {"_value": 847.249408, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0"}], ["gpu_free_memory", {"_value": 24548.212736, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0"}], ["gpu_utilization", {"_value": 2.5, "_timestamp": 0, "_device_uuid": null}], ["gpu_power_usage", {"_value": 56.2845, "_timestamp": 0, "_device_uuid": null}]]}, "_avg_gpu_data": [["gpu_used_memory", {"_value": 847.249408, "_timestamp": 0, "_device_uuid": null}], ["gpu_free_memory", {"_value": 24548.212736, "_timestamp": 0, "_device_uuid": null}], ["gpu_utilization", {"_value": 2.5, "_timestamp": 0, "_device_uuid": null}], ["gpu_power_usage", {"_value": 56.2845, "_timestamp": 0, "_device_uuid": null}]], "_avg_gpu_data_from_tag": {"gpu_used_memory": ["gpu_used_memory", {"_value": 847.249408, "_timestamp": 0, "_device_uuid": null}], "gpu_free_memory": ["gpu_free_memory", {"_value": 24548.212736, "_timestamp": 0, "_device_uuid": null}], "gpu_utilization": ["gpu_utilization", {"_value": 2.5, "_timestamp": 0, "_device_uuid": null}], "gpu_power_usage": ["gpu_power_usage", {"_value": 56.2845, "_timestamp": 0, "_device_uuid": null}]}, "_model_config_measurements": [{"_model_config_name": "add_sub_config_1", "_model_specific_pa_params": {"batch-size": 2, "concurrency-range": 1}, "_non_gpu_data": [["perf_latency_avg", {"_value": 0.4, "_timestamp": 0}], ["perf_latency_p90", {"_value": 0.486, "_timestamp": 0}], ["perf_latency_p95", {"_value": 0.528, "_timestamp": 0}], ["perf_latency_p99", {"_value": 0.573, "_timestamp": 0}], ["perf_throughput", {"_value": 4989.17, "_timestamp": 0}], ["perf_client_send_recv", {"_value": 0.003, "_timestamp": 0}], ["perf_client_response_wait", {"_value": 0.387, "_timestamp": 0}], ["perf_server_queue", {"_value": 0.033, "_timestamp": 0}], ["perf_server_compute_infer", {"_value": 0.071, "_timestamp": 0}], ["perf_server_compute_input", {"_value": 0.032, "_timestamp": 0}], ["perf_server_compute_output", {"_value": 0.043, "_timestamp": 0}]], "_non_gpu_data_from_tag": {"perf_latency_avg": ["perf_latency_avg", {"_value": 0.4, "_timestamp": 0}], "perf_latency_p90": ["perf_latency_p90", {"_value": 0.486, "_timestamp": 0}], "perf_latency_p95": ["perf_latency_p95", {"_value": 0.528, "_timestamp": 0}], "perf_latency_p99": ["perf_latency_p99", {"_value": 0.573, "_timestamp": 0}], "perf_throughput": ["perf_throughput", {"_value": 4989.17, "_timestamp": 0}], "perf_client_send_recv": ["perf_client_send_recv", {"_value": 0.003, "_timestamp": 0}], "perf_client_response_wait": ["perf_client_response_wait", {"_value": 0.387, "_timestamp": 0}], "perf_server_queue": ["perf_server_queue", {"_value": 0.033, "_timestamp": 0}], "perf_server_compute_infer": ["perf_server_compute_infer", {"_value": 0.071, "_timestamp": 0}], "perf_server_compute_input": ["perf_server_compute_input", {"_value": 0.032, "_timestamp": 0}], "perf_server_compute_output": ["perf_server_compute_output", {"_value": 0.043, "_timestamp": 0}]}}]}, "-m add_sub_config_1 -b 2 -i grpc -f add_sub_config_1-results.csv --verbose-csv --concurrency-range=2 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": {"_model_variants_name": "add_sub_config_1", "_gpu_data": {"GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [["gpu_used_memory", {"_value": 847.249408, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0"}], ["gpu_free_memory", {"_value": 24548.212736, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0"}], ["gpu_utilization", {"_value": 6.666669999999999, "_timestamp": 0, "_device_uuid": null}], ["gpu_power_usage", {"_value": 56.4687, "_timestamp": 0, "_device_uuid": null}]]}, "_avg_gpu_data": [["gpu_used_memory", {"_value": 847.249408, "_timestamp": 0, "_device_uuid": null}], ["gpu_free_memory", {"_value": 24548.212736, "_timestamp": 0, "_device_uuid": null}], ["gpu_utilization", {"_value": 6.666669999999999, "_timestamp": 0, "_device_uuid": null}], ["gpu_power_usage", {"_value": 56.4687, "_timestamp": 0, "_device_uuid": null}]], "_avg_gpu_data_from_tag": {"gpu_used_memory": ["gpu_used_memory", {"_value": 847.249408, "_timestamp": 0, "_device_uuid": null}], "gpu_free_memory": ["gpu_free_memory", {"_value": 24548.212736, "_timestamp": 0, "_device_uuid": null}], "gpu_utilization": ["gpu_utilization", {"_value": 6.666669999999999, "_timestamp": 0, "_device_uuid": null}], "gpu_power_usage": ["gpu_power_usage", {"_value": 56.4687, "_timestamp": 0, "_device_uuid": null}]}, "_model_config_measurements": [{"_model_config_name": "add_sub_config_1", "_model_specific_pa_params": {"batch-size": 2, "concurrency-range": 2}, "_non_gpu_data": [["perf_latency_avg", {"_value": 0.331, "_timestamp": 0}], ["perf_latency_p90", {"_value": 0.367, "_timestamp": 0}], ["perf_latency_p95", {"_value": 0.411, "_timestamp": 0}], ["perf_latency_p99", {"_value": 0.592, "_timestamp": 0}], ["perf_throughput", {"_value": 12046.2, "_timestamp": 0}], ["perf_client_send_recv", {"_value": 0.003, "_timestamp": 0}], ["perf_client_response_wait", {"_value": 0.318, "_timestamp": 0}], ["perf_server_queue", {"_value": 0.041, "_timestamp": 0}], ["perf_server_compute_infer", {"_value": 0.035, "_timestamp": 0}], ["perf_server_compute_input", {"_value": 0.018, "_timestamp": 0}], ["perf_server_compute_output", {"_value": 0.023, "_timestamp": 0}]], "_non_gpu_data_from_tag": {"perf_latency_avg": ["perf_latency_avg", {"_value": 0.331, "_timestamp": 0}], "perf_latency_p90": ["perf_latency_p90", {"_value": 0.367, "_timestamp": 0}], "perf_latency_p95": ["perf_latency_p95", {"_value": 0.411, "_timestamp": 0}], "perf_latency_p99": ["perf_latency_p99", {"_value": 0.592, "_timestamp": 0}], "perf_throughput": ["perf_throughput", {"_value": 12046.2, "_timestamp": 0}], "perf_client_send_recv": ["perf_client_send_recv", {"_value": 0.003, "_timestamp": 0}], "perf_client_response_wait": ["perf_client_response_wait", {"_value": 0.318, "_timestamp": 0}], "perf_server_queue": ["perf_server_queue", {"_value": 0.041, "_timestamp": 0}], "perf_server_compute_infer": ["perf_server_compute_infer", {"_value": 0.035, "_timestamp": 0}], "perf_server_compute_input": ["perf_server_compute_input", {"_value": 0.018, "_timestamp": 0}], "perf_server_compute_output": ["perf_server_compute_output", {"_value": 0.023, "_timestamp": 0}]}}]}}], "add_sub_config_2": [{"_triton_env": {}, "_model_run_configs": [{"_model_name": "add_sub", "_model_config": {"name": "add_sub_config_2", "platform": "pytorch_libtorch", "maxBatchSize": 1, "input": [{"name": "INPUT__0", "dataType": "TYPE_FP32", "dims": ["16"]}, {"name": "INPUT__1", "dataType": "TYPE_FP32", "dims": ["16"]}], "output": [{"name": "OUTPUT__0", "dataType": "TYPE_FP32", "dims": ["16"], "labelFilename": "output0_labels.txt"}, {"name": "OUTPUT__1", "dataType": "TYPE_FP32", "dims": ["16"]}], "instanceGroup": [{"count": 2, "kind": "KIND_GPU"}], "dynamicBatching": {}, "cpu_only": false}, "_perf_config": {"_args": {"service-kind": null, "model-signature-name": null, "async": null, "sync": null, "measurement-interval": null, "concurrency-range": 1, "request-rate-range": null, "request-distribution": null, "request-intervals": null, "binary-search": null, "num-of-sequences": null, "latency-threshold": null, "max-threads": null, "stability-percentage": null, "max-trials": null, "percentile": null, "input-data": null, "shared-memory": null, "output-shared-memory-size": null, "shape": null, "sequence-length": null, "sequence-id-range": null, "string-length": null, "string-data": null, "measurement-mode": "count_windows", "measurement-request-count": 100, "streaming": null, "grpc-compression-algorithm": null, "triton-server-directory": null, "model-repository": null, "ssl-grpc-use-ssl": null, "ssl-grpc-root-certifications-file": null, "ssl-grpc-private-key-file": null, "ssl-grpc-certificate-chain-file": null, "ssl-https-verify-peer": null, "ssl-https-verify-host": null, "ssl-https-ca-certificates-file": null, "ssl-https-client-certificate-type": null, "ssl-https-client-certificate-file": null, "ssl-https-private-key-type": null, "ssl-https-private-key-file": null, "collect-metrics": "True", "metrics-url": "http://localhost:8002/metrics", "metrics-interval": 1000.0}, "_options": {"-m": "add_sub_config_2", "-x": null, "-b": 1, "-u": "localhost:8001", "-i": "grpc", "-f": "add_sub_config_2-results.csv", "-H": null}, "_verbose": {"-v": null, "-v -v": null, "--verbose-csv": "--verbose-csv"}, "_input_to_options": {"model-name": "-m", "model-version": "-x", "batch-size": "-b", "url": "-u", "protocol": "-i", "latency-report-file": "-f", "http-header": "-H"}, "_input_to_verbose": {"verbose": "-v", "extra-verbose": "-v -v", "verbose-csv": "--verbose-csv"}, "_additive_args": {"input-data": null, "shape": null}}, "_ensemble_composing_configs": []}]}, {"-m add_sub_config_2 -b 1 -i grpc -f add_sub_config_2-results.csv --verbose-csv --concurrency-range=1 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": {"_model_variants_name": "add_sub_config_2", "_gpu_data": {"GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [["gpu_used_memory", {"_value": 849.34656, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0"}], ["gpu_free_memory", {"_value": 24546.115584, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0"}], ["gpu_utilization", {"_value": 2.33333, "_timestamp": 0, "_device_uuid": null}], ["gpu_power_usage", {"_value": 56.4313, "_timestamp": 0, "_device_uuid": null}]]}, "_avg_gpu_data": [["gpu_used_memory", {"_value": 849.34656, "_timestamp": 0, "_device_uuid": null}], ["gpu_free_memory", {"_value": 24546.115584, "_timestamp": 0, "_device_uuid": null}], ["gpu_utilization", {"_value": 2.33333, "_timestamp": 0, "_device_uuid": null}], ["gpu_power_usage", {"_value": 56.4313, "_timestamp": 0, "_device_uuid": null}]], "_avg_gpu_data_from_tag": {"gpu_used_memory": ["gpu_used_memory", {"_value": 849.34656, "_timestamp": 0, "_device_uuid": null}], "gpu_free_memory": ["gpu_free_memory", {"_value": 24546.115584, "_timestamp": 0, "_device_uuid": null}], "gpu_utilization": ["gpu_utilization", {"_value": 2.33333, "_timestamp": 0, "_device_uuid": null}], "gpu_power_usage": ["gpu_power_usage", {"_value": 56.4313, "_timestamp": 0, "_device_uuid": null}]}, "_model_config_measurements": [{"_model_config_name": "add_sub_config_2", "_model_specific_pa_params": {"batch-size": 1, "concurrency-range": 1}, "_non_gpu_data": [["perf_latency_avg", {"_value": 0.557, "_timestamp": 0}], ["perf_latency_p90", {"_value": 0.712, "_timestamp": 0}], ["perf_latency_p95", {"_value": 0.746, "_timestamp": 0}], ["perf_latency_p99", {"_value": 0.818, "_timestamp": 0}], ["perf_throughput", {"_value": 1791.43, "_timestamp": 0}], ["perf_client_send_recv", {"_value": 0.003, "_timestamp": 0}], ["perf_client_response_wait", {"_value": 0.544, "_timestamp": 0}], ["perf_server_queue", {"_value": 0.05, "_timestamp": 0}], ["perf_server_compute_infer", {"_value": 0.105, "_timestamp": 0}], ["perf_server_compute_input", {"_value": 0.055, "_timestamp": 0}], ["perf_server_compute_output", {"_value": 0.067, "_timestamp": 0}]], "_non_gpu_data_from_tag": {"perf_latency_avg": ["perf_latency_avg", {"_value": 0.557, "_timestamp": 0}], "perf_latency_p90": ["perf_latency_p90", {"_value": 0.712, "_timestamp": 0}], "perf_latency_p95": ["perf_latency_p95", {"_value": 0.746, "_timestamp": 0}], "perf_latency_p99": ["perf_latency_p99", {"_value": 0.818, "_timestamp": 0}], "perf_throughput": ["perf_throughput", {"_value": 1791.43, "_timestamp": 0}], "perf_client_send_recv": ["perf_client_send_recv", {"_value": 0.003, "_timestamp": 0}], "perf_client_response_wait": ["perf_client_response_wait", {"_value": 0.544, "_timestamp": 0}], "perf_server_queue": ["perf_server_queue", {"_value": 0.05, "_timestamp": 0}], "perf_server_compute_infer": ["perf_server_compute_infer", {"_value": 0.105, "_timestamp": 0}], "perf_server_compute_input": ["perf_server_compute_input", {"_value": 0.055, "_timestamp": 0}], "perf_server_compute_output": ["perf_server_compute_output", {"_value": 0.067, "_timestamp": 0}]}}]}, "-m add_sub_config_2 -b 1 -i grpc -f add_sub_config_2-results.csv --verbose-csv --concurrency-range=2 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": {"_model_variants_name": "add_sub_config_2", "_gpu_data": {"GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [["gpu_used_memory", {"_value": 849.34656, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0"}], ["gpu_free_memory", {"_value": 24546.115584, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0"}], ["gpu_utilization", {"_value": 2.5, "_timestamp": 0, "_device_uuid": null}], ["gpu_power_usage", {"_value": 56.472, "_timestamp": 0, "_device_uuid": null}]]}, "_avg_gpu_data": [["gpu_used_memory", {"_value": 849.34656, "_timestamp": 0, "_device_uuid": null}], ["gpu_free_memory", {"_value": 24546.115584, "_timestamp": 0, "_device_uuid": null}], ["gpu_utilization", {"_value": 2.5, "_timestamp": 0, "_device_uuid": null}], ["gpu_power_usage", {"_value": 56.472, "_timestamp": 0, "_device_uuid": null}]], "_avg_gpu_data_from_tag": {"gpu_used_memory": ["gpu_used_memory", {"_value": 849.34656, "_timestamp": 0, "_device_uuid": null}], "gpu_free_memory": ["gpu_free_memory", {"_value": 24546.115584, "_timestamp": 0, "_device_uuid": null}], "gpu_utilization": ["gpu_utilization", {"_value": 2.5, "_timestamp": 0, "_device_uuid": null}], "gpu_power_usage": ["gpu_power_usage", {"_value": 56.472, "_timestamp": 0, "_device_uuid": null}]}, "_model_config_measurements": [{"_model_config_name": "add_sub_config_2", "_model_specific_pa_params": {"batch-size": 1, "concurrency-range": 2}, "_non_gpu_data": [["perf_latency_avg", {"_value": 0.602, "_timestamp": 0}], ["perf_latency_p90", {"_value": 0.697, "_timestamp": 0}], ["perf_latency_p95", {"_value": 0.742, "_timestamp": 0}], ["perf_latency_p99", {"_value": 0.846, "_timestamp": 0}], ["perf_throughput", {"_value": 3314.69, "_timestamp": 0}], ["perf_client_send_recv", {"_value": 0.005, "_timestamp": 0}], ["perf_client_response_wait", {"_value": 0.584, "_timestamp": 0}], ["perf_server_queue", {"_value": 0.043, "_timestamp": 0}], ["perf_server_compute_infer", {"_value": 0.111, "_timestamp": 0}], ["perf_server_compute_input", {"_value": 0.054, "_timestamp": 0}], ["perf_server_compute_output", {"_value": 0.067, "_timestamp": 0}]], "_non_gpu_data_from_tag": {"perf_latency_avg": ["perf_latency_avg", {"_value": 0.602, "_timestamp": 0}], "perf_latency_p90": ["perf_latency_p90", {"_value": 0.697, "_timestamp": 0}], "perf_latency_p95": ["perf_latency_p95", {"_value": 0.742, "_timestamp": 0}], "perf_latency_p99": ["perf_latency_p99", {"_value": 0.846, "_timestamp": 0}], "perf_throughput": ["perf_throughput", {"_value": 3314.69, "_timestamp": 0}], "perf_client_send_recv": ["perf_client_send_recv", {"_value": 0.005, "_timestamp": 0}], "perf_client_response_wait": ["perf_client_response_wait", {"_value": 0.584, "_timestamp": 0}], "perf_server_queue": ["perf_server_queue", {"_value": 0.043, "_timestamp": 0}], "perf_server_compute_infer": ["perf_server_compute_infer", {"_value": 0.111, "_timestamp": 0}], "perf_server_compute_input": ["perf_server_compute_input", {"_value": 0.054, "_timestamp": 0}], "perf_server_compute_output": ["perf_server_compute_output", {"_value": 0.067, "_timestamp": 0}]}}]}}], "add_sub_config_3": [{"_triton_env": {}, "_model_run_configs": [{"_model_name": "add_sub", "_model_config": {"name": "add_sub_config_3", "platform": "pytorch_libtorch", "maxBatchSize": 2, "input": [{"name": "INPUT__0", "dataType": "TYPE_FP32", "dims": ["16"]}, {"name": "INPUT__1", "dataType": "TYPE_FP32", "dims": ["16"]}], "output": [{"name": "OUTPUT__0", "dataType": "TYPE_FP32", "dims": ["16"], "labelFilename": "output0_labels.txt"}, {"name": "OUTPUT__1", "dataType": "TYPE_FP32", "dims": ["16"]}], "instanceGroup": [{"count": 2, "kind": "KIND_GPU"}], "dynamicBatching": {}, "cpu_only": false}, "_perf_config": {"_args": {"service-kind": null, "model-signature-name": null, "async": null, "sync": null, "measurement-interval": null, "concurrency-range": 1, "request-rate-range": null, "request-distribution": null, "request-intervals": null, "binary-search": null, "num-of-sequences": null, "latency-threshold": null, "max-threads": null, "stability-percentage": null, "max-trials": null, "percentile": null, "input-data": null, "shared-memory": null, "output-shared-memory-size": null, "shape": null, "sequence-length": null, "sequence-id-range": null, "string-length": null, "string-data": null, "measurement-mode": "count_windows", "measurement-request-count": null, "streaming": null, "grpc-compression-algorithm": null, "triton-server-directory": null, "model-repository": null, "ssl-grpc-use-ssl": null, "ssl-grpc-root-certifications-file": null, "ssl-grpc-private-key-file": null, "ssl-grpc-certificate-chain-file": null, "ssl-https-verify-peer": null, "ssl-https-verify-host": null, "ssl-https-ca-certificates-file": null, "ssl-https-client-certificate-type": null, "ssl-https-client-certificate-file": null, "ssl-https-private-key-type": null, "ssl-https-private-key-file": null, "collect-metrics": "True", "metrics-url": "http://localhost:8002/metrics", "metrics-interval": 1000.0}, "_options": {"-m": "add_sub_config_3", "-x": null, "-b": 1, "-u": "localhost:8001", "-i": "grpc", "-f": "add_sub_config_3-results.csv", "-H": null}, "_verbose": {"-v": null, "-v -v": null, "--verbose-csv": "--verbose-csv"}, "_input_to_options": {"model-name": "-m", "model-version": "-x", "batch-size": "-b", "url": "-u", "protocol": "-i", "latency-report-file": "-f", "http-header": "-H"}, "_input_to_verbose": {"verbose": "-v", "extra-verbose": "-v -v", "verbose-csv": "--verbose-csv"}, "_additive_args": {"input-data": null, "shape": null}}, "_ensemble_composing_configs": []}]}, {"-m add_sub_config_3 -b 1 -i grpc -f add_sub_config_3-results.csv --verbose-csv --concurrency-range=1 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": {"_model_variants_name": "add_sub_config_3", "_gpu_data": {"GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [["gpu_used_memory", {"_value": 849.34656, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0"}], ["gpu_free_memory", {"_value": 24546.115584, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0"}], ["gpu_utilization", {"_value": 2.66667, "_timestamp": 0, "_device_uuid": null}], ["gpu_power_usage", {"_value": 56.4323, "_timestamp": 0, "_device_uuid": null}]]}, "_avg_gpu_data": [["gpu_used_memory", {"_value": 849.34656, "_timestamp": 0, "_device_uuid": null}], ["gpu_free_memory", {"_value": 24546.115584, "_timestamp": 0, "_device_uuid": null}], ["gpu_utilization", {"_value": 2.66667, "_timestamp": 0, "_device_uuid": null}], ["gpu_power_usage", {"_value": 56.4323, "_timestamp": 0, "_device_uuid": null}]], "_avg_gpu_data_from_tag": {"gpu_used_memory": ["gpu_used_memory", {"_value": 849.34656, "_timestamp": 0, "_device_uuid": null}], "gpu_free_memory": ["gpu_free_memory", {"_value": 24546.115584, "_timestamp": 0, "_device_uuid": null}], "gpu_utilization": ["gpu_utilization", {"_value": 2.66667, "_timestamp": 0, "_device_uuid": null}], "gpu_power_usage": ["gpu_power_usage", {"_value": 56.4323, "_timestamp": 0, "_device_uuid": null}]}, "_model_config_measurements": [{"_model_config_name": "add_sub_config_3", "_model_specific_pa_params": {"batch-size": 1, "concurrency-range": 1}, "_non_gpu_data": [["perf_latency_avg", {"_value": 0.473, "_timestamp": 0}], ["perf_latency_p90", {"_value": 0.583, "_timestamp": 0}], ["perf_latency_p95", {"_value": 0.626, "_timestamp": 0}], ["perf_latency_p99", {"_value": 0.763, "_timestamp": 0}], ["perf_throughput", {"_value": 2108.93, "_timestamp": 0}], ["perf_client_send_recv", {"_value": 0.004, "_timestamp": 0}], ["perf_client_response_wait", {"_value": 0.458, "_timestamp": 0}], ["perf_server_queue", {"_value": 0.04, "_timestamp": 0}], ["perf_server_compute_infer", {"_value": 0.081, "_timestamp": 0}], ["perf_server_compute_input", {"_value": 0.041, "_timestamp": 0}], ["perf_server_compute_output", {"_value": 0.052, "_timestamp": 0}]], "_non_gpu_data_from_tag": {"perf_latency_avg": ["perf_latency_avg", {"_value": 0.473, "_timestamp": 0}], "perf_latency_p90": ["perf_latency_p90", {"_value": 0.583, "_timestamp": 0}], "perf_latency_p95": ["perf_latency_p95", {"_value": 0.626, "_timestamp": 0}], "perf_latency_p99": ["perf_latency_p99", {"_value": 0.763, "_timestamp": 0}], "perf_throughput": ["perf_throughput", {"_value": 2108.93, "_timestamp": 0}], "perf_client_send_recv": ["perf_client_send_recv", {"_value": 0.004, "_timestamp": 0}], "perf_client_response_wait": ["perf_client_response_wait", {"_value": 0.458, "_timestamp": 0}], "perf_server_queue": ["perf_server_queue", {"_value": 0.04, "_timestamp": 0}], "perf_server_compute_infer": ["perf_server_compute_infer", {"_value": 0.081, "_timestamp": 0}], "perf_server_compute_input": ["perf_server_compute_input", {"_value": 0.041, "_timestamp": 0}], "perf_server_compute_output": ["perf_server_compute_output", {"_value": 0.052, "_timestamp": 0}]}}]}, "-m add_sub_config_3 -b 1 -i grpc -f add_sub_config_3-results.csv --verbose-csv --concurrency-range=2 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": {"_model_variants_name": "add_sub_config_3", "_gpu_data": {"GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [["gpu_used_memory", {"_value": 849.34656, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0"}], ["gpu_free_memory", {"_value": 24546.115584, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0"}], ["gpu_utilization", {"_value": 4.33333, "_timestamp": 0, "_device_uuid": null}], ["gpu_power_usage", {"_value": 56.561, "_timestamp": 0, "_device_uuid": null}]]}, "_avg_gpu_data": [["gpu_used_memory", {"_value": 849.34656, "_timestamp": 0, "_device_uuid": null}], ["gpu_free_memory", {"_value": 24546.115584, "_timestamp": 0, "_device_uuid": null}], ["gpu_utilization", {"_value": 4.33333, "_timestamp": 0, "_device_uuid": null}], ["gpu_power_usage", {"_value": 56.561, "_timestamp": 0, "_device_uuid": null}]], "_avg_gpu_data_from_tag": {"gpu_used_memory": ["gpu_used_memory", {"_value": 849.34656, "_timestamp": 0, "_device_uuid": null}], "gpu_free_memory": ["gpu_free_memory", {"_value": 24546.115584, "_timestamp": 0, "_device_uuid": null}], "gpu_utilization": ["gpu_utilization", {"_value": 4.33333, "_timestamp": 0, "_device_uuid": null}], "gpu_power_usage": ["gpu_power_usage", {"_value": 56.561, "_timestamp": 0, "_device_uuid": null}]}, "_model_config_measurements": [{"_model_config_name": "add_sub_config_3", "_model_specific_pa_params": {"batch-size": 1, "concurrency-range": 2}, "_non_gpu_data": [["perf_latency_avg", {"_value": 0.586, "_timestamp": 0}], ["perf_latency_p90", {"_value": 0.712, "_timestamp": 0}], ["perf_latency_p95", {"_value": 0.758, "_timestamp": 0}], ["perf_latency_p99", {"_value": 0.871, "_timestamp": 0}], ["perf_throughput", {"_value": 3402.77, "_timestamp": 0}], ["perf_client_send_recv", {"_value": 0.005, "_timestamp": 0}], ["perf_client_response_wait", {"_value": 0.568, "_timestamp": 0}], ["perf_server_queue", {"_value": 0.041, "_timestamp": 0}], ["perf_server_compute_infer", {"_value": 0.105, "_timestamp": 0}], ["perf_server_compute_input", {"_value": 0.054, "_timestamp": 0}], ["perf_server_compute_output", {"_value": 0.067, "_timestamp": 0}]], "_non_gpu_data_from_tag": {"perf_latency_avg": ["perf_latency_avg", {"_value": 0.586, "_timestamp": 0}], "perf_latency_p90": ["perf_latency_p90", {"_value": 0.712, "_timestamp": 0}], "perf_latency_p95": ["perf_latency_p95", {"_value": 0.758, "_timestamp": 0}], "perf_latency_p99": ["perf_latency_p99", {"_value": 0.871, "_timestamp": 0}], "perf_throughput": ["perf_throughput", {"_value": 3402.77, "_timestamp": 0}], "perf_client_send_recv": ["perf_client_send_recv", {"_value": 0.005, "_timestamp": 0}], "perf_client_response_wait": ["perf_client_response_wait", {"_value": 0.568, "_timestamp": 0}], "perf_server_queue": ["perf_server_queue", {"_value": 0.041, "_timestamp": 0}], "perf_server_compute_infer": ["perf_server_compute_infer", {"_value": 0.105, "_timestamp": 0}], "perf_server_compute_input": ["perf_server_compute_input", {"_value": 0.054, "_timestamp": 0}], "perf_server_compute_output": ["perf_server_compute_output", {"_value": 0.067, "_timestamp": 0}]}}]}, "-m add_sub_config_3 -b 2 -i grpc -f add_sub_config_3-results.csv --verbose-csv --concurrency-range=1 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": {"_model_variants_name": "add_sub_config_3", "_gpu_data": {"GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [["gpu_used_memory", {"_value": 849.34656, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0"}], ["gpu_free_memory", {"_value": 24546.115584, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0"}], ["gpu_utilization", {"_value": 2.66667, "_timestamp": 0, "_device_uuid": null}], ["gpu_power_usage", {"_value": 56.5343, "_timestamp": 0, "_device_uuid": null}]]}, "_avg_gpu_data": [["gpu_used_memory", {"_value": 849.34656, "_timestamp": 0, "_device_uuid": null}], ["gpu_free_memory", {"_value": 24546.115584, "_timestamp": 0, "_device_uuid": null}], ["gpu_utilization", {"_value": 2.66667, "_timestamp": 0, "_device_uuid": null}], ["gpu_power_usage", {"_value": 56.5343, "_timestamp": 0, "_device_uuid": null}]], "_avg_gpu_data_from_tag": {"gpu_used_memory": ["gpu_used_memory", {"_value": 849.34656, "_timestamp": 0, "_device_uuid": null}], "gpu_free_memory": ["gpu_free_memory", {"_value": 24546.115584, "_timestamp": 0, "_device_uuid": null}], "gpu_utilization": ["gpu_utilization", {"_value": 2.66667, "_timestamp": 0, "_device_uuid": null}], "gpu_power_usage": ["gpu_power_usage", {"_value": 56.5343, "_timestamp": 0, "_device_uuid": null}]}, "_model_config_measurements": [{"_model_config_name": "add_sub_config_3", "_model_specific_pa_params": {"batch-size": 2, "concurrency-range": 1}, "_non_gpu_data": [["perf_latency_avg", {"_value": 0.558, "_timestamp": 0}], ["perf_latency_p90", {"_value": 0.695, "_timestamp": 0}], ["perf_latency_p95", {"_value": 0.757, "_timestamp": 0}], ["perf_latency_p99", {"_value": 0.845, "_timestamp": 0}], ["perf_throughput", {"_value": 3575.86, "_timestamp": 0}], ["perf_client_send_recv", {"_value": 0.004, "_timestamp": 0}], ["perf_client_response_wait", {"_value": 0.543, "_timestamp": 0}], ["perf_server_queue", {"_value": 0.049, "_timestamp": 0}], ["perf_server_compute_infer", {"_value": 0.111, "_timestamp": 0}], ["perf_server_compute_input", {"_value": 0.056, "_timestamp": 0}], ["perf_server_compute_output", {"_value": 0.067, "_timestamp": 0}]], "_non_gpu_data_from_tag": {"perf_latency_avg": ["perf_latency_avg", {"_value": 0.558, "_timestamp": 0}], "perf_latency_p90": ["perf_latency_p90", {"_value": 0.695, "_timestamp": 0}], "perf_latency_p95": ["perf_latency_p95", {"_value": 0.757, "_timestamp": 0}], "perf_latency_p99": ["perf_latency_p99", {"_value": 0.845, "_timestamp": 0}], "perf_throughput": ["perf_throughput", {"_value": 3575.86, "_timestamp": 0}], "perf_client_send_recv": ["perf_client_send_recv", {"_value": 0.004, "_timestamp": 0}], "perf_client_response_wait": ["perf_client_response_wait", {"_value": 0.543, "_timestamp": 0}], "perf_server_queue": ["perf_server_queue", {"_value": 0.049, "_timestamp": 0}], "perf_server_compute_infer": ["perf_server_compute_infer", {"_value": 0.111, "_timestamp": 0}], "perf_server_compute_input": ["perf_server_compute_input", {"_value": 0.056, "_timestamp": 0}], "perf_server_compute_output": ["perf_server_compute_output", {"_value": 0.067, "_timestamp": 0}]}}]}, "-m add_sub_config_3 -b 2 -i grpc -f add_sub_config_3-results.csv --verbose-csv --concurrency-range=2 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0": {"_model_variants_name": "add_sub_config_3", "_gpu_data": {"GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [["gpu_used_memory", {"_value": 849.34656, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0"}], ["gpu_free_memory", {"_value": 24546.115584, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0"}], ["gpu_utilization", {"_value": 2.25, "_timestamp": 0, "_device_uuid": null}], ["gpu_power_usage", {"_value": 56.5485, "_timestamp": 0, "_device_uuid": null}]]}, "_avg_gpu_data": [["gpu_used_memory", {"_value": 849.34656, "_timestamp": 0, "_device_uuid": null}], ["gpu_free_memory", {"_value": 24546.115584, "_timestamp": 0, "_device_uuid": null}], ["gpu_utilization", {"_value": 2.25, "_timestamp": 0, "_device_uuid": null}], ["gpu_power_usage", {"_value": 56.5485, "_timestamp": 0, "_device_uuid": null}]], "_avg_gpu_data_from_tag": {"gpu_used_memory": ["gpu_used_memory", {"_value": 849.34656, "_timestamp": 0, "_device_uuid": null}], "gpu_free_memory": ["gpu_free_memory", {"_value": 24546.115584, "_timestamp": 0, "_device_uuid": null}], "gpu_utilization": ["gpu_utilization", {"_value": 2.25, "_timestamp": 0, "_device_uuid": null}], "gpu_power_usage": ["gpu_power_usage", {"_value": 56.5485, "_timestamp": 0, "_device_uuid": null}]}, "_model_config_measurements": [{"_model_config_name": "add_sub_config_3", "_model_specific_pa_params": {"batch-size": 2, "concurrency-range": 2}, "_non_gpu_data": [["perf_latency_avg", {"_value": 0.601, "_timestamp": 0}], ["perf_latency_p90", {"_value": 0.708, "_timestamp": 0}], ["perf_latency_p95", {"_value": 0.748, "_timestamp": 0}], ["perf_latency_p99", {"_value": 0.839, "_timestamp": 0}], ["perf_throughput", {"_value": 6643.47, "_timestamp": 0}], ["perf_client_send_recv", {"_value": 0.005, "_timestamp": 0}], ["perf_client_response_wait", {"_value": 0.582, "_timestamp": 0}], ["perf_server_queue", {"_value": 0.042, "_timestamp": 0}], ["perf_server_compute_infer", {"_value": 0.114, "_timestamp": 0}], ["perf_server_compute_input", {"_value": 0.054, "_timestamp": 0}], ["perf_server_compute_output", {"_value": 0.067, "_timestamp": 0}]], "_non_gpu_data_from_tag": {"perf_latency_avg": ["perf_latency_avg", {"_value": 0.601, "_timestamp": 0}], "perf_latency_p90": ["perf_latency_p90", {"_value": 0.708, "_timestamp": 0}], "perf_latency_p95": ["perf_latency_p95", {"_value": 0.748, "_timestamp": 0}], "perf_latency_p99": ["perf_latency_p99", {"_value": 0.839, "_timestamp": 0}], "perf_throughput": ["perf_throughput", {"_value": 6643.47, "_timestamp": 0}], "perf_client_send_recv": ["perf_client_send_recv", {"_value": 0.005, "_timestamp": 0}], "perf_client_response_wait": ["perf_client_response_wait", {"_value": 0.582, "_timestamp": 0}], "perf_server_queue": ["perf_server_queue", {"_value": 0.042, "_timestamp": 0}], "perf_server_compute_infer": ["perf_server_compute_infer", {"_value": 0.114, "_timestamp": 0}], "perf_server_compute_input": ["perf_server_compute_input", {"_value": 0.054, "_timestamp": 0}], "perf_server_compute_output": ["perf_server_compute_output", {"_value": 0.067, "_timestamp": 0}]}}]}}]}}}, "ResultManager.server_only_data": {"GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [["gpu_used_memory", {"_value": 457.0, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0"}], ["gpu_free_memory", {"_value": 24938.0, "_timestamp": 0, "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0"}], ["gpu_utilization", {"_value": 0.25, "_timestamp": 0, "_device_uuid": null}], ["gpu_power_usage", {"_value": 55.6305, "_timestamp": 0, "_device_uuid": null}]]}, "MetricsManager.gpus": {"GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": {"name": "NVIDIA TITAN RTX", "total_memory": 25395462144}}, "ModelManager.model_variant_name_manager": {"_model_config_dicts": {"add_sub_config_0": {"name": "add_sub", "platform": "pytorch_libtorch", "max_batch_size": 1, "input": [{"name": "INPUT__0", "data_type": "TYPE_FP32", "dims": ["16"]}, {"name": "INPUT__1", "data_type": "TYPE_FP32", "dims": ["16"]}], "output": [{"name": "OUTPUT__0", "data_type": "TYPE_FP32", "dims": ["16"], "label_filename": "output0_labels.txt"}, {"name": "OUTPUT__1", "data_type": "TYPE_FP32", "dims": ["16"]}], "instance_group": [{"count": 1, "kind": "KIND_GPU"}], "dynamic_batching": {}}, "add_sub_config_1": {"name": "add_sub", "platform": "pytorch_libtorch", "max_batch_size": 2, "input": [{"name": "INPUT__0", "data_type": "TYPE_FP32", "dims": ["16"]}, {"name": "INPUT__1", "data_type": "TYPE_FP32", "dims": ["16"]}], "output": [{"name": "OUTPUT__0", "data_type": "TYPE_FP32", "dims": ["16"], "label_filename": "output0_labels.txt"}, {"name": "OUTPUT__1", "data_type": "TYPE_FP32", "dims": ["16"]}], "instance_group": [{"count": 1, "kind": "KIND_GPU"}], "dynamic_batching": {}}, "add_sub_config_2": {"name": "add_sub", "platform": "pytorch_libtorch", "max_batch_size": 1, "input": [{"name": "INPUT__0", "data_type": "TYPE_FP32", "dims": ["16"]}, {"name": "INPUT__1", "data_type": "TYPE_FP32", "dims": ["16"]}], "output": [{"name": "OUTPUT__0", "data_type": "TYPE_FP32", "dims": ["16"], "label_filename": "output0_labels.txt"}, {"name": "OUTPUT__1", "data_type": "TYPE_FP32", "dims": ["16"]}], "instance_group": [{"count": 2, "kind": "KIND_GPU"}], "dynamic_batching": {}}, "add_sub_config_3": {"name": "add_sub", "platform": "pytorch_libtorch", "max_batch_size": 2, "input": [{"name": "INPUT__0", "data_type": "TYPE_FP32", "dims": ["16"]}, {"name": "INPUT__1", "data_type": "TYPE_FP32", "dims": ["16"]}], "output": [{"name": "OUTPUT__0", "data_type": "TYPE_FP32", "dims": ["16"], "label_filename": "output0_labels.txt"}, {"name": "OUTPUT__1", "data_type": "TYPE_FP32", "dims": ["16"]}], "instance_group": [{"count": 2, "kind": "KIND_GPU"}], "dynamic_batching": {}}}, "_model_name_index": {"add_sub": 3}}} \ No newline at end of file +{ + "ResultManager.results": { + "_results": { + "add_sub": { + "add_sub_config_default": [ + { + "_triton_env": {}, + "_model_run_configs": [ + { + "_model_name": "add_sub", + "_model_config_variant": { + "model_config": { + "name": "add_sub", + "platform": "pytorch_libtorch", + "maxBatchSize": 8, + "input": [ + { + "name": "INPUT__0", + "dataType": "TYPE_FP32", + "dims": [ + "16" + ] + }, + { + "name": "INPUT__1", + "dataType": "TYPE_FP32", + "dims": [ + "16" + ] + } + ], + "output": [ + { + "name": "OUTPUT__0", + "dataType": "TYPE_FP32", + "dims": [ + "16" + ], + "labelFilename": "output0_labels.txt" + }, + { + "name": "OUTPUT__1", + "dataType": "TYPE_FP32", + "dims": [ + "16" + ] + } + ], + "instanceGroup": [ + { + "kind": "KIND_GPU" + } + ] + }, + "variant_name": "add_sub_config_default", + "cpu_only": false + }, + "_perf_config": { + "_args": { + "service-kind": null, + "model-signature-name": null, + "async": null, + "sync": null, + "measurement-interval": null, + "concurrency-range": 1, + "request-rate-range": null, + "request-distribution": null, + "request-intervals": null, + "binary-search": null, + "num-of-sequences": null, + "latency-threshold": null, + "max-threads": null, + "stability-percentage": null, + "max-trials": null, + "percentile": null, + "input-data": null, + "shared-memory": null, + "output-shared-memory-size": null, + "shape": null, + "sequence-length": null, + "sequence-id-range": null, + "string-length": null, + "string-data": null, + "measurement-mode": "count_windows", + "measurement-request-count": null, + "streaming": null, + "grpc-compression-algorithm": null, + "triton-server-directory": null, + "model-repository": null, + "ssl-grpc-use-ssl": null, + "ssl-grpc-root-certifications-file": null, + "ssl-grpc-private-key-file": null, + "ssl-grpc-certificate-chain-file": null, + "ssl-https-verify-peer": null, + "ssl-https-verify-host": null, + "ssl-https-ca-certificates-file": null, + "ssl-https-client-certificate-type": null, + "ssl-https-client-certificate-file": null, + "ssl-https-private-key-type": null, + "ssl-https-private-key-file": null, + "collect-metrics": "True", + "metrics-url": "http://localhost:8002/metrics", + "metrics-interval": 1000, + "bls-composing-models": null + }, + "_options": { + "-m": "add_sub", + "-x": null, + "-b": 1, + "-u": "localhost:8001", + "-i": "grpc", + "-f": "add_sub-results.csv", + "-H": null + }, + "_verbose": { + "-v": null, + "-v -v": null, + "--verbose-csv": "--verbose-csv" + }, + "_input_to_options": { + "model-name": "-m", + "model-version": "-x", + "batch-size": "-b", + "url": "-u", + "protocol": "-i", + "latency-report-file": "-f", + "http-header": "-H" + }, + "_input_to_verbose": { + "verbose": "-v", + "extra-verbose": "-v -v", + "verbose-csv": "--verbose-csv" + }, + "_additive_args": { + "input-data": null, + "shape": null + } + }, + "_composing_config_variants": [] + } + ] + }, + { + "add_sub_config_default -m add_sub -b 1 -i grpc -f add_sub-results.csv --verbose-csv --concurrency-range=1 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000": { + "_model_variants_name": "add_sub_config_default", + "_gpu_data": { + "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ + [ + "gpu_used_memory", + { + "_value": 872.415232, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_free_memory", + { + "_value": 24897.388544, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_utilization", + { + "_value": 2.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 56.1825, + "_timestamp": 0, + "_device_uuid": null + } + ] + ] + }, + "_avg_gpu_data": [ + [ + "gpu_used_memory", + { + "_value": 872.415232, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_free_memory", + { + "_value": 24897.388544, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_utilization", + { + "_value": 2.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 56.1825, + "_timestamp": 0, + "_device_uuid": null + } + ] + ], + "_avg_gpu_data_from_tag": { + "gpu_used_memory": [ + "gpu_used_memory", + { + "_value": 872.415232, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_free_memory": [ + "gpu_free_memory", + { + "_value": 24897.388544, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_utilization": [ + "gpu_utilization", + { + "_value": 2.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_power_usage": [ + "gpu_power_usage", + { + "_value": 56.1825, + "_timestamp": 0, + "_device_uuid": null + } + ] + }, + "_model_config_measurements": [ + { + "_model_config_name": "add_sub", + "_model_specific_pa_params": { + "batch-size": 1, + "concurrency-range": 1, + "request-rate-range": null + }, + "_non_gpu_data": [ + [ + "perf_latency_avg", + { + "_value": 0.397, + "_timestamp": 0 + } + ], + [ + "perf_latency_p90", + { + "_value": 0.463, + "_timestamp": 0 + } + ], + [ + "perf_latency_p95", + { + "_value": 0.478, + "_timestamp": 0 + } + ], + [ + "perf_latency_p99", + { + "_value": 0.539, + "_timestamp": 0 + } + ], + [ + "perf_throughput", + { + "_value": 2508.04, + "_timestamp": 0 + } + ], + [ + "perf_client_send_recv", + { + "_value": 0.004, + "_timestamp": 0 + } + ], + [ + "perf_client_response_wait", + { + "_value": 0.382, + "_timestamp": 0 + } + ], + [ + "perf_server_queue", + { + "_value": 0.019, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_infer", + { + "_value": 0.064, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_input", + { + "_value": 0.035, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_output", + { + "_value": 0.042, + "_timestamp": 0 + } + ] + ], + "_non_gpu_data_from_tag": { + "perf_latency_avg": [ + "perf_latency_avg", + { + "_value": 0.397, + "_timestamp": 0 + } + ], + "perf_latency_p90": [ + "perf_latency_p90", + { + "_value": 0.463, + "_timestamp": 0 + } + ], + "perf_latency_p95": [ + "perf_latency_p95", + { + "_value": 0.478, + "_timestamp": 0 + } + ], + "perf_latency_p99": [ + "perf_latency_p99", + { + "_value": 0.539, + "_timestamp": 0 + } + ], + "perf_throughput": [ + "perf_throughput", + { + "_value": 2508.04, + "_timestamp": 0 + } + ], + "perf_client_send_recv": [ + "perf_client_send_recv", + { + "_value": 0.004, + "_timestamp": 0 + } + ], + "perf_client_response_wait": [ + "perf_client_response_wait", + { + "_value": 0.382, + "_timestamp": 0 + } + ], + "perf_server_queue": [ + "perf_server_queue", + { + "_value": 0.019, + "_timestamp": 0 + } + ], + "perf_server_compute_infer": [ + "perf_server_compute_infer", + { + "_value": 0.064, + "_timestamp": 0 + } + ], + "perf_server_compute_input": [ + "perf_server_compute_input", + { + "_value": 0.035, + "_timestamp": 0 + } + ], + "perf_server_compute_output": [ + "perf_server_compute_output", + { + "_value": 0.042, + "_timestamp": 0 + } + ] + } + } + ] + }, + "add_sub_config_default -m add_sub -b 1 -i grpc -f add_sub-results.csv --verbose-csv --concurrency-range=2 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000": { + "_model_variants_name": "add_sub_config_default", + "_gpu_data": { + "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ + [ + "gpu_used_memory", + { + "_value": 872.415232, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_free_memory", + { + "_value": 24897.388544, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_utilization", + { + "_value": 4.25, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 56.3008, + "_timestamp": 0, + "_device_uuid": null + } + ] + ] + }, + "_avg_gpu_data": [ + [ + "gpu_used_memory", + { + "_value": 872.415232, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_free_memory", + { + "_value": 24897.388544, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_utilization", + { + "_value": 4.25, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 56.3008, + "_timestamp": 0, + "_device_uuid": null + } + ] + ], + "_avg_gpu_data_from_tag": { + "gpu_used_memory": [ + "gpu_used_memory", + { + "_value": 872.415232, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_free_memory": [ + "gpu_free_memory", + { + "_value": 24897.388544, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_utilization": [ + "gpu_utilization", + { + "_value": 4.25, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_power_usage": [ + "gpu_power_usage", + { + "_value": 56.3008, + "_timestamp": 0, + "_device_uuid": null + } + ] + }, + "_model_config_measurements": [ + { + "_model_config_name": "add_sub", + "_model_specific_pa_params": { + "batch-size": 1, + "concurrency-range": 2, + "request-rate-range": null + }, + "_non_gpu_data": [ + [ + "perf_latency_avg", + { + "_value": 0.322, + "_timestamp": 0 + } + ], + [ + "perf_latency_p90", + { + "_value": 0.401, + "_timestamp": 0 + } + ], + [ + "perf_latency_p95", + { + "_value": 0.465, + "_timestamp": 0 + } + ], + [ + "perf_latency_p99", + { + "_value": 0.581, + "_timestamp": 0 + } + ], + [ + "perf_throughput", + { + "_value": 6176.0, + "_timestamp": 0 + } + ], + [ + "perf_client_send_recv", + { + "_value": 0.003, + "_timestamp": 0 + } + ], + [ + "perf_client_response_wait", + { + "_value": 0.309, + "_timestamp": 0 + } + ], + [ + "perf_server_queue", + { + "_value": 0.03, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_infer", + { + "_value": 0.032, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_input", + { + "_value": 0.02, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_output", + { + "_value": 0.022, + "_timestamp": 0 + } + ] + ], + "_non_gpu_data_from_tag": { + "perf_latency_avg": [ + "perf_latency_avg", + { + "_value": 0.322, + "_timestamp": 0 + } + ], + "perf_latency_p90": [ + "perf_latency_p90", + { + "_value": 0.401, + "_timestamp": 0 + } + ], + "perf_latency_p95": [ + "perf_latency_p95", + { + "_value": 0.465, + "_timestamp": 0 + } + ], + "perf_latency_p99": [ + "perf_latency_p99", + { + "_value": 0.581, + "_timestamp": 0 + } + ], + "perf_throughput": [ + "perf_throughput", + { + "_value": 6176.0, + "_timestamp": 0 + } + ], + "perf_client_send_recv": [ + "perf_client_send_recv", + { + "_value": 0.003, + "_timestamp": 0 + } + ], + "perf_client_response_wait": [ + "perf_client_response_wait", + { + "_value": 0.309, + "_timestamp": 0 + } + ], + "perf_server_queue": [ + "perf_server_queue", + { + "_value": 0.03, + "_timestamp": 0 + } + ], + "perf_server_compute_infer": [ + "perf_server_compute_infer", + { + "_value": 0.032, + "_timestamp": 0 + } + ], + "perf_server_compute_input": [ + "perf_server_compute_input", + { + "_value": 0.02, + "_timestamp": 0 + } + ], + "perf_server_compute_output": [ + "perf_server_compute_output", + { + "_value": 0.022, + "_timestamp": 0 + } + ] + } + } + ] + }, + "add_sub_config_default -m add_sub -b 2 -i grpc -f add_sub-results.csv --verbose-csv --concurrency-range=1 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000": { + "_model_variants_name": "add_sub_config_default", + "_gpu_data": { + "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ + [ + "gpu_used_memory", + { + "_value": 872.415232, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_free_memory", + { + "_value": 24897.388544, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_utilization", + { + "_value": 3.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 56.181, + "_timestamp": 0, + "_device_uuid": null + } + ] + ] + }, + "_avg_gpu_data": [ + [ + "gpu_used_memory", + { + "_value": 872.415232, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_free_memory", + { + "_value": 24897.388544, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_utilization", + { + "_value": 3.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 56.181, + "_timestamp": 0, + "_device_uuid": null + } + ] + ], + "_avg_gpu_data_from_tag": { + "gpu_used_memory": [ + "gpu_used_memory", + { + "_value": 872.415232, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_free_memory": [ + "gpu_free_memory", + { + "_value": 24897.388544, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_utilization": [ + "gpu_utilization", + { + "_value": 3.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_power_usage": [ + "gpu_power_usage", + { + "_value": 56.181, + "_timestamp": 0, + "_device_uuid": null + } + ] + }, + "_model_config_measurements": [ + { + "_model_config_name": "add_sub", + "_model_specific_pa_params": { + "batch-size": 2, + "concurrency-range": 1, + "request-rate-range": null + }, + "_non_gpu_data": [ + [ + "perf_latency_avg", + { + "_value": 0.409, + "_timestamp": 0 + } + ], + [ + "perf_latency_p90", + { + "_value": 0.497, + "_timestamp": 0 + } + ], + [ + "perf_latency_p95", + { + "_value": 0.56, + "_timestamp": 0 + } + ], + [ + "perf_latency_p99", + { + "_value": 0.658, + "_timestamp": 0 + } + ], + [ + "perf_throughput", + { + "_value": 4876.12, + "_timestamp": 0 + } + ], + [ + "perf_client_send_recv", + { + "_value": 0.004, + "_timestamp": 0 + } + ], + [ + "perf_client_response_wait", + { + "_value": 0.393, + "_timestamp": 0 + } + ], + [ + "perf_server_queue", + { + "_value": 0.019, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_infer", + { + "_value": 0.07, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_input", + { + "_value": 0.037, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_output", + { + "_value": 0.043, + "_timestamp": 0 + } + ] + ], + "_non_gpu_data_from_tag": { + "perf_latency_avg": [ + "perf_latency_avg", + { + "_value": 0.409, + "_timestamp": 0 + } + ], + "perf_latency_p90": [ + "perf_latency_p90", + { + "_value": 0.497, + "_timestamp": 0 + } + ], + "perf_latency_p95": [ + "perf_latency_p95", + { + "_value": 0.56, + "_timestamp": 0 + } + ], + "perf_latency_p99": [ + "perf_latency_p99", + { + "_value": 0.658, + "_timestamp": 0 + } + ], + "perf_throughput": [ + "perf_throughput", + { + "_value": 4876.12, + "_timestamp": 0 + } + ], + "perf_client_send_recv": [ + "perf_client_send_recv", + { + "_value": 0.004, + "_timestamp": 0 + } + ], + "perf_client_response_wait": [ + "perf_client_response_wait", + { + "_value": 0.393, + "_timestamp": 0 + } + ], + "perf_server_queue": [ + "perf_server_queue", + { + "_value": 0.019, + "_timestamp": 0 + } + ], + "perf_server_compute_infer": [ + "perf_server_compute_infer", + { + "_value": 0.07, + "_timestamp": 0 + } + ], + "perf_server_compute_input": [ + "perf_server_compute_input", + { + "_value": 0.037, + "_timestamp": 0 + } + ], + "perf_server_compute_output": [ + "perf_server_compute_output", + { + "_value": 0.043, + "_timestamp": 0 + } + ] + } + } + ] + }, + "add_sub_config_default -m add_sub -b 2 -i grpc -f add_sub-results.csv --verbose-csv --concurrency-range=2 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000": { + "_model_variants_name": "add_sub_config_default", + "_gpu_data": { + "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ + [ + "gpu_used_memory", + { + "_value": 872.415232, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_free_memory", + { + "_value": 24897.388544, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_utilization", + { + "_value": 5.5, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 56.3928, + "_timestamp": 0, + "_device_uuid": null + } + ] + ] + }, + "_avg_gpu_data": [ + [ + "gpu_used_memory", + { + "_value": 872.415232, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_free_memory", + { + "_value": 24897.388544, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_utilization", + { + "_value": 5.5, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 56.3928, + "_timestamp": 0, + "_device_uuid": null + } + ] + ], + "_avg_gpu_data_from_tag": { + "gpu_used_memory": [ + "gpu_used_memory", + { + "_value": 872.415232, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_free_memory": [ + "gpu_free_memory", + { + "_value": 24897.388544, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_utilization": [ + "gpu_utilization", + { + "_value": 5.5, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_power_usage": [ + "gpu_power_usage", + { + "_value": 56.3928, + "_timestamp": 0, + "_device_uuid": null + } + ] + }, + "_model_config_measurements": [ + { + "_model_config_name": "add_sub", + "_model_specific_pa_params": { + "batch-size": 2, + "concurrency-range": 2, + "request-rate-range": null + }, + "_non_gpu_data": [ + [ + "perf_latency_avg", + { + "_value": 0.301, + "_timestamp": 0 + } + ], + [ + "perf_latency_p90", + { + "_value": 0.344, + "_timestamp": 0 + } + ], + [ + "perf_latency_p95", + { + "_value": 0.384, + "_timestamp": 0 + } + ], + [ + "perf_latency_p99", + { + "_value": 0.524, + "_timestamp": 0 + } + ], + [ + "perf_throughput", + { + "_value": 13205.0, + "_timestamp": 0 + } + ], + [ + "perf_client_send_recv", + { + "_value": 0.003, + "_timestamp": 0 + } + ], + [ + "perf_client_response_wait", + { + "_value": 0.288, + "_timestamp": 0 + } + ], + [ + "perf_server_queue", + { + "_value": 0.029, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_infer", + { + "_value": 0.03, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_input", + { + "_value": 0.018, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_output", + { + "_value": 0.02, + "_timestamp": 0 + } + ] + ], + "_non_gpu_data_from_tag": { + "perf_latency_avg": [ + "perf_latency_avg", + { + "_value": 0.301, + "_timestamp": 0 + } + ], + "perf_latency_p90": [ + "perf_latency_p90", + { + "_value": 0.344, + "_timestamp": 0 + } + ], + "perf_latency_p95": [ + "perf_latency_p95", + { + "_value": 0.384, + "_timestamp": 0 + } + ], + "perf_latency_p99": [ + "perf_latency_p99", + { + "_value": 0.524, + "_timestamp": 0 + } + ], + "perf_throughput": [ + "perf_throughput", + { + "_value": 13205.0, + "_timestamp": 0 + } + ], + "perf_client_send_recv": [ + "perf_client_send_recv", + { + "_value": 0.003, + "_timestamp": 0 + } + ], + "perf_client_response_wait": [ + "perf_client_response_wait", + { + "_value": 0.288, + "_timestamp": 0 + } + ], + "perf_server_queue": [ + "perf_server_queue", + { + "_value": 0.029, + "_timestamp": 0 + } + ], + "perf_server_compute_infer": [ + "perf_server_compute_infer", + { + "_value": 0.03, + "_timestamp": 0 + } + ], + "perf_server_compute_input": [ + "perf_server_compute_input", + { + "_value": 0.018, + "_timestamp": 0 + } + ], + "perf_server_compute_output": [ + "perf_server_compute_output", + { + "_value": 0.02, + "_timestamp": 0 + } + ] + } + } + ] + } + } + ], + "add_sub_config_0": [ + { + "_triton_env": {}, + "_model_run_configs": [ + { + "_model_name": "add_sub", + "_model_config_variant": { + "model_config": { + "name": "add_sub", + "platform": "pytorch_libtorch", + "maxBatchSize": 1, + "input": [ + { + "name": "INPUT__0", + "dataType": "TYPE_FP32", + "dims": [ + "16" + ] + }, + { + "name": "INPUT__1", + "dataType": "TYPE_FP32", + "dims": [ + "16" + ] + } + ], + "output": [ + { + "name": "OUTPUT__0", + "dataType": "TYPE_FP32", + "dims": [ + "16" + ], + "labelFilename": "output0_labels.txt" + }, + { + "name": "OUTPUT__1", + "dataType": "TYPE_FP32", + "dims": [ + "16" + ] + } + ], + "instanceGroup": [ + { + "count": 1, + "kind": "KIND_GPU" + } + ], + "dynamicBatching": {} + }, + "variant_name": "add_sub_config_0", + "cpu_only": false + }, + "_perf_config": { + "_args": { + "service-kind": null, + "model-signature-name": null, + "async": null, + "sync": null, + "measurement-interval": null, + "concurrency-range": 1, + "request-rate-range": null, + "request-distribution": null, + "request-intervals": null, + "binary-search": null, + "num-of-sequences": null, + "latency-threshold": null, + "max-threads": null, + "stability-percentage": null, + "max-trials": null, + "percentile": null, + "input-data": null, + "shared-memory": null, + "output-shared-memory-size": null, + "shape": null, + "sequence-length": null, + "sequence-id-range": null, + "string-length": null, + "string-data": null, + "measurement-mode": "count_windows", + "measurement-request-count": null, + "streaming": null, + "grpc-compression-algorithm": null, + "triton-server-directory": null, + "model-repository": null, + "ssl-grpc-use-ssl": null, + "ssl-grpc-root-certifications-file": null, + "ssl-grpc-private-key-file": null, + "ssl-grpc-certificate-chain-file": null, + "ssl-https-verify-peer": null, + "ssl-https-verify-host": null, + "ssl-https-ca-certificates-file": null, + "ssl-https-client-certificate-type": null, + "ssl-https-client-certificate-file": null, + "ssl-https-private-key-type": null, + "ssl-https-private-key-file": null, + "collect-metrics": "True", + "metrics-url": "http://localhost:8002/metrics", + "metrics-interval": 1000, + "bls-composing-models": null + }, + "_options": { + "-m": "add_sub", + "-x": null, + "-b": 1, + "-u": "localhost:8001", + "-i": "grpc", + "-f": "add_sub-results.csv", + "-H": null + }, + "_verbose": { + "-v": null, + "-v -v": null, + "--verbose-csv": "--verbose-csv" + }, + "_input_to_options": { + "model-name": "-m", + "model-version": "-x", + "batch-size": "-b", + "url": "-u", + "protocol": "-i", + "latency-report-file": "-f", + "http-header": "-H" + }, + "_input_to_verbose": { + "verbose": "-v", + "extra-verbose": "-v -v", + "verbose-csv": "--verbose-csv" + }, + "_additive_args": { + "input-data": null, + "shape": null + } + }, + "_composing_config_variants": [] + } + ] + }, + { + "add_sub_config_0 -m add_sub -b 1 -i grpc -f add_sub-results.csv --verbose-csv --concurrency-range=1 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000": { + "_model_variants_name": "add_sub_config_0", + "_gpu_data": { + "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ + [ + "gpu_used_memory", + { + "_value": 872.415232, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_free_memory", + { + "_value": 24897.388544, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_utilization", + { + "_value": 2.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 56.391, + "_timestamp": 0, + "_device_uuid": null + } + ] + ] + }, + "_avg_gpu_data": [ + [ + "gpu_used_memory", + { + "_value": 872.415232, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_free_memory", + { + "_value": 24897.388544, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_utilization", + { + "_value": 2.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 56.391, + "_timestamp": 0, + "_device_uuid": null + } + ] + ], + "_avg_gpu_data_from_tag": { + "gpu_used_memory": [ + "gpu_used_memory", + { + "_value": 872.415232, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_free_memory": [ + "gpu_free_memory", + { + "_value": 24897.388544, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_utilization": [ + "gpu_utilization", + { + "_value": 2.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_power_usage": [ + "gpu_power_usage", + { + "_value": 56.391, + "_timestamp": 0, + "_device_uuid": null + } + ] + }, + "_model_config_measurements": [ + { + "_model_config_name": "add_sub", + "_model_specific_pa_params": { + "batch-size": 1, + "concurrency-range": 1, + "request-rate-range": null + }, + "_non_gpu_data": [ + [ + "perf_latency_avg", + { + "_value": 0.427, + "_timestamp": 0 + } + ], + [ + "perf_latency_p90", + { + "_value": 0.481, + "_timestamp": 0 + } + ], + [ + "perf_latency_p95", + { + "_value": 0.505, + "_timestamp": 0 + } + ], + [ + "perf_latency_p99", + { + "_value": 0.564, + "_timestamp": 0 + } + ], + [ + "perf_throughput", + { + "_value": 2334.71, + "_timestamp": 0 + } + ], + [ + "perf_client_send_recv", + { + "_value": 0.004, + "_timestamp": 0 + } + ], + [ + "perf_client_response_wait", + { + "_value": 0.412, + "_timestamp": 0 + } + ], + [ + "perf_server_queue", + { + "_value": 0.039, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_infer", + { + "_value": 0.069, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_input", + { + "_value": 0.037, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_output", + { + "_value": 0.044, + "_timestamp": 0 + } + ] + ], + "_non_gpu_data_from_tag": { + "perf_latency_avg": [ + "perf_latency_avg", + { + "_value": 0.427, + "_timestamp": 0 + } + ], + "perf_latency_p90": [ + "perf_latency_p90", + { + "_value": 0.481, + "_timestamp": 0 + } + ], + "perf_latency_p95": [ + "perf_latency_p95", + { + "_value": 0.505, + "_timestamp": 0 + } + ], + "perf_latency_p99": [ + "perf_latency_p99", + { + "_value": 0.564, + "_timestamp": 0 + } + ], + "perf_throughput": [ + "perf_throughput", + { + "_value": 2334.71, + "_timestamp": 0 + } + ], + "perf_client_send_recv": [ + "perf_client_send_recv", + { + "_value": 0.004, + "_timestamp": 0 + } + ], + "perf_client_response_wait": [ + "perf_client_response_wait", + { + "_value": 0.412, + "_timestamp": 0 + } + ], + "perf_server_queue": [ + "perf_server_queue", + { + "_value": 0.039, + "_timestamp": 0 + } + ], + "perf_server_compute_infer": [ + "perf_server_compute_infer", + { + "_value": 0.069, + "_timestamp": 0 + } + ], + "perf_server_compute_input": [ + "perf_server_compute_input", + { + "_value": 0.037, + "_timestamp": 0 + } + ], + "perf_server_compute_output": [ + "perf_server_compute_output", + { + "_value": 0.044, + "_timestamp": 0 + } + ] + } + } + ] + }, + "add_sub_config_0 -m add_sub -b 1 -i grpc -f add_sub-results.csv --verbose-csv --concurrency-range=2 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000": { + "_model_variants_name": "add_sub_config_0", + "_gpu_data": { + "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ + [ + "gpu_used_memory", + { + "_value": 872.415232, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_free_memory", + { + "_value": 24897.388544, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_utilization", + { + "_value": 4.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 56.5265, + "_timestamp": 0, + "_device_uuid": null + } + ] + ] + }, + "_avg_gpu_data": [ + [ + "gpu_used_memory", + { + "_value": 872.415232, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_free_memory", + { + "_value": 24897.388544, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_utilization", + { + "_value": 4.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 56.5265, + "_timestamp": 0, + "_device_uuid": null + } + ] + ], + "_avg_gpu_data_from_tag": { + "gpu_used_memory": [ + "gpu_used_memory", + { + "_value": 872.415232, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_free_memory": [ + "gpu_free_memory", + { + "_value": 24897.388544, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_utilization": [ + "gpu_utilization", + { + "_value": 4.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_power_usage": [ + "gpu_power_usage", + { + "_value": 56.5265, + "_timestamp": 0, + "_device_uuid": null + } + ] + }, + "_model_config_measurements": [ + { + "_model_config_name": "add_sub", + "_model_specific_pa_params": { + "batch-size": 1, + "concurrency-range": 2, + "request-rate-range": null + }, + "_non_gpu_data": [ + [ + "perf_latency_avg", + { + "_value": 0.326, + "_timestamp": 0 + } + ], + [ + "perf_latency_p90", + { + "_value": 0.383, + "_timestamp": 0 + } + ], + [ + "perf_latency_p95", + { + "_value": 0.455, + "_timestamp": 0 + } + ], + [ + "perf_latency_p99", + { + "_value": 0.599, + "_timestamp": 0 + } + ], + [ + "perf_throughput", + { + "_value": 6114.93, + "_timestamp": 0 + } + ], + [ + "perf_client_send_recv", + { + "_value": 0.003, + "_timestamp": 0 + } + ], + [ + "perf_client_response_wait", + { + "_value": 0.312, + "_timestamp": 0 + } + ], + [ + "perf_server_queue", + { + "_value": 0.044, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_infer", + { + "_value": 0.031, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_input", + { + "_value": 0.019, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_output", + { + "_value": 0.021, + "_timestamp": 0 + } + ] + ], + "_non_gpu_data_from_tag": { + "perf_latency_avg": [ + "perf_latency_avg", + { + "_value": 0.326, + "_timestamp": 0 + } + ], + "perf_latency_p90": [ + "perf_latency_p90", + { + "_value": 0.383, + "_timestamp": 0 + } + ], + "perf_latency_p95": [ + "perf_latency_p95", + { + "_value": 0.455, + "_timestamp": 0 + } + ], + "perf_latency_p99": [ + "perf_latency_p99", + { + "_value": 0.599, + "_timestamp": 0 + } + ], + "perf_throughput": [ + "perf_throughput", + { + "_value": 6114.93, + "_timestamp": 0 + } + ], + "perf_client_send_recv": [ + "perf_client_send_recv", + { + "_value": 0.003, + "_timestamp": 0 + } + ], + "perf_client_response_wait": [ + "perf_client_response_wait", + { + "_value": 0.312, + "_timestamp": 0 + } + ], + "perf_server_queue": [ + "perf_server_queue", + { + "_value": 0.044, + "_timestamp": 0 + } + ], + "perf_server_compute_infer": [ + "perf_server_compute_infer", + { + "_value": 0.031, + "_timestamp": 0 + } + ], + "perf_server_compute_input": [ + "perf_server_compute_input", + { + "_value": 0.019, + "_timestamp": 0 + } + ], + "perf_server_compute_output": [ + "perf_server_compute_output", + { + "_value": 0.021, + "_timestamp": 0 + } + ] + } + } + ] + } + } + ], + "add_sub_config_1": [ + { + "_triton_env": {}, + "_model_run_configs": [ + { + "_model_name": "add_sub", + "_model_config_variant": { + "model_config": { + "name": "add_sub", + "platform": "pytorch_libtorch", + "maxBatchSize": 2, + "input": [ + { + "name": "INPUT__0", + "dataType": "TYPE_FP32", + "dims": [ + "16" + ] + }, + { + "name": "INPUT__1", + "dataType": "TYPE_FP32", + "dims": [ + "16" + ] + } + ], + "output": [ + { + "name": "OUTPUT__0", + "dataType": "TYPE_FP32", + "dims": [ + "16" + ], + "labelFilename": "output0_labels.txt" + }, + { + "name": "OUTPUT__1", + "dataType": "TYPE_FP32", + "dims": [ + "16" + ] + } + ], + "instanceGroup": [ + { + "count": 1, + "kind": "KIND_GPU" + } + ], + "dynamicBatching": {} + }, + "variant_name": "add_sub_config_1", + "cpu_only": false + }, + "_perf_config": { + "_args": { + "service-kind": null, + "model-signature-name": null, + "async": null, + "sync": null, + "measurement-interval": null, + "concurrency-range": 1, + "request-rate-range": null, + "request-distribution": null, + "request-intervals": null, + "binary-search": null, + "num-of-sequences": null, + "latency-threshold": null, + "max-threads": null, + "stability-percentage": null, + "max-trials": null, + "percentile": null, + "input-data": null, + "shared-memory": null, + "output-shared-memory-size": null, + "shape": null, + "sequence-length": null, + "sequence-id-range": null, + "string-length": null, + "string-data": null, + "measurement-mode": "count_windows", + "measurement-request-count": null, + "streaming": null, + "grpc-compression-algorithm": null, + "triton-server-directory": null, + "model-repository": null, + "ssl-grpc-use-ssl": null, + "ssl-grpc-root-certifications-file": null, + "ssl-grpc-private-key-file": null, + "ssl-grpc-certificate-chain-file": null, + "ssl-https-verify-peer": null, + "ssl-https-verify-host": null, + "ssl-https-ca-certificates-file": null, + "ssl-https-client-certificate-type": null, + "ssl-https-client-certificate-file": null, + "ssl-https-private-key-type": null, + "ssl-https-private-key-file": null, + "collect-metrics": "True", + "metrics-url": "http://localhost:8002/metrics", + "metrics-interval": 1000, + "bls-composing-models": null + }, + "_options": { + "-m": "add_sub", + "-x": null, + "-b": 1, + "-u": "localhost:8001", + "-i": "grpc", + "-f": "add_sub-results.csv", + "-H": null + }, + "_verbose": { + "-v": null, + "-v -v": null, + "--verbose-csv": "--verbose-csv" + }, + "_input_to_options": { + "model-name": "-m", + "model-version": "-x", + "batch-size": "-b", + "url": "-u", + "protocol": "-i", + "latency-report-file": "-f", + "http-header": "-H" + }, + "_input_to_verbose": { + "verbose": "-v", + "extra-verbose": "-v -v", + "verbose-csv": "--verbose-csv" + }, + "_additive_args": { + "input-data": null, + "shape": null + } + }, + "_composing_config_variants": [] + } + ] + }, + { + "add_sub_config_1 -m add_sub -b 1 -i grpc -f add_sub-results.csv --verbose-csv --concurrency-range=1 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000": { + "_model_variants_name": "add_sub_config_1", + "_gpu_data": { + "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ + [ + "gpu_used_memory", + { + "_value": 872.415232, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_free_memory", + { + "_value": 24897.388544, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_utilization", + { + "_value": 1.7500000000000002, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 56.417, + "_timestamp": 0, + "_device_uuid": null + } + ] + ] + }, + "_avg_gpu_data": [ + [ + "gpu_used_memory", + { + "_value": 872.415232, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_free_memory", + { + "_value": 24897.388544, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_utilization", + { + "_value": 1.7500000000000002, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 56.417, + "_timestamp": 0, + "_device_uuid": null + } + ] + ], + "_avg_gpu_data_from_tag": { + "gpu_used_memory": [ + "gpu_used_memory", + { + "_value": 872.415232, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_free_memory": [ + "gpu_free_memory", + { + "_value": 24897.388544, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_utilization": [ + "gpu_utilization", + { + "_value": 1.7500000000000002, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_power_usage": [ + "gpu_power_usage", + { + "_value": 56.417, + "_timestamp": 0, + "_device_uuid": null + } + ] + }, + "_model_config_measurements": [ + { + "_model_config_name": "add_sub", + "_model_specific_pa_params": { + "batch-size": 1, + "concurrency-range": 1, + "request-rate-range": null + }, + "_non_gpu_data": [ + [ + "perf_latency_avg", + { + "_value": 0.456, + "_timestamp": 0 + } + ], + [ + "perf_latency_p90", + { + "_value": 0.53, + "_timestamp": 0 + } + ], + [ + "perf_latency_p95", + { + "_value": 0.546, + "_timestamp": 0 + } + ], + [ + "perf_latency_p99", + { + "_value": 0.599, + "_timestamp": 0 + } + ], + [ + "perf_throughput", + { + "_value": 2185.97, + "_timestamp": 0 + } + ], + [ + "perf_client_send_recv", + { + "_value": 0.004, + "_timestamp": 0 + } + ], + [ + "perf_client_response_wait", + { + "_value": 0.44, + "_timestamp": 0 + } + ], + [ + "perf_server_queue", + { + "_value": 0.041, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_infer", + { + "_value": 0.071, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_input", + { + "_value": 0.04, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_output", + { + "_value": 0.046, + "_timestamp": 0 + } + ] + ], + "_non_gpu_data_from_tag": { + "perf_latency_avg": [ + "perf_latency_avg", + { + "_value": 0.456, + "_timestamp": 0 + } + ], + "perf_latency_p90": [ + "perf_latency_p90", + { + "_value": 0.53, + "_timestamp": 0 + } + ], + "perf_latency_p95": [ + "perf_latency_p95", + { + "_value": 0.546, + "_timestamp": 0 + } + ], + "perf_latency_p99": [ + "perf_latency_p99", + { + "_value": 0.599, + "_timestamp": 0 + } + ], + "perf_throughput": [ + "perf_throughput", + { + "_value": 2185.97, + "_timestamp": 0 + } + ], + "perf_client_send_recv": [ + "perf_client_send_recv", + { + "_value": 0.004, + "_timestamp": 0 + } + ], + "perf_client_response_wait": [ + "perf_client_response_wait", + { + "_value": 0.44, + "_timestamp": 0 + } + ], + "perf_server_queue": [ + "perf_server_queue", + { + "_value": 0.041, + "_timestamp": 0 + } + ], + "perf_server_compute_infer": [ + "perf_server_compute_infer", + { + "_value": 0.071, + "_timestamp": 0 + } + ], + "perf_server_compute_input": [ + "perf_server_compute_input", + { + "_value": 0.04, + "_timestamp": 0 + } + ], + "perf_server_compute_output": [ + "perf_server_compute_output", + { + "_value": 0.046, + "_timestamp": 0 + } + ] + } + } + ] + }, + "add_sub_config_1 -m add_sub -b 1 -i grpc -f add_sub-results.csv --verbose-csv --concurrency-range=2 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000": { + "_model_variants_name": "add_sub_config_1", + "_gpu_data": { + "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ + [ + "gpu_used_memory", + { + "_value": 872.415232, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_free_memory", + { + "_value": 24897.388544, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_utilization", + { + "_value": 4.75, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 56.6202, + "_timestamp": 0, + "_device_uuid": null + } + ] + ] + }, + "_avg_gpu_data": [ + [ + "gpu_used_memory", + { + "_value": 872.415232, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_free_memory", + { + "_value": 24897.388544, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_utilization", + { + "_value": 4.75, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 56.6202, + "_timestamp": 0, + "_device_uuid": null + } + ] + ], + "_avg_gpu_data_from_tag": { + "gpu_used_memory": [ + "gpu_used_memory", + { + "_value": 872.415232, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_free_memory": [ + "gpu_free_memory", + { + "_value": 24897.388544, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_utilization": [ + "gpu_utilization", + { + "_value": 4.75, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_power_usage": [ + "gpu_power_usage", + { + "_value": 56.6202, + "_timestamp": 0, + "_device_uuid": null + } + ] + }, + "_model_config_measurements": [ + { + "_model_config_name": "add_sub", + "_model_specific_pa_params": { + "batch-size": 1, + "concurrency-range": 2, + "request-rate-range": null + }, + "_non_gpu_data": [ + [ + "perf_latency_avg", + { + "_value": 0.326, + "_timestamp": 0 + } + ], + [ + "perf_latency_p90", + { + "_value": 0.389, + "_timestamp": 0 + } + ], + [ + "perf_latency_p95", + { + "_value": 0.474, + "_timestamp": 0 + } + ], + [ + "perf_latency_p99", + { + "_value": 0.607, + "_timestamp": 0 + } + ], + [ + "perf_throughput", + { + "_value": 6109.95, + "_timestamp": 0 + } + ], + [ + "perf_client_send_recv", + { + "_value": 0.003, + "_timestamp": 0 + } + ], + [ + "perf_client_response_wait", + { + "_value": 0.313, + "_timestamp": 0 + } + ], + [ + "perf_server_queue", + { + "_value": 0.048, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_infer", + { + "_value": 0.033, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_input", + { + "_value": 0.02, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_output", + { + "_value": 0.022, + "_timestamp": 0 + } + ] + ], + "_non_gpu_data_from_tag": { + "perf_latency_avg": [ + "perf_latency_avg", + { + "_value": 0.326, + "_timestamp": 0 + } + ], + "perf_latency_p90": [ + "perf_latency_p90", + { + "_value": 0.389, + "_timestamp": 0 + } + ], + "perf_latency_p95": [ + "perf_latency_p95", + { + "_value": 0.474, + "_timestamp": 0 + } + ], + "perf_latency_p99": [ + "perf_latency_p99", + { + "_value": 0.607, + "_timestamp": 0 + } + ], + "perf_throughput": [ + "perf_throughput", + { + "_value": 6109.95, + "_timestamp": 0 + } + ], + "perf_client_send_recv": [ + "perf_client_send_recv", + { + "_value": 0.003, + "_timestamp": 0 + } + ], + "perf_client_response_wait": [ + "perf_client_response_wait", + { + "_value": 0.313, + "_timestamp": 0 + } + ], + "perf_server_queue": [ + "perf_server_queue", + { + "_value": 0.048, + "_timestamp": 0 + } + ], + "perf_server_compute_infer": [ + "perf_server_compute_infer", + { + "_value": 0.033, + "_timestamp": 0 + } + ], + "perf_server_compute_input": [ + "perf_server_compute_input", + { + "_value": 0.02, + "_timestamp": 0 + } + ], + "perf_server_compute_output": [ + "perf_server_compute_output", + { + "_value": 0.022, + "_timestamp": 0 + } + ] + } + } + ] + }, + "add_sub_config_1 -m add_sub -b 2 -i grpc -f add_sub-results.csv --verbose-csv --concurrency-range=1 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000": { + "_model_variants_name": "add_sub_config_1", + "_gpu_data": { + "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ + [ + "gpu_used_memory", + { + "_value": 872.415232, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_free_memory", + { + "_value": 24897.388544, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_utilization", + { + "_value": 3.5000000000000004, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 56.479, + "_timestamp": 0, + "_device_uuid": null + } + ] + ] + }, + "_avg_gpu_data": [ + [ + "gpu_used_memory", + { + "_value": 872.415232, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_free_memory", + { + "_value": 24897.388544, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_utilization", + { + "_value": 3.5000000000000004, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 56.479, + "_timestamp": 0, + "_device_uuid": null + } + ] + ], + "_avg_gpu_data_from_tag": { + "gpu_used_memory": [ + "gpu_used_memory", + { + "_value": 872.415232, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_free_memory": [ + "gpu_free_memory", + { + "_value": 24897.388544, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_utilization": [ + "gpu_utilization", + { + "_value": 3.5000000000000004, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_power_usage": [ + "gpu_power_usage", + { + "_value": 56.479, + "_timestamp": 0, + "_device_uuid": null + } + ] + }, + "_model_config_measurements": [ + { + "_model_config_name": "add_sub", + "_model_specific_pa_params": { + "batch-size": 2, + "concurrency-range": 1, + "request-rate-range": null + }, + "_non_gpu_data": [ + [ + "perf_latency_avg", + { + "_value": 0.415, + "_timestamp": 0 + } + ], + [ + "perf_latency_p90", + { + "_value": 0.477, + "_timestamp": 0 + } + ], + [ + "perf_latency_p95", + { + "_value": 0.511, + "_timestamp": 0 + } + ], + [ + "perf_latency_p99", + { + "_value": 0.587, + "_timestamp": 0 + } + ], + [ + "perf_throughput", + { + "_value": 4804.46, + "_timestamp": 0 + } + ], + [ + "perf_client_send_recv", + { + "_value": 0.004, + "_timestamp": 0 + } + ], + [ + "perf_client_response_wait", + { + "_value": 0.4, + "_timestamp": 0 + } + ], + [ + "perf_server_queue", + { + "_value": 0.038, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_infer", + { + "_value": 0.069, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_input", + { + "_value": 0.035, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_output", + { + "_value": 0.041, + "_timestamp": 0 + } + ] + ], + "_non_gpu_data_from_tag": { + "perf_latency_avg": [ + "perf_latency_avg", + { + "_value": 0.415, + "_timestamp": 0 + } + ], + "perf_latency_p90": [ + "perf_latency_p90", + { + "_value": 0.477, + "_timestamp": 0 + } + ], + "perf_latency_p95": [ + "perf_latency_p95", + { + "_value": 0.511, + "_timestamp": 0 + } + ], + "perf_latency_p99": [ + "perf_latency_p99", + { + "_value": 0.587, + "_timestamp": 0 + } + ], + "perf_throughput": [ + "perf_throughput", + { + "_value": 4804.46, + "_timestamp": 0 + } + ], + "perf_client_send_recv": [ + "perf_client_send_recv", + { + "_value": 0.004, + "_timestamp": 0 + } + ], + "perf_client_response_wait": [ + "perf_client_response_wait", + { + "_value": 0.4, + "_timestamp": 0 + } + ], + "perf_server_queue": [ + "perf_server_queue", + { + "_value": 0.038, + "_timestamp": 0 + } + ], + "perf_server_compute_infer": [ + "perf_server_compute_infer", + { + "_value": 0.069, + "_timestamp": 0 + } + ], + "perf_server_compute_input": [ + "perf_server_compute_input", + { + "_value": 0.035, + "_timestamp": 0 + } + ], + "perf_server_compute_output": [ + "perf_server_compute_output", + { + "_value": 0.041, + "_timestamp": 0 + } + ] + } + } + ] + }, + "add_sub_config_1 -m add_sub -b 2 -i grpc -f add_sub-results.csv --verbose-csv --concurrency-range=2 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000": { + "_model_variants_name": "add_sub_config_1", + "_gpu_data": { + "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ + [ + "gpu_used_memory", + { + "_value": 872.415232, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_free_memory", + { + "_value": 24897.388544, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_utilization", + { + "_value": 6.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 56.5925, + "_timestamp": 0, + "_device_uuid": null + } + ] + ] + }, + "_avg_gpu_data": [ + [ + "gpu_used_memory", + { + "_value": 872.415232, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_free_memory", + { + "_value": 24897.388544, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_utilization", + { + "_value": 6.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 56.5925, + "_timestamp": 0, + "_device_uuid": null + } + ] + ], + "_avg_gpu_data_from_tag": { + "gpu_used_memory": [ + "gpu_used_memory", + { + "_value": 872.415232, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_free_memory": [ + "gpu_free_memory", + { + "_value": 24897.388544, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_utilization": [ + "gpu_utilization", + { + "_value": 6.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_power_usage": [ + "gpu_power_usage", + { + "_value": 56.5925, + "_timestamp": 0, + "_device_uuid": null + } + ] + }, + "_model_config_measurements": [ + { + "_model_config_name": "add_sub", + "_model_specific_pa_params": { + "batch-size": 2, + "concurrency-range": 2, + "request-rate-range": null + }, + "_non_gpu_data": [ + [ + "perf_latency_avg", + { + "_value": 0.318, + "_timestamp": 0 + } + ], + [ + "perf_latency_p90", + { + "_value": 0.36, + "_timestamp": 0 + } + ], + [ + "perf_latency_p95", + { + "_value": 0.421, + "_timestamp": 0 + } + ], + [ + "perf_latency_p99", + { + "_value": 0.596, + "_timestamp": 0 + } + ], + [ + "perf_throughput", + { + "_value": 12528.9, + "_timestamp": 0 + } + ], + [ + "perf_client_send_recv", + { + "_value": 0.003, + "_timestamp": 0 + } + ], + [ + "perf_client_response_wait", + { + "_value": 0.304, + "_timestamp": 0 + } + ], + [ + "perf_server_queue", + { + "_value": 0.042, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_infer", + { + "_value": 0.032, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_input", + { + "_value": 0.018, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_output", + { + "_value": 0.021, + "_timestamp": 0 + } + ] + ], + "_non_gpu_data_from_tag": { + "perf_latency_avg": [ + "perf_latency_avg", + { + "_value": 0.318, + "_timestamp": 0 + } + ], + "perf_latency_p90": [ + "perf_latency_p90", + { + "_value": 0.36, + "_timestamp": 0 + } + ], + "perf_latency_p95": [ + "perf_latency_p95", + { + "_value": 0.421, + "_timestamp": 0 + } + ], + "perf_latency_p99": [ + "perf_latency_p99", + { + "_value": 0.596, + "_timestamp": 0 + } + ], + "perf_throughput": [ + "perf_throughput", + { + "_value": 12528.9, + "_timestamp": 0 + } + ], + "perf_client_send_recv": [ + "perf_client_send_recv", + { + "_value": 0.003, + "_timestamp": 0 + } + ], + "perf_client_response_wait": [ + "perf_client_response_wait", + { + "_value": 0.304, + "_timestamp": 0 + } + ], + "perf_server_queue": [ + "perf_server_queue", + { + "_value": 0.042, + "_timestamp": 0 + } + ], + "perf_server_compute_infer": [ + "perf_server_compute_infer", + { + "_value": 0.032, + "_timestamp": 0 + } + ], + "perf_server_compute_input": [ + "perf_server_compute_input", + { + "_value": 0.018, + "_timestamp": 0 + } + ], + "perf_server_compute_output": [ + "perf_server_compute_output", + { + "_value": 0.021, + "_timestamp": 0 + } + ] + } + } + ] + } + } + ], + "add_sub_config_2": [ + { + "_triton_env": {}, + "_model_run_configs": [ + { + "_model_name": "add_sub", + "_model_config_variant": { + "model_config": { + "name": "add_sub", + "platform": "pytorch_libtorch", + "maxBatchSize": 1, + "input": [ + { + "name": "INPUT__0", + "dataType": "TYPE_FP32", + "dims": [ + "16" + ] + }, + { + "name": "INPUT__1", + "dataType": "TYPE_FP32", + "dims": [ + "16" + ] + } + ], + "output": [ + { + "name": "OUTPUT__0", + "dataType": "TYPE_FP32", + "dims": [ + "16" + ], + "labelFilename": "output0_labels.txt" + }, + { + "name": "OUTPUT__1", + "dataType": "TYPE_FP32", + "dims": [ + "16" + ] + } + ], + "instanceGroup": [ + { + "count": 2, + "kind": "KIND_GPU" + } + ], + "dynamicBatching": {} + }, + "variant_name": "add_sub_config_2", + "cpu_only": false + }, + "_perf_config": { + "_args": { + "service-kind": null, + "model-signature-name": null, + "async": null, + "sync": null, + "measurement-interval": null, + "concurrency-range": 1, + "request-rate-range": null, + "request-distribution": null, + "request-intervals": null, + "binary-search": null, + "num-of-sequences": null, + "latency-threshold": null, + "max-threads": null, + "stability-percentage": null, + "max-trials": null, + "percentile": null, + "input-data": null, + "shared-memory": null, + "output-shared-memory-size": null, + "shape": null, + "sequence-length": null, + "sequence-id-range": null, + "string-length": null, + "string-data": null, + "measurement-mode": "count_windows", + "measurement-request-count": null, + "streaming": null, + "grpc-compression-algorithm": null, + "triton-server-directory": null, + "model-repository": null, + "ssl-grpc-use-ssl": null, + "ssl-grpc-root-certifications-file": null, + "ssl-grpc-private-key-file": null, + "ssl-grpc-certificate-chain-file": null, + "ssl-https-verify-peer": null, + "ssl-https-verify-host": null, + "ssl-https-ca-certificates-file": null, + "ssl-https-client-certificate-type": null, + "ssl-https-client-certificate-file": null, + "ssl-https-private-key-type": null, + "ssl-https-private-key-file": null, + "collect-metrics": "True", + "metrics-url": "http://localhost:8002/metrics", + "metrics-interval": 1000, + "bls-composing-models": null + }, + "_options": { + "-m": "add_sub", + "-x": null, + "-b": 1, + "-u": "localhost:8001", + "-i": "grpc", + "-f": "add_sub-results.csv", + "-H": null + }, + "_verbose": { + "-v": null, + "-v -v": null, + "--verbose-csv": "--verbose-csv" + }, + "_input_to_options": { + "model-name": "-m", + "model-version": "-x", + "batch-size": "-b", + "url": "-u", + "protocol": "-i", + "latency-report-file": "-f", + "http-header": "-H" + }, + "_input_to_verbose": { + "verbose": "-v", + "extra-verbose": "-v -v", + "verbose-csv": "--verbose-csv" + }, + "_additive_args": { + "input-data": null, + "shape": null + } + }, + "_composing_config_variants": [] + } + ] + }, + { + "add_sub_config_2 -m add_sub -b 1 -i grpc -f add_sub-results.csv --verbose-csv --concurrency-range=1 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000": { + "_model_variants_name": "add_sub_config_2", + "_gpu_data": { + "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ + [ + "gpu_used_memory", + { + "_value": 874.512384, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_free_memory", + { + "_value": 24895.291392, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_utilization", + { + "_value": 3.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 56.5757, + "_timestamp": 0, + "_device_uuid": null + } + ] + ] + }, + "_avg_gpu_data": [ + [ + "gpu_used_memory", + { + "_value": 874.512384, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_free_memory", + { + "_value": 24895.291392, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_utilization", + { + "_value": 3.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 56.5757, + "_timestamp": 0, + "_device_uuid": null + } + ] + ], + "_avg_gpu_data_from_tag": { + "gpu_used_memory": [ + "gpu_used_memory", + { + "_value": 874.512384, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_free_memory": [ + "gpu_free_memory", + { + "_value": 24895.291392, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_utilization": [ + "gpu_utilization", + { + "_value": 3.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_power_usage": [ + "gpu_power_usage", + { + "_value": 56.5757, + "_timestamp": 0, + "_device_uuid": null + } + ] + }, + "_model_config_measurements": [ + { + "_model_config_name": "add_sub", + "_model_specific_pa_params": { + "batch-size": 1, + "concurrency-range": 1, + "request-rate-range": null + }, + "_non_gpu_data": [ + [ + "perf_latency_avg", + { + "_value": 0.52, + "_timestamp": 0 + } + ], + [ + "perf_latency_p90", + { + "_value": 0.655, + "_timestamp": 0 + } + ], + [ + "perf_latency_p95", + { + "_value": 0.718, + "_timestamp": 0 + } + ], + [ + "perf_latency_p99", + { + "_value": 0.789, + "_timestamp": 0 + } + ], + [ + "perf_throughput", + { + "_value": 1915.57, + "_timestamp": 0 + } + ], + [ + "perf_client_send_recv", + { + "_value": 0.004, + "_timestamp": 0 + } + ], + [ + "perf_client_response_wait", + { + "_value": 0.503, + "_timestamp": 0 + } + ], + [ + "perf_server_queue", + { + "_value": 0.047, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_infer", + { + "_value": 0.089, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_input", + { + "_value": 0.052, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_output", + { + "_value": 0.056, + "_timestamp": 0 + } + ] + ], + "_non_gpu_data_from_tag": { + "perf_latency_avg": [ + "perf_latency_avg", + { + "_value": 0.52, + "_timestamp": 0 + } + ], + "perf_latency_p90": [ + "perf_latency_p90", + { + "_value": 0.655, + "_timestamp": 0 + } + ], + "perf_latency_p95": [ + "perf_latency_p95", + { + "_value": 0.718, + "_timestamp": 0 + } + ], + "perf_latency_p99": [ + "perf_latency_p99", + { + "_value": 0.789, + "_timestamp": 0 + } + ], + "perf_throughput": [ + "perf_throughput", + { + "_value": 1915.57, + "_timestamp": 0 + } + ], + "perf_client_send_recv": [ + "perf_client_send_recv", + { + "_value": 0.004, + "_timestamp": 0 + } + ], + "perf_client_response_wait": [ + "perf_client_response_wait", + { + "_value": 0.503, + "_timestamp": 0 + } + ], + "perf_server_queue": [ + "perf_server_queue", + { + "_value": 0.047, + "_timestamp": 0 + } + ], + "perf_server_compute_infer": [ + "perf_server_compute_infer", + { + "_value": 0.089, + "_timestamp": 0 + } + ], + "perf_server_compute_input": [ + "perf_server_compute_input", + { + "_value": 0.052, + "_timestamp": 0 + } + ], + "perf_server_compute_output": [ + "perf_server_compute_output", + { + "_value": 0.056, + "_timestamp": 0 + } + ] + } + } + ] + }, + "add_sub_config_2 -m add_sub -b 1 -i grpc -f add_sub-results.csv --verbose-csv --concurrency-range=2 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000": { + "_model_variants_name": "add_sub_config_2", + "_gpu_data": { + "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ + [ + "gpu_used_memory", + { + "_value": 874.512384, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_free_memory", + { + "_value": 24895.291392, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_utilization", + { + "_value": 2.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 56.6993, + "_timestamp": 0, + "_device_uuid": null + } + ] + ] + }, + "_avg_gpu_data": [ + [ + "gpu_used_memory", + { + "_value": 874.512384, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_free_memory", + { + "_value": 24895.291392, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_utilization", + { + "_value": 2.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 56.6993, + "_timestamp": 0, + "_device_uuid": null + } + ] + ], + "_avg_gpu_data_from_tag": { + "gpu_used_memory": [ + "gpu_used_memory", + { + "_value": 874.512384, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_free_memory": [ + "gpu_free_memory", + { + "_value": 24895.291392, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_utilization": [ + "gpu_utilization", + { + "_value": 2.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_power_usage": [ + "gpu_power_usage", + { + "_value": 56.6993, + "_timestamp": 0, + "_device_uuid": null + } + ] + }, + "_model_config_measurements": [ + { + "_model_config_name": "add_sub", + "_model_specific_pa_params": { + "batch-size": 1, + "concurrency-range": 2, + "request-rate-range": null + }, + "_non_gpu_data": [ + [ + "perf_latency_avg", + { + "_value": 0.593, + "_timestamp": 0 + } + ], + [ + "perf_latency_p90", + { + "_value": 0.721, + "_timestamp": 0 + } + ], + [ + "perf_latency_p95", + { + "_value": 0.764, + "_timestamp": 0 + } + ], + [ + "perf_latency_p99", + { + "_value": 0.864, + "_timestamp": 0 + } + ], + [ + "perf_throughput", + { + "_value": 3363.11, + "_timestamp": 0 + } + ], + [ + "perf_client_send_recv", + { + "_value": 0.005, + "_timestamp": 0 + } + ], + [ + "perf_client_response_wait", + { + "_value": 0.573, + "_timestamp": 0 + } + ], + [ + "perf_server_queue", + { + "_value": 0.044, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_infer", + { + "_value": 0.098, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_input", + { + "_value": 0.055, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_output", + { + "_value": 0.061, + "_timestamp": 0 + } + ] + ], + "_non_gpu_data_from_tag": { + "perf_latency_avg": [ + "perf_latency_avg", + { + "_value": 0.593, + "_timestamp": 0 + } + ], + "perf_latency_p90": [ + "perf_latency_p90", + { + "_value": 0.721, + "_timestamp": 0 + } + ], + "perf_latency_p95": [ + "perf_latency_p95", + { + "_value": 0.764, + "_timestamp": 0 + } + ], + "perf_latency_p99": [ + "perf_latency_p99", + { + "_value": 0.864, + "_timestamp": 0 + } + ], + "perf_throughput": [ + "perf_throughput", + { + "_value": 3363.11, + "_timestamp": 0 + } + ], + "perf_client_send_recv": [ + "perf_client_send_recv", + { + "_value": 0.005, + "_timestamp": 0 + } + ], + "perf_client_response_wait": [ + "perf_client_response_wait", + { + "_value": 0.573, + "_timestamp": 0 + } + ], + "perf_server_queue": [ + "perf_server_queue", + { + "_value": 0.044, + "_timestamp": 0 + } + ], + "perf_server_compute_infer": [ + "perf_server_compute_infer", + { + "_value": 0.098, + "_timestamp": 0 + } + ], + "perf_server_compute_input": [ + "perf_server_compute_input", + { + "_value": 0.055, + "_timestamp": 0 + } + ], + "perf_server_compute_output": [ + "perf_server_compute_output", + { + "_value": 0.061, + "_timestamp": 0 + } + ] + } + } + ] + } + } + ], + "add_sub_config_3": [ + { + "_triton_env": {}, + "_model_run_configs": [ + { + "_model_name": "add_sub", + "_model_config_variant": { + "model_config": { + "name": "add_sub", + "platform": "pytorch_libtorch", + "maxBatchSize": 2, + "input": [ + { + "name": "INPUT__0", + "dataType": "TYPE_FP32", + "dims": [ + "16" + ] + }, + { + "name": "INPUT__1", + "dataType": "TYPE_FP32", + "dims": [ + "16" + ] + } + ], + "output": [ + { + "name": "OUTPUT__0", + "dataType": "TYPE_FP32", + "dims": [ + "16" + ], + "labelFilename": "output0_labels.txt" + }, + { + "name": "OUTPUT__1", + "dataType": "TYPE_FP32", + "dims": [ + "16" + ] + } + ], + "instanceGroup": [ + { + "count": 2, + "kind": "KIND_GPU" + } + ], + "dynamicBatching": {} + }, + "variant_name": "add_sub_config_3", + "cpu_only": false + }, + "_perf_config": { + "_args": { + "service-kind": null, + "model-signature-name": null, + "async": null, + "sync": null, + "measurement-interval": null, + "concurrency-range": 1, + "request-rate-range": null, + "request-distribution": null, + "request-intervals": null, + "binary-search": null, + "num-of-sequences": null, + "latency-threshold": null, + "max-threads": null, + "stability-percentage": null, + "max-trials": null, + "percentile": null, + "input-data": null, + "shared-memory": null, + "output-shared-memory-size": null, + "shape": null, + "sequence-length": null, + "sequence-id-range": null, + "string-length": null, + "string-data": null, + "measurement-mode": "count_windows", + "measurement-request-count": 100, + "streaming": null, + "grpc-compression-algorithm": null, + "triton-server-directory": null, + "model-repository": null, + "ssl-grpc-use-ssl": null, + "ssl-grpc-root-certifications-file": null, + "ssl-grpc-private-key-file": null, + "ssl-grpc-certificate-chain-file": null, + "ssl-https-verify-peer": null, + "ssl-https-verify-host": null, + "ssl-https-ca-certificates-file": null, + "ssl-https-client-certificate-type": null, + "ssl-https-client-certificate-file": null, + "ssl-https-private-key-type": null, + "ssl-https-private-key-file": null, + "collect-metrics": "True", + "metrics-url": "http://localhost:8002/metrics", + "metrics-interval": 1000, + "bls-composing-models": null + }, + "_options": { + "-m": "add_sub", + "-x": null, + "-b": 1, + "-u": "localhost:8001", + "-i": "grpc", + "-f": "add_sub-results.csv", + "-H": null + }, + "_verbose": { + "-v": null, + "-v -v": null, + "--verbose-csv": "--verbose-csv" + }, + "_input_to_options": { + "model-name": "-m", + "model-version": "-x", + "batch-size": "-b", + "url": "-u", + "protocol": "-i", + "latency-report-file": "-f", + "http-header": "-H" + }, + "_input_to_verbose": { + "verbose": "-v", + "extra-verbose": "-v -v", + "verbose-csv": "--verbose-csv" + }, + "_additive_args": { + "input-data": null, + "shape": null + } + }, + "_composing_config_variants": [] + } + ] + }, + { + "add_sub_config_3 -m add_sub -b 1 -i grpc -f add_sub-results.csv --verbose-csv --concurrency-range=1 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000": { + "_model_variants_name": "add_sub_config_3", + "_gpu_data": { + "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ + [ + "gpu_used_memory", + { + "_value": 874.512384, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_free_memory", + { + "_value": 24895.291392, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_utilization", + { + "_value": 1.6666699999999999, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 56.6563, + "_timestamp": 0, + "_device_uuid": null + } + ] + ] + }, + "_avg_gpu_data": [ + [ + "gpu_used_memory", + { + "_value": 874.512384, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_free_memory", + { + "_value": 24895.291392, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_utilization", + { + "_value": 1.6666699999999999, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 56.6563, + "_timestamp": 0, + "_device_uuid": null + } + ] + ], + "_avg_gpu_data_from_tag": { + "gpu_used_memory": [ + "gpu_used_memory", + { + "_value": 874.512384, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_free_memory": [ + "gpu_free_memory", + { + "_value": 24895.291392, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_utilization": [ + "gpu_utilization", + { + "_value": 1.6666699999999999, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_power_usage": [ + "gpu_power_usage", + { + "_value": 56.6563, + "_timestamp": 0, + "_device_uuid": null + } + ] + }, + "_model_config_measurements": [ + { + "_model_config_name": "add_sub", + "_model_specific_pa_params": { + "batch-size": 1, + "concurrency-range": 1, + "request-rate-range": null + }, + "_non_gpu_data": [ + [ + "perf_latency_avg", + { + "_value": 0.569, + "_timestamp": 0 + } + ], + [ + "perf_latency_p90", + { + "_value": 0.74, + "_timestamp": 0 + } + ], + [ + "perf_latency_p95", + { + "_value": 0.767, + "_timestamp": 0 + } + ], + [ + "perf_latency_p99", + { + "_value": 0.839, + "_timestamp": 0 + } + ], + [ + "perf_throughput", + { + "_value": 1752.08, + "_timestamp": 0 + } + ], + [ + "perf_client_send_recv", + { + "_value": 0.004, + "_timestamp": 0 + } + ], + [ + "perf_client_response_wait", + { + "_value": 0.552, + "_timestamp": 0 + } + ], + [ + "perf_server_queue", + { + "_value": 0.056, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_infer", + { + "_value": 0.099, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_input", + { + "_value": 0.061, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_output", + { + "_value": 0.064, + "_timestamp": 0 + } + ] + ], + "_non_gpu_data_from_tag": { + "perf_latency_avg": [ + "perf_latency_avg", + { + "_value": 0.569, + "_timestamp": 0 + } + ], + "perf_latency_p90": [ + "perf_latency_p90", + { + "_value": 0.74, + "_timestamp": 0 + } + ], + "perf_latency_p95": [ + "perf_latency_p95", + { + "_value": 0.767, + "_timestamp": 0 + } + ], + "perf_latency_p99": [ + "perf_latency_p99", + { + "_value": 0.839, + "_timestamp": 0 + } + ], + "perf_throughput": [ + "perf_throughput", + { + "_value": 1752.08, + "_timestamp": 0 + } + ], + "perf_client_send_recv": [ + "perf_client_send_recv", + { + "_value": 0.004, + "_timestamp": 0 + } + ], + "perf_client_response_wait": [ + "perf_client_response_wait", + { + "_value": 0.552, + "_timestamp": 0 + } + ], + "perf_server_queue": [ + "perf_server_queue", + { + "_value": 0.056, + "_timestamp": 0 + } + ], + "perf_server_compute_infer": [ + "perf_server_compute_infer", + { + "_value": 0.099, + "_timestamp": 0 + } + ], + "perf_server_compute_input": [ + "perf_server_compute_input", + { + "_value": 0.061, + "_timestamp": 0 + } + ], + "perf_server_compute_output": [ + "perf_server_compute_output", + { + "_value": 0.064, + "_timestamp": 0 + } + ] + } + } + ] + }, + "add_sub_config_3 -m add_sub -b 1 -i grpc -f add_sub-results.csv --verbose-csv --concurrency-range=2 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000": { + "_model_variants_name": "add_sub_config_3", + "_gpu_data": { + "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ + [ + "gpu_used_memory", + { + "_value": 874.512384, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_free_memory", + { + "_value": 24895.291392, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_utilization", + { + "_value": 3.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 56.621, + "_timestamp": 0, + "_device_uuid": null + } + ] + ] + }, + "_avg_gpu_data": [ + [ + "gpu_used_memory", + { + "_value": 874.512384, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_free_memory", + { + "_value": 24895.291392, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_utilization", + { + "_value": 3.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 56.621, + "_timestamp": 0, + "_device_uuid": null + } + ] + ], + "_avg_gpu_data_from_tag": { + "gpu_used_memory": [ + "gpu_used_memory", + { + "_value": 874.512384, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_free_memory": [ + "gpu_free_memory", + { + "_value": 24895.291392, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_utilization": [ + "gpu_utilization", + { + "_value": 3.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_power_usage": [ + "gpu_power_usage", + { + "_value": 56.621, + "_timestamp": 0, + "_device_uuid": null + } + ] + }, + "_model_config_measurements": [ + { + "_model_config_name": "add_sub", + "_model_specific_pa_params": { + "batch-size": 1, + "concurrency-range": 2, + "request-rate-range": null + }, + "_non_gpu_data": [ + [ + "perf_latency_avg", + { + "_value": 0.609, + "_timestamp": 0 + } + ], + [ + "perf_latency_p90", + { + "_value": 0.712, + "_timestamp": 0 + } + ], + [ + "perf_latency_p95", + { + "_value": 0.754, + "_timestamp": 0 + } + ], + [ + "perf_latency_p99", + { + "_value": 0.838, + "_timestamp": 0 + } + ], + [ + "perf_throughput", + { + "_value": 3270.69, + "_timestamp": 0 + } + ], + [ + "perf_client_send_recv", + { + "_value": 0.005, + "_timestamp": 0 + } + ], + [ + "perf_client_response_wait", + { + "_value": 0.589, + "_timestamp": 0 + } + ], + [ + "perf_server_queue", + { + "_value": 0.045, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_infer", + { + "_value": 0.101, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_input", + { + "_value": 0.057, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_output", + { + "_value": 0.063, + "_timestamp": 0 + } + ] + ], + "_non_gpu_data_from_tag": { + "perf_latency_avg": [ + "perf_latency_avg", + { + "_value": 0.609, + "_timestamp": 0 + } + ], + "perf_latency_p90": [ + "perf_latency_p90", + { + "_value": 0.712, + "_timestamp": 0 + } + ], + "perf_latency_p95": [ + "perf_latency_p95", + { + "_value": 0.754, + "_timestamp": 0 + } + ], + "perf_latency_p99": [ + "perf_latency_p99", + { + "_value": 0.838, + "_timestamp": 0 + } + ], + "perf_throughput": [ + "perf_throughput", + { + "_value": 3270.69, + "_timestamp": 0 + } + ], + "perf_client_send_recv": [ + "perf_client_send_recv", + { + "_value": 0.005, + "_timestamp": 0 + } + ], + "perf_client_response_wait": [ + "perf_client_response_wait", + { + "_value": 0.589, + "_timestamp": 0 + } + ], + "perf_server_queue": [ + "perf_server_queue", + { + "_value": 0.045, + "_timestamp": 0 + } + ], + "perf_server_compute_infer": [ + "perf_server_compute_infer", + { + "_value": 0.101, + "_timestamp": 0 + } + ], + "perf_server_compute_input": [ + "perf_server_compute_input", + { + "_value": 0.057, + "_timestamp": 0 + } + ], + "perf_server_compute_output": [ + "perf_server_compute_output", + { + "_value": 0.063, + "_timestamp": 0 + } + ] + } + } + ] + }, + "add_sub_config_3 -m add_sub -b 2 -i grpc -f add_sub-results.csv --verbose-csv --concurrency-range=1 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000": { + "_model_variants_name": "add_sub_config_3", + "_gpu_data": { + "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ + [ + "gpu_used_memory", + { + "_value": 874.512384, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_free_memory", + { + "_value": 24895.291392, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_utilization", + { + "_value": 3.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 56.7673, + "_timestamp": 0, + "_device_uuid": null + } + ] + ] + }, + "_avg_gpu_data": [ + [ + "gpu_used_memory", + { + "_value": 874.512384, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_free_memory", + { + "_value": 24895.291392, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_utilization", + { + "_value": 3.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 56.7673, + "_timestamp": 0, + "_device_uuid": null + } + ] + ], + "_avg_gpu_data_from_tag": { + "gpu_used_memory": [ + "gpu_used_memory", + { + "_value": 874.512384, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_free_memory": [ + "gpu_free_memory", + { + "_value": 24895.291392, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_utilization": [ + "gpu_utilization", + { + "_value": 3.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_power_usage": [ + "gpu_power_usage", + { + "_value": 56.7673, + "_timestamp": 0, + "_device_uuid": null + } + ] + }, + "_model_config_measurements": [ + { + "_model_config_name": "add_sub", + "_model_specific_pa_params": { + "batch-size": 2, + "concurrency-range": 1, + "request-rate-range": null + }, + "_non_gpu_data": [ + [ + "perf_latency_avg", + { + "_value": 0.5, + "_timestamp": 0 + } + ], + [ + "perf_latency_p90", + { + "_value": 0.658, + "_timestamp": 0 + } + ], + [ + "perf_latency_p95", + { + "_value": 0.709, + "_timestamp": 0 + } + ], + [ + "perf_latency_p99", + { + "_value": 0.803, + "_timestamp": 0 + } + ], + [ + "perf_throughput", + { + "_value": 3988.65, + "_timestamp": 0 + } + ], + [ + "perf_client_send_recv", + { + "_value": 0.004, + "_timestamp": 0 + } + ], + [ + "perf_client_response_wait", + { + "_value": 0.483, + "_timestamp": 0 + } + ], + [ + "perf_server_queue", + { + "_value": 0.046, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_infer", + { + "_value": 0.09, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_input", + { + "_value": 0.048, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_output", + { + "_value": 0.054, + "_timestamp": 0 + } + ] + ], + "_non_gpu_data_from_tag": { + "perf_latency_avg": [ + "perf_latency_avg", + { + "_value": 0.5, + "_timestamp": 0 + } + ], + "perf_latency_p90": [ + "perf_latency_p90", + { + "_value": 0.658, + "_timestamp": 0 + } + ], + "perf_latency_p95": [ + "perf_latency_p95", + { + "_value": 0.709, + "_timestamp": 0 + } + ], + "perf_latency_p99": [ + "perf_latency_p99", + { + "_value": 0.803, + "_timestamp": 0 + } + ], + "perf_throughput": [ + "perf_throughput", + { + "_value": 3988.65, + "_timestamp": 0 + } + ], + "perf_client_send_recv": [ + "perf_client_send_recv", + { + "_value": 0.004, + "_timestamp": 0 + } + ], + "perf_client_response_wait": [ + "perf_client_response_wait", + { + "_value": 0.483, + "_timestamp": 0 + } + ], + "perf_server_queue": [ + "perf_server_queue", + { + "_value": 0.046, + "_timestamp": 0 + } + ], + "perf_server_compute_infer": [ + "perf_server_compute_infer", + { + "_value": 0.09, + "_timestamp": 0 + } + ], + "perf_server_compute_input": [ + "perf_server_compute_input", + { + "_value": 0.048, + "_timestamp": 0 + } + ], + "perf_server_compute_output": [ + "perf_server_compute_output", + { + "_value": 0.054, + "_timestamp": 0 + } + ] + } + } + ] + }, + "add_sub_config_3 -m add_sub -b 2 -i grpc -f add_sub-results.csv --verbose-csv --concurrency-range=2 --measurement-mode=count_windows --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000": { + "_model_variants_name": "add_sub_config_3", + "_gpu_data": { + "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ + [ + "gpu_used_memory", + { + "_value": 874.512384, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_free_memory", + { + "_value": 24895.291392, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_utilization", + { + "_value": 2.75, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 56.815, + "_timestamp": 0, + "_device_uuid": null + } + ] + ] + }, + "_avg_gpu_data": [ + [ + "gpu_used_memory", + { + "_value": 874.512384, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_free_memory", + { + "_value": 24895.291392, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_utilization", + { + "_value": 2.75, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 56.815, + "_timestamp": 0, + "_device_uuid": null + } + ] + ], + "_avg_gpu_data_from_tag": { + "gpu_used_memory": [ + "gpu_used_memory", + { + "_value": 874.512384, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_free_memory": [ + "gpu_free_memory", + { + "_value": 24895.291392, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_utilization": [ + "gpu_utilization", + { + "_value": 2.75, + "_timestamp": 0, + "_device_uuid": null + } + ], + "gpu_power_usage": [ + "gpu_power_usage", + { + "_value": 56.815, + "_timestamp": 0, + "_device_uuid": null + } + ] + }, + "_model_config_measurements": [ + { + "_model_config_name": "add_sub", + "_model_specific_pa_params": { + "batch-size": 2, + "concurrency-range": 2, + "request-rate-range": null + }, + "_non_gpu_data": [ + [ + "perf_latency_avg", + { + "_value": 0.572, + "_timestamp": 0 + } + ], + [ + "perf_latency_p90", + { + "_value": 0.677, + "_timestamp": 0 + } + ], + [ + "perf_latency_p95", + { + "_value": 0.715, + "_timestamp": 0 + } + ], + [ + "perf_latency_p99", + { + "_value": 0.81, + "_timestamp": 0 + } + ], + [ + "perf_throughput", + { + "_value": 6967.92, + "_timestamp": 0 + } + ], + [ + "perf_client_send_recv", + { + "_value": 0.004, + "_timestamp": 0 + } + ], + [ + "perf_client_response_wait", + { + "_value": 0.554, + "_timestamp": 0 + } + ], + [ + "perf_server_queue", + { + "_value": 0.043, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_infer", + { + "_value": 0.102, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_input", + { + "_value": 0.053, + "_timestamp": 0 + } + ], + [ + "perf_server_compute_output", + { + "_value": 0.059, + "_timestamp": 0 + } + ] + ], + "_non_gpu_data_from_tag": { + "perf_latency_avg": [ + "perf_latency_avg", + { + "_value": 0.572, + "_timestamp": 0 + } + ], + "perf_latency_p90": [ + "perf_latency_p90", + { + "_value": 0.677, + "_timestamp": 0 + } + ], + "perf_latency_p95": [ + "perf_latency_p95", + { + "_value": 0.715, + "_timestamp": 0 + } + ], + "perf_latency_p99": [ + "perf_latency_p99", + { + "_value": 0.81, + "_timestamp": 0 + } + ], + "perf_throughput": [ + "perf_throughput", + { + "_value": 6967.92, + "_timestamp": 0 + } + ], + "perf_client_send_recv": [ + "perf_client_send_recv", + { + "_value": 0.004, + "_timestamp": 0 + } + ], + "perf_client_response_wait": [ + "perf_client_response_wait", + { + "_value": 0.554, + "_timestamp": 0 + } + ], + "perf_server_queue": [ + "perf_server_queue", + { + "_value": 0.043, + "_timestamp": 0 + } + ], + "perf_server_compute_infer": [ + "perf_server_compute_infer", + { + "_value": 0.102, + "_timestamp": 0 + } + ], + "perf_server_compute_input": [ + "perf_server_compute_input", + { + "_value": 0.053, + "_timestamp": 0 + } + ], + "perf_server_compute_output": [ + "perf_server_compute_output", + { + "_value": 0.059, + "_timestamp": 0 + } + ] + } + } + ] + } + } + ] + } + } + }, + "ResultManager.server_only_data": { + "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": [ + [ + "gpu_used_memory", + { + "_value": 870.0, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_free_memory", + { + "_value": 24899.0, + "_timestamp": 0, + "_device_uuid": "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0" + } + ], + [ + "gpu_utilization", + { + "_value": 0.0, + "_timestamp": 0, + "_device_uuid": null + } + ], + [ + "gpu_power_usage", + { + "_value": 55.95925, + "_timestamp": 0, + "_device_uuid": null + } + ] + ] + }, + "MetricsManager.gpus": { + "GPU-8557549f-9c89-4384-8bd6-1fd823c342e0": { + "name": "NVIDIA TITAN RTX", + "total_memory": 25387401216 + } + }, + "ModelManager.model_variant_name_manager": { + "_model_config_dicts": { + "add_sub_config_0": { + "name": "add_sub", + "platform": "pytorch_libtorch", + "max_batch_size": 1, + "input": [ + { + "name": "INPUT__0", + "data_type": "TYPE_FP32", + "dims": [ + "16" + ] + }, + { + "name": "INPUT__1", + "data_type": "TYPE_FP32", + "dims": [ + "16" + ] + } + ], + "output": [ + { + "name": "OUTPUT__0", + "data_type": "TYPE_FP32", + "dims": [ + "16" + ], + "label_filename": "output0_labels.txt" + }, + { + "name": "OUTPUT__1", + "data_type": "TYPE_FP32", + "dims": [ + "16" + ] + } + ], + "instance_group": [ + { + "count": 1, + "kind": "KIND_GPU" + } + ], + "dynamic_batching": {} + }, + "add_sub_config_1": { + "name": "add_sub", + "platform": "pytorch_libtorch", + "max_batch_size": 2, + "input": [ + { + "name": "INPUT__0", + "data_type": "TYPE_FP32", + "dims": [ + "16" + ] + }, + { + "name": "INPUT__1", + "data_type": "TYPE_FP32", + "dims": [ + "16" + ] + } + ], + "output": [ + { + "name": "OUTPUT__0", + "data_type": "TYPE_FP32", + "dims": [ + "16" + ], + "label_filename": "output0_labels.txt" + }, + { + "name": "OUTPUT__1", + "data_type": "TYPE_FP32", + "dims": [ + "16" + ] + } + ], + "instance_group": [ + { + "count": 1, + "kind": "KIND_GPU" + } + ], + "dynamic_batching": {} + }, + "add_sub_config_2": { + "name": "add_sub", + "platform": "pytorch_libtorch", + "max_batch_size": 1, + "input": [ + { + "name": "INPUT__0", + "data_type": "TYPE_FP32", + "dims": [ + "16" + ] + }, + { + "name": "INPUT__1", + "data_type": "TYPE_FP32", + "dims": [ + "16" + ] + } + ], + "output": [ + { + "name": "OUTPUT__0", + "data_type": "TYPE_FP32", + "dims": [ + "16" + ], + "label_filename": "output0_labels.txt" + }, + { + "name": "OUTPUT__1", + "data_type": "TYPE_FP32", + "dims": [ + "16" + ] + } + ], + "instance_group": [ + { + "count": 2, + "kind": "KIND_GPU" + } + ], + "dynamic_batching": {} + }, + "add_sub_config_3": { + "name": "add_sub", + "platform": "pytorch_libtorch", + "max_batch_size": 2, + "input": [ + { + "name": "INPUT__0", + "data_type": "TYPE_FP32", + "dims": [ + "16" + ] + }, + { + "name": "INPUT__1", + "data_type": "TYPE_FP32", + "dims": [ + "16" + ] + } + ], + "output": [ + { + "name": "OUTPUT__0", + "data_type": "TYPE_FP32", + "dims": [ + "16" + ], + "label_filename": "output0_labels.txt" + }, + { + "name": "OUTPUT__1", + "data_type": "TYPE_FP32", + "dims": [ + "16" + ] + } + ], + "instance_group": [ + { + "count": 2, + "kind": "KIND_GPU" + } + ], + "dynamic_batching": {} + } + }, + "_model_name_index": { + "add_sub": 3 + } + } +} \ No newline at end of file diff --git a/tests/common/single-model-ckpt/golden-metrics-model-gpu.csv b/tests/common/single-model-ckpt/golden-metrics-model-gpu.csv index 0b0bddc06..ce20b6e30 100644 --- a/tests/common/single-model-ckpt/golden-metrics-model-gpu.csv +++ b/tests/common/single-model-ckpt/golden-metrics-model-gpu.csv @@ -1,18 +1,18 @@ Model,GPU UUID,Batch,Concurrency,Model Config Path,Instance Group,Satisfies Constraints,GPU Memory Usage (MB),GPU Utilization (%),GPU Power Usage (W) -add_sub,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,2,2,add_sub_config_default,1:GPU,Yes,847.2,5.0,56.0 -add_sub,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,2,add_sub_config_default,1:GPU,Yes,847.2,7.7,56.1 -add_sub,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,2,1,add_sub_config_default,1:GPU,Yes,847.2,2.2,55.9 -add_sub,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,1,add_sub_config_default,1:GPU,Yes,847.2,3.0,55.8 -add_sub,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,2,2,add_sub_config_1,1:GPU,Yes,847.2,6.7,56.5 -add_sub,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,2,add_sub_config_1,1:GPU,Yes,847.2,4.2,56.2 -add_sub,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,2,1,add_sub_config_1,1:GPU,Yes,847.2,2.5,56.3 -add_sub,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,1,add_sub_config_1,1:GPU,Yes,847.2,2.8,56.0 -add_sub,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,2,2,add_sub_config_3,2:GPU,Yes,849.3,2.2,56.5 -add_sub,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,2,1,add_sub_config_3,2:GPU,Yes,849.3,2.7,56.5 -add_sub,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,2,add_sub_config_3,2:GPU,Yes,849.3,4.3,56.6 -add_sub,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,1,add_sub_config_3,2:GPU,Yes,849.3,2.7,56.4 -add_sub,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,2,add_sub_config_0,1:GPU,Yes,847.2,4.2,56.0 -add_sub,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,1,add_sub_config_0,1:GPU,Yes,847.2,3.2,56.0 -add_sub,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,2,add_sub_config_2,2:GPU,Yes,849.3,2.5,56.5 -add_sub,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,1,add_sub_config_2,2:GPU,Yes,849.3,2.3,56.4 +add_sub,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,2,2,add_sub_config_default,1:GPU,Yes,872.4,5.5,56.4 +add_sub,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,2,add_sub_config_default,1:GPU,Yes,872.4,4.2,56.3 +add_sub,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,2,1,add_sub_config_default,1:GPU,Yes,872.4,3.0,56.2 +add_sub,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,1,add_sub_config_default,1:GPU,Yes,872.4,2.0,56.2 +add_sub,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,2,2,add_sub_config_1,1:GPU,Yes,872.4,6.0,56.6 +add_sub,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,2,add_sub_config_1,1:GPU,Yes,872.4,4.8,56.6 +add_sub,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,2,1,add_sub_config_1,1:GPU,Yes,872.4,3.5,56.5 +add_sub,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,1,add_sub_config_1,1:GPU,Yes,872.4,1.8,56.4 +add_sub,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,2,2,add_sub_config_3,2:GPU,Yes,874.5,2.8,56.8 +add_sub,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,2,1,add_sub_config_3,2:GPU,Yes,874.5,3.0,56.8 +add_sub,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,2,add_sub_config_3,2:GPU,Yes,874.5,3.0,56.6 +add_sub,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,1,add_sub_config_3,2:GPU,Yes,874.5,1.7,56.7 +add_sub,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,2,add_sub_config_0,1:GPU,Yes,872.4,4.0,56.5 +add_sub,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,1,add_sub_config_0,1:GPU,Yes,872.4,2.0,56.4 +add_sub,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,2,add_sub_config_2,2:GPU,Yes,874.5,2.0,56.7 +add_sub,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,1,1,add_sub_config_2,2:GPU,Yes,874.5,3.0,56.6 diff --git a/tests/common/single-model-ckpt/golden-metrics-model-inference.csv b/tests/common/single-model-ckpt/golden-metrics-model-inference.csv index f0923ecb1..64d216513 100644 --- a/tests/common/single-model-ckpt/golden-metrics-model-inference.csv +++ b/tests/common/single-model-ckpt/golden-metrics-model-inference.csv @@ -1,18 +1,18 @@ Model,Batch,Concurrency,Model Config Path,Instance Group,Max Batch Size,Satisfies Constraints,Throughput (infer/sec),p99 Latency (ms) -add_sub,2,2,add_sub_config_default,1:GPU,8,Yes,12693.7,0.6 -add_sub,1,2,add_sub_config_default,1:GPU,8,Yes,6494.6,0.5 -add_sub,2,1,add_sub_config_default,1:GPU,8,Yes,5066.4,0.6 -add_sub,1,1,add_sub_config_default,1:GPU,8,Yes,2626.1,0.5 -add_sub,2,2,add_sub_config_1,1:GPU,2,Yes,12046.2,0.6 -add_sub,1,2,add_sub_config_1,1:GPU,2,Yes,6209.9,0.6 -add_sub,2,1,add_sub_config_1,1:GPU,2,Yes,4989.2,0.6 -add_sub,1,1,add_sub_config_1,1:GPU,2,Yes,2422.0,0.6 -add_sub,2,2,add_sub_config_3,2:GPU,2,Yes,6643.5,0.8 -add_sub,2,1,add_sub_config_3,2:GPU,2,Yes,3575.9,0.8 -add_sub,1,2,add_sub_config_3,2:GPU,2,Yes,3402.8,0.9 -add_sub,1,1,add_sub_config_3,2:GPU,2,Yes,2108.9,0.8 -add_sub,1,2,add_sub_config_0,1:GPU,1,Yes,5966.3,0.6 -add_sub,1,1,add_sub_config_0,1:GPU,1,Yes,2282.1,0.6 -add_sub,1,2,add_sub_config_2,2:GPU,1,Yes,3314.7,0.8 -add_sub,1,1,add_sub_config_2,2:GPU,1,Yes,1791.4,0.8 +add_sub,2,2,add_sub_config_default,1:GPU,8,Yes,13205.0,0.5 +add_sub,1,2,add_sub_config_default,1:GPU,8,Yes,6176.0,0.6 +add_sub,2,1,add_sub_config_default,1:GPU,8,Yes,4876.1,0.7 +add_sub,1,1,add_sub_config_default,1:GPU,8,Yes,2508.0,0.5 +add_sub,2,2,add_sub_config_1,1:GPU,2,Yes,12528.9,0.6 +add_sub,1,2,add_sub_config_1,1:GPU,2,Yes,6109.9,0.6 +add_sub,2,1,add_sub_config_1,1:GPU,2,Yes,4804.5,0.6 +add_sub,1,1,add_sub_config_1,1:GPU,2,Yes,2186.0,0.6 +add_sub,2,2,add_sub_config_3,2:GPU,2,Yes,6967.9,0.8 +add_sub,2,1,add_sub_config_3,2:GPU,2,Yes,3988.7,0.8 +add_sub,1,2,add_sub_config_3,2:GPU,2,Yes,3270.7,0.8 +add_sub,1,1,add_sub_config_3,2:GPU,2,Yes,1752.1,0.8 +add_sub,1,2,add_sub_config_0,1:GPU,1,Yes,6114.9,0.6 +add_sub,1,1,add_sub_config_0,1:GPU,1,Yes,2334.7,0.6 +add_sub,1,2,add_sub_config_2,2:GPU,1,Yes,3363.1,0.9 +add_sub,1,1,add_sub_config_2,2:GPU,1,Yes,1915.6,0.8 diff --git a/tests/common/single-model-ckpt/golden-metrics-server-only.csv b/tests/common/single-model-ckpt/golden-metrics-server-only.csv index 72676ca1b..f25e1df5c 100644 --- a/tests/common/single-model-ckpt/golden-metrics-server-only.csv +++ b/tests/common/single-model-ckpt/golden-metrics-server-only.csv @@ -1,3 +1,3 @@ Model,GPU UUID,GPU Memory Usage (MB),GPU Utilization (%),GPU Power Usage (W) -triton-server,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,457.0,0.2,55.6 +triton-server,GPU-8557549f-9c89-4384-8bd6-1fd823c342e0,870.0,0.0,56.0 diff --git a/tests/common/single-model-ckpt/plot_manager.json b/tests/common/single-model-ckpt/plot_manager.json index 6bef83031..cfd9c1a8b 100644 --- a/tests/common/single-model-ckpt/plot_manager.json +++ b/tests/common/single-model-ckpt/plot_manager.json @@ -1 +1 @@ -{"_simple_plots": {"add_sub": {"throughput_v_latency": {"_name": "throughput_v_latency", "_title": "Throughput vs. Latency", "_x_axis": "perf_latency_p99", "_y_axis": "perf_throughput", "_monotonic": true, "_data": {"add_sub_config_default": {"x_data": [0.577, 0.54, 0.637, 0.511], "y_data": [12693.7, 6494.65, 5066.42, 2626.05]}, "add_sub_config_1": {"x_data": [0.592, 0.579, 0.573, 0.568], "y_data": [12046.2, 6209.88, 4989.17, 2421.98]}, "add_sub_config_3": {"x_data": [0.839, 0.845, 0.871, 0.763], "y_data": [6643.47, 3575.86, 3402.77, 2108.93]}}}, "gpu_mem_v_latency": {"_name": "gpu_mem_v_latency", "_title": "GPU Memory vs. Latency", "_x_axis": "perf_latency_p99", "_y_axis": "gpu_used_memory", "_monotonic": false, "_data": {"add_sub_config_default": {"x_data": [0.577, 0.54, 0.637, 0.511], "y_data": [847.249408, 847.249408, 847.249408, 847.249408]}, "add_sub_config_1": {"x_data": [0.592, 0.579, 0.573, 0.568], "y_data": [847.249408, 847.249408, 847.249408, 847.249408]}, "add_sub_config_3": {"x_data": [0.839, 0.845, 0.871, 0.763], "y_data": [849.34656, 849.34656, 849.34656, 849.34656]}}}}}} \ No newline at end of file +{"_simple_plots": {"add_sub": {"throughput_v_latency": {"_name": "throughput_v_latency", "_title": "Throughput vs. Latency", "_x_axis": "perf_latency_p99", "_y_axis": "perf_throughput", "_monotonic": true, "_data": {"add_sub_config_default": {"x_data": [0.524, 0.581, 0.658, 0.539], "y_data": [13205.0, 6176.0, 4876.12, 2508.04]}, "add_sub_config_1": {"x_data": [0.596, 0.607, 0.587, 0.599], "y_data": [12528.9, 6109.95, 4804.46, 2185.97]}, "add_sub_config_3": {"x_data": [0.81, 0.803, 0.838, 0.839], "y_data": [6967.92, 3988.65, 3270.69, 1752.08]}}}, "gpu_mem_v_latency": {"_name": "gpu_mem_v_latency", "_title": "GPU Memory vs. Latency", "_x_axis": "perf_latency_p99", "_y_axis": "gpu_used_memory", "_monotonic": false, "_data": {"add_sub_config_default": {"x_data": [0.524, 0.581, 0.658, 0.539], "y_data": [872.415232, 872.415232, 872.415232, 872.415232]}, "add_sub_config_1": {"x_data": [0.596, 0.607, 0.587, 0.599], "y_data": [872.415232, 872.415232, 872.415232, 872.415232]}, "add_sub_config_3": {"x_data": [0.81, 0.803, 0.838, 0.839], "y_data": [874.512384, 874.512384, 874.512384, 874.512384]}}}}}} \ No newline at end of file diff --git a/tests/common/test_utils.py b/tests/common/test_utils.py index ada58e418..9d418027f 100755 --- a/tests/common/test_utils.py +++ b/tests/common/test_utils.py @@ -98,7 +98,7 @@ def load_ensemble_result_manager() -> Tuple[ResultManager, ConfigCommandProfile] as the ConfigCommandProfile used to fake the profile step """ dir_path = f"{ROOT_DIR}/ensemble-ckpt/" - yaml_str = "profile_models: ensemble_python_resnet50" + yaml_str = "profile_models: ensemble_add_sub" return _load_result_manager_helper(dir_path, yaml_str) @@ -108,7 +108,7 @@ def load_bls_result_manager() -> Tuple[ResultManager, ConfigCommandProfile]: as the ConfigCommandProfile used to fake the profile step """ dir_path = f"{ROOT_DIR}/bls-ckpt/" - yaml_str = "profile_models: FaceDetectionBLS" + yaml_str = "profile_models: bls" return _load_result_manager_helper(dir_path, yaml_str) @@ -120,7 +120,7 @@ def load_request_rate_result_manager() -> Tuple[ResultManager, ConfigCommandProf dir_path = f"{ROOT_DIR}/request-rate-ckpt/" yaml_str = f""" request_rate_search_enable: true - profile_models: vgg19_libtorch + profile_models: add_sub """ return _load_result_manager_helper(dir_path, yaml_str) diff --git a/tests/test_bls_report_manager.py b/tests/test_bls_report_manager.py index 37773418b..911369081 100755 --- a/tests/test_bls_report_manager.py +++ b/tests/test_bls_report_manager.py @@ -67,24 +67,23 @@ def test_bls_summary(self): """ Ensures the summary report sentence and table are accurate for a basic bls model (loaded from a checkpoint) """ - self._init_managers(models="FaceDetectionBLS", subcommand="profile") + self._init_managers(models="bls", subcommand="profile") self.report_manager.create_summaries() expected_summary_sentence = ( - "In 26 measurements across 10 configurations, FaceDetectionBLS_config_7 " - "is 12% better than the default configuration at maximizing throughput, " - "under the given constraints, on GPU(s) TITAN RTX.