diff --git a/model_analyzer/perf_analyzer/perf_analyzer.py b/model_analyzer/perf_analyzer/perf_analyzer.py index 51ad64151..49f15f5a2 100755 --- a/model_analyzer/perf_analyzer/perf_analyzer.py +++ b/model_analyzer/perf_analyzer/perf_analyzer.py @@ -16,6 +16,7 @@ import csv import glob +import json import logging import os import re @@ -25,6 +26,7 @@ from typing import Dict, List import psutil +from numpy import mean from model_analyzer.constants import ( INTERVAL_SLEEP_TIME, @@ -118,6 +120,14 @@ def get_gpu_metrics(): ] return gpu_metrics + @staticmethod + def get_llm_metrics(): + llm_metrics = [ + llm_metric[PerfAnalyzer.RECORD_CLASS] + for llm_metric in PerfAnalyzer.llm_metric_table + ] + return llm_metrics + def __init__(self, path, config, max_retries, timeout, max_cpu_util): """ Parameters @@ -143,6 +153,7 @@ def __init__(self, path, config, max_retries, timeout, max_cpu_util): self._output = "" self._perf_records = {} self._gpu_records = [] + self._llm_records = {} self._max_cpu_util = max_cpu_util def run(self, metrics, env=None): @@ -216,6 +227,19 @@ def get_gpu_records(self): return self._gpu_records + def get_llm_records(self): + """ + Returns + ------- + The LLM records from the last perf_analyzer run + """ + + if self._llm_records: + return self._llm_records + raise TritonModelAnalyzerException( + "Attempted to get perf_analyzer results without calling run first." + ) + def output(self): """ Returns @@ -457,21 +481,82 @@ def _parse_outputs(self, metrics): logger.debug( f"Reading PA results from {perf_config['latency-report-file']}" ) - with open(perf_config["latency-report-file"], mode="r") as f: - csv_reader = csv.DictReader(f, delimiter=",") - - for row in csv_reader: - self._perf_records[ - perf_config["model-name"] - ] = self._extract_perf_records_from_row(metrics, row) - self._gpu_records = self._extract_gpu_records_from_row(metrics, row) + self._extract_gpu_records(perf_config, metrics) + self._extract_llm_records(perf_config, metrics) for perf_config in [ mrc.perf_config() for mrc in self._config.model_run_configs() ]: - # Remove the latency file and all associated composing model latency files + # Remove the latency/profile export files and all associated composing model latency files for f in glob.glob(f"*{perf_config['latency-report-file']}"): os.remove(f) + for f in glob.glob(f"*{perf_config['profile-export-file']}"): + os.remove(f) + + def _extract_gpu_records(self, perf_config, metrics): + if perf_config["profile-export-file"]: + return + + with open(perf_config["latency-report-file"], mode="r") as f: + csv_reader = csv.DictReader(f, delimiter=",") + + for row in csv_reader: + self._perf_records[ + perf_config["model-name"] + ] = self._extract_perf_records_from_row(metrics, row) + self._gpu_records = self._extract_gpu_records_from_row(metrics, row) + + def _extract_llm_records(self, perf_config, metrics): + if not perf_config["profile-export-file"]: + return + + self._llm_records[perf_config["model-name"]] = [] + + with open(perf_config["profile-export-file"], mode="r") as f: + llm_output = json.load(f) + + avg_first_token_latency = self._calculate_avg_first_token_latency( + llm_output + ) + record = PerfAnalyzer.llm_metric_table[0][PerfAnalyzer.RECORD_CLASS]( + value=avg_first_token_latency + ) # type: ignore + + self._llm_records[perf_config["model-name"]].append(record) + + avg_token_to_token_latency = self._calculate_avg_token_to_token_latency( + llm_output + ) + record = PerfAnalyzer.llm_metric_table[1][PerfAnalyzer.RECORD_CLASS]( + value=avg_token_to_token_latency + ) # type: ignore + self._llm_records[perf_config["model-name"]].append(record) + + def _calculate_avg_first_token_latency(self, llm_output: Dict) -> float: + total_first_token_latencies = [] + for request in llm_output["experiments"][0]["requests"]: + total_first_token_latencies.append( + request["response_timestamps"][0] - request["timestamp"] + ) + + avg_first_token_latency = mean(total_first_token_latencies) + + return avg_first_token_latency + + def _calculate_avg_token_to_token_latency(self, llm_output: Dict) -> float: + token_to_token_latencies = [] + for request in llm_output["experiments"][0]["requests"]: + response_to_response_latencies = [] + prev_response = request["response_timestamps"][0] + for response in request["response_timestamps"][1:]: + response_to_response_latencies.append(response - prev_response) + prev_response = response + + token_to_token_latencies.append(mean(response_to_response_latencies)) + + avg_token_to_token_latency = mean(token_to_token_latencies) + + return avg_token_to_token_latency def _extract_perf_records_from_row( self, requested_metrics: List[Record], row_metrics: Dict[str, str] diff --git a/model_analyzer/perf_analyzer/perf_config.py b/model_analyzer/perf_analyzer/perf_config.py index 7cab2dd3c..a72cdc3b1 100755 --- a/model_analyzer/perf_analyzer/perf_config.py +++ b/model_analyzer/perf_analyzer/perf_config.py @@ -73,6 +73,7 @@ class PerfAnalyzerConfig: "metrics-interval", "bls-composing-models", "request-parameter", + "request-period", ] input_to_options = [ @@ -82,6 +83,7 @@ class PerfAnalyzerConfig: "url", "protocol", "latency-report-file", + "profile-export-file", "http-header", ] @@ -112,6 +114,7 @@ def __init__(self): "-u": None, "-i": None, "-f": None, + "--profile-export-file": None, "-H": None, } self._verbose = {"-v": None, "-v -v": None, "--verbose-csv": None} @@ -123,6 +126,7 @@ def __init__(self): "url": "-u", "protocol": "-i", "latency-report-file": "-f", + "profile-export-file": "--profile-export-file", "http-header": "-H", } @@ -193,6 +197,9 @@ def update_config_from_profile_config(self, model_name, profile_config): "verbose-csv": "--verbose-csv", } + if profile_config.is_llm_model(): + params.update({"profile-export-file": model_name + "-results.json"}) + if profile_config.triton_launch_mode == "c_api": params.update( { @@ -307,7 +314,7 @@ def remove_url_from_cli_string(cls, cli_string): @classmethod def remove_mrc_from_cli_string(cls, cli_string): """ - utility function strips the measruement request count + utility function strips the measurement request count from a cli string representation Parameters diff --git a/model_analyzer/record/metrics_manager.py b/model_analyzer/record/metrics_manager.py index 176b632df..fe77f6eb8 100755 --- a/model_analyzer/record/metrics_manager.py +++ b/model_analyzer/record/metrics_manager.py @@ -69,6 +69,8 @@ class MetricsManager: "gpu_power_usage", "cpu_available_ram", "cpu_used_ram", + "avg_first_token_latency", + "avg_token_to_token_latency", ] def __init__(self, config, client, server, gpus, result_manager, state_manager): @@ -116,6 +118,7 @@ def __init__(self, config, client, server, gpus, result_manager, state_manager): self._gpu_metrics, self._perf_metrics, self._cpu_metrics, + self._llm_metrics, ) = self._categorize_metrics(self.metrics, self._config.collect_cpu_metrics) self._gpus = gpus self._init_state() @@ -160,21 +163,23 @@ def _categorize_metrics(metric_tags, collect_cpu_metrics=False): Returns ------- - (list,list,list) - tuple of three lists (DCGM, PerfAnalyzer, CPU) metrics + (list,list,list,list) + tuple of four lists (DCGM, PerfAnalyzer, CPU, LLM) metrics """ - gpu_metrics, perf_metrics, cpu_metrics = [], [], [] + gpu_metrics, perf_metrics, cpu_metrics, llm_metrics = [], [], [], [] # Separates metrics and objectives into related lists for metric in MetricsManager.get_metric_types(metric_tags): if metric in PerfAnalyzer.get_gpu_metrics(): gpu_metrics.append(metric) elif metric in PerfAnalyzer.get_perf_metrics(): perf_metrics.append(metric) + elif metric in PerfAnalyzer.get_llm_metrics(): + llm_metrics.append(metric) elif collect_cpu_metrics and (metric in CPUMonitor.cpu_metrics): cpu_metrics.append(metric) - return gpu_metrics, perf_metrics, cpu_metrics + return gpu_metrics, perf_metrics, cpu_metrics, llm_metrics def profile_server(self): """ @@ -556,6 +561,9 @@ def _run_perf_analyzer( ) metrics_to_gather = self._perf_metrics + self._gpu_metrics + if self._config.is_llm_model(): + metrics_to_gather += self._llm_metrics + status = perf_analyzer.run(metrics_to_gather, env=perf_analyzer_env) self._write_perf_analyzer_output(perf_output_writer, perf_analyzer) @@ -564,7 +572,9 @@ def _run_perf_analyzer( self._handle_unsuccessful_perf_analyzer_run(perf_analyzer) return (None, None) - perf_records = perf_analyzer.get_perf_records() + perf_records = ( + perf_analyzer.get_perf_records() + perf_analyzer.get_llm_records() + ) gpu_records = perf_analyzer.get_gpu_records() aggregated_perf_records = self._aggregate_perf_records(perf_records) diff --git a/model_analyzer/record/types/avg_first_token_latency.py b/model_analyzer/record/types/avg_first_token_latency.py index 15badd92a..72d539633 100755 --- a/model_analyzer/record/types/avg_first_token_latency.py +++ b/model_analyzer/record/types/avg_first_token_latency.py @@ -22,7 +22,7 @@ @total_ordering class AvgFirstTokenLatency(DecreasingRecord): """ - A record for perf_analyzer avg first token to token latency metric + A record for perf_analyzer average first token latency metric """ tag = "avg_first_token_latency" diff --git a/model_analyzer/record/types/avg_token_to_token_latency.py b/model_analyzer/record/types/avg_token_to_token_latency.py index 2941da39b..66c93b6fc 100755 --- a/model_analyzer/record/types/avg_token_to_token_latency.py +++ b/model_analyzer/record/types/avg_token_to_token_latency.py @@ -22,7 +22,7 @@ @total_ordering class AvgTokenToTokenLatency(DecreasingRecord): """ - A record for perf_analyzer avg token-to-token latency metric + A record for perf_analyzer average token-to-token latency metric """ tag = "avg_token_to_token_latency" diff --git a/tests/common/test_utils.py b/tests/common/test_utils.py index d6e42fadc..380a5d404 100755 --- a/tests/common/test_utils.py +++ b/tests/common/test_utils.py @@ -238,6 +238,7 @@ def convert_avg_gpu_metrics_to_data(avg_gpu_metric_values): def construct_perf_analyzer_config( model_name="my-model", output_file_name="my-model-results.csv", + export_file_name="my-model-results.json", batch_size=DEFAULT_BATCH_SIZES, concurrency=DEFAULT_RUN_CONFIG_MIN_CONCURRENCY, periodic_concurrency=DEFAULT_RUN_CONFIG_MIN_PERIODIC_CONCURRENCY, @@ -257,6 +258,8 @@ def construct_perf_analyzer_config( The name of the model output_file_name: str The name of the output file + export_file_name: str + The name of the export file batch_size: int The batch size for this PA configuration concurrency: int @@ -285,6 +288,9 @@ def construct_perf_analyzer_config( pa_config._options["-f"] = output_file_name pa_config._options["-b"] = batch_size + if llm_search_mode: + pa_config._options["--profile-export-file"] = export_file_name + if request_rate: pa_config._args["request-rate-range"] = request_rate elif llm_search_mode: diff --git a/tests/test_perf_analyzer.py b/tests/test_perf_analyzer.py index e95f0d4a1..a984279bd 100755 --- a/tests/test_perf_analyzer.py +++ b/tests/test_perf_analyzer.py @@ -49,6 +49,7 @@ from model_analyzer.triton.client.client_factory import TritonClientFactory from model_analyzer.triton.server.server_config import TritonServerConfig from model_analyzer.triton.server.server_factory import TritonServerFactory +from tests.common.test_utils import construct_perf_analyzer_config from .common import test_result_collector as trc from .mocks.mock_client import MockTritonClientMethods @@ -67,7 +68,56 @@ TEST_GRPC_URL = "test_hostname:test_port" -class TestPerfAnalyzerMethods(trc.TestResultCollector): +def mock_open_method(*args, **kwargs): + pa_csv_mock = """Concurrency,Inferences/Second,Client Send,Network+Server Send/Recv,Server Queue,Server Compute Input,Server Compute Infer,Server Compute Output,""" + pa_csv_mock += """Client Recv,p50 latency,p90 latency,p95 latency,p99 latency,Avg latency,request/response,response wait,""" + pa_csv_mock += """Avg GPU Utilization,Avg GPU Power Usage,Max GPU Memory Usage,Total GPU Memory\n""" + pa_csv_mock += """1,46.8,2,187,18,34,65,16,1,4600,4700,4800,4900,5000,3,314,""" + pa_csv_mock += """GPU-aaf4fea0:0.809;GPU-aaf4fea1:0.901;GPU-aaf4fea2:0.745;,GPU-aaf4fea0:91.2;GPU-aaf4fea1:100;,GPU-aaf4fea0:1000000000;GPU-aaf4fea1:2000000000,GPU-aaf4fea0:1500000000;GPU-aaf4fea2:3000000000""" + + # yapf: disable + pa_json_mock = """ + { + "experiments": [ + { + "experiment": { + "mode": "concurrency", + "value": 4 + }, + "requests": [ + { + "timestamp": 1, + "sequence_id": 1, + "response_timestamps": [2,3,4] + }, + { + "timestamp": 4, + "sequence_id": 2, + "response_timestamps": [5,6] + }, + { + "timestamp": 6, + "sequence_id": 3, + "response_timestamps": [7,8,9] + } + ], + "window_boundaries": [1,5,6] + } + ], + "version": "1.2.3" + } + """ + # yapf: enable + + if args[0] == "my-model-results.csv": + return mock_open(read_data=pa_csv_mock)(*args, **kwargs) + elif args[0] == "my-model-llm-results.csv": + return mock_open(read_data=pa_json_mock)(*args, **kwargs) + else: + return mock_open(read_data=None)(*args, **kwargs) + + +class TestPerfAnalyzer(trc.TestResultCollector): def setUp(self): # Mocks self.server_local_mock = MockServerLocalMethods() @@ -80,7 +130,7 @@ def setUp(self): self.client_mock.start() # PerfAnalyzer config for all tests - self.config = PerfAnalyzerConfig() + self.config = construct_perf_analyzer_config() self.config["model-name"] = TEST_MODEL_NAME self.config["measurement-interval"] = 1000 self.config["measurement-request-count"] = 50 @@ -90,6 +140,16 @@ def setUp(self): ModelRunConfig("fake_name", MagicMock(), self.config) ) + self.llm_config = construct_perf_analyzer_config(llm_search_mode=True) + self.llm_config["model-name"] = TEST_MODEL_NAME + self.llm_config["measurement-interval"] = 1000 + self.llm_config["measurement-request-count"] = 50 + + self.llm_run_config = RunConfig({}) + self.llm_run_config.add_model_run_config( + ModelRunConfig("fake_name", MagicMock(), self.llm_config) + ) + self.gpus = [GPUDevice("TEST_DEVICE_NAME", 0, "TEST_PCI_BUS_ID", "TEST_UUID")] # Triton Server @@ -132,7 +192,7 @@ def test_perf_analyzer_config(self): def test_perf_analyzer_boolean_args(self): """Test that only positive boolean args get added""" - expected_cli_str = "-m test_model --measurement-interval=1000 --binary-search --measurement-request-count=50" + expected_cli_str = "-m test_model -b 1 -u localhost:8001 -i grpc -f my-model-results.csv --measurement-interval=1000 --concurrency-range=1 --binary-search --measurement-mode=count_windows --measurement-request-count=50 --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0" self.config["async"] = "False" self.config["binary-search"] = "True" @@ -141,7 +201,7 @@ def test_perf_analyzer_boolean_args(self): def test_perf_analyzer_additive_args(self): shape = ["name1:1,2,3", "name2:4,5,6"] - expected_cli_str = "-m test_model --measurement-interval=1000 --shape=name1:1,2,3 --shape=name2:4,5,6 --measurement-request-count=50" + expected_cli_str = "-m test_model -b 1 -u localhost:8001 -i grpc -f my-model-results.csv --measurement-interval=1000 --concurrency-range=1 --shape=name1:1,2,3 --shape=name2:4,5,6 --measurement-mode=count_windows --measurement-request-count=50 --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0" self.config["shape"] = shape[:] @@ -149,7 +209,7 @@ def test_perf_analyzer_additive_args(self): self.assertEqual(self.config.to_cli_string(), expected_cli_str) shape = "name1:1,2,3" - expected_cli_str = "-m test_model --measurement-interval=1000 --shape=name1:1,2,3 --measurement-request-count=50" + expected_cli_str = "-m test_model -b 1 -u localhost:8001 -i grpc -f my-model-results.csv --measurement-interval=1000 --concurrency-range=1 --shape=name1:1,2,3 --measurement-mode=count_windows --measurement-request-count=50 --collect-metrics --metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0" self.config["shape"] = shape self.assertEqual(self.config.to_cli_string(), expected_cli_str) @@ -177,10 +237,13 @@ def test_perf_analyzer_ssl_args(self): ssl_https_private_key_file = "h" expected_cli_str = ( - f"-m test_model --measurement-interval=1000 --measurement-request-count=50 --ssl-grpc-use-ssl " + f"-m test_model -b 1 -u localhost:8001 -i grpc -f my-model-results.csv --measurement-interval=1000 " + f"--concurrency-range=1 --measurement-mode=count_windows --measurement-request-count=50 --ssl-grpc-use-ssl " f"--ssl-grpc-root-certifications-file=a --ssl-grpc-private-key-file=b --ssl-grpc-certificate-chain-file=c " - f"--ssl-https-verify-peer=1 --ssl-https-verify-host=2 --ssl-https-ca-certificates-file=d --ssl-https-client-certificate-type=e " - f"--ssl-https-client-certificate-file=f --ssl-https-private-key-type=g --ssl-https-private-key-file=h" + f"--ssl-https-verify-peer=1 --ssl-https-verify-host=2 --ssl-https-ca-certificates-file=d " + f"--ssl-https-client-certificate-type=e --ssl-https-client-certificate-file=f --ssl-https-private-key-type=g " + f"--ssl-https-private-key-file=h --collect-metrics --metrics-url=http://localhost:8002/metrics " + f"--metrics-interval=1000.0" ) self.config["ssl-grpc-use-ssl"] = ssl_grpc_use_ssl @@ -241,11 +304,15 @@ def test_perf_analyzer_ssl_args(self): self.config["ssl-grpc-use-ssl"] = ssl_grpc_use_ssl self.assertEqual(self.config["ssl-grpc-use-ssl"], ssl_grpc_use_ssl) expected_cli_str = ( - f"-m test_model --measurement-interval=1000 --measurement-request-count=50 " + f"-m test_model -b 1 -u localhost:8001 -i grpc -f my-model-results.csv --measurement-interval=1000 " + f"--concurrency-range=1 --measurement-mode=count_windows --measurement-request-count=50 " f"--ssl-grpc-root-certifications-file=a --ssl-grpc-private-key-file=b --ssl-grpc-certificate-chain-file=c " - f"--ssl-https-verify-peer=1 --ssl-https-verify-host=2 --ssl-https-ca-certificates-file=d --ssl-https-client-certificate-type=e " - f"--ssl-https-client-certificate-file=f --ssl-https-private-key-type=g --ssl-https-private-key-file=h" + f"--ssl-https-verify-peer=1 --ssl-https-verify-host=2 --ssl-https-ca-certificates-file=d " + f"--ssl-https-client-certificate-type=e --ssl-https-client-certificate-file=f " + f"--ssl-https-private-key-type=g --ssl-https-private-key-file=h --collect-metrics " + f"--metrics-url=http://localhost:8002/metrics --metrics-interval=1000.0" ) + self.assertEqual(self.config.to_cli_string(), expected_cli_str) def test_run(self): @@ -268,18 +335,12 @@ def test_run(self): self.server.start() self.client.wait_for_server_ready(num_retries=1) - pa_csv_mock = """Concurrency,Inferences/Second,Client Send,Network+Server Send/Recv,Server Queue,Server Compute Input,Server Compute Infer,Server Compute Output,""" - pa_csv_mock += """Client Recv,p50 latency,p90 latency,p95 latency,p99 latency,Avg latency,request/response,response wait,""" - pa_csv_mock += """Avg GPU Utilization,Avg GPU Power Usage,Max GPU Memory Usage,Total GPU Memory\n""" - pa_csv_mock += """1,46.8,2,187,18,34,65,16,1,4600,4700,4800,4900,5000,3,314,""" - pa_csv_mock += """GPU-aaf4fea0:0.809;GPU-aaf4fea1:0.901;GPU-aaf4fea2:0.745;,GPU-aaf4fea0:91.2;GPU-aaf4fea1:100;,GPU-aaf4fea0:1000000000;GPU-aaf4fea1:2000000000,GPU-aaf4fea0:1500000000;GPU-aaf4fea2:3000000000""" - # Test avg latency parsing. GPU metric is ignored for get_perf_records() perf_metrics = [PerfLatencyAvg, GPUUtilization] with patch( "model_analyzer.perf_analyzer.perf_analyzer.open", - mock_open(read_data=pa_csv_mock), + side_effect=mock_open_method, ), patch("model_analyzer.perf_analyzer.perf_analyzer.os.remove"): perf_analyzer.run(perf_metrics) @@ -292,7 +353,7 @@ def test_run(self): with patch( "model_analyzer.perf_analyzer.perf_analyzer.open", - mock_open(read_data=pa_csv_mock), + side_effect=mock_open_method, ), patch("model_analyzer.perf_analyzer.perf_analyzer.os.remove"): perf_analyzer.run(perf_metrics) @@ -305,7 +366,7 @@ def test_run(self): with patch( "model_analyzer.perf_analyzer.perf_analyzer.open", - mock_open(read_data=pa_csv_mock), + side_effect=mock_open_method, ), patch("model_analyzer.perf_analyzer.perf_analyzer.os.remove"): perf_analyzer.run(perf_metrics) @@ -318,7 +379,7 @@ def test_run(self): with patch( "model_analyzer.perf_analyzer.perf_analyzer.open", - mock_open(read_data=pa_csv_mock), + side_effect=mock_open_method, ), patch("model_analyzer.perf_analyzer.perf_analyzer.os.remove"): perf_analyzer.run(perf_metrics) @@ -331,7 +392,7 @@ def test_run(self): with patch( "model_analyzer.perf_analyzer.perf_analyzer.open", - mock_open(read_data=pa_csv_mock), + side_effect=mock_open_method, ), patch("model_analyzer.perf_analyzer.perf_analyzer.os.remove"): perf_analyzer.run(perf_metrics) @@ -344,7 +405,7 @@ def test_run(self): with patch( "model_analyzer.perf_analyzer.perf_analyzer.open", - mock_open(read_data=pa_csv_mock), + side_effect=mock_open_method, ), patch("model_analyzer.perf_analyzer.perf_analyzer.os.remove"): perf_analyzer.run(perf_metrics) @@ -357,7 +418,7 @@ def test_run(self): with patch( "model_analyzer.perf_analyzer.perf_analyzer.open", - mock_open(read_data=pa_csv_mock), + side_effect=mock_open_method, ), patch("model_analyzer.perf_analyzer.perf_analyzer.os.remove"): perf_analyzer.run(perf_metrics) @@ -370,7 +431,7 @@ def test_run(self): with patch( "model_analyzer.perf_analyzer.perf_analyzer.open", - mock_open(read_data=pa_csv_mock), + side_effect=mock_open_method, ), patch("model_analyzer.perf_analyzer.perf_analyzer.os.remove"): perf_analyzer.run(perf_metrics) @@ -383,7 +444,7 @@ def test_run(self): with patch( "model_analyzer.perf_analyzer.perf_analyzer.open", - mock_open(read_data=pa_csv_mock), + side_effect=mock_open_method, ), patch("model_analyzer.perf_analyzer.perf_analyzer.os.remove"): perf_analyzer.run(perf_metrics) @@ -396,7 +457,7 @@ def test_run(self): with patch( "model_analyzer.perf_analyzer.perf_analyzer.open", - mock_open(read_data=pa_csv_mock), + side_effect=mock_open_method, ), patch("model_analyzer.perf_analyzer.perf_analyzer.os.remove"): perf_analyzer.run(perf_metrics) @@ -409,7 +470,7 @@ def test_run(self): with patch( "model_analyzer.perf_analyzer.perf_analyzer.open", - mock_open(read_data=pa_csv_mock), + side_effect=mock_open_method, ), patch("model_analyzer.perf_analyzer.perf_analyzer.os.remove"): perf_analyzer.run(gpu_metrics) @@ -427,7 +488,7 @@ def test_run(self): with patch( "model_analyzer.perf_analyzer.perf_analyzer.open", - mock_open(read_data=pa_csv_mock), + side_effect=mock_open_method, ), patch("model_analyzer.perf_analyzer.perf_analyzer.os.remove"): perf_analyzer.run(gpu_metrics) @@ -443,7 +504,7 @@ def test_run(self): with patch( "model_analyzer.perf_analyzer.perf_analyzer.open", - mock_open(read_data=pa_csv_mock), + side_effect=mock_open_method, ), patch("model_analyzer.perf_analyzer.perf_analyzer.os.remove"): perf_analyzer.run(gpu_metrics) @@ -462,7 +523,7 @@ def test_run(self): with patch( "model_analyzer.perf_analyzer.perf_analyzer.open", - mock_open(read_data=pa_csv_mock), + side_effect=mock_open_method, ), patch("model_analyzer.perf_analyzer.perf_analyzer.os.remove"): perf_analyzer.run(gpu_metrics) @@ -487,7 +548,7 @@ def test_run(self): with patch( "model_analyzer.perf_analyzer.perf_analyzer.open", - mock_open(read_data=pa_csv_mock), + side_effect=mock_open_method, ), patch("model_analyzer.perf_analyzer.perf_analyzer.os.remove"): perf_analyzer.run(perf_metrics) @@ -651,10 +712,27 @@ def test_get_cmd_single_model(self): "perf_analyzer", "-m", "test_model", + "-b", + "1", + "-u", + "localhost:8001", + "-i", + "grpc", + "-f", + "my-model-results.csv", "--measurement-interval", "1000", + "--concurrency-range", + "1", + "--measurement-mode", + "count_windows", "--measurement-request-count", "50", + "--collect-metrics", + "--metrics-url", + "http://localhost:8002/metrics", + "--metrics-interval", + "1000.0", ] self.assertEqual(pa._get_cmd(), expected_cmd)