From e1fdb1b8e3dc31a0aa489536252edcda05330e60 Mon Sep 17 00:00:00 2001 From: Brian Raf <92820864+nv-braf@users.noreply.github.com> Date: Wed, 26 Jul 2023 09:54:38 -0700 Subject: [PATCH] New config option: --always-report-gpu-metrics (#734) * Add config option * Summary report support * Changing cpu_only to report_gpu_metrics * changing name of config option * adding back in always * Adding option to report * Fixing formatting * Updated logic to use capture_gpu_metrics * Adding cpu_only to detailed report unit test * Changing comment --- docs/config.md | 6 ++ .../config/input/config_command_profile.py | 13 ++- .../config/input/config_command_report.py | 11 +++ .../config/input/config_defaults.py | 1 + model_analyzer/record/metrics_manager.py | 34 +++---- model_analyzer/reports/report_manager.py | 89 +++++++++++-------- tests/test_bls_report_manager.py | 4 +- tests/test_cli.py | 1 + tests/test_ensemble_report_manager.py | 4 +- tests/test_report_manager.py | 45 ++++++---- 10 files changed, 133 insertions(+), 75 deletions(-) diff --git a/docs/config.md b/docs/config.md index d3c36bbf5..9c7a9b8f4 100644 --- a/docs/config.md +++ b/docs/config.md @@ -227,6 +227,9 @@ cpu_only_composing_models: # Enables the searching of request rate (instead of concurrency) [ request_rate_search_enable: | default: false] +# Always report GPU metrics, even if the model(s) is cpu_only +[ always_report_gpu_metrics: | default: false] + # Skips the generation of summary reports and tables [ skip_summary_reports: | default: false] @@ -335,6 +338,9 @@ report_model_configs: # Specify path to config YAML file [ config_file: ] + +# Always report GPU metrics +[ always_report_gpu_metrics: | default: false] ``` ## YAML only options diff --git a/model_analyzer/config/input/config_command_profile.py b/model_analyzer/config/input/config_command_profile.py index ec59d71f4..02d6def28 100755 --- a/model_analyzer/config/input/config_command_profile.py +++ b/model_analyzer/config/input/config_command_profile.py @@ -18,7 +18,7 @@ import logging import os -import numba +import numba.cuda import psutil from google.protobuf.descriptor import FieldDescriptor from tritonclient.grpc.model_config_pb2 import ModelConfig @@ -37,6 +37,7 @@ from .config_command import ConfigCommand from .config_defaults import ( + DEFAULT_ALWAYS_REPORT_GPU_METRICS, DEFAULT_BATCH_SIZES, DEFAULT_CHECKPOINT_DIRECTORY, DEFAULT_CLIENT_PROTOCOL, @@ -266,6 +267,16 @@ def _fill_config(self): "Use 'all' to profile all the GPUs visible by CUDA.", ) ) + self._add_config( + ConfigField( + "always_report_gpu_metrics", + flags=["--always-report-gpu-metrics"], + field_type=ConfigPrimitive(bool), + parser_args={"action": "store_true"}, + default_value=DEFAULT_ALWAYS_REPORT_GPU_METRICS, + description="Report GPU metrics, even when the model is `cpu_only`.", + ) + ) self._add_config( ConfigField( "skip_summary_reports", diff --git a/model_analyzer/config/input/config_command_report.py b/model_analyzer/config/input/config_command_report.py index 10bada5f3..7d1eee7fb 100755 --- a/model_analyzer/config/input/config_command_report.py +++ b/model_analyzer/config/input/config_command_report.py @@ -26,6 +26,7 @@ from .config_command import ConfigCommand from .config_defaults import ( + DEFAULT_ALWAYS_REPORT_GPU_METRICS, DEFAULT_CHECKPOINT_DIRECTORY, DEFAULT_EXPORT_PATH, DEFAULT_OFFLINE_REPORT_PLOTS, @@ -172,6 +173,16 @@ def _fill_config(self): description="Output file format for detailed report.", ) ) + self._add_config( + ConfigField( + "always_report_gpu_metrics", + flags=["--always_report-gpu-metrics"], + field_type=ConfigPrimitive(bool), + parser_args={"action": "store_true"}, + default_value=DEFAULT_ALWAYS_REPORT_GPU_METRICS, + description="Report GPU metrics, even when the model is `cpu_only`.", + ) + ) def set_config_values(self, args): """ diff --git a/model_analyzer/config/input/config_defaults.py b/model_analyzer/config/input/config_defaults.py index f7401ad75..785dec205 100755 --- a/model_analyzer/config/input/config_defaults.py +++ b/model_analyzer/config/input/config_defaults.py @@ -34,6 +34,7 @@ DEFAULT_COLLECT_CPU_METRICS = False DEFAULT_LOG_LEVEL = "INFO" DEFAULT_GPUS = "all" +DEFAULT_ALWAYS_REPORT_GPU_METRICS = False DEFAULT_SKIP_SUMMARY_REPORTS = False DEFAULT_SKIP_DETAILED_REPORTS = False DEFAULT_OUTPUT_MODEL_REPOSITORY = os.path.join(os.getcwd(), "output_model_repository") diff --git a/model_analyzer/record/metrics_manager.py b/model_analyzer/record/metrics_manager.py index 2a9987f1a..393f37e53 100755 --- a/model_analyzer/record/metrics_manager.py +++ b/model_analyzer/record/metrics_manager.py @@ -183,13 +183,13 @@ def profile_server(self): TritonModelAnalyzerException """ - cpu_only = not numba.cuda.is_available() - self._start_monitors(cpu_only=cpu_only) + capture_gpu_metrics = numba.cuda.is_available() + self._start_monitors(capture_gpu_metrics=capture_gpu_metrics) time.sleep(self._config.duration_seconds) - if not cpu_only: + if capture_gpu_metrics or self._config.always_report_gpu_metrics: server_gpu_metrics = self._get_gpu_inference_metrics() self._result_manager.add_server_data(data=server_gpu_metrics) - self._destroy_monitors(cpu_only=cpu_only) + self._destroy_monitors(capture_gpu_metrics=capture_gpu_metrics) def execute_run_config( self, run_config: RunConfig @@ -244,27 +244,29 @@ def profile_models(self, run_config: RunConfig) -> Optional[RunConfigMeasurement if not self._config.perf_output else FileWriter(self._config.perf_output_path) ) - cpu_only = run_config.cpu_only() + capture_gpu_metrics = ( + run_config.cpu_only() and not self._config.always_report_gpu_metrics + ) self._print_run_config_info(run_config) - self._start_monitors(cpu_only=cpu_only) + self._start_monitors(capture_gpu_metrics=capture_gpu_metrics) perf_analyzer_metrics, model_gpu_metrics = self._run_perf_analyzer( run_config, perf_output_writer ) if not perf_analyzer_metrics: - self._stop_monitors(cpu_only=cpu_only) - self._destroy_monitors(cpu_only=cpu_only) + self._stop_monitors(capture_gpu_metrics=capture_gpu_metrics) + self._destroy_monitors(capture_gpu_metrics=capture_gpu_metrics) return None # Get metrics for model inference and combine metrics that do not have GPU UUID - if not cpu_only and not model_gpu_metrics: + if capture_gpu_metrics and not model_gpu_metrics: model_gpu_metrics = self._get_gpu_inference_metrics() model_cpu_metrics = self._get_cpu_inference_metrics() - self._destroy_monitors(cpu_only=cpu_only) + self._destroy_monitors(capture_gpu_metrics=capture_gpu_metrics) run_config_measurement = None if model_gpu_metrics is not None and perf_analyzer_metrics is not None: @@ -450,13 +452,13 @@ def _get_measurement_if_config_duplicate(self, run_config): return measurements.get(key, None) - def _start_monitors(self, cpu_only=False): + def _start_monitors(self, capture_gpu_metrics=True): """ Start any metrics monitors """ self._gpu_monitor = None - if not cpu_only: + if capture_gpu_metrics: try: self._gpu_monitor = RemoteMonitor( self._config.triton_metrics_url, @@ -483,23 +485,23 @@ def _start_monitors(self, cpu_only=False): ) self._cpu_monitor.start_recording_metrics() - def _stop_monitors(self, cpu_only=False): + def _stop_monitors(self, capture_gpu_metrics=True): """ Stop any metrics monitors, when we don't need to collect the result """ # Stop DCGM Monitor only if there are GPUs available - if not cpu_only: + if capture_gpu_metrics: self._gpu_monitor.stop_recording_metrics() self._cpu_monitor.stop_recording_metrics() - def _destroy_monitors(self, cpu_only=False): + def _destroy_monitors(self, capture_gpu_metrics=True): """ Destroy the monitors created by start """ - if not cpu_only: + if capture_gpu_metrics: if self._gpu_monitor: self._gpu_monitor.destroy() if self._cpu_monitor: diff --git a/model_analyzer/reports/report_manager.py b/model_analyzer/reports/report_manager.py index 7753ad291..deca073b3 100755 --- a/model_analyzer/reports/report_manager.py +++ b/model_analyzer/reports/report_manager.py @@ -357,35 +357,32 @@ def _build_summary_report(self, report_key, num_configs, statistics): # Get GPU names and memory run_config = self._summary_data[report_key][0][0] - cpu_only = run_config.cpu_only() - - (gpu_names, max_memories) = self._get_gpu_stats( - measurements=[v for _, v in self._summary_data[report_key]] + report_gpu_metrics = ( + self._config.always_report_gpu_metrics or not run_config.cpu_only() ) + (gpu_names, max_memories) = (None, None) + if report_gpu_metrics: + (gpu_names, max_memories) = self._get_gpu_stats( + measurements=[v for _, v in self._summary_data[report_key]] + ) + # Get constraints constraint_str = self._create_constraint_string(report_key) # Build summary table and info sentence - if not cpu_only: - table, summary_sentence = self._build_summary_table( - report_key=report_key, - num_configurations=total_configurations, - num_measurements=total_measurements, - gpu_name=gpu_names, - ) - else: - table, summary_sentence = self._build_summary_table( - report_key=report_key, - num_configurations=total_configurations, - num_measurements=total_measurements, - cpu_only=True, - ) + table, summary_sentence = self._build_summary_table( + report_key=report_key, + num_configurations=total_configurations, + num_measurements=total_measurements, + gpu_name=gpu_names, + report_gpu_metrics=report_gpu_metrics, + ) # Add summary sections summary.add_title(title=f"{self._mode.title()} Result Summary") summary.add_subheading(f"Model: {' and '.join(report_key.split(','))}") - if not cpu_only: + if report_gpu_metrics: summary.add_paragraph(f"GPU(s): {gpu_names}") summary.add_paragraph(f"Total Available GPU Memory: {max_memories}") summary.add_paragraph(f"Constraint targets: {constraint_str}") @@ -407,7 +404,7 @@ def _build_summary_report(self, report_key, num_configs, statistics): caption_throughput = f"{throughput_plot_config.title()} curves for {num_best_configs} best configurations." - if not cpu_only: + if report_gpu_metrics: summary.add_images([throughput_plot], [caption_throughput], image_width=66) if self._mode == "online": memory_latency_plot = os.path.join( @@ -482,7 +479,7 @@ def _build_summary_table( num_configurations, num_measurements, gpu_name=None, - cpu_only=False, + report_gpu_metrics=True, ): """ Creates a result table corresponding @@ -508,20 +505,14 @@ def _build_summary_table( best_run_config, best_run_config_measurement, gpu_name, - cpu_only, + report_gpu_metrics, multi_model, is_ensemble, is_bls, ) - summary_table = ( - self._construct_summary_result_table_cpu_only( - sorted_measurements, multi_model, has_composing_models - ) - if cpu_only - else self._construct_summary_result_table( - sorted_measurements, multi_model, has_composing_models - ) + summary_table = self._construct_summary_result_table( + sorted_measurements, multi_model, has_composing_models, report_gpu_metrics ) return summary_table, summary_sentence @@ -581,7 +572,7 @@ def _create_summary_sentence( best_run_config, best_run_config_measurement, gpu_name, - cpu_only, + report_gpu_metrics, multi_model, is_ensemble, is_bls, @@ -593,7 +584,9 @@ def _create_summary_sentence( objective_phrase = self._create_summary_objective_phrase( report_key, best_run_config_measurement ) - gpu_name_phrase = self._create_summary_gpu_name_phrase(gpu_name, cpu_only) + gpu_name_phrase = self._create_summary_gpu_name_phrase( + gpu_name, report_gpu_metrics + ) summary_sentence = ( f"In {measurement_phrase} across {config_phrase} " @@ -778,8 +771,20 @@ def _create_instance_group_phrase(self, model_config): ret_str += "s" return ret_str - def _create_summary_gpu_name_phrase(self, gpu_name, cpu_only): - return f", on GPU(s) {gpu_name}" if not cpu_only else "" + def _create_summary_gpu_name_phrase(self, gpu_name, report_gpu_metrics): + return f", on GPU(s) {gpu_name}" if report_gpu_metrics else "" + + def _construct_summary_result_table( + self, sorted_measurements, multi_model, has_composing_models, report_gpu_metrics + ): + if report_gpu_metrics: + return self._construct_summary_result_table_with_gpu( + sorted_measurements, multi_model, has_composing_models + ) + else: + return self._construct_summary_result_table_cpu_only( + sorted_measurements, multi_model, has_composing_models + ) def _construct_summary_result_table_cpu_only( self, sorted_measurements, multi_model, has_composing_models @@ -794,7 +799,7 @@ def _construct_summary_result_table_cpu_only( return summary_table - def _construct_summary_result_table( + def _construct_summary_result_table_with_gpu( self, sorted_measurements, multi_model, has_composing_models ): summary_table = self._create_summary_result_table_header(multi_model) @@ -1108,7 +1113,9 @@ def _build_detailed_table(self, model_config_name): key=lambda x: x.get_non_gpu_metric_value(sort_by_tag), reverse=True, ) - cpu_only = model_config.cpu_only() + report_gpu_metrics = ( + self._config.always_report_gpu_metrics or not model_config.cpu_only() + ) if self._was_measured_with_request_rate(measurements[0]): first_column_header = ( @@ -1125,7 +1132,7 @@ def _build_detailed_table(self, model_config_name): "concurrency-range" if self._mode == "online" else "batch-size" ) - if not cpu_only: + if report_gpu_metrics: headers = [ first_column_header, "p99 Latency (ms)", @@ -1156,7 +1163,7 @@ def _build_detailed_table(self, model_config_name): detailed_table = ResultTable(headers, title="Detailed Table") # Construct table - if not cpu_only: + if report_gpu_metrics: for measurement in measurements: row = [ # TODO-TMA-568: This needs to be updated because there will be multiple model configs @@ -1219,7 +1226,11 @@ def _build_detailed_info(self, model_config_name): gpu_cpu_string = "CPU" - if not run_config.cpu_only(): + report_gpu_metrics = ( + self._config.always_report_gpu_metrics or not run_config.cpu_only() + ) + + if report_gpu_metrics: gpu_names, max_memories = self._get_gpu_stats(measurements) gpu_cpu_string = f"GPU(s) {gpu_names} with total memory {max_memories}" diff --git a/tests/test_bls_report_manager.py b/tests/test_bls_report_manager.py index 255ac1a6f..19053863c 100755 --- a/tests/test_bls_report_manager.py +++ b/tests/test_bls_report_manager.py @@ -86,7 +86,7 @@ def test_bls_summary(self): num_measurements=26, num_configurations=10, gpu_name="TITAN RTX", - cpu_only=False, + report_gpu_metrics=True, ) self.assertEqual(summary_sentence, expected_summary_sentence) @@ -132,7 +132,7 @@ def test_bls_summary_cpu_only(self): num_measurements=26, num_configurations=10, gpu_name="TITAN RTX", - cpu_only=True, + report_gpu_metrics=False, ) self.assertEqual(summary_sentence, expected_summary_sentence) diff --git a/tests/test_cli.py b/tests/test_cli.py index a2bbd9004..98ec60237 100755 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -64,6 +64,7 @@ def get_test_options(): OptionStruct("bool", "profile","--early-exit-enable"), OptionStruct("bool", "profile","--skip-summary-reports"), OptionStruct("bool", "profile","--skip-detailed-reports"), + OptionStruct("bool", "profile","--always-report-gpu-metrics"), #Int/Float options # Options format: # (int/float, MA step, long_option, short_option, test_value, expected_default_value) diff --git a/tests/test_ensemble_report_manager.py b/tests/test_ensemble_report_manager.py index 06165ee96..afa3e96f8 100755 --- a/tests/test_ensemble_report_manager.py +++ b/tests/test_ensemble_report_manager.py @@ -85,7 +85,7 @@ def test_ensemble_summary(self): num_measurements=68, num_configurations=37, gpu_name="TITAN RTX", - cpu_only=False, + report_gpu_metrics=True, ) self.assertEqual(summary_sentence, expected_summary_sentence) @@ -128,7 +128,7 @@ def test_ensemble_summary_cpu_only(self): num_measurements=68, num_configurations=37, gpu_name="TITAN RTX", - cpu_only=True, + report_gpu_metrics=False, ) self.assertEqual(summary_sentence, expected_summary_sentence) diff --git a/tests/test_report_manager.py b/tests/test_report_manager.py index 8873aec2a..8db02ad18 100755 --- a/tests/test_report_manager.py +++ b/tests/test_report_manager.py @@ -45,7 +45,8 @@ def _init_managers( models="test_model", num_configs_per_model=10, mode="online", - subcommand="analyze", + subcommand="profile", + report_gpu_metrics=False, ): args = ["model-analyzer", subcommand, "-f", "path-to-config-file"] if subcommand == "profile": @@ -54,6 +55,9 @@ def _init_managers( else: args.extend(["--report-model-configs", models]) + if report_gpu_metrics: + args.extend(["--always-report-gpu-metrics"]) + yaml_str = ( """ num_configs_per_model: """ @@ -223,16 +227,24 @@ def test_add_results(self, *args): def test_build_summary_table(self, *args): for mode in ["offline", "online"]: for cpu_only in [True, False]: - self.subtest_build_summary_table(mode, cpu_only) + for report_gpu_metrics in [True, False]: + self.subtest_build_summary_table(mode, cpu_only, report_gpu_metrics) - def subtest_build_summary_table(self, mode, cpu_only): - self._init_managers(models="test_model", mode=mode, subcommand="profile") + def subtest_build_summary_table(self, mode, cpu_only, report_gpu_metrics): + self._init_managers( + models="test_model", + mode=mode, + subcommand="profile", + report_gpu_metrics=report_gpu_metrics, + ) result_comparator = RunConfigResultComparator( metric_objectives_list=[{"perf_throughput": 10}], model_weights=[1] ) avg_gpu_metrics = {0: {"gpu_used_memory": 6000, "gpu_utilization": 60}} + gpu_metrics = report_gpu_metrics or not cpu_only + for i in range(10, 0, -1): avg_non_gpu_metrics = { "perf_throughput": 100 + 10 * i, @@ -245,7 +257,7 @@ def subtest_build_summary_table(self, mode, cpu_only): avg_gpu_metrics, avg_non_gpu_metrics, result_comparator, - cpu_only, + cpu_only=not gpu_metrics, ) self.report_manager.create_summaries() @@ -255,7 +267,7 @@ def subtest_build_summary_table(self, mode, cpu_only): num_measurements=10, num_configurations=3, gpu_name="TITAN RTX", - cpu_only=cpu_only, + report_gpu_metrics=gpu_metrics, ) if mode == "online": @@ -263,11 +275,11 @@ def subtest_build_summary_table(self, mode, cpu_only): else: objective = "minimizing latency" - if cpu_only: + if gpu_metrics: expected_summary_sentence = ( "In 10 measurements across 3 configurations, " "test_model_config_10 is 100% better than the default configuration " - f"at {objective}, under the given constraints.
  • " + f"at {objective}, under the given constraints, on GPU(s) TITAN RTX.
    • " "test_model_config_10: 1 GPU instance with a max batch size of 8 on platform tensorflow_graphdef " "
    " ) @@ -275,7 +287,7 @@ def subtest_build_summary_table(self, mode, cpu_only): expected_summary_sentence = ( "In 10 measurements across 3 configurations, " "test_model_config_10 is 100% better than the default configuration " - f"at {objective}, under the given constraints, on GPU(s) TITAN RTX.
    • " + f"at {objective}, under the given constraints.
      • " "test_model_config_10: 1 GPU instance with a max batch size of 8 on platform tensorflow_graphdef " "
      " ) @@ -296,15 +308,18 @@ def subtest_build_summary_table(self, mode, cpu_only): def test_build_detailed_info(self): for cpu_only in [True, False]: - self._subtest_build_detailed_info(cpu_only) + for report_gpu_metrics in [True, False]: + self._subtest_build_detailed_info(cpu_only, report_gpu_metrics) - def _subtest_build_detailed_info(self, cpu_only): + def _subtest_build_detailed_info(self, cpu_only, report_gpu_metrics): self._init_managers(models="test_model_config_10", subcommand="report") result_comparator = RunConfigResultComparator( metric_objectives_list=[{"perf_throughput": 10}], model_weights=[1] ) + gpu_metrics = report_gpu_metrics or not cpu_only + avg_gpu_metrics = {"gpu_uuid": {"gpu_used_memory": 6000, "gpu_utilization": 60}} for i in range(10, 0, -1): @@ -319,7 +334,7 @@ def _subtest_build_detailed_info(self, cpu_only): avg_gpu_metrics, avg_non_gpu_metrics, result_comparator, - cpu_only=cpu_only, + cpu_only=not gpu_metrics, add_to_results_only=True, ) @@ -327,18 +342,18 @@ def _subtest_build_detailed_info(self, cpu_only): self.report_manager._build_detailed_table("test_model_config_10") sentence = self.report_manager._build_detailed_info("test_model_config_10") - if cpu_only: + if gpu_metrics: expected_sentence = ( f"The model config test_model_config_10 uses 1 GPU instance with " f"a max batch size of 8 and has dynamic batching enabled. 1 measurement(s) " - f"were obtained for the model config on CPU. " + f"were obtained for the model config on GPU(s) 1 x fake_gpu_name with total memory 1.0 GB. " f"This model uses the platform tensorflow_graphdef." ) else: expected_sentence = ( f"The model config test_model_config_10 uses 1 GPU instance with " f"a max batch size of 8 and has dynamic batching enabled. 1 measurement(s) " - f"were obtained for the model config on GPU(s) 1 x fake_gpu_name with total memory 1.0 GB. " + f"were obtained for the model config on CPU. " f"This model uses the platform tensorflow_graphdef." )