Skip to content

Commit

Permalink
New config option: --always-report-gpu-metrics (#734)
Browse files Browse the repository at this point in the history
* Add config option

* Summary report support

* Changing cpu_only to report_gpu_metrics

* changing name of config option

* adding back in always

* Adding option to report

* Fixing formatting

* Updated logic to use capture_gpu_metrics

* Adding cpu_only to detailed report unit test

* Changing comment
  • Loading branch information
nv-braf authored Jul 26, 2023
1 parent b4a6633 commit e1fdb1b
Show file tree
Hide file tree
Showing 10 changed files with 133 additions and 75 deletions.
6 changes: 6 additions & 0 deletions docs/config.md
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,9 @@ cpu_only_composing_models: <comma-delimited-string-list>
# Enables the searching of request rate (instead of concurrency)
[ request_rate_search_enable: <bool> | default: false]
# Always report GPU metrics, even if the model(s) is cpu_only
[ always_report_gpu_metrics: <bool> | default: false]
# Skips the generation of summary reports and tables
[ skip_summary_reports: <bool> | default: false]
Expand Down Expand Up @@ -335,6 +338,9 @@ report_model_configs: <comma-delimited-string-list>
# Specify path to config YAML file
[ config_file: <string> ]
# Always report GPU metrics
[ always_report_gpu_metrics: <bool> | default: false]
```

## YAML only options
Expand Down
13 changes: 12 additions & 1 deletion model_analyzer/config/input/config_command_profile.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
import logging
import os

import numba
import numba.cuda
import psutil
from google.protobuf.descriptor import FieldDescriptor
from tritonclient.grpc.model_config_pb2 import ModelConfig
Expand All @@ -37,6 +37,7 @@

from .config_command import ConfigCommand
from .config_defaults import (
DEFAULT_ALWAYS_REPORT_GPU_METRICS,
DEFAULT_BATCH_SIZES,
DEFAULT_CHECKPOINT_DIRECTORY,
DEFAULT_CLIENT_PROTOCOL,
Expand Down Expand Up @@ -266,6 +267,16 @@ def _fill_config(self):
"Use 'all' to profile all the GPUs visible by CUDA.",
)
)
self._add_config(
ConfigField(
"always_report_gpu_metrics",
flags=["--always-report-gpu-metrics"],
field_type=ConfigPrimitive(bool),
parser_args={"action": "store_true"},
default_value=DEFAULT_ALWAYS_REPORT_GPU_METRICS,
description="Report GPU metrics, even when the model is `cpu_only`.",
)
)
self._add_config(
ConfigField(
"skip_summary_reports",
Expand Down
11 changes: 11 additions & 0 deletions model_analyzer/config/input/config_command_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@

from .config_command import ConfigCommand
from .config_defaults import (
DEFAULT_ALWAYS_REPORT_GPU_METRICS,
DEFAULT_CHECKPOINT_DIRECTORY,
DEFAULT_EXPORT_PATH,
DEFAULT_OFFLINE_REPORT_PLOTS,
Expand Down Expand Up @@ -172,6 +173,16 @@ def _fill_config(self):
description="Output file format for detailed report.",
)
)
self._add_config(
ConfigField(
"always_report_gpu_metrics",
flags=["--always_report-gpu-metrics"],
field_type=ConfigPrimitive(bool),
parser_args={"action": "store_true"},
default_value=DEFAULT_ALWAYS_REPORT_GPU_METRICS,
description="Report GPU metrics, even when the model is `cpu_only`.",
)
)

def set_config_values(self, args):
"""
Expand Down
1 change: 1 addition & 0 deletions model_analyzer/config/input/config_defaults.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
DEFAULT_COLLECT_CPU_METRICS = False
DEFAULT_LOG_LEVEL = "INFO"
DEFAULT_GPUS = "all"
DEFAULT_ALWAYS_REPORT_GPU_METRICS = False
DEFAULT_SKIP_SUMMARY_REPORTS = False
DEFAULT_SKIP_DETAILED_REPORTS = False
DEFAULT_OUTPUT_MODEL_REPOSITORY = os.path.join(os.getcwd(), "output_model_repository")
Expand Down
34 changes: 18 additions & 16 deletions model_analyzer/record/metrics_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,13 +183,13 @@ def profile_server(self):
TritonModelAnalyzerException
"""

cpu_only = not numba.cuda.is_available()
self._start_monitors(cpu_only=cpu_only)
capture_gpu_metrics = numba.cuda.is_available()
self._start_monitors(capture_gpu_metrics=capture_gpu_metrics)
time.sleep(self._config.duration_seconds)
if not cpu_only:
if capture_gpu_metrics or self._config.always_report_gpu_metrics:
server_gpu_metrics = self._get_gpu_inference_metrics()
self._result_manager.add_server_data(data=server_gpu_metrics)
self._destroy_monitors(cpu_only=cpu_only)
self._destroy_monitors(capture_gpu_metrics=capture_gpu_metrics)

def execute_run_config(
self, run_config: RunConfig
Expand Down Expand Up @@ -244,27 +244,29 @@ def profile_models(self, run_config: RunConfig) -> Optional[RunConfigMeasurement
if not self._config.perf_output
else FileWriter(self._config.perf_output_path)
)
cpu_only = run_config.cpu_only()
capture_gpu_metrics = (
run_config.cpu_only() and not self._config.always_report_gpu_metrics
)

self._print_run_config_info(run_config)

self._start_monitors(cpu_only=cpu_only)
self._start_monitors(capture_gpu_metrics=capture_gpu_metrics)

perf_analyzer_metrics, model_gpu_metrics = self._run_perf_analyzer(
run_config, perf_output_writer
)

if not perf_analyzer_metrics:
self._stop_monitors(cpu_only=cpu_only)
self._destroy_monitors(cpu_only=cpu_only)
self._stop_monitors(capture_gpu_metrics=capture_gpu_metrics)
self._destroy_monitors(capture_gpu_metrics=capture_gpu_metrics)
return None

# Get metrics for model inference and combine metrics that do not have GPU UUID
if not cpu_only and not model_gpu_metrics:
if capture_gpu_metrics and not model_gpu_metrics:
model_gpu_metrics = self._get_gpu_inference_metrics()
model_cpu_metrics = self._get_cpu_inference_metrics()

self._destroy_monitors(cpu_only=cpu_only)
self._destroy_monitors(capture_gpu_metrics=capture_gpu_metrics)

run_config_measurement = None
if model_gpu_metrics is not None and perf_analyzer_metrics is not None:
Expand Down Expand Up @@ -450,13 +452,13 @@ def _get_measurement_if_config_duplicate(self, run_config):

return measurements.get(key, None)

def _start_monitors(self, cpu_only=False):
def _start_monitors(self, capture_gpu_metrics=True):
"""
Start any metrics monitors
"""

self._gpu_monitor = None
if not cpu_only:
if capture_gpu_metrics:
try:
self._gpu_monitor = RemoteMonitor(
self._config.triton_metrics_url,
Expand All @@ -483,23 +485,23 @@ def _start_monitors(self, cpu_only=False):
)
self._cpu_monitor.start_recording_metrics()

def _stop_monitors(self, cpu_only=False):
def _stop_monitors(self, capture_gpu_metrics=True):
"""
Stop any metrics monitors, when we don't need
to collect the result
"""

# Stop DCGM Monitor only if there are GPUs available
if not cpu_only:
if capture_gpu_metrics:
self._gpu_monitor.stop_recording_metrics()
self._cpu_monitor.stop_recording_metrics()

def _destroy_monitors(self, cpu_only=False):
def _destroy_monitors(self, capture_gpu_metrics=True):
"""
Destroy the monitors created by start
"""

if not cpu_only:
if capture_gpu_metrics:
if self._gpu_monitor:
self._gpu_monitor.destroy()
if self._cpu_monitor:
Expand Down
89 changes: 50 additions & 39 deletions model_analyzer/reports/report_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -357,35 +357,32 @@ def _build_summary_report(self, report_key, num_configs, statistics):

# Get GPU names and memory
run_config = self._summary_data[report_key][0][0]
cpu_only = run_config.cpu_only()

(gpu_names, max_memories) = self._get_gpu_stats(
measurements=[v for _, v in self._summary_data[report_key]]
report_gpu_metrics = (
self._config.always_report_gpu_metrics or not run_config.cpu_only()
)

(gpu_names, max_memories) = (None, None)
if report_gpu_metrics:
(gpu_names, max_memories) = self._get_gpu_stats(
measurements=[v for _, v in self._summary_data[report_key]]
)

# Get constraints
constraint_str = self._create_constraint_string(report_key)

# Build summary table and info sentence
if not cpu_only:
table, summary_sentence = self._build_summary_table(
report_key=report_key,
num_configurations=total_configurations,
num_measurements=total_measurements,
gpu_name=gpu_names,
)
else:
table, summary_sentence = self._build_summary_table(
report_key=report_key,
num_configurations=total_configurations,
num_measurements=total_measurements,
cpu_only=True,
)
table, summary_sentence = self._build_summary_table(
report_key=report_key,
num_configurations=total_configurations,
num_measurements=total_measurements,
gpu_name=gpu_names,
report_gpu_metrics=report_gpu_metrics,
)

# Add summary sections
summary.add_title(title=f"{self._mode.title()} Result Summary")
summary.add_subheading(f"Model: {' and '.join(report_key.split(','))}")
if not cpu_only:
if report_gpu_metrics:
summary.add_paragraph(f"GPU(s): {gpu_names}")
summary.add_paragraph(f"Total Available GPU Memory: {max_memories}")
summary.add_paragraph(f"Constraint targets: {constraint_str}")
Expand All @@ -407,7 +404,7 @@ def _build_summary_report(self, report_key, num_configs, statistics):

caption_throughput = f"{throughput_plot_config.title()} curves for {num_best_configs} best configurations."

if not cpu_only:
if report_gpu_metrics:
summary.add_images([throughput_plot], [caption_throughput], image_width=66)
if self._mode == "online":
memory_latency_plot = os.path.join(
Expand Down Expand Up @@ -482,7 +479,7 @@ def _build_summary_table(
num_configurations,
num_measurements,
gpu_name=None,
cpu_only=False,
report_gpu_metrics=True,
):
"""
Creates a result table corresponding
Expand All @@ -508,20 +505,14 @@ def _build_summary_table(
best_run_config,
best_run_config_measurement,
gpu_name,
cpu_only,
report_gpu_metrics,
multi_model,
is_ensemble,
is_bls,
)

summary_table = (
self._construct_summary_result_table_cpu_only(
sorted_measurements, multi_model, has_composing_models
)
if cpu_only
else self._construct_summary_result_table(
sorted_measurements, multi_model, has_composing_models
)
summary_table = self._construct_summary_result_table(
sorted_measurements, multi_model, has_composing_models, report_gpu_metrics
)

return summary_table, summary_sentence
Expand Down Expand Up @@ -581,7 +572,7 @@ def _create_summary_sentence(
best_run_config,
best_run_config_measurement,
gpu_name,
cpu_only,
report_gpu_metrics,
multi_model,
is_ensemble,
is_bls,
Expand All @@ -593,7 +584,9 @@ def _create_summary_sentence(
objective_phrase = self._create_summary_objective_phrase(
report_key, best_run_config_measurement
)
gpu_name_phrase = self._create_summary_gpu_name_phrase(gpu_name, cpu_only)
gpu_name_phrase = self._create_summary_gpu_name_phrase(
gpu_name, report_gpu_metrics
)

summary_sentence = (
f"In {measurement_phrase} across {config_phrase} "
Expand Down Expand Up @@ -778,8 +771,20 @@ def _create_instance_group_phrase(self, model_config):
ret_str += "s"
return ret_str

def _create_summary_gpu_name_phrase(self, gpu_name, cpu_only):
return f", on GPU(s) {gpu_name}" if not cpu_only else ""
def _create_summary_gpu_name_phrase(self, gpu_name, report_gpu_metrics):
return f", on GPU(s) {gpu_name}" if report_gpu_metrics else ""

def _construct_summary_result_table(
self, sorted_measurements, multi_model, has_composing_models, report_gpu_metrics
):
if report_gpu_metrics:
return self._construct_summary_result_table_with_gpu(
sorted_measurements, multi_model, has_composing_models
)
else:
return self._construct_summary_result_table_cpu_only(
sorted_measurements, multi_model, has_composing_models
)

def _construct_summary_result_table_cpu_only(
self, sorted_measurements, multi_model, has_composing_models
Expand All @@ -794,7 +799,7 @@ def _construct_summary_result_table_cpu_only(

return summary_table

def _construct_summary_result_table(
def _construct_summary_result_table_with_gpu(
self, sorted_measurements, multi_model, has_composing_models
):
summary_table = self._create_summary_result_table_header(multi_model)
Expand Down Expand Up @@ -1108,7 +1113,9 @@ def _build_detailed_table(self, model_config_name):
key=lambda x: x.get_non_gpu_metric_value(sort_by_tag),
reverse=True,
)
cpu_only = model_config.cpu_only()
report_gpu_metrics = (
self._config.always_report_gpu_metrics or not model_config.cpu_only()
)

if self._was_measured_with_request_rate(measurements[0]):
first_column_header = (
Expand All @@ -1125,7 +1132,7 @@ def _build_detailed_table(self, model_config_name):
"concurrency-range" if self._mode == "online" else "batch-size"
)

if not cpu_only:
if report_gpu_metrics:
headers = [
first_column_header,
"p99 Latency (ms)",
Expand Down Expand Up @@ -1156,7 +1163,7 @@ def _build_detailed_table(self, model_config_name):
detailed_table = ResultTable(headers, title="Detailed Table")

# Construct table
if not cpu_only:
if report_gpu_metrics:
for measurement in measurements:
row = [
# TODO-TMA-568: This needs to be updated because there will be multiple model configs
Expand Down Expand Up @@ -1219,7 +1226,11 @@ def _build_detailed_info(self, model_config_name):

gpu_cpu_string = "CPU"

if not run_config.cpu_only():
report_gpu_metrics = (
self._config.always_report_gpu_metrics or not run_config.cpu_only()
)

if report_gpu_metrics:
gpu_names, max_memories = self._get_gpu_stats(measurements)
gpu_cpu_string = f"GPU(s) {gpu_names} with total memory {max_memories}"

Expand Down
4 changes: 2 additions & 2 deletions tests/test_bls_report_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ def test_bls_summary(self):
num_measurements=26,
num_configurations=10,
gpu_name="TITAN RTX",
cpu_only=False,
report_gpu_metrics=True,
)

self.assertEqual(summary_sentence, expected_summary_sentence)
Expand Down Expand Up @@ -132,7 +132,7 @@ def test_bls_summary_cpu_only(self):
num_measurements=26,
num_configurations=10,
gpu_name="TITAN RTX",
cpu_only=True,
report_gpu_metrics=False,
)

self.assertEqual(summary_sentence, expected_summary_sentence)
Expand Down
1 change: 1 addition & 0 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ def get_test_options():
OptionStruct("bool", "profile","--early-exit-enable"),
OptionStruct("bool", "profile","--skip-summary-reports"),
OptionStruct("bool", "profile","--skip-detailed-reports"),
OptionStruct("bool", "profile","--always-report-gpu-metrics"),
#Int/Float options
# Options format:
# (int/float, MA step, long_option, short_option, test_value, expected_default_value)
Expand Down
Loading

0 comments on commit e1fdb1b

Please sign in to comment.