From e1fdb1b8e3dc31a0aa489536252edcda05330e60 Mon Sep 17 00:00:00 2001
From: Brian Raf <92820864+nv-braf@users.noreply.github.com>
Date: Wed, 26 Jul 2023 09:54:38 -0700
Subject: [PATCH] New config option: --always-report-gpu-metrics (#734)

* Add config option

* Summary report support

* Changing cpu_only to report_gpu_metrics

* changing name of config option

* adding back in always

* Adding option to report

* Fixing formatting

* Updated logic to use capture_gpu_metrics

* Adding cpu_only to detailed report unit test

* Changing comment
---
 docs/config.md                                |  6 ++
 .../config/input/config_command_profile.py    | 13 ++-
 .../config/input/config_command_report.py     | 11 +++
 .../config/input/config_defaults.py           |  1 +
 model_analyzer/record/metrics_manager.py      | 34 +++----
 model_analyzer/reports/report_manager.py      | 89 +++++++++++--------
 tests/test_bls_report_manager.py              |  4 +-
 tests/test_cli.py                             |  1 +
 tests/test_ensemble_report_manager.py         |  4 +-
 tests/test_report_manager.py                  | 45 ++++++----
 10 files changed, 133 insertions(+), 75 deletions(-)
diff --git a/docs/config.md b/docs/config.md
index d3c36bbf5..9c7a9b8f4 100644
--- a/docs/config.md
+++ b/docs/config.md
@@ -227,6 +227,9 @@ cpu_only_composing_models: <comma-delimited-string-list>
 # Enables the searching of request rate (instead of concurrency)
 [ request_rate_search_enable: <bool> | default: false]
 
+# Always report GPU metrics, even if the model(s) is cpu_only
+[ always_report_gpu_metrics: <bool> | default: false]
+
 # Skips the generation of summary reports and tables
 [ skip_summary_reports: <bool> | default: false]
 
@@ -335,6 +338,9 @@ report_model_configs: <comma-delimited-string-list>
 
 # Specify path to config YAML file
 [ config_file: <string> ]
+
+# Always report GPU metrics
+[ always_report_gpu_metrics: <bool> | default: false]
 ```
 
 ## YAML only options
diff --git a/model_analyzer/config/input/config_command_profile.py b/model_analyzer/config/input/config_command_profile.py
index ec59d71f4..02d6def28 100755
--- a/model_analyzer/config/input/config_command_profile.py
+++ b/model_analyzer/config/input/config_command_profile.py
@@ -18,7 +18,7 @@
 import logging
 import os
 
-import numba
+import numba.cuda
 import psutil
 from google.protobuf.descriptor import FieldDescriptor
 from tritonclient.grpc.model_config_pb2 import ModelConfig
@@ -37,6 +37,7 @@
 
 from .config_command import ConfigCommand
 from .config_defaults import (
+    DEFAULT_ALWAYS_REPORT_GPU_METRICS,
     DEFAULT_BATCH_SIZES,
     DEFAULT_CHECKPOINT_DIRECTORY,
     DEFAULT_CLIENT_PROTOCOL,
@@ -266,6 +267,16 @@ def _fill_config(self):
                 "Use 'all' to profile all the GPUs visible by CUDA.",
             )
         )
+        self._add_config(
+            ConfigField(
+                "always_report_gpu_metrics",
+                flags=["--always-report-gpu-metrics"],
+                field_type=ConfigPrimitive(bool),
+                parser_args={"action": "store_true"},
+                default_value=DEFAULT_ALWAYS_REPORT_GPU_METRICS,
+                description="Report GPU metrics, even when the model is `cpu_only`.",
+            )
+        )
         self._add_config(
             ConfigField(
                 "skip_summary_reports",
diff --git a/model_analyzer/config/input/config_command_report.py b/model_analyzer/config/input/config_command_report.py
index 10bada5f3..7d1eee7fb 100755
--- a/model_analyzer/config/input/config_command_report.py
+++ b/model_analyzer/config/input/config_command_report.py
@@ -26,6 +26,7 @@
 
 from .config_command import ConfigCommand
 from .config_defaults import (
+    DEFAULT_ALWAYS_REPORT_GPU_METRICS,
     DEFAULT_CHECKPOINT_DIRECTORY,
     DEFAULT_EXPORT_PATH,
     DEFAULT_OFFLINE_REPORT_PLOTS,
@@ -172,6 +173,16 @@ def _fill_config(self):
                 description="Output file format for detailed report.",
             )
         )
+        self._add_config(
+            ConfigField(
+                "always_report_gpu_metrics",
+                flags=["--always_report-gpu-metrics"],
+                field_type=ConfigPrimitive(bool),
+                parser_args={"action": "store_true"},
+                default_value=DEFAULT_ALWAYS_REPORT_GPU_METRICS,
+                description="Report GPU metrics, even when the model is `cpu_only`.",
+            )
+        )
 
     def set_config_values(self, args):
         """
diff --git a/model_analyzer/config/input/config_defaults.py b/model_analyzer/config/input/config_defaults.py
index f7401ad75..785dec205 100755
--- a/model_analyzer/config/input/config_defaults.py
+++ b/model_analyzer/config/input/config_defaults.py
@@ -34,6 +34,7 @@
 DEFAULT_COLLECT_CPU_METRICS = False
 DEFAULT_LOG_LEVEL = "INFO"
 DEFAULT_GPUS = "all"
+DEFAULT_ALWAYS_REPORT_GPU_METRICS = False
 DEFAULT_SKIP_SUMMARY_REPORTS = False
 DEFAULT_SKIP_DETAILED_REPORTS = False
 DEFAULT_OUTPUT_MODEL_REPOSITORY = os.path.join(os.getcwd(), "output_model_repository")
diff --git a/model_analyzer/record/metrics_manager.py b/model_analyzer/record/metrics_manager.py
index 2a9987f1a..393f37e53 100755
--- a/model_analyzer/record/metrics_manager.py
+++ b/model_analyzer/record/metrics_manager.py
@@ -183,13 +183,13 @@ def profile_server(self):
         TritonModelAnalyzerException
         """
 
-        cpu_only = not numba.cuda.is_available()
-        self._start_monitors(cpu_only=cpu_only)
+        capture_gpu_metrics = numba.cuda.is_available()
+        self._start_monitors(capture_gpu_metrics=capture_gpu_metrics)
         time.sleep(self._config.duration_seconds)
-        if not cpu_only:
+        if capture_gpu_metrics or self._config.always_report_gpu_metrics:
             server_gpu_metrics = self._get_gpu_inference_metrics()
             self._result_manager.add_server_data(data=server_gpu_metrics)
-        self._destroy_monitors(cpu_only=cpu_only)
+        self._destroy_monitors(capture_gpu_metrics=capture_gpu_metrics)
 
     def execute_run_config(
         self, run_config: RunConfig
@@ -244,27 +244,29 @@ def profile_models(self, run_config: RunConfig) -> Optional[RunConfigMeasurement
             if not self._config.perf_output
             else FileWriter(self._config.perf_output_path)
         )
-        cpu_only = run_config.cpu_only()
+        capture_gpu_metrics = (
+            run_config.cpu_only() and not self._config.always_report_gpu_metrics
+        )
 
         self._print_run_config_info(run_config)
 
-        self._start_monitors(cpu_only=cpu_only)
+        self._start_monitors(capture_gpu_metrics=capture_gpu_metrics)
 
         perf_analyzer_metrics, model_gpu_metrics = self._run_perf_analyzer(
             run_config, perf_output_writer
         )
 
         if not perf_analyzer_metrics:
-            self._stop_monitors(cpu_only=cpu_only)
-            self._destroy_monitors(cpu_only=cpu_only)
+            self._stop_monitors(capture_gpu_metrics=capture_gpu_metrics)
+            self._destroy_monitors(capture_gpu_metrics=capture_gpu_metrics)
             return None
 
         # Get metrics for model inference and combine metrics that do not have GPU UUID
-        if not cpu_only and not model_gpu_metrics:
+        if capture_gpu_metrics and not model_gpu_metrics:
             model_gpu_metrics = self._get_gpu_inference_metrics()
         model_cpu_metrics = self._get_cpu_inference_metrics()
 
-        self._destroy_monitors(cpu_only=cpu_only)
+        self._destroy_monitors(capture_gpu_metrics=capture_gpu_metrics)
 
         run_config_measurement = None
         if model_gpu_metrics is not None and perf_analyzer_metrics is not None:
@@ -450,13 +452,13 @@ def _get_measurement_if_config_duplicate(self, run_config):
 
         return measurements.get(key, None)
 
-    def _start_monitors(self, cpu_only=False):
+    def _start_monitors(self, capture_gpu_metrics=True):
         """
         Start any metrics monitors
         """
 
         self._gpu_monitor = None
-        if not cpu_only:
+        if capture_gpu_metrics:
             try:
                 self._gpu_monitor = RemoteMonitor(
                     self._config.triton_metrics_url,
@@ -483,23 +485,23 @@ def _start_monitors(self, cpu_only=False):
         )
         self._cpu_monitor.start_recording_metrics()
 
-    def _stop_monitors(self, cpu_only=False):
+    def _stop_monitors(self, capture_gpu_metrics=True):
         """
         Stop any metrics monitors, when we don't need
         to collect the result
         """
 
         # Stop DCGM Monitor only if there are GPUs available
-        if not cpu_only:
+        if capture_gpu_metrics:
             self._gpu_monitor.stop_recording_metrics()
         self._cpu_monitor.stop_recording_metrics()
 
-    def _destroy_monitors(self, cpu_only=False):
+    def _destroy_monitors(self, capture_gpu_metrics=True):
         """
         Destroy the monitors created by start
         """
 
-        if not cpu_only:
+        if capture_gpu_metrics:
             if self._gpu_monitor:
                 self._gpu_monitor.destroy()
         if self._cpu_monitor:
diff --git a/model_analyzer/reports/report_manager.py b/model_analyzer/reports/report_manager.py
index 7753ad291..deca073b3 100755
--- a/model_analyzer/reports/report_manager.py
+++ b/model_analyzer/reports/report_manager.py
@@ -357,35 +357,32 @@ def _build_summary_report(self, report_key, num_configs, statistics):
 
         # Get GPU names and memory
         run_config = self._summary_data[report_key][0][0]
-        cpu_only = run_config.cpu_only()
-
-        (gpu_names, max_memories) = self._get_gpu_stats(
-            measurements=[v for _, v in self._summary_data[report_key]]
+        report_gpu_metrics = (
+            self._config.always_report_gpu_metrics or not run_config.cpu_only()
         )
 
+        (gpu_names, max_memories) = (None, None)
+        if report_gpu_metrics:
+            (gpu_names, max_memories) = self._get_gpu_stats(
+                measurements=[v for _, v in self._summary_data[report_key]]
+            )
+
         # Get constraints
         constraint_str = self._create_constraint_string(report_key)
 
         # Build summary table and info sentence
-        if not cpu_only:
-            table, summary_sentence = self._build_summary_table(
-                report_key=report_key,
-                num_configurations=total_configurations,
-                num_measurements=total_measurements,
-                gpu_name=gpu_names,
-            )
-        else:
-            table, summary_sentence = self._build_summary_table(
-                report_key=report_key,
-                num_configurations=total_configurations,
-                num_measurements=total_measurements,
-                cpu_only=True,
-            )
+        table, summary_sentence = self._build_summary_table(
+            report_key=report_key,
+            num_configurations=total_configurations,
+            num_measurements=total_measurements,
+            gpu_name=gpu_names,
+            report_gpu_metrics=report_gpu_metrics,
+        )
 
         # Add summary sections
         summary.add_title(title=f"{self._mode.title()} Result Summary")
         summary.add_subheading(f"Model: {' and '.join(report_key.split(','))}")
-        if not cpu_only:
+        if report_gpu_metrics:
             summary.add_paragraph(f"GPU(s): {gpu_names}")
             summary.add_paragraph(f"Total Available GPU Memory: {max_memories}")
         summary.add_paragraph(f"Constraint targets: {constraint_str}")
@@ -407,7 +404,7 @@ def _build_summary_report(self, report_key, num_configs, statistics):
 
         caption_throughput = f"{throughput_plot_config.title()} curves for {num_best_configs} best configurations."
 
-        if not cpu_only:
+        if report_gpu_metrics:
             summary.add_images([throughput_plot], [caption_throughput], image_width=66)
             if self._mode == "online":
                 memory_latency_plot = os.path.join(
@@ -482,7 +479,7 @@ def _build_summary_table(
         num_configurations,
         num_measurements,
         gpu_name=None,
-        cpu_only=False,
+        report_gpu_metrics=True,
     ):
         """
         Creates a result table corresponding
@@ -508,20 +505,14 @@ def _build_summary_table(
             best_run_config,
             best_run_config_measurement,
             gpu_name,
-            cpu_only,
+            report_gpu_metrics,
             multi_model,
             is_ensemble,
             is_bls,
         )
 
-        summary_table = (
-            self._construct_summary_result_table_cpu_only(
-                sorted_measurements, multi_model, has_composing_models
-            )
-            if cpu_only
-            else self._construct_summary_result_table(
-                sorted_measurements, multi_model, has_composing_models
-            )
+        summary_table = self._construct_summary_result_table(
+            sorted_measurements, multi_model, has_composing_models, report_gpu_metrics
         )
 
         return summary_table, summary_sentence
@@ -581,7 +572,7 @@ def _create_summary_sentence(
         best_run_config,
         best_run_config_measurement,
         gpu_name,
-        cpu_only,
+        report_gpu_metrics,
         multi_model,
         is_ensemble,
         is_bls,
@@ -593,7 +584,9 @@ def _create_summary_sentence(
         objective_phrase = self._create_summary_objective_phrase(
             report_key, best_run_config_measurement
         )
-        gpu_name_phrase = self._create_summary_gpu_name_phrase(gpu_name, cpu_only)
+        gpu_name_phrase = self._create_summary_gpu_name_phrase(
+            gpu_name, report_gpu_metrics
+        )
 
         summary_sentence = (
             f"In {measurement_phrase} across {config_phrase} "
@@ -778,8 +771,20 @@ def _create_instance_group_phrase(self, model_config):
                 ret_str += "s"
         return ret_str
 
-    def _create_summary_gpu_name_phrase(self, gpu_name, cpu_only):
-        return f", on GPU(s) {gpu_name}" if not cpu_only else ""
+    def _create_summary_gpu_name_phrase(self, gpu_name, report_gpu_metrics):
+        return f", on GPU(s) {gpu_name}" if report_gpu_metrics else ""
+
+    def _construct_summary_result_table(
+        self, sorted_measurements, multi_model, has_composing_models, report_gpu_metrics
+    ):
+        if report_gpu_metrics:
+            return self._construct_summary_result_table_with_gpu(
+                sorted_measurements, multi_model, has_composing_models
+            )
+        else:
+            return self._construct_summary_result_table_cpu_only(
+                sorted_measurements, multi_model, has_composing_models
+            )
 
     def _construct_summary_result_table_cpu_only(
         self, sorted_measurements, multi_model, has_composing_models
@@ -794,7 +799,7 @@ def _construct_summary_result_table_cpu_only(
 
         return summary_table
 
-    def _construct_summary_result_table(
+    def _construct_summary_result_table_with_gpu(
         self, sorted_measurements, multi_model, has_composing_models
     ):
         summary_table = self._create_summary_result_table_header(multi_model)
@@ -1108,7 +1113,9 @@ def _build_detailed_table(self, model_config_name):
             key=lambda x: x.get_non_gpu_metric_value(sort_by_tag),
             reverse=True,
         )
-        cpu_only = model_config.cpu_only()
+        report_gpu_metrics = (
+            self._config.always_report_gpu_metrics or not model_config.cpu_only()
+        )
 
         if self._was_measured_with_request_rate(measurements[0]):
             first_column_header = (
@@ -1125,7 +1132,7 @@ def _build_detailed_table(self, model_config_name):
                 "concurrency-range" if self._mode == "online" else "batch-size"
             )
 
-        if not cpu_only:
+        if report_gpu_metrics:
             headers = [
                 first_column_header,
                 "p99 Latency (ms)",
@@ -1156,7 +1163,7 @@ def _build_detailed_table(self, model_config_name):
         detailed_table = ResultTable(headers, title="Detailed Table")
 
         # Construct table
-        if not cpu_only:
+        if report_gpu_metrics:
             for measurement in measurements:
                 row = [
                     # TODO-TMA-568: This needs to be updated because there will be multiple model configs
@@ -1219,7 +1226,11 @@ def _build_detailed_info(self, model_config_name):
 
         gpu_cpu_string = "CPU"
 
-        if not run_config.cpu_only():
+        report_gpu_metrics = (
+            self._config.always_report_gpu_metrics or not run_config.cpu_only()
+        )
+
+        if report_gpu_metrics:
             gpu_names, max_memories = self._get_gpu_stats(measurements)
             gpu_cpu_string = f"GPU(s) {gpu_names} with total memory {max_memories}"
 
diff --git a/tests/test_bls_report_manager.py b/tests/test_bls_report_manager.py
index 255ac1a6f..19053863c 100755
--- a/tests/test_bls_report_manager.py
+++ b/tests/test_bls_report_manager.py
@@ -86,7 +86,7 @@ def test_bls_summary(self):
             num_measurements=26,
             num_configurations=10,
             gpu_name="TITAN RTX",
-            cpu_only=False,
+            report_gpu_metrics=True,
         )
 
         self.assertEqual(summary_sentence, expected_summary_sentence)
@@ -132,7 +132,7 @@ def test_bls_summary_cpu_only(self):
             num_measurements=26,
             num_configurations=10,
             gpu_name="TITAN RTX",
-            cpu_only=True,
+            report_gpu_metrics=False,
         )
 
         self.assertEqual(summary_sentence, expected_summary_sentence)
diff --git a/tests/test_cli.py b/tests/test_cli.py
index a2bbd9004..98ec60237 100755
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -64,6 +64,7 @@ def get_test_options():
         OptionStruct("bool", "profile","--early-exit-enable"),
         OptionStruct("bool", "profile","--skip-summary-reports"),
         OptionStruct("bool", "profile","--skip-detailed-reports"),
+        OptionStruct("bool", "profile","--always-report-gpu-metrics"),
         #Int/Float options
         # Options format:
         #   (int/float, MA step, long_option, short_option, test_value, expected_default_value)
diff --git a/tests/test_ensemble_report_manager.py b/tests/test_ensemble_report_manager.py
index 06165ee96..afa3e96f8 100755
--- a/tests/test_ensemble_report_manager.py
+++ b/tests/test_ensemble_report_manager.py
@@ -85,7 +85,7 @@ def test_ensemble_summary(self):
             num_measurements=68,
             num_configurations=37,
             gpu_name="TITAN RTX",
-            cpu_only=False,
+            report_gpu_metrics=True,
         )
 
         self.assertEqual(summary_sentence, expected_summary_sentence)
@@ -128,7 +128,7 @@ def test_ensemble_summary_cpu_only(self):
             num_measurements=68,
             num_configurations=37,
             gpu_name="TITAN RTX",
-            cpu_only=True,
+            report_gpu_metrics=False,
         )
 
         self.assertEqual(summary_sentence, expected_summary_sentence)
diff --git a/tests/test_report_manager.py b/tests/test_report_manager.py
index 8873aec2a..8db02ad18 100755
--- a/tests/test_report_manager.py
+++ b/tests/test_report_manager.py
@@ -45,7 +45,8 @@ def _init_managers(
         models="test_model",
         num_configs_per_model=10,
         mode="online",
-        subcommand="analyze",
+        subcommand="profile",
+        report_gpu_metrics=False,
     ):
         args = ["model-analyzer", subcommand, "-f", "path-to-config-file"]
         if subcommand == "profile":
@@ -54,6 +55,9 @@ def _init_managers(
         else:
             args.extend(["--report-model-configs", models])
 
+        if report_gpu_metrics:
+            args.extend(["--always-report-gpu-metrics"])
+
         yaml_str = (
             """
             num_configs_per_model: """
@@ -223,16 +227,24 @@ def test_add_results(self, *args):
     def test_build_summary_table(self, *args):
         for mode in ["offline", "online"]:
             for cpu_only in [True, False]:
-                self.subtest_build_summary_table(mode, cpu_only)
+                for report_gpu_metrics in [True, False]:
+                    self.subtest_build_summary_table(mode, cpu_only, report_gpu_metrics)
 
-    def subtest_build_summary_table(self, mode, cpu_only):
-        self._init_managers(models="test_model", mode=mode, subcommand="profile")
+    def subtest_build_summary_table(self, mode, cpu_only, report_gpu_metrics):
+        self._init_managers(
+            models="test_model",
+            mode=mode,
+            subcommand="profile",
+            report_gpu_metrics=report_gpu_metrics,
+        )
         result_comparator = RunConfigResultComparator(
             metric_objectives_list=[{"perf_throughput": 10}], model_weights=[1]
         )
 
         avg_gpu_metrics = {0: {"gpu_used_memory": 6000, "gpu_utilization": 60}}
 
+        gpu_metrics = report_gpu_metrics or not cpu_only
+
         for i in range(10, 0, -1):
             avg_non_gpu_metrics = {
                 "perf_throughput": 100 + 10 * i,
@@ -245,7 +257,7 @@ def subtest_build_summary_table(self, mode, cpu_only):
                 avg_gpu_metrics,
                 avg_non_gpu_metrics,
                 result_comparator,
-                cpu_only,
+                cpu_only=not gpu_metrics,
             )
 
         self.report_manager.create_summaries()
@@ -255,7 +267,7 @@ def subtest_build_summary_table(self, mode, cpu_only):
             num_measurements=10,
             num_configurations=3,
             gpu_name="TITAN RTX",
-            cpu_only=cpu_only,
+            report_gpu_metrics=gpu_metrics,
         )
 
         if mode == "online":
@@ -263,11 +275,11 @@ def subtest_build_summary_table(self, mode, cpu_only):
         else:
             objective = "minimizing latency"
 
-        if cpu_only:
+        if gpu_metrics:
             expected_summary_sentence = (
                 "In 10 measurements across 3 configurations, "
                 "<strong>test_model_config_10</strong> is <strong>100%</strong> better than the default configuration "
-                f"at {objective}, under the given constraints.<UL><LI> "
+                f"at {objective}, under the given constraints, on GPU(s) TITAN RTX.<UL><LI> "
                 "<strong>test_model_config_10</strong>: 1 GPU instance with a max batch size of 8 on platform tensorflow_graphdef "
                 "</LI> </UL>"
             )
@@ -275,7 +287,7 @@ def subtest_build_summary_table(self, mode, cpu_only):
             expected_summary_sentence = (
                 "In 10 measurements across 3 configurations, "
                 "<strong>test_model_config_10</strong> is <strong>100%</strong> better than the default configuration "
-                f"at {objective}, under the given constraints, on GPU(s) TITAN RTX.<UL><LI> "
+                f"at {objective}, under the given constraints.<UL><LI> "
                 "<strong>test_model_config_10</strong>: 1 GPU instance with a max batch size of 8 on platform tensorflow_graphdef "
                 "</LI> </UL>"
             )
@@ -296,15 +308,18 @@ def subtest_build_summary_table(self, mode, cpu_only):
 
     def test_build_detailed_info(self):
         for cpu_only in [True, False]:
-            self._subtest_build_detailed_info(cpu_only)
+            for report_gpu_metrics in [True, False]:
+                self._subtest_build_detailed_info(cpu_only, report_gpu_metrics)
 
-    def _subtest_build_detailed_info(self, cpu_only):
+    def _subtest_build_detailed_info(self, cpu_only, report_gpu_metrics):
         self._init_managers(models="test_model_config_10", subcommand="report")
 
         result_comparator = RunConfigResultComparator(
             metric_objectives_list=[{"perf_throughput": 10}], model_weights=[1]
         )
 
+        gpu_metrics = report_gpu_metrics or not cpu_only
+
         avg_gpu_metrics = {"gpu_uuid": {"gpu_used_memory": 6000, "gpu_utilization": 60}}
 
         for i in range(10, 0, -1):
@@ -319,7 +334,7 @@ def _subtest_build_detailed_info(self, cpu_only):
                 avg_gpu_metrics,
                 avg_non_gpu_metrics,
                 result_comparator,
-                cpu_only=cpu_only,
+                cpu_only=not gpu_metrics,
                 add_to_results_only=True,
             )
 
@@ -327,18 +342,18 @@ def _subtest_build_detailed_info(self, cpu_only):
         self.report_manager._build_detailed_table("test_model_config_10")
         sentence = self.report_manager._build_detailed_info("test_model_config_10")
 
-        if cpu_only:
+        if gpu_metrics:
             expected_sentence = (
                 f"The model config <strong>test_model_config_10</strong> uses 1 GPU instance with "
                 f"a max batch size of 8 and has dynamic batching enabled. 1 measurement(s) "
-                f"were obtained for the model config on CPU. "
+                f"were obtained for the model config on GPU(s) 1 x fake_gpu_name with total memory 1.0 GB. "
                 f"This model uses the platform tensorflow_graphdef."
             )
         else:
             expected_sentence = (
                 f"The model config <strong>test_model_config_10</strong> uses 1 GPU instance with "
                 f"a max batch size of 8 and has dynamic batching enabled. 1 measurement(s) "
-                f"were obtained for the model config on GPU(s) 1 x fake_gpu_name with total memory 1.0 GB. "
+                f"were obtained for the model config on CPU. "
                 f"This model uses the platform tensorflow_graphdef."
             )