Changes needed post-merge from other branches

triton-inference-server · Oct 12, 2023 · cedc7a1 · cedc7a1
1 parent 5aeb538
commit cedc7a1
Show file tree

Hide file tree

Showing 7 changed files with 9 additions and 109 deletions.
diff --git a/model_analyzer/config/input/config_command_profile.py b/model_analyzer/config/input/config_command_profile.py
@@ -66,10 +66,6 @@ def __init__(self):
         super().__init__()
         self._fill_config()
 
-    # FIXME: placeholder until branch is merged
-    def is_llm_model(self):
-        return False
-
     def _resolve_protobuf_field(self, field: FieldDescriptor) -> ConfigSweep:
         """
         Recursively resolve protobuf fields.

diff --git a/model_analyzer/record/metrics_manager.py b/model_analyzer/record/metrics_manager.py
@@ -69,8 +69,8 @@ class MetricsManager:
         "gpu_power_usage",
         "cpu_available_ram",
         "cpu_used_ram",
-        "avg_first_latency",
-        "avg_token_latency",
+        "avg_first_token_latency",
+        "avg_token_to_token_latency",
     ]
 
     def __init__(self, config, client, server, gpus, result_manager, state_manager):

diff --git a/model_analyzer/record/types/avg_first_token_latency.py b/model_analyzer/record/types/avg_first_token_latency.py
@@ -22,10 +22,10 @@
 @total_ordering
 class AvgFirstTokenLatency(DecreasingRecord):
     """
-    A record for perf_analyzer avg first token to token latency metric
+    A record for perf_analyzer average first token latency metric
     """
 
-    tag = "avg_first_latency"
+    tag = "avg_first_token_latency"
 
     def __init__(self, value, timestamp=0):
         """

diff --git a/model_analyzer/record/types/avg_token_latency.py b/model_analyzer/record/types/avg_token_latency.py
diff --git a/model_analyzer/record/types/avg_token_to_token_latency.py b/model_analyzer/record/types/avg_token_to_token_latency.py
@@ -22,7 +22,7 @@
 @total_ordering
 class AvgTokenToTokenLatency(DecreasingRecord):
     """
-    A record for perf_analyzer avg token-to-token latency metric
+    A record for perf_analyzer average token-to-token latency metric
     """
 
     tag = "avg_token_to_token_latency"

diff --git a/tests/common/test_utils.py b/tests/common/test_utils.py
@@ -287,12 +287,12 @@ def construct_perf_analyzer_config(
 
     if request_rate:
         pa_config._args["request-rate-range"] = request_rate
-    elif llm_search_mode:
+    elif is_llm_model:
         pa_config._args["periodic-concurrency-range"] = concurrency
     else:
         pa_config._args["concurrency-range"] = concurrency
 
-    if llm_search_mode:
+    if is_llm_model:
         pa_config._args["request-parameter"] = (
             "max_token:" + str(max_token_count) + ":int"
         )

diff --git a/tests/test_perf_analyzer_config_generator.py b/tests/test_perf_analyzer_config_generator.py
@@ -578,7 +578,7 @@ def test_llm_search_max_token_count(self):
 
         max_token_counts = utils.generate_doubled_list(1, 256)
         expected_configs = [
-            construct_perf_analyzer_config(max_token_count=mtc, llm_search_mode=True)
+            construct_perf_analyzer_config(max_token_count=mtc, is_llm_model=True)
             for mtc in max_token_counts
         ]
 
@@ -612,7 +612,7 @@ def test_llm_search_text_input_length(self):
 
         text_input_lengths = utils.generate_doubled_list(1, 1024)
         expected_configs = [
-            construct_perf_analyzer_config(llm_search_mode=True)
+            construct_perf_analyzer_config(is_llm_model=True)
             for pl in text_input_lengths
         ]