Skip to content

Commit

Permalink
Changes needed post-merge from other branches
Browse files Browse the repository at this point in the history
  • Loading branch information
nv-braf committed Oct 12, 2023
1 parent 5aeb538 commit cedc7a1
Show file tree
Hide file tree
Showing 7 changed files with 9 additions and 109 deletions.
4 changes: 0 additions & 4 deletions model_analyzer/config/input/config_command_profile.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,10 +66,6 @@ def __init__(self):
super().__init__()
self._fill_config()

# FIXME: placeholder until branch is merged
def is_llm_model(self):
return False

def _resolve_protobuf_field(self, field: FieldDescriptor) -> ConfigSweep:
"""
Recursively resolve protobuf fields.
Expand Down
4 changes: 2 additions & 2 deletions model_analyzer/record/metrics_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,8 +69,8 @@ class MetricsManager:
"gpu_power_usage",
"cpu_available_ram",
"cpu_used_ram",
"avg_first_latency",
"avg_token_latency",
"avg_first_token_latency",
"avg_token_to_token_latency",
]

def __init__(self, config, client, server, gpus, result_manager, state_manager):
Expand Down
4 changes: 2 additions & 2 deletions model_analyzer/record/types/avg_first_token_latency.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,10 @@
@total_ordering
class AvgFirstTokenLatency(DecreasingRecord):
"""
A record for perf_analyzer avg first token to token latency metric
A record for perf_analyzer average first token latency metric
"""

tag = "avg_first_latency"
tag = "avg_first_token_latency"

def __init__(self, value, timestamp=0):
"""
Expand Down
96 changes: 0 additions & 96 deletions model_analyzer/record/types/avg_token_latency.py

This file was deleted.

2 changes: 1 addition & 1 deletion model_analyzer/record/types/avg_token_to_token_latency.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
@total_ordering
class AvgTokenToTokenLatency(DecreasingRecord):
"""
A record for perf_analyzer avg token-to-token latency metric
A record for perf_analyzer average token-to-token latency metric
"""

tag = "avg_token_to_token_latency"
Expand Down
4 changes: 2 additions & 2 deletions tests/common/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -287,12 +287,12 @@ def construct_perf_analyzer_config(

if request_rate:
pa_config._args["request-rate-range"] = request_rate
elif llm_search_mode:
elif is_llm_model:
pa_config._args["periodic-concurrency-range"] = concurrency
else:
pa_config._args["concurrency-range"] = concurrency

if llm_search_mode:
if is_llm_model:
pa_config._args["request-parameter"] = (
"max_token:" + str(max_token_count) + ":int"
)
Expand Down
4 changes: 2 additions & 2 deletions tests/test_perf_analyzer_config_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -578,7 +578,7 @@ def test_llm_search_max_token_count(self):

max_token_counts = utils.generate_doubled_list(1, 256)
expected_configs = [
construct_perf_analyzer_config(max_token_count=mtc, llm_search_mode=True)
construct_perf_analyzer_config(max_token_count=mtc, is_llm_model=True)
for mtc in max_token_counts
]

Expand Down Expand Up @@ -612,7 +612,7 @@ def test_llm_search_text_input_length(self):

text_input_lengths = utils.generate_doubled_list(1, 1024)
expected_configs = [
construct_perf_analyzer_config(llm_search_mode=True)
construct_perf_analyzer_config(is_llm_model=True)
for pl in text_input_lengths
]

Expand Down

0 comments on commit cedc7a1

Please sign in to comment.