Skip to content

Commit

Permalink
Excluded null response from T2T latency calculation
Browse files Browse the repository at this point in the history
  • Loading branch information
nv-hwoo committed Oct 4, 2023
1 parent 6f92b65 commit c2dd174
Showing 1 changed file with 16 additions and 7 deletions.
23 changes: 16 additions & 7 deletions src/c++/perf_analyzer/docs/examples/profile.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,10 @@ def collect_latencies(requests):
token_to_token_latencies = []
requests = requests["experiments"][0]["requests"]
for request in requests:
prev_response = request["response_timestamps"][0]
first_token_latencies.append(prev_response - request["timestamp"])
for response in request["response_timestamps"][1:]:
first_response, *remaining_responses, _ = request["response_timestamps"]
first_token_latencies.append(first_response - request["timestamp"])
prev_response = first_response
for response in remaining_responses:
token_to_token_latencies.append(response - prev_response)
prev_response = response
return first_token_latencies, token_to_token_latencies
Expand All @@ -59,7 +60,10 @@ def calculate_avg_latencies():

# Compute mean and convert from nanosec to sec
avg_first_token_latency = mean(first_token_latencies) / 1_000_000_000
avg_token_to_token_latency = mean(token_to_token_latencies) / 1_000_000_000
if token_to_token_latencies:
avg_token_to_token_latency = mean(token_to_token_latencies) / 1_000_000_000
else:
avg_token_to_token_latency = None
return avg_first_token_latency, avg_token_to_token_latency


Expand Down Expand Up @@ -155,8 +159,13 @@ def generate_input_data(args, filename):

print("\n[ Benchmark Summary ]")
for prompt_size, avg_first_token_latency, avg_token_to_token_latency in results:
print(
line = (
f" Prompt size: {prompt_size}, "
f"Average first-token latency: {avg_first_token_latency:.4f} sec, "
f"Average token-token latency: {avg_token_to_token_latency:.4f} sec"
f"Average first-token latency: {avg_first_token_latency:.4f} sec"
)
line += (
f", Average token-token latency: {avg_token_to_token_latency:.4f} sec"
if avg_token_to_token_latency
else ""
)
print(line)

0 comments on commit c2dd174

Please sign in to comment.