From c2dd174ddace296f6226ecfd16827de0d891f1c5 Mon Sep 17 00:00:00 2001
From: Hyunjae Woo <hwoo@nvidia.com>
Date: Wed, 4 Oct 2023 12:17:49 -0700
Subject: [PATCH] Excluded null response from T2T latency calculation

---
 .../perf_analyzer/docs/examples/profile.py    | 23 +++++++++++++------
 1 file changed, 16 insertions(+), 7 deletions(-)

diff --git a/src/c++/perf_analyzer/docs/examples/profile.py b/src/c++/perf_analyzer/docs/examples/profile.py
index 5849384cc..15e5671e3 100644
--- a/src/c++/perf_analyzer/docs/examples/profile.py
+++ b/src/c++/perf_analyzer/docs/examples/profile.py
@@ -45,9 +45,10 @@ def collect_latencies(requests):
     token_to_token_latencies = []
     requests = requests["experiments"][0]["requests"]
     for request in requests:
-        prev_response = request["response_timestamps"][0]
-        first_token_latencies.append(prev_response - request["timestamp"])
-        for response in request["response_timestamps"][1:]:
+        first_response, *remaining_responses, _ = request["response_timestamps"]
+        first_token_latencies.append(first_response - request["timestamp"])
+        prev_response = first_response
+        for response in remaining_responses:
             token_to_token_latencies.append(response - prev_response)
             prev_response = response
     return first_token_latencies, token_to_token_latencies
@@ -59,7 +60,10 @@ def calculate_avg_latencies():
 
     # Compute mean and convert from nanosec to sec
     avg_first_token_latency = mean(first_token_latencies) / 1_000_000_000
-    avg_token_to_token_latency = mean(token_to_token_latencies) / 1_000_000_000
+    if token_to_token_latencies:
+        avg_token_to_token_latency = mean(token_to_token_latencies) / 1_000_000_000
+    else:
+        avg_token_to_token_latency = None
     return avg_first_token_latency, avg_token_to_token_latency
 
 
@@ -155,8 +159,13 @@ def generate_input_data(args, filename):
 
     print("\n[ Benchmark Summary ]")
     for prompt_size, avg_first_token_latency, avg_token_to_token_latency in results:
-        print(
+        line = (
             f"  Prompt size: {prompt_size}, "
-            f"Average first-token latency: {avg_first_token_latency:.4f} sec, "
-            f"Average token-token latency: {avg_token_to_token_latency:.4f} sec"
+            f"Average first-token latency: {avg_first_token_latency:.4f} sec"
         )
+        line += (
+            f", Average token-token latency: {avg_token_to_token_latency:.4f} sec"
+            if avg_token_to_token_latency
+            else ""
+        )
+        print(line)