From 6f92b65681886417598c48f116fdc2360a43d20b Mon Sep 17 00:00:00 2001 From: Hyunjae Woo Date: Wed, 4 Oct 2023 09:55:32 -0700 Subject: [PATCH] Address feedback --- src/c++/perf_analyzer/docs/examples/profile.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/c++/perf_analyzer/docs/examples/profile.py b/src/c++/perf_analyzer/docs/examples/profile.py index 7031b2ec7..5849384cc 100644 --- a/src/c++/perf_analyzer/docs/examples/profile.py +++ b/src/c++/perf_analyzer/docs/examples/profile.py @@ -38,20 +38,24 @@ def load_profile_data(): return json.load(f) -def calculate_avg_latencies(): +def collect_latencies(requests): # Example json demonstrating format: # see client/src/c++/perf_analyzer/docs/examples/decoupled_output_file.json first_token_latencies = [] token_to_token_latencies = [] - - requests = load_profile_data()["experiments"][0]["requests"] - + requests = requests["experiments"][0]["requests"] for request in requests: prev_response = request["response_timestamps"][0] first_token_latencies.append(prev_response - request["timestamp"]) for response in request["response_timestamps"][1:]: token_to_token_latencies.append(response - prev_response) prev_response = response + return first_token_latencies, token_to_token_latencies + + +def calculate_avg_latencies(): + requests = load_profile_data() + first_token_latencies, token_to_token_latencies = collect_latencies(requests) # Compute mean and convert from nanosec to sec avg_first_token_latency = mean(first_token_latencies) / 1_000_000_000