From 703e18fc476f96869b5267aa7a98f8224368748a Mon Sep 17 00:00:00 2001
From: Matthew Kotila <matthew.r.kotila@gmail.com>
Date: Tue, 25 Jul 2023 17:52:06 +0000
Subject: [PATCH 1/4] Store sequence ID in timestamps tuple object

---
 src/c++/perf_analyzer/CMakeLists.txt          |  2 +
 .../client_backend/mock_client_backend.h      | 43 +++++++----
 src/c++/perf_analyzer/infer_context.cc        | 27 +++----
 src/c++/perf_analyzer/infer_context.h         | 29 +++-----
 src/c++/perf_analyzer/inference_profiler.cc   | 10 +--
 src/c++/perf_analyzer/inference_profiler.h    |  1 +
 src/c++/perf_analyzer/load_manager.h          |  1 +
 src/c++/perf_analyzer/mock_infer_context.h    | 28 +++++++-
 src/c++/perf_analyzer/perf_utils.h            |  4 --
 src/c++/perf_analyzer/request_properties.h    | 71 +++++++++++++++++++
 src/c++/perf_analyzer/test_infer_context.cc   | 40 ++++++++++-
 .../perf_analyzer/test_inference_profiler.cc  | 41 ++++++-----
 src/c++/perf_analyzer/test_load_manager.cc    | 36 +++++-----
 src/c++/perf_analyzer/timestamp_vector.h      | 36 ++++++++++
 14 files changed, 271 insertions(+), 98 deletions(-)
 create mode 100644 src/c++/perf_analyzer/request_properties.h
 create mode 100644 src/c++/perf_analyzer/timestamp_vector.h
diff --git a/src/c++/perf_analyzer/CMakeLists.txt b/src/c++/perf_analyzer/CMakeLists.txt
index 517f5bc9d..596a01a02 100644
--- a/src/c++/perf_analyzer/CMakeLists.txt
+++ b/src/c++/perf_analyzer/CMakeLists.txt
@@ -104,6 +104,8 @@ set(
   concurrency_ctx_id_tracker.h
   fifo_ctx_id_tracker.h
   rand_ctx_id_tracker.h
+  request_properties.h
+  timestamp_vector.h
 )
 
 add_executable(
diff --git a/src/c++/perf_analyzer/client_backend/mock_client_backend.h b/src/c++/perf_analyzer/client_backend/mock_client_backend.h
index ddc14f663..c81122e7b 100644
--- a/src/c++/perf_analyzer/client_backend/mock_client_backend.h
+++ b/src/c++/perf_analyzer/client_backend/mock_client_backend.h
@@ -470,12 +470,40 @@ class MockClientStats {
 ///
 class MockClientBackend : public ClientBackend {
  public:
-  MockClientBackend(std::shared_ptr<MockClientStats> stats) { stats_ = stats; }
+  MockClientBackend() { SetupMocks(); };
+  MockClientBackend(std::shared_ptr<MockClientStats> stats)
+  {
+    SetupMocks();
+    stats_ = stats;
+  }
+
+  void SetupMocks()
+  {
+    ON_CALL(*this, AsyncInfer(testing::_, testing::_, testing::_, testing::_))
+        .WillByDefault(
+            [this](
+                OnCompleteFn callback, const InferOptions& options,
+                const std::vector<InferInput*>& inputs,
+                const std::vector<const InferRequestedOutput*>& outputs)
+                -> Error {
+              stats_->CaptureRequest(
+                  MockClientStats::ReqType::ASYNC, options, inputs, outputs);
+
+              LaunchAsyncMockRequest(options, callback);
+
+              return stats_->GetNextReturnStatus();
+            });
+  }
 
   MOCK_METHOD(
       Error, ModelConfig,
       (rapidjson::Document*, const std::string&, const std::string&),
       (override));
+  MOCK_METHOD(
+      Error, AsyncInfer,
+      (OnCompleteFn, const InferOptions&, const std::vector<InferInput*>&,
+       const std::vector<const InferRequestedOutput*>&),
+      (override));
 
   Error Infer(
       InferResult** result, const InferOptions& options,
@@ -493,19 +521,6 @@ class MockClientBackend : public ClientBackend {
     return stats_->GetNextReturnStatus();
   }
 
-  Error AsyncInfer(
-      OnCompleteFn callback, const InferOptions& options,
-      const std::vector<InferInput*>& inputs,
-      const std::vector<const InferRequestedOutput*>& outputs) override
-  {
-    stats_->CaptureRequest(
-        MockClientStats::ReqType::ASYNC, options, inputs, outputs);
-
-    LaunchAsyncMockRequest(options, callback);
-
-    return stats_->GetNextReturnStatus();
-  }
-
   Error AsyncStreamInfer(
       const InferOptions& options, const std::vector<InferInput*>& inputs,
       const std::vector<const InferRequestedOutput*>& outputs)
diff --git a/src/c++/perf_analyzer/infer_context.cc b/src/c++/perf_analyzer/infer_context.cc
index dc65c2adc..01a2febd0 100644
--- a/src/c++/perf_analyzer/infer_context.cc
+++ b/src/c++/perf_analyzer/infer_context.cc
@@ -53,7 +53,7 @@ InferContext::SendInferRequest(bool delayed)
   if (using_json_data_) {
     UpdateJsonData();
   }
-  SendRequest(request_id_++, delayed);
+  SendRequest(request_id_++, delayed, /* ignored */ 0);
 }
 
 void
@@ -74,7 +74,7 @@ InferContext::SendSequenceInferRequest(uint32_t seq_stat_index, bool delayed)
 
     sequence_manager_->DecrementRemainingQueries(seq_stat_index);
 
-    SendRequest(request_id_++, delayed);
+    SendRequest(request_id_++, delayed, seq_stat_index);
   }
 }
 
@@ -95,12 +95,14 @@ InferContext::CompleteOngoingSequence(uint32_t seq_stat_index)
     sequence_manager_->DecrementRemainingQueries(seq_stat_index);
 
     bool is_delayed = false;
-    SendRequest(request_id_++, is_delayed);
+    SendRequest(request_id_++, is_delayed, seq_stat_index);
   }
 }
 
 void
-InferContext::SendRequest(const uint64_t request_id, const bool delayed)
+InferContext::SendRequest(
+    const uint64_t request_id, const bool delayed,
+    const uint32_t seq_stat_index)
 {
   if (!thread_stat_->status_.IsOk()) {
     return;
@@ -113,12 +115,12 @@ InferContext::SendRequest(const uint64_t request_id, const bool delayed)
       std::lock_guard<std::mutex> lock(thread_stat_->mu_);
       auto it =
           async_req_map_
-              .emplace(
-                  infer_data_.options_->request_id_, AsyncRequestProperties())
+              .emplace(infer_data_.options_->request_id_, RequestProperties())
               .first;
       it->second.start_time_ = std::chrono::system_clock::now();
       it->second.sequence_end_ = infer_data_.options_->sequence_end_;
       it->second.delayed_ = delayed;
+      it->second.sequence_status_index_ = seq_stat_index;
     }
 
     thread_stat_->idle_timer.Start();
@@ -161,9 +163,9 @@ InferContext::SendRequest(const uint64_t request_id, const bool delayed)
       // locking
       std::lock_guard<std::mutex> lock(thread_stat_->mu_);
       auto total = end_time_sync - start_time_sync;
-      thread_stat_->request_timestamps_.emplace_back(std::make_tuple(
+      thread_stat_->request_timestamps_.emplace_back(RequestProperties(
           start_time_sync, std::move(end_time_syncs),
-          infer_data_.options_->sequence_end_, delayed));
+          infer_data_.options_->sequence_end_, delayed, seq_stat_index));
       thread_stat_->status_ =
           infer_backend_->ClientInferStat(&(thread_stat_->contexts_stat_[id_]));
       if (!thread_stat_->status_.IsOk()) {
@@ -254,7 +256,7 @@ InferContext::AsyncCallbackFuncImpl(cb::InferResult* result)
           return;
         }
         if (is_null_response == false) {
-          it->second.end_times.push_back(std::chrono::system_clock::now());
+          it->second.end_times_.push_back(std::chrono::system_clock::now());
         }
         thread_stat_->cb_status_ =
             result_ptr->IsFinalResponse(&is_final_response);
@@ -262,9 +264,10 @@ InferContext::AsyncCallbackFuncImpl(cb::InferResult* result)
           return;
         }
         if (is_final_response) {
-          thread_stat_->request_timestamps_.emplace_back(std::make_tuple(
-              it->second.start_time_, it->second.end_times,
-              it->second.sequence_end_, it->second.delayed_));
+          thread_stat_->request_timestamps_.emplace_back(
+              it->second.start_time_, it->second.end_times_,
+              it->second.sequence_end_, it->second.delayed_,
+              it->second.sequence_status_index_);
           infer_backend_->ClientInferStat(&(thread_stat_->contexts_stat_[id_]));
           thread_stat_->cb_status_ = ValidateOutputs(result);
           async_req_map_.erase(request_id);
diff --git a/src/c++/perf_analyzer/infer_context.h b/src/c++/perf_analyzer/infer_context.h
index c91fbcacc..a9217b01f 100644
--- a/src/c++/perf_analyzer/infer_context.h
+++ b/src/c++/perf_analyzer/infer_context.h
@@ -36,7 +36,9 @@
 #include "iinfer_data_manager.h"
 #include "infer_data.h"
 #include "perf_utils.h"
+#include "request_properties.h"
 #include "sequence_manager.h"
+#include "timestamp_vector.h"
 
 namespace triton { namespace perfanalyzer {
 
@@ -64,22 +66,8 @@ struct ThreadStat {
   std::atomic<size_t> num_sent_requests_{0};
 };
 
-/// The properties of an asynchronous request required in
-/// the callback to effectively interpret the response.
-struct AsyncRequestProperties {
-  AsyncRequestProperties() : sequence_end_(false), delayed_(true) {}
-  // The timestamp of when the request was started.
-  std::chrono::time_point<std::chrono::system_clock> start_time_;
-  // Whether or not the request is at the end of a sequence.
-  bool sequence_end_;
-  // Whether or not the request is delayed as per schedule.
-  bool delayed_;
-  // Collection of response times
-  std::vector<std::chrono::time_point<std::chrono::system_clock>> end_times;
-};
-
 #ifndef DOCTEST_CONFIG_DISABLE
-class MockInferContext;
+class NaggyMockInferContext;
 #endif
 
 /// Sends inference requests to the server
@@ -146,7 +134,10 @@ class InferContext {
   /// A helper function to issue inference request to the server.
   /// \param request_id The unique id to be associated with the request.
   /// \param delayed Whether the request fell behind its scheduled time.
-  virtual void SendRequest(const uint64_t request_id, const bool delayed);
+  /// \param sequence_status_index Sequence status index of the request.
+  virtual void SendRequest(
+      const uint64_t request_id, const bool delayed,
+      const uint32_t sequence_status_index);
 
   /// Update inputs based on custom json data
   void UpdateJsonData();
@@ -159,7 +150,7 @@ class InferContext {
   // Callback function for handling asynchronous requests
   void AsyncCallbackFuncImpl(cb::InferResult* result);
 
-  const bool async_{false};
+  bool async_{false};
   const bool streaming_{false};
   const bool on_sequence_model_{false};
   bool using_json_data_{false};
@@ -172,7 +163,7 @@ class InferContext {
   std::shared_ptr<IInferDataManager> infer_data_manager_;
 
   uint64_t request_id_ = 0;
-  std::map<std::string, AsyncRequestProperties> async_req_map_;
+  std::map<std::string, RequestProperties> async_req_map_;
   std::atomic<uint> total_ongoing_requests_{0};
   size_t data_step_id_;
 
@@ -203,7 +194,7 @@ class InferContext {
   std::shared_ptr<SequenceManager> sequence_manager_{nullptr};
 
 #ifndef DOCTEST_CONFIG_DISABLE
-  friend MockInferContext;
+  friend NaggyMockInferContext;
 
  public:
   InferContext() = default;
diff --git a/src/c++/perf_analyzer/inference_profiler.cc b/src/c++/perf_analyzer/inference_profiler.cc
index b0dd3f224..22ec2a51c 100644
--- a/src/c++/perf_analyzer/inference_profiler.cc
+++ b/src/c++/perf_analyzer/inference_profiler.cc
@@ -1259,21 +1259,21 @@ InferenceProfiler::ValidLatencyMeasurement(
   std::vector<size_t> erase_indices{};
   for (size_t i = 0; i < all_timestamps_.size(); i++) {
     const auto& timestamp = all_timestamps_[i];
-    uint64_t request_start_ns = CHRONO_TO_NANOS(std::get<0>(timestamp));
-    uint64_t request_end_ns = CHRONO_TO_NANOS(std::get<1>(timestamp).back());
+    uint64_t request_start_ns = CHRONO_TO_NANOS(timestamp.start_time_);
+    uint64_t request_end_ns = CHRONO_TO_NANOS(timestamp.end_times_.back());
 
     if (request_start_ns <= request_end_ns) {
       // Only counting requests that end within the time interval
       if ((request_end_ns >= valid_range.first) &&
           (request_end_ns <= valid_range.second)) {
         valid_latencies->push_back(request_end_ns - request_start_ns);
-        response_count += std::get<1>(timestamp).size();
+        response_count += timestamp.end_times_.size();
         erase_indices.push_back(i);
         // Just add the sequence_end flag here.
-        if (std::get<2>(timestamp)) {
+        if (timestamp.sequence_end_) {
           valid_sequence_count++;
         }
-        if (std::get<3>(timestamp)) {
+        if (timestamp.delayed_) {
           delayed_request_count++;
         }
       }
diff --git a/src/c++/perf_analyzer/inference_profiler.h b/src/c++/perf_analyzer/inference_profiler.h
index b07cd93ae..db3735000 100644
--- a/src/c++/perf_analyzer/inference_profiler.h
+++ b/src/c++/perf_analyzer/inference_profiler.h
@@ -44,6 +44,7 @@
 #include "model_parser.h"
 #include "mpi_utils.h"
 #include "request_rate_manager.h"
+#include "timestamp_vector.h"
 
 namespace triton { namespace perfanalyzer {
 
diff --git a/src/c++/perf_analyzer/load_manager.h b/src/c++/perf_analyzer/load_manager.h
index 5a10ae592..cf54600f4 100644
--- a/src/c++/perf_analyzer/load_manager.h
+++ b/src/c++/perf_analyzer/load_manager.h
@@ -37,6 +37,7 @@
 #include "load_worker.h"
 #include "perf_utils.h"
 #include "sequence_manager.h"
+#include "timestamp_vector.h"
 
 namespace triton { namespace perfanalyzer {
 
diff --git a/src/c++/perf_analyzer/mock_infer_context.h b/src/c++/perf_analyzer/mock_infer_context.h
index 3b9d938b3..e60e8e25b 100644
--- a/src/c++/perf_analyzer/mock_infer_context.h
+++ b/src/c++/perf_analyzer/mock_infer_context.h
@@ -1,4 +1,4 @@
-// Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions
@@ -30,9 +30,23 @@
 
 namespace triton { namespace perfanalyzer {
 
-class MockInferContext : public InferContext {
+class NaggyMockInferContext : public InferContext {
  public:
-  MOCK_METHOD(void, SendRequest, (const uint64_t, const bool), (override));
+  NaggyMockInferContext()
+  {
+    ON_CALL(*this, SendRequest(testing::_, testing::_, testing::_))
+        .WillByDefault(
+            [this](
+                const uint64_t request_id, const bool delayed,
+                const uint32_t sequence_status_index) -> void {
+              this->InferContext::SendRequest(
+                  request_id, delayed, sequence_status_index);
+            });
+  }
+
+  MOCK_METHOD(
+      void, SendRequest, (const uint64_t, const bool, const uint32_t),
+      (override));
 
   std::shared_ptr<SequenceManager>& sequence_manager_{
       InferContext::sequence_manager_};
@@ -42,6 +56,14 @@ class MockInferContext : public InferContext {
   std::shared_ptr<ThreadStat>& thread_stat_{InferContext::thread_stat_};
   std::reference_wrapper<const bool>& execute_{InferContext::execute_};
   bool& using_json_data_{InferContext::using_json_data_};
+  std::map<std::string, RequestProperties>& async_req_map_{
+      InferContext::async_req_map_};
+  bool& async_{InferContext::async_};
+  InferData& infer_data_{InferContext::infer_data_};
+  std::unique_ptr<cb::ClientBackend>& infer_backend_{
+      InferContext::infer_backend_};
 };
 
+using MockInferContext = testing::NiceMock<NaggyMockInferContext>;
+
 }}  // namespace triton::perfanalyzer
diff --git a/src/c++/perf_analyzer/perf_utils.h b/src/c++/perf_analyzer/perf_utils.h
index 1865b8430..7166936a9 100644
--- a/src/c++/perf_analyzer/perf_utils.h
+++ b/src/c++/perf_analyzer/perf_utils.h
@@ -53,10 +53,6 @@ constexpr uint64_t NANOS_PER_MILLIS = 1000000;
 #define CHRONO_TO_MILLIS(TS) (CHRONO_TO_NANOS(TS) / pa::NANOS_PER_MILLIS)
 
 //==============================================================================
-using TimestampVector = std::vector<std::tuple<
-    std::chrono::time_point<std::chrono::system_clock>,
-    std::vector<std::chrono::time_point<std::chrono::system_clock>>, uint32_t,
-    bool>>;
 
 // Will use the characters specified here to construct random strings
 std::string const character_set =
diff --git a/src/c++/perf_analyzer/request_properties.h b/src/c++/perf_analyzer/request_properties.h
new file mode 100644
index 000000000..de5d30ae3
--- /dev/null
+++ b/src/c++/perf_analyzer/request_properties.h
@@ -0,0 +1,71 @@
+// Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#pragma once
+
+#include <chrono>
+#include <cstdint>
+#include <tuple>
+#include <vector>
+
+namespace triton { namespace perfanalyzer {
+
+/// The properties of a request required in the callback to effectively
+/// interpret the response.
+struct RequestProperties {
+  RequestProperties() = default;
+  RequestProperties(
+      std::chrono::time_point<std::chrono::system_clock> start_time,
+      std::vector<std::chrono::time_point<std::chrono::system_clock>> end_times,
+      bool sequence_end, bool delayed,
+
+      uint32_t sequence_status_index)
+      : start_time_(start_time), end_times_(end_times),
+        sequence_end_(sequence_end), delayed_(delayed),
+        sequence_status_index_(sequence_status_index)
+  {
+  }
+  bool operator==(const RequestProperties& other) const
+  {
+    return std::tie(
+               start_time_, end_times_, sequence_end_, delayed_,
+               sequence_status_index_) ==
+           std::tie(
+               other.start_time_, other.end_times_, other.sequence_end_,
+               other.delayed_, other.sequence_status_index_);
+  }
+  // The timestamp of when the request was started.
+  std::chrono::time_point<std::chrono::system_clock> start_time_;
+  // Collection of response times
+  std::vector<std::chrono::time_point<std::chrono::system_clock>> end_times_;
+  // Whether or not the request is at the end of a sequence.
+  bool sequence_end_;
+  // Whether or not the request is delayed as per schedule.
+  bool delayed_;
+  // Sequence status index of the request
+  uint32_t sequence_status_index_;
+};
+
+}}  // namespace triton::perfanalyzer
diff --git a/src/c++/perf_analyzer/test_infer_context.cc b/src/c++/perf_analyzer/test_infer_context.cc
index 0af45b4dd..1cb718e20 100644
--- a/src/c++/perf_analyzer/test_infer_context.cc
+++ b/src/c++/perf_analyzer/test_infer_context.cc
@@ -1,4 +1,4 @@
-// Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions
@@ -24,6 +24,7 @@
 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+#include "client_backend/mock_client_backend.h"
 #include "doctest.h"
 #include "gmock/gmock.h"
 #include "infer_context.h"
@@ -92,7 +93,7 @@ TEST_CASE("update_seq_json_data: testing the UpdateSeqJsonData function")
 
   std::shared_ptr<MockInferContext> mic{std::make_shared<MockInferContext>()};
 
-  EXPECT_CALL(*mic, SendRequest(testing::_, testing::_))
+  EXPECT_CALL(*mic, SendRequest(testing::_, testing::_, testing::_))
       .Times(6)
       .WillRepeatedly(testing::Return());
 
@@ -122,4 +123,39 @@ TEST_CASE("update_seq_json_data: testing the UpdateSeqJsonData function")
   REQUIRE(testing::Test::HasFailure() == false);
 }
 
+TEST_CASE("send_request: testing the SendRequest function")
+{
+  MockInferContext mock_infer_context{};
+
+  SUBCASE("testing logic relevant to request timestamp sequence ID")
+  {
+    mock_infer_context.thread_stat_ = std::make_shared<ThreadStat>();
+    mock_infer_context.async_ = true;
+    mock_infer_context.infer_data_.options_ =
+        std::make_unique<cb::InferOptions>("my_model");
+    mock_infer_context.infer_backend_ =
+        std::make_unique<cb::MockClientBackend>();
+
+    EXPECT_CALL(
+        dynamic_cast<cb::MockClientBackend&>(
+            *mock_infer_context.infer_backend_),
+        AsyncInfer(testing::_, testing::_, testing::_, testing::_))
+        .WillOnce(testing::Return(cb::Error::Success));
+
+    const uint64_t request_id{5};
+    const bool delayed{false};
+    const uint32_t sequence_status_index{2};
+
+    mock_infer_context.SendRequest(request_id, delayed, sequence_status_index);
+
+    CHECK(mock_infer_context.async_req_map_.size() == 1);
+    CHECK(
+        mock_infer_context.async_req_map_.find(std::to_string(request_id)) !=
+        mock_infer_context.async_req_map_.end());
+    CHECK(
+        mock_infer_context.async_req_map_[std::to_string(request_id)]
+            .sequence_status_index_ == sequence_status_index);
+  }
+}
+
 }}  // namespace triton::perfanalyzer
diff --git a/src/c++/perf_analyzer/test_inference_profiler.cc b/src/c++/perf_analyzer/test_inference_profiler.cc
index 71ed2eb89..c497226b1 100644
--- a/src/c++/perf_analyzer/test_inference_profiler.cc
+++ b/src/c++/perf_analyzer/test_inference_profiler.cc
@@ -175,44 +175,43 @@ TEST_CASE("testing the ValidLatencyMeasurement function")
       // request ends before window starts, this should not be possible to exist
       // in the vector of requests, but if it is, we exclude it: not included in
       // current window
-      std::make_tuple(
+      RequestProperties(
           time_point(ns(1)), std::vector<time_point>{time_point(ns(2))}, 0,
-          false),
+          false, 0),
 
       // request starts before window starts and ends inside window: included in
       // current window
-      std::make_tuple(
+      RequestProperties(
           time_point(ns(3)), std::vector<time_point>{time_point(ns(5))}, 0,
-          false),
+          false, 0),
 
       // requests start and end inside window: included in current window
-      std::make_tuple(
+      RequestProperties(
           time_point(ns(6)), std::vector<time_point>{time_point(ns(9))}, 0,
-          false),
-      std::make_tuple(
+          false, 0),
+      RequestProperties(
           time_point(ns(10)), std::vector<time_point>{time_point(ns(14))}, 0,
-          false),
+          false, 0),
 
       // request starts before window ends and ends after window ends: not
       // included in current window
-      std::make_tuple(
+      RequestProperties(
           time_point(ns(15)), std::vector<time_point>{time_point(ns(20))}, 0,
-          false),
+          false, 0),
 
       // request starts after window ends: not included in current window
-      std::make_tuple(
+      RequestProperties(
           time_point(ns(21)), std::vector<time_point>{time_point(ns(27))}, 0,
-          false)};
+          false, 0)};
 
   TestInferenceProfiler::ValidLatencyMeasurement(
       window, valid_sequence_count, delayed_request_count, &latencies,
       response_count, all_timestamps);
 
-  const auto& convert_timestamp_to_latency{
-      [](std::tuple<time_point, std::vector<time_point>, uint32_t, bool> t) {
-        return CHRONO_TO_NANOS(std::get<1>(t).back()) -
-               CHRONO_TO_NANOS(std::get<0>(t));
-      }};
+  const auto& convert_timestamp_to_latency{[](RequestProperties t) {
+    return CHRONO_TO_NANOS(t.end_times_.back()) -
+           CHRONO_TO_NANOS(t.start_time_);
+  }};
 
   CHECK(latencies.size() == 3);
   CHECK(latencies[0] == convert_timestamp_to_latency(all_timestamps[1]));
@@ -871,21 +870,21 @@ TEST_CASE(
     auto request1_timestamp{clock_epoch + std::chrono::nanoseconds(1)};
     auto response1_timestamp{clock_epoch + std::chrono::nanoseconds(2)};
     auto response2_timestamp{clock_epoch + std::chrono::nanoseconds(3)};
-    auto timestamp1{std::make_tuple(
+    auto timestamp1{RequestProperties(
         request1_timestamp,
         std::vector<std::chrono::time_point<std::chrono::system_clock>>{
             response1_timestamp, response2_timestamp},
-        0, false)};
+        0, false, 0)};
 
     auto request2_timestamp{clock_epoch + std::chrono::nanoseconds(4)};
     auto response3_timestamp{clock_epoch + std::chrono::nanoseconds(5)};
     auto response4_timestamp{clock_epoch + std::chrono::nanoseconds(6)};
     auto response5_timestamp{clock_epoch + std::chrono::nanoseconds(7)};
-    auto timestamp2{std::make_tuple(
+    auto timestamp2{RequestProperties(
         request2_timestamp,
         std::vector<std::chrono::time_point<std::chrono::system_clock>>{
             response3_timestamp, response4_timestamp, response5_timestamp},
-        0, false)};
+        0, false, 0)};
 
     mock_inference_profiler.all_timestamps_ = {timestamp1, timestamp2};
 
diff --git a/src/c++/perf_analyzer/test_load_manager.cc b/src/c++/perf_analyzer/test_load_manager.cc
index 224dc895f..dc1f8b31e 100644
--- a/src/c++/perf_analyzer/test_load_manager.cc
+++ b/src/c++/perf_analyzer/test_load_manager.cc
@@ -117,15 +117,15 @@ class TestLoadManager : public TestLoadManagerBase, public LoadManager {
   {
     using time_point = std::chrono::time_point<std::chrono::system_clock>;
     using ns = std::chrono::nanoseconds;
-    auto timestamp1 = std::make_tuple(
-        time_point(ns(1)), std::vector<time_point>{time_point(ns(2))}, 0,
-        false);
-    auto timestamp2 = std::make_tuple(
-        time_point(ns(3)), std::vector<time_point>{time_point(ns(4))}, 0,
-        false);
-    auto timestamp3 = std::make_tuple(
-        time_point(ns(5)), std::vector<time_point>{time_point(ns(6))}, 0,
-        false);
+    auto timestamp1 = RequestProperties(
+        time_point(ns(1)), std::vector<time_point>{time_point(ns(2))}, 0, false,
+        0);
+    auto timestamp2 = RequestProperties(
+        time_point(ns(3)), std::vector<time_point>{time_point(ns(4))}, 0, false,
+        0);
+    auto timestamp3 = RequestProperties(
+        time_point(ns(5)), std::vector<time_point>{time_point(ns(6))}, 0, false,
+        0);
 
     TimestampVector source_timestamps;
 
@@ -278,15 +278,15 @@ class TestLoadManager : public TestLoadManagerBase, public LoadManager {
   {
     using time_point = std::chrono::time_point<std::chrono::system_clock>;
     using ns = std::chrono::nanoseconds;
-    auto timestamp1 = std::make_tuple(
-        time_point(ns(1)), std::vector<time_point>{time_point(ns(2))}, 0,
-        false);
-    auto timestamp2 = std::make_tuple(
-        time_point(ns(3)), std::vector<time_point>{time_point(ns(4))}, 0,
-        false);
-    auto timestamp3 = std::make_tuple(
-        time_point(ns(5)), std::vector<time_point>{time_point(ns(6))}, 0,
-        false);
+    auto timestamp1 = RequestProperties(
+        time_point(ns(1)), std::vector<time_point>{time_point(ns(2))}, 0, false,
+        0);
+    auto timestamp2 = RequestProperties(
+        time_point(ns(3)), std::vector<time_point>{time_point(ns(4))}, 0, false,
+        0);
+    auto timestamp3 = RequestProperties(
+        time_point(ns(5)), std::vector<time_point>{time_point(ns(6))}, 0, false,
+        0);
 
     SUBCASE("No threads")
     {
diff --git a/src/c++/perf_analyzer/timestamp_vector.h b/src/c++/perf_analyzer/timestamp_vector.h
new file mode 100644
index 000000000..5132e762e
--- /dev/null
+++ b/src/c++/perf_analyzer/timestamp_vector.h
@@ -0,0 +1,36 @@
+// Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#pragma once
+
+#include <vector>
+
+#include "request_properties.h"
+
+namespace triton { namespace perfanalyzer {
+
+using TimestampVector = std::vector<RequestProperties>;
+
+}}  // namespace triton::perfanalyzer

From cbc34ae6ee977fd6590368fb05b09bde4b85adef Mon Sep 17 00:00:00 2001
From: Matthew Kotila <matthew.r.kotila@gmail.com>
Date: Wed, 26 Jul 2023 20:45:49 +0000
Subject: [PATCH 2/4] Fix bug and address feedback

---
 .../client_backend/mock_client_backend.h      | 42 ++++++++---------
 src/c++/perf_analyzer/infer_context.cc        | 17 ++++---
 src/c++/perf_analyzer/infer_context.h         |  6 +--
 src/c++/perf_analyzer/inference_profiler.cc   |  1 -
 src/c++/perf_analyzer/mock_infer_context.h    | 12 ++---
 src/c++/perf_analyzer/request_properties.h    | 14 +++---
 src/c++/perf_analyzer/sequence_manager.cc     |  8 +++-
 src/c++/perf_analyzer/sequence_manager.h      |  6 +++
 src/c++/perf_analyzer/test_infer_context.cc   | 47 +++++++++++++------
 9 files changed, 90 insertions(+), 63 deletions(-)

diff --git a/src/c++/perf_analyzer/client_backend/mock_client_backend.h b/src/c++/perf_analyzer/client_backend/mock_client_backend.h
index c81122e7b..483af914d 100644
--- a/src/c++/perf_analyzer/client_backend/mock_client_backend.h
+++ b/src/c++/perf_analyzer/client_backend/mock_client_backend.h
@@ -103,7 +103,7 @@ class MockInferInput : public InferInput {
 ///
 class MockInferResult : public InferResult {
  public:
-  MockInferResult(const InferOptions& options) : req_id_{options.request_id_} {}
+  MockInferResult(const InferOptions& options) : req_id_(options.request_id_) {}
 
   Error Id(std::string* id) const override
   {
@@ -468,28 +468,22 @@ class MockClientStats {
 
 /// Mock implementation of ClientBackend interface
 ///
-class MockClientBackend : public ClientBackend {
+class NaggyMockClientBackend : public ClientBackend {
  public:
-  MockClientBackend() { SetupMocks(); };
-  MockClientBackend(std::shared_ptr<MockClientStats> stats)
+  NaggyMockClientBackend(std::shared_ptr<MockClientStats> stats) : stats_(stats)
   {
-    SetupMocks();
-    stats_ = stats;
-  }
-
-  void SetupMocks()
-  {
-    ON_CALL(*this, AsyncInfer(testing::_, testing::_, testing::_, testing::_))
+    ON_CALL(*this, AsyncStreamInfer(testing::_, testing::_, testing::_))
         .WillByDefault(
             [this](
-                OnCompleteFn callback, const InferOptions& options,
+                const InferOptions& options,
                 const std::vector<InferInput*>& inputs,
                 const std::vector<const InferRequestedOutput*>& outputs)
                 -> Error {
               stats_->CaptureRequest(
-                  MockClientStats::ReqType::ASYNC, options, inputs, outputs);
+                  MockClientStats::ReqType::ASYNC_STREAM, options, inputs,
+                  outputs);
 
-              LaunchAsyncMockRequest(options, callback);
+              LaunchAsyncMockRequest(options, stream_callback_);
 
               return stats_->GetNextReturnStatus();
             });
@@ -500,8 +494,8 @@ class MockClientBackend : public ClientBackend {
       (rapidjson::Document*, const std::string&, const std::string&),
       (override));
   MOCK_METHOD(
-      Error, AsyncInfer,
-      (OnCompleteFn, const InferOptions&, const std::vector<InferInput*>&,
+      Error, AsyncStreamInfer,
+      (const InferOptions&, const std::vector<InferInput*>&,
        const std::vector<const InferRequestedOutput*>&),
       (override));
 
@@ -521,14 +515,15 @@ class MockClientBackend : public ClientBackend {
     return stats_->GetNextReturnStatus();
   }
 
-  Error AsyncStreamInfer(
-      const InferOptions& options, const std::vector<InferInput*>& inputs,
-      const std::vector<const InferRequestedOutput*>& outputs)
+  Error AsyncInfer(
+      OnCompleteFn callback, const InferOptions& options,
+      const std::vector<InferInput*>& inputs,
+      const std::vector<const InferRequestedOutput*>& outputs) override
   {
     stats_->CaptureRequest(
-        MockClientStats::ReqType::ASYNC_STREAM, options, inputs, outputs);
+        MockClientStats::ReqType::ASYNC, options, inputs, outputs);
 
-    LaunchAsyncMockRequest(options, stream_callback_);
+    LaunchAsyncMockRequest(options, callback);
 
     return stats_->GetNextReturnStatus();
   }
@@ -616,6 +611,8 @@ class MockClientBackend : public ClientBackend {
     return Error::Success;
   }
 
+  OnCompleteFn stream_callback_;
+
  private:
   void LaunchAsyncMockRequest(const InferOptions options, OnCompleteFn callback)
   {
@@ -634,9 +631,10 @@ class MockClientBackend : public ClientBackend {
   size_t local_completed_req_count_ = 0;
 
   std::shared_ptr<MockClientStats> stats_;
-  OnCompleteFn stream_callback_;
 };
 
+using MockClientBackend = testing::NiceMock<NaggyMockClientBackend>;
+
 /// Mock factory that always creates a MockClientBackend instead
 /// of a real backend
 ///
diff --git a/src/c++/perf_analyzer/infer_context.cc b/src/c++/perf_analyzer/infer_context.cc
index 01a2febd0..30e78fe71 100644
--- a/src/c++/perf_analyzer/infer_context.cc
+++ b/src/c++/perf_analyzer/infer_context.cc
@@ -74,7 +74,9 @@ InferContext::SendSequenceInferRequest(uint32_t seq_stat_index, bool delayed)
 
     sequence_manager_->DecrementRemainingQueries(seq_stat_index);
 
-    SendRequest(request_id_++, delayed, seq_stat_index);
+    SendRequest(
+        request_id_++, delayed,
+        sequence_manager_->GetSequenceID(seq_stat_index));
   }
 }
 
@@ -95,14 +97,15 @@ InferContext::CompleteOngoingSequence(uint32_t seq_stat_index)
     sequence_manager_->DecrementRemainingQueries(seq_stat_index);
 
     bool is_delayed = false;
-    SendRequest(request_id_++, is_delayed, seq_stat_index);
+    SendRequest(
+        request_id_++, is_delayed,
+        sequence_manager_->GetSequenceID(seq_stat_index));
   }
 }
 
 void
 InferContext::SendRequest(
-    const uint64_t request_id, const bool delayed,
-    const uint32_t seq_stat_index)
+    const uint64_t request_id, const bool delayed, const uint64_t sequence_id)
 {
   if (!thread_stat_->status_.IsOk()) {
     return;
@@ -120,7 +123,7 @@ InferContext::SendRequest(
       it->second.start_time_ = std::chrono::system_clock::now();
       it->second.sequence_end_ = infer_data_.options_->sequence_end_;
       it->second.delayed_ = delayed;
-      it->second.sequence_status_index_ = seq_stat_index;
+      it->second.sequence_id_ = sequence_id;
     }
 
     thread_stat_->idle_timer.Start();
@@ -165,7 +168,7 @@ InferContext::SendRequest(
       auto total = end_time_sync - start_time_sync;
       thread_stat_->request_timestamps_.emplace_back(RequestProperties(
           start_time_sync, std::move(end_time_syncs),
-          infer_data_.options_->sequence_end_, delayed, seq_stat_index));
+          infer_data_.options_->sequence_end_, delayed, sequence_id));
       thread_stat_->status_ =
           infer_backend_->ClientInferStat(&(thread_stat_->contexts_stat_[id_]));
       if (!thread_stat_->status_.IsOk()) {
@@ -267,7 +270,7 @@ InferContext::AsyncCallbackFuncImpl(cb::InferResult* result)
           thread_stat_->request_timestamps_.emplace_back(
               it->second.start_time_, it->second.end_times_,
               it->second.sequence_end_, it->second.delayed_,
-              it->second.sequence_status_index_);
+              it->second.sequence_id_);
           infer_backend_->ClientInferStat(&(thread_stat_->contexts_stat_[id_]));
           thread_stat_->cb_status_ = ValidateOutputs(result);
           async_req_map_.erase(request_id);
diff --git a/src/c++/perf_analyzer/infer_context.h b/src/c++/perf_analyzer/infer_context.h
index a9217b01f..84e4ddb44 100644
--- a/src/c++/perf_analyzer/infer_context.h
+++ b/src/c++/perf_analyzer/infer_context.h
@@ -134,10 +134,10 @@ class InferContext {
   /// A helper function to issue inference request to the server.
   /// \param request_id The unique id to be associated with the request.
   /// \param delayed Whether the request fell behind its scheduled time.
-  /// \param sequence_status_index Sequence status index of the request.
+  /// \param sequence_id Sequence ID of the request.
   virtual void SendRequest(
       const uint64_t request_id, const bool delayed,
-      const uint32_t sequence_status_index);
+      const uint64_t sequence_id);
 
   /// Update inputs based on custom json data
   void UpdateJsonData();
@@ -151,7 +151,7 @@ class InferContext {
   void AsyncCallbackFuncImpl(cb::InferResult* result);
 
   bool async_{false};
-  const bool streaming_{false};
+  bool streaming_{false};
   const bool on_sequence_model_{false};
   bool using_json_data_{false};
   const int32_t batch_size_{0};
diff --git a/src/c++/perf_analyzer/inference_profiler.cc b/src/c++/perf_analyzer/inference_profiler.cc
index 22ec2a51c..73ef1d758 100644
--- a/src/c++/perf_analyzer/inference_profiler.cc
+++ b/src/c++/perf_analyzer/inference_profiler.cc
@@ -1269,7 +1269,6 @@ InferenceProfiler::ValidLatencyMeasurement(
         valid_latencies->push_back(request_end_ns - request_start_ns);
         response_count += timestamp.end_times_.size();
         erase_indices.push_back(i);
-        // Just add the sequence_end flag here.
         if (timestamp.sequence_end_) {
           valid_sequence_count++;
         }
diff --git a/src/c++/perf_analyzer/mock_infer_context.h b/src/c++/perf_analyzer/mock_infer_context.h
index e60e8e25b..e1c15d03c 100644
--- a/src/c++/perf_analyzer/mock_infer_context.h
+++ b/src/c++/perf_analyzer/mock_infer_context.h
@@ -38,14 +38,13 @@ class NaggyMockInferContext : public InferContext {
         .WillByDefault(
             [this](
                 const uint64_t request_id, const bool delayed,
-                const uint32_t sequence_status_index) -> void {
-              this->InferContext::SendRequest(
-                  request_id, delayed, sequence_status_index);
+                const uint64_t sequence_id) -> void {
+              this->InferContext::SendRequest(request_id, delayed, sequence_id);
             });
   }
 
   MOCK_METHOD(
-      void, SendRequest, (const uint64_t, const bool, const uint32_t),
+      void, SendRequest, (const uint64_t, const bool, const uint64_t),
       (override));
 
   std::shared_ptr<SequenceManager>& sequence_manager_{
@@ -56,12 +55,13 @@ class NaggyMockInferContext : public InferContext {
   std::shared_ptr<ThreadStat>& thread_stat_{InferContext::thread_stat_};
   std::reference_wrapper<const bool>& execute_{InferContext::execute_};
   bool& using_json_data_{InferContext::using_json_data_};
-  std::map<std::string, RequestProperties>& async_req_map_{
-      InferContext::async_req_map_};
   bool& async_{InferContext::async_};
+  bool& streaming_{InferContext::streaming_};
   InferData& infer_data_{InferContext::infer_data_};
   std::unique_ptr<cb::ClientBackend>& infer_backend_{
       InferContext::infer_backend_};
+  std::function<void(cb::InferResult*)>& async_callback_func_{
+      InferContext::async_callback_func_};
 };
 
 using MockInferContext = testing::NiceMock<NaggyMockInferContext>;
diff --git a/src/c++/perf_analyzer/request_properties.h b/src/c++/perf_analyzer/request_properties.h
index de5d30ae3..dce688482 100644
--- a/src/c++/perf_analyzer/request_properties.h
+++ b/src/c++/perf_analyzer/request_properties.h
@@ -39,22 +39,20 @@ struct RequestProperties {
   RequestProperties(
       std::chrono::time_point<std::chrono::system_clock> start_time,
       std::vector<std::chrono::time_point<std::chrono::system_clock>> end_times,
-      bool sequence_end, bool delayed,
-
-      uint32_t sequence_status_index)
+      bool sequence_end, bool delayed, uint64_t sequence_id)
       : start_time_(start_time), end_times_(end_times),
         sequence_end_(sequence_end), delayed_(delayed),
-        sequence_status_index_(sequence_status_index)
+        sequence_id_(sequence_id)
   {
   }
   bool operator==(const RequestProperties& other) const
   {
     return std::tie(
                start_time_, end_times_, sequence_end_, delayed_,
-               sequence_status_index_) ==
+               sequence_id_) ==
            std::tie(
                other.start_time_, other.end_times_, other.sequence_end_,
-               other.delayed_, other.sequence_status_index_);
+               other.delayed_, other.sequence_id_);
   }
   // The timestamp of when the request was started.
   std::chrono::time_point<std::chrono::system_clock> start_time_;
@@ -64,8 +62,8 @@ struct RequestProperties {
   bool sequence_end_;
   // Whether or not the request is delayed as per schedule.
   bool delayed_;
-  // Sequence status index of the request
-  uint32_t sequence_status_index_;
+  // Sequence ID of the request
+  uint64_t sequence_id_;
 };
 
 }}  // namespace triton::perfanalyzer
diff --git a/src/c++/perf_analyzer/sequence_manager.cc b/src/c++/perf_analyzer/sequence_manager.cc
index 089768232..eaf5d6e00 100644
--- a/src/c++/perf_analyzer/sequence_manager.cc
+++ b/src/c++/perf_analyzer/sequence_manager.cc
@@ -1,4 +1,4 @@
-// Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions
@@ -53,6 +53,12 @@ SequenceManager::InitSequenceStatuses(size_t num_sequence_statuses)
   }
 }
 
+const uint64_t
+SequenceManager::GetSequenceID(size_t sequence_status_index) const
+{
+  return sequence_statuses_.at(sequence_status_index)->seq_id_;
+}
+
 std::mutex&
 SequenceManager::GetMutex(size_t sequence_status_index)
 {
diff --git a/src/c++/perf_analyzer/sequence_manager.h b/src/c++/perf_analyzer/sequence_manager.h
index d204b0f0b..c419a87f0 100644
--- a/src/c++/perf_analyzer/sequence_manager.h
+++ b/src/c++/perf_analyzer/sequence_manager.h
@@ -68,6 +68,12 @@ class SequenceManager {
   ///
   void InitSequenceStatuses(size_t num_sequence_statuses);
 
+  /// Gets the sequence ID for the specified sequence status object.
+  /// \param sequence_status_index The index of the sequence status object.
+  /// \return The sequence ID for the specified sequence status object.
+  ///
+  const uint64_t GetSequenceID(size_t sequence_status_index) const;
+
   /// Gets a non-const reference to the mutex for the specified sequence status
   /// object.
   /// \param sequence_status_index The index of the sequence status object.
diff --git a/src/c++/perf_analyzer/test_infer_context.cc b/src/c++/perf_analyzer/test_infer_context.cc
index 1cb718e20..05fd11245 100644
--- a/src/c++/perf_analyzer/test_infer_context.cc
+++ b/src/c++/perf_analyzer/test_infer_context.cc
@@ -130,31 +130,48 @@ TEST_CASE("send_request: testing the SendRequest function")
   SUBCASE("testing logic relevant to request timestamp sequence ID")
   {
     mock_infer_context.thread_stat_ = std::make_shared<ThreadStat>();
+    mock_infer_context.thread_stat_->contexts_stat_.emplace_back();
     mock_infer_context.async_ = true;
+    mock_infer_context.streaming_ = true;
     mock_infer_context.infer_data_.options_ =
         std::make_unique<cb::InferOptions>("my_model");
+    std::shared_ptr<cb::MockClientStats> mock_client_stats{
+        std::make_shared<cb::MockClientStats>()};
     mock_infer_context.infer_backend_ =
-        std::make_unique<cb::MockClientBackend>();
-
-    EXPECT_CALL(
-        dynamic_cast<cb::MockClientBackend&>(
-            *mock_infer_context.infer_backend_),
-        AsyncInfer(testing::_, testing::_, testing::_, testing::_))
-        .WillOnce(testing::Return(cb::Error::Success));
+        std::make_unique<cb::MockClientBackend>(mock_client_stats);
 
     const uint64_t request_id{5};
     const bool delayed{false};
-    const uint32_t sequence_status_index{2};
+    const uint64_t sequence_id{2};
 
-    mock_infer_context.SendRequest(request_id, delayed, sequence_status_index);
+    mock_infer_context.infer_data_.options_->request_id_ =
+        std::to_string(request_id);
 
-    CHECK(mock_infer_context.async_req_map_.size() == 1);
-    CHECK(
-        mock_infer_context.async_req_map_.find(std::to_string(request_id)) !=
-        mock_infer_context.async_req_map_.end());
+    cb::MockInferResult* mock_infer_result{
+        new cb::MockInferResult(*mock_infer_context.infer_data_.options_)};
+
+    cb::OnCompleteFn& stream_callback{mock_infer_context.async_callback_func_};
+
+    EXPECT_CALL(
+        dynamic_cast<cb::MockClientBackend&>(
+            *mock_infer_context.infer_backend_),
+        AsyncStreamInfer(testing::_, testing::_, testing::_))
+        .WillOnce(
+            [&mock_infer_result, &stream_callback](
+                const cb::InferOptions& options,
+                const std::vector<cb::InferInput*>& inputs,
+                const std::vector<const cb::InferRequestedOutput*>& outputs)
+                -> cb::Error {
+              stream_callback(mock_infer_result);
+              return cb::Error::Success;
+            });
+
+    mock_infer_context.SendRequest(request_id, delayed, sequence_id);
+
+    CHECK(mock_infer_context.thread_stat_->request_timestamps_.size() == 1);
     CHECK(
-        mock_infer_context.async_req_map_[std::to_string(request_id)]
-            .sequence_status_index_ == sequence_status_index);
+        mock_infer_context.thread_stat_->request_timestamps_[0].sequence_id_ ==
+        sequence_id);
   }
 }
 

From a6fe66ad5258589621d58b41c9c4a1fd887422c4 Mon Sep 17 00:00:00 2001
From: Matthew Kotila <matthew.r.kotila@gmail.com>
Date: Thu, 27 Jul 2023 17:13:21 +0000
Subject: [PATCH 3/4] Address feedback

---
 src/c++/perf_analyzer/CMakeLists.txt          |   3 +-
 src/c++/perf_analyzer/infer_context.cc        |  22 +--
 src/c++/perf_analyzer/infer_context.h         |  12 +-
 src/c++/perf_analyzer/inference_profiler.cc   |  39 +++---
 src/c++/perf_analyzer/inference_profiler.h    |  11 +-
 src/c++/perf_analyzer/load_manager.cc         |  19 ++-
 src/c++/perf_analyzer/load_manager.h          |   9 +-
 .../perf_analyzer/mock_inference_profiler.h   |   3 +-
 ...{request_properties.h => request_record.h} |  26 ++--
 src/c++/perf_analyzer/test_infer_context.cc   |   6 +-
 .../perf_analyzer/test_inference_profiler.cc  |  43 +++---
 src/c++/perf_analyzer/test_load_manager.cc    | 126 ++++++++++--------
 src/c++/perf_analyzer/timestamp_vector.h      |  36 -----
 13 files changed, 165 insertions(+), 190 deletions(-)
 rename src/c++/perf_analyzer/{request_properties.h => request_record.h} (80%)
 delete mode 100644 src/c++/perf_analyzer/timestamp_vector.h

diff --git a/src/c++/perf_analyzer/CMakeLists.txt b/src/c++/perf_analyzer/CMakeLists.txt
index 596a01a02..089cc1c28 100644
--- a/src/c++/perf_analyzer/CMakeLists.txt
+++ b/src/c++/perf_analyzer/CMakeLists.txt
@@ -104,8 +104,7 @@ set(
   concurrency_ctx_id_tracker.h
   fifo_ctx_id_tracker.h
   rand_ctx_id_tracker.h
-  request_properties.h
-  timestamp_vector.h
+  request_record.h
 )
 
 add_executable(
diff --git a/src/c++/perf_analyzer/infer_context.cc b/src/c++/perf_analyzer/infer_context.cc
index 30e78fe71..f020cd8fa 100644
--- a/src/c++/perf_analyzer/infer_context.cc
+++ b/src/c++/perf_analyzer/infer_context.cc
@@ -53,7 +53,7 @@ InferContext::SendInferRequest(bool delayed)
   if (using_json_data_) {
     UpdateJsonData();
   }
-  SendRequest(request_id_++, delayed, /* ignored */ 0);
+  SendRequest(request_id_++, delayed);
 }
 
 void
@@ -116,10 +116,9 @@ InferContext::SendRequest(
     infer_data_.options_->request_id_ = std::to_string(request_id);
     {
       std::lock_guard<std::mutex> lock(thread_stat_->mu_);
-      auto it =
-          async_req_map_
-              .emplace(infer_data_.options_->request_id_, RequestProperties())
-              .first;
+      auto it = async_req_map_
+                    .emplace(infer_data_.options_->request_id_, RequestRecord())
+                    .first;
       it->second.start_time_ = std::chrono::system_clock::now();
       it->second.sequence_end_ = infer_data_.options_->sequence_end_;
       it->second.delayed_ = delayed;
@@ -162,11 +161,11 @@ InferContext::SendRequest(
     std::vector<std::chrono::time_point<std::chrono::system_clock>>
         end_time_syncs{end_time_sync};
     {
-      // Add the request timestamp to thread Timestamp vector with proper
+      // Add the request record to thread request records vector with proper
       // locking
       std::lock_guard<std::mutex> lock(thread_stat_->mu_);
       auto total = end_time_sync - start_time_sync;
-      thread_stat_->request_timestamps_.emplace_back(RequestProperties(
+      thread_stat_->request_records_.emplace_back(RequestRecord(
           start_time_sync, std::move(end_time_syncs),
           infer_data_.options_->sequence_end_, delayed, sequence_id));
       thread_stat_->status_ =
@@ -243,7 +242,7 @@ InferContext::AsyncCallbackFuncImpl(cb::InferResult* result)
   std::shared_ptr<cb::InferResult> result_ptr(result);
   bool is_final_response{true};
   if (thread_stat_->cb_status_.IsOk()) {
-    // Add the request timestamp to thread Timestamp vector with
+    // Add the request record to thread request records vector with
     // proper locking
     std::lock_guard<std::mutex> lock(thread_stat_->mu_);
     thread_stat_->cb_status_ = result_ptr->RequestStatus();
@@ -259,7 +258,8 @@ InferContext::AsyncCallbackFuncImpl(cb::InferResult* result)
           return;
         }
         if (is_null_response == false) {
-          it->second.end_times_.push_back(std::chrono::system_clock::now());
+          it->second.response_times_.push_back(
+              std::chrono::system_clock::now());
         }
         thread_stat_->cb_status_ =
             result_ptr->IsFinalResponse(&is_final_response);
@@ -267,8 +267,8 @@ InferContext::AsyncCallbackFuncImpl(cb::InferResult* result)
           return;
         }
         if (is_final_response) {
-          thread_stat_->request_timestamps_.emplace_back(
-              it->second.start_time_, it->second.end_times_,
+          thread_stat_->request_records_.emplace_back(
+              it->second.start_time_, it->second.response_times_,
               it->second.sequence_end_, it->second.delayed_,
               it->second.sequence_id_);
           infer_backend_->ClientInferStat(&(thread_stat_->contexts_stat_[id_]));
diff --git a/src/c++/perf_analyzer/infer_context.h b/src/c++/perf_analyzer/infer_context.h
index 84e4ddb44..7f9d3c6ce 100644
--- a/src/c++/perf_analyzer/infer_context.h
+++ b/src/c++/perf_analyzer/infer_context.h
@@ -36,9 +36,8 @@
 #include "iinfer_data_manager.h"
 #include "infer_data.h"
 #include "perf_utils.h"
-#include "request_properties.h"
+#include "request_record.h"
 #include "sequence_manager.h"
-#include "timestamp_vector.h"
 
 namespace triton { namespace perfanalyzer {
 
@@ -57,9 +56,8 @@ struct ThreadStat {
   // Tracks the amount of time this thread spent sleeping or waiting
   IdleTimer idle_timer;
 
-  // A vector of request timestamps <start_time, end_time>
-  // Request latency will be end_time - start_time
-  TimestampVector request_timestamps_;
+  // A vector of request records
+  std::vector<RequestRecord> request_records_;
   // A lock to protect thread data
   std::mutex mu_;
   // The number of sent requests by this thread.
@@ -137,7 +135,7 @@ class InferContext {
   /// \param sequence_id Sequence ID of the request.
   virtual void SendRequest(
       const uint64_t request_id, const bool delayed,
-      const uint64_t sequence_id);
+      const uint64_t sequence_id = 0);
 
   /// Update inputs based on custom json data
   void UpdateJsonData();
@@ -163,7 +161,7 @@ class InferContext {
   std::shared_ptr<IInferDataManager> infer_data_manager_;
 
   uint64_t request_id_ = 0;
-  std::map<std::string, RequestProperties> async_req_map_;
+  std::map<std::string, RequestRecord> async_req_map_;
   std::atomic<uint> total_ongoing_requests_{0};
   size_t data_step_id_;
 
diff --git a/src/c++/perf_analyzer/inference_profiler.cc b/src/c++/perf_analyzer/inference_profiler.cc
index 73ef1d758..76d4d113f 100644
--- a/src/c++/perf_analyzer/inference_profiler.cc
+++ b/src/c++/perf_analyzer/inference_profiler.cc
@@ -681,13 +681,13 @@ InferenceProfiler::ProfileHelper(
   size_t completed_trials = 0;
   std::queue<cb::Error> error;
   std::deque<PerfStatus> measurement_perf_statuses;
-  all_timestamps_.clear();
+  all_request_records_.clear();
   previous_window_end_ns_ = 0;
 
-  // Start with a fresh empty timestamp vector in the manager
+  // Start with a fresh empty request records vector in the manager
   //
-  TimestampVector empty_timestamps;
-  RETURN_IF_ERROR(manager_->SwapTimestamps(empty_timestamps));
+  std::vector<RequestRecord> empty_request_records;
+  RETURN_IF_ERROR(manager_->SwapRequestRecords(empty_request_records));
 
   do {
     PerfStatus measurement_perf_status;
@@ -1193,11 +1193,11 @@ InferenceProfiler::Measure(
   RETURN_IF_ERROR(manager_->GetAccumulatedClientStat(&end_stat));
   prev_client_side_stats_ = end_stat;
 
-  TimestampVector current_timestamps;
-  RETURN_IF_ERROR(manager_->SwapTimestamps(current_timestamps));
-  all_timestamps_.insert(
-      all_timestamps_.end(), current_timestamps.begin(),
-      current_timestamps.end());
+  std::vector<RequestRecord> current_request_records;
+  RETURN_IF_ERROR(manager_->SwapRequestRecords(current_request_records));
+  all_request_records_.insert(
+      all_request_records_.end(), current_request_records.begin(),
+      current_request_records.end());
 
   RETURN_IF_ERROR(Summarize(
       start_status, end_status, start_stat, end_stat, perf_status,
@@ -1257,22 +1257,23 @@ InferenceProfiler::ValidLatencyMeasurement(
   valid_sequence_count = 0;
   response_count = 0;
   std::vector<size_t> erase_indices{};
-  for (size_t i = 0; i < all_timestamps_.size(); i++) {
-    const auto& timestamp = all_timestamps_[i];
-    uint64_t request_start_ns = CHRONO_TO_NANOS(timestamp.start_time_);
-    uint64_t request_end_ns = CHRONO_TO_NANOS(timestamp.end_times_.back());
+  for (size_t i = 0; i < all_request_records_.size(); i++) {
+    const auto& request_record = all_request_records_[i];
+    uint64_t request_start_ns = CHRONO_TO_NANOS(request_record.start_time_);
+    uint64_t request_end_ns =
+        CHRONO_TO_NANOS(request_record.response_times_.back());
 
     if (request_start_ns <= request_end_ns) {
       // Only counting requests that end within the time interval
       if ((request_end_ns >= valid_range.first) &&
           (request_end_ns <= valid_range.second)) {
         valid_latencies->push_back(request_end_ns - request_start_ns);
-        response_count += timestamp.end_times_.size();
+        response_count += request_record.response_times_.size();
         erase_indices.push_back(i);
-        if (timestamp.sequence_end_) {
+        if (request_record.sequence_end_) {
           valid_sequence_count++;
         }
-        if (timestamp.delayed_) {
+        if (request_record.delayed_) {
           delayed_request_count++;
         }
       }
@@ -1280,10 +1281,10 @@ InferenceProfiler::ValidLatencyMeasurement(
   }
 
   // Iterate through erase indices backwards so that erases from
-  // `all_timestamps_` happen from the back to the front to avoid using wrong
-  // indices after subsequent erases
+  // `all_request_records_` happen from the back to the front to avoid using
+  // wrong indices after subsequent erases
   std::for_each(erase_indices.rbegin(), erase_indices.rend(), [this](size_t i) {
-    this->all_timestamps_.erase(this->all_timestamps_.begin() + i);
+    this->all_request_records_.erase(this->all_request_records_.begin() + i);
   });
 
   // Always sort measured latencies as percentile will be reported as default
diff --git a/src/c++/perf_analyzer/inference_profiler.h b/src/c++/perf_analyzer/inference_profiler.h
index db3735000..b76a8dac7 100644
--- a/src/c++/perf_analyzer/inference_profiler.h
+++ b/src/c++/perf_analyzer/inference_profiler.h
@@ -44,7 +44,6 @@
 #include "model_parser.h"
 #include "mpi_utils.h"
 #include "request_rate_manager.h"
-#include "timestamp_vector.h"
 
 namespace triton { namespace perfanalyzer {
 
@@ -184,9 +183,9 @@ cb::Error ReportPrometheusMetrics(const Metrics& metrics);
 /// time.
 /// 2. After given time interval, the profiler gets end status from the server
 ///    and records the end time.
-/// 3. The profiler obtains the timestamps recorded by concurrency manager,
-///    and uses the timestamps that are recorded between start time and end time
-///    to measure client side status and update status_summary.
+/// 3. The profiler obtains the request records recorded by concurrency manager,
+///    and uses the request records that are recorded between start time and end
+///    time to measure client side status and update status_summary.
 ///
 class InferenceProfiler {
  public:
@@ -679,8 +678,8 @@ class InferenceProfiler {
   bool include_server_stats_;
   std::shared_ptr<MPIDriver> mpi_driver_;
 
-  /// The timestamps of the requests completed during all measurements
-  TimestampVector all_timestamps_;
+  /// The request records of the requests completed during all measurements
+  std::vector<RequestRecord> all_request_records_;
 
   /// The end time of the previous measurement window
   uint64_t previous_window_end_ns_;
diff --git a/src/c++/perf_analyzer/load_manager.cc b/src/c++/perf_analyzer/load_manager.cc
index 5b76c9f4b..369317e66 100644
--- a/src/c++/perf_analyzer/load_manager.cc
+++ b/src/c++/perf_analyzer/load_manager.cc
@@ -59,20 +59,19 @@ LoadManager::CheckHealth()
 }
 
 cb::Error
-LoadManager::SwapTimestamps(TimestampVector& new_timestamps)
+LoadManager::SwapRequestRecords(std::vector<RequestRecord>& new_request_records)
 {
-  TimestampVector total_timestamp;
-  // Gather request timestamps with proper locking from all the worker
-  // threads
+  std::vector<RequestRecord> total_request_records;
+  // Gather request records with proper locking from all the worker threads
   for (auto& thread_stat : threads_stat_) {
     std::lock_guard<std::mutex> lock(thread_stat->mu_);
-    total_timestamp.insert(
-        total_timestamp.end(), thread_stat->request_timestamps_.begin(),
-        thread_stat->request_timestamps_.end());
-    thread_stat->request_timestamps_.clear();
+    total_request_records.insert(
+        total_request_records.end(), thread_stat->request_records_.begin(),
+        thread_stat->request_records_.end());
+    thread_stat->request_records_.clear();
   }
   // Swap the results
-  total_timestamp.swap(new_timestamps);
+  total_request_records.swap(new_request_records);
   return cb::Error::Success;
 }
 
@@ -82,7 +81,7 @@ LoadManager::CountCollectedRequests()
   uint64_t num_of_requests = 0;
   for (auto& thread_stat : threads_stat_) {
     std::lock_guard<std::mutex> lock(thread_stat->mu_);
-    num_of_requests += thread_stat->request_timestamps_.size();
+    num_of_requests += thread_stat->request_records_.size();
   }
   return num_of_requests;
 }
diff --git a/src/c++/perf_analyzer/load_manager.h b/src/c++/perf_analyzer/load_manager.h
index cf54600f4..5e75ab9ea 100644
--- a/src/c++/perf_analyzer/load_manager.h
+++ b/src/c++/perf_analyzer/load_manager.h
@@ -37,7 +37,6 @@
 #include "load_worker.h"
 #include "perf_utils.h"
 #include "sequence_manager.h"
-#include "timestamp_vector.h"
 
 namespace triton { namespace perfanalyzer {
 
@@ -77,11 +76,11 @@ class LoadManager {
   /// \return cb::Error object indicating success or failure.
   cb::Error CheckHealth();
 
-  /// Swap the content of the timestamp vector recorded by the load
-  /// manager with a new timestamp vector
-  /// \param new_timestamps The timestamp vector to be swapped.
+  /// Swap the content of the request records vector recorded by the load
+  /// manager with a new request records vector
+  /// \param new_request_records The request records vector to be swapped.
   /// \return cb::Error object indicating success or failure.
-  cb::Error SwapTimestamps(TimestampVector& new_timestamps);
+  cb::Error SwapRequestRecords(std::vector<RequestRecord>& new_request_records);
 
   /// Get the sum of all contexts' stat
   /// \param contexts_stat Returned the accumulated stat from all contexts
diff --git a/src/c++/perf_analyzer/mock_inference_profiler.h b/src/c++/perf_analyzer/mock_inference_profiler.h
index a31485091..c64b5fb08 100644
--- a/src/c++/perf_analyzer/mock_inference_profiler.h
+++ b/src/c++/perf_analyzer/mock_inference_profiler.h
@@ -113,7 +113,8 @@ class NaggyMockInferenceProfiler : public InferenceProfiler {
   std::shared_ptr<ModelParser>& parser_{InferenceProfiler::parser_};
   std::unique_ptr<LoadManager>& manager_{InferenceProfiler::manager_};
   bool& include_lib_stats_{InferenceProfiler::include_lib_stats_};
-  TimestampVector& all_timestamps_{InferenceProfiler::all_timestamps_};
+  std::vector<RequestRecord>& all_request_records_{
+      InferenceProfiler::all_request_records_};
 };
 
 using MockInferenceProfiler = testing::NiceMock<NaggyMockInferenceProfiler>;
diff --git a/src/c++/perf_analyzer/request_properties.h b/src/c++/perf_analyzer/request_record.h
similarity index 80%
rename from src/c++/perf_analyzer/request_properties.h
rename to src/c++/perf_analyzer/request_record.h
index dce688482..ba441549a 100644
--- a/src/c++/perf_analyzer/request_properties.h
+++ b/src/c++/perf_analyzer/request_record.h
@@ -32,32 +32,24 @@
 
 namespace triton { namespace perfanalyzer {
 
-/// The properties of a request required in the callback to effectively
-/// interpret the response.
-struct RequestProperties {
-  RequestProperties() = default;
-  RequestProperties(
+/// A record of an individual request
+struct RequestRecord {
+  RequestRecord() = default;
+  RequestRecord(
       std::chrono::time_point<std::chrono::system_clock> start_time,
-      std::vector<std::chrono::time_point<std::chrono::system_clock>> end_times,
+      std::vector<std::chrono::time_point<std::chrono::system_clock>>
+          response_times,
       bool sequence_end, bool delayed, uint64_t sequence_id)
-      : start_time_(start_time), end_times_(end_times),
+      : start_time_(start_time), response_times_(response_times),
         sequence_end_(sequence_end), delayed_(delayed),
         sequence_id_(sequence_id)
   {
   }
-  bool operator==(const RequestProperties& other) const
-  {
-    return std::tie(
-               start_time_, end_times_, sequence_end_, delayed_,
-               sequence_id_) ==
-           std::tie(
-               other.start_time_, other.end_times_, other.sequence_end_,
-               other.delayed_, other.sequence_id_);
-  }
   // The timestamp of when the request was started.
   std::chrono::time_point<std::chrono::system_clock> start_time_;
   // Collection of response times
-  std::vector<std::chrono::time_point<std::chrono::system_clock>> end_times_;
+  std::vector<std::chrono::time_point<std::chrono::system_clock>>
+      response_times_;
   // Whether or not the request is at the end of a sequence.
   bool sequence_end_;
   // Whether or not the request is delayed as per schedule.
diff --git a/src/c++/perf_analyzer/test_infer_context.cc b/src/c++/perf_analyzer/test_infer_context.cc
index 05fd11245..951fb2b10 100644
--- a/src/c++/perf_analyzer/test_infer_context.cc
+++ b/src/c++/perf_analyzer/test_infer_context.cc
@@ -127,7 +127,7 @@ TEST_CASE("send_request: testing the SendRequest function")
 {
   MockInferContext mock_infer_context{};
 
-  SUBCASE("testing logic relevant to request timestamp sequence ID")
+  SUBCASE("testing logic relevant to request record sequence ID")
   {
     mock_infer_context.thread_stat_ = std::make_shared<ThreadStat>();
     mock_infer_context.thread_stat_->contexts_stat_.emplace_back();
@@ -168,9 +168,9 @@ TEST_CASE("send_request: testing the SendRequest function")
 
     mock_infer_context.SendRequest(request_id, delayed, sequence_id);
 
-    CHECK(mock_infer_context.thread_stat_->request_timestamps_.size() == 1);
+    CHECK(mock_infer_context.thread_stat_->request_records_.size() == 1);
     CHECK(
-        mock_infer_context.thread_stat_->request_timestamps_[0].sequence_id_ ==
+        mock_infer_context.thread_stat_->request_records_[0].sequence_id_ ==
         sequence_id);
   }
 }
diff --git a/src/c++/perf_analyzer/test_inference_profiler.cc b/src/c++/perf_analyzer/test_inference_profiler.cc
index c497226b1..bd3ab3e5b 100644
--- a/src/c++/perf_analyzer/test_inference_profiler.cc
+++ b/src/c++/perf_analyzer/test_inference_profiler.cc
@@ -38,10 +38,10 @@ class TestInferenceProfiler : public InferenceProfiler {
       const std::pair<uint64_t, uint64_t>& valid_range,
       size_t& valid_sequence_count, size_t& delayed_request_count,
       std::vector<uint64_t>* latencies, size_t& response_count,
-      TimestampVector& all_timestamps)
+      std::vector<RequestRecord>& all_request_records)
   {
     InferenceProfiler inference_profiler{};
-    inference_profiler.all_timestamps_ = all_timestamps;
+    inference_profiler.all_request_records_ = all_request_records;
     inference_profiler.ValidLatencyMeasurement(
         valid_range, valid_sequence_count, delayed_request_count, latencies,
         response_count);
@@ -171,52 +171,58 @@ TEST_CASE("testing the ValidLatencyMeasurement function")
   const std::pair<uint64_t, uint64_t> window{4, 17};
   using time_point = std::chrono::time_point<std::chrono::system_clock>;
   using ns = std::chrono::nanoseconds;
-  TimestampVector all_timestamps{
+  std::vector<RequestRecord> all_request_records{
       // request ends before window starts, this should not be possible to exist
       // in the vector of requests, but if it is, we exclude it: not included in
       // current window
-      RequestProperties(
+      RequestRecord(
           time_point(ns(1)), std::vector<time_point>{time_point(ns(2))}, 0,
           false, 0),
 
       // request starts before window starts and ends inside window: included in
       // current window
-      RequestProperties(
+      RequestRecord(
           time_point(ns(3)), std::vector<time_point>{time_point(ns(5))}, 0,
           false, 0),
 
       // requests start and end inside window: included in current window
-      RequestProperties(
+      RequestRecord(
           time_point(ns(6)), std::vector<time_point>{time_point(ns(9))}, 0,
           false, 0),
-      RequestProperties(
+      RequestRecord(
           time_point(ns(10)), std::vector<time_point>{time_point(ns(14))}, 0,
           false, 0),
 
       // request starts before window ends and ends after window ends: not
       // included in current window
-      RequestProperties(
+      RequestRecord(
           time_point(ns(15)), std::vector<time_point>{time_point(ns(20))}, 0,
           false, 0),
 
       // request starts after window ends: not included in current window
-      RequestProperties(
+      RequestRecord(
           time_point(ns(21)), std::vector<time_point>{time_point(ns(27))}, 0,
           false, 0)};
 
   TestInferenceProfiler::ValidLatencyMeasurement(
       window, valid_sequence_count, delayed_request_count, &latencies,
-      response_count, all_timestamps);
+      response_count, all_request_records);
 
-  const auto& convert_timestamp_to_latency{[](RequestProperties t) {
-    return CHRONO_TO_NANOS(t.end_times_.back()) -
+  const auto& convert_request_record_to_latency{[](RequestRecord t) {
+    return CHRONO_TO_NANOS(t.response_times_.back()) -
            CHRONO_TO_NANOS(t.start_time_);
   }};
 
   CHECK(latencies.size() == 3);
-  CHECK(latencies[0] == convert_timestamp_to_latency(all_timestamps[1]));
-  CHECK(latencies[1] == convert_timestamp_to_latency(all_timestamps[2]));
-  CHECK(latencies[2] == convert_timestamp_to_latency(all_timestamps[3]));
+  CHECK(
+      latencies[0] ==
+      convert_request_record_to_latency(all_request_records[1]));
+  CHECK(
+      latencies[1] ==
+      convert_request_record_to_latency(all_request_records[2]));
+  CHECK(
+      latencies[2] ==
+      convert_request_record_to_latency(all_request_records[3]));
 }
 
 TEST_CASE("test_check_window_for_stability")
@@ -870,7 +876,7 @@ TEST_CASE(
     auto request1_timestamp{clock_epoch + std::chrono::nanoseconds(1)};
     auto response1_timestamp{clock_epoch + std::chrono::nanoseconds(2)};
     auto response2_timestamp{clock_epoch + std::chrono::nanoseconds(3)};
-    auto timestamp1{RequestProperties(
+    auto request_record1{RequestRecord(
         request1_timestamp,
         std::vector<std::chrono::time_point<std::chrono::system_clock>>{
             response1_timestamp, response2_timestamp},
@@ -880,13 +886,14 @@ TEST_CASE(
     auto response3_timestamp{clock_epoch + std::chrono::nanoseconds(5)};
     auto response4_timestamp{clock_epoch + std::chrono::nanoseconds(6)};
     auto response5_timestamp{clock_epoch + std::chrono::nanoseconds(7)};
-    auto timestamp2{RequestProperties(
+    auto request_record2{RequestRecord(
         request2_timestamp,
         std::vector<std::chrono::time_point<std::chrono::system_clock>>{
             response3_timestamp, response4_timestamp, response5_timestamp},
         0, false, 0)};
 
-    mock_inference_profiler.all_timestamps_ = {timestamp1, timestamp2};
+    mock_inference_profiler.all_request_records_ = {
+        request_record1, request_record2};
 
     const std::pair<uint64_t, uint64_t> valid_range{
         std::make_pair(0, UINT64_MAX)};
diff --git a/src/c++/perf_analyzer/test_load_manager.cc b/src/c++/perf_analyzer/test_load_manager.cc
index dc1f8b31e..ada49e25b 100644
--- a/src/c++/perf_analyzer/test_load_manager.cc
+++ b/src/c++/perf_analyzer/test_load_manager.cc
@@ -33,6 +33,21 @@ namespace cb = triton::perfanalyzer::clientbackend;
 
 namespace triton { namespace perfanalyzer {
 
+namespace {
+
+bool
+operator==(const RequestRecord& lhs, const RequestRecord& rhs)
+{
+  return std::tie(
+             lhs.start_time_, lhs.response_times_, lhs.sequence_end_,
+             lhs.delayed_, lhs.sequence_id_) ==
+         std::tie(
+             rhs.start_time_, rhs.response_times_, rhs.sequence_end_,
+             rhs.delayed_, rhs.sequence_id_);
+}
+
+}  // namespace
+
 class TestLoadManager : public TestLoadManagerBase, public LoadManager {
  public:
   ~TestLoadManager() = default;
@@ -108,83 +123,83 @@ class TestLoadManager : public TestLoadManagerBase, public LoadManager {
     CHECK(CheckHealth().IsOk() == expect_ok);
   }
 
-  /// Test the public function SwapTimestamps
+  /// Test the public function SwapRequestRecords
   ///
-  /// It will gather all timestamps from the thread_stats
-  /// and return them, and clear the thread_stats timestamps
+  /// It will gather all request records from the thread_stats
+  /// and return them, and clear the thread_stats request records
   ///
-  void TestSwapTimeStamps()
+  void TestSwapRequestRecords()
   {
     using time_point = std::chrono::time_point<std::chrono::system_clock>;
     using ns = std::chrono::nanoseconds;
-    auto timestamp1 = RequestProperties(
+    auto request_record1 = RequestRecord(
         time_point(ns(1)), std::vector<time_point>{time_point(ns(2))}, 0, false,
         0);
-    auto timestamp2 = RequestProperties(
+    auto request_record2 = RequestRecord(
         time_point(ns(3)), std::vector<time_point>{time_point(ns(4))}, 0, false,
         0);
-    auto timestamp3 = RequestProperties(
+    auto request_record3 = RequestRecord(
         time_point(ns(5)), std::vector<time_point>{time_point(ns(6))}, 0, false,
         0);
 
-    TimestampVector source_timestamps;
+    std::vector<RequestRecord> source_request_records;
 
     SUBCASE("No threads")
     {
-      auto ret = SwapTimestamps(source_timestamps);
-      CHECK(source_timestamps.size() == 0);
+      auto ret = SwapRequestRecords(source_request_records);
+      CHECK(source_request_records.size() == 0);
       CHECK(ret.IsOk() == true);
     }
-    SUBCASE("Source has timestamps")
+    SUBCASE("Source has request records")
     {
-      // Any timestamps in the vector passed in to Swaptimestamps will
+      // Any request records in the vector passed in to SwapRequestRecords will
       // be dropped on the floor
       //
-      source_timestamps.push_back(timestamp1);
-      auto ret = SwapTimestamps(source_timestamps);
-      CHECK(source_timestamps.size() == 0);
+      source_request_records.push_back(request_record1);
+      auto ret = SwapRequestRecords(source_request_records);
+      CHECK(source_request_records.size() == 0);
       CHECK(ret.IsOk() == true);
     }
     SUBCASE("One thread")
     {
       auto stat1 = std::make_shared<ThreadStat>();
-      stat1->request_timestamps_.push_back(timestamp1);
-      stat1->request_timestamps_.push_back(timestamp2);
-      stat1->request_timestamps_.push_back(timestamp3);
+      stat1->request_records_.push_back(request_record1);
+      stat1->request_records_.push_back(request_record2);
+      stat1->request_records_.push_back(request_record3);
       threads_stat_.push_back(stat1);
 
-      CHECK(stat1->request_timestamps_.size() == 3);
-      auto ret = SwapTimestamps(source_timestamps);
-      CHECK(stat1->request_timestamps_.size() == 0);
+      CHECK(stat1->request_records_.size() == 3);
+      auto ret = SwapRequestRecords(source_request_records);
+      CHECK(stat1->request_records_.size() == 0);
 
-      REQUIRE(source_timestamps.size() == 3);
-      CHECK(source_timestamps[0] == timestamp1);
-      CHECK(source_timestamps[1] == timestamp2);
-      CHECK(source_timestamps[2] == timestamp3);
+      REQUIRE(source_request_records.size() == 3);
+      CHECK(source_request_records[0] == request_record1);
+      CHECK(source_request_records[1] == request_record2);
+      CHECK(source_request_records[2] == request_record3);
       CHECK(ret.IsOk() == true);
     }
     SUBCASE("Multiple threads")
     {
       auto stat1 = std::make_shared<ThreadStat>();
-      stat1->request_timestamps_.push_back(timestamp2);
+      stat1->request_records_.push_back(request_record2);
 
       auto stat2 = std::make_shared<ThreadStat>();
-      stat2->request_timestamps_.push_back(timestamp1);
-      stat2->request_timestamps_.push_back(timestamp3);
+      stat2->request_records_.push_back(request_record1);
+      stat2->request_records_.push_back(request_record3);
 
       threads_stat_.push_back(stat1);
       threads_stat_.push_back(stat2);
 
-      CHECK(stat1->request_timestamps_.size() == 1);
-      CHECK(stat2->request_timestamps_.size() == 2);
-      auto ret = SwapTimestamps(source_timestamps);
-      CHECK(stat1->request_timestamps_.size() == 0);
-      CHECK(stat2->request_timestamps_.size() == 0);
+      CHECK(stat1->request_records_.size() == 1);
+      CHECK(stat2->request_records_.size() == 2);
+      auto ret = SwapRequestRecords(source_request_records);
+      CHECK(stat1->request_records_.size() == 0);
+      CHECK(stat2->request_records_.size() == 0);
 
-      REQUIRE(source_timestamps.size() == 3);
-      CHECK(source_timestamps[0] == timestamp2);
-      CHECK(source_timestamps[1] == timestamp1);
-      CHECK(source_timestamps[2] == timestamp3);
+      REQUIRE(source_request_records.size() == 3);
+      CHECK(source_request_records[0] == request_record2);
+      CHECK(source_request_records[1] == request_record1);
+      CHECK(source_request_records[2] == request_record3);
       CHECK(ret.IsOk() == true);
     }
   }
@@ -271,20 +286,20 @@ class TestLoadManager : public TestLoadManagerBase, public LoadManager {
 
   /// Test the public function CountCollectedRequests
   ///
-  /// It will count all timestamps in the thread_stats (and not modify
+  /// It will count all request records in the thread_stats (and not modify
   /// the thread_stats in any way)
   ///
   void TestCountCollectedRequests()
   {
     using time_point = std::chrono::time_point<std::chrono::system_clock>;
     using ns = std::chrono::nanoseconds;
-    auto timestamp1 = RequestProperties(
+    auto request_record1 = RequestRecord(
         time_point(ns(1)), std::vector<time_point>{time_point(ns(2))}, 0, false,
         0);
-    auto timestamp2 = RequestProperties(
+    auto request_record2 = RequestRecord(
         time_point(ns(3)), std::vector<time_point>{time_point(ns(4))}, 0, false,
         0);
-    auto timestamp3 = RequestProperties(
+    auto request_record3 = RequestRecord(
         time_point(ns(5)), std::vector<time_point>{time_point(ns(6))}, 0, false,
         0);
 
@@ -295,32 +310,32 @@ class TestLoadManager : public TestLoadManagerBase, public LoadManager {
     SUBCASE("One thread")
     {
       auto stat1 = std::make_shared<ThreadStat>();
-      stat1->request_timestamps_.push_back(timestamp1);
-      stat1->request_timestamps_.push_back(timestamp2);
-      stat1->request_timestamps_.push_back(timestamp3);
+      stat1->request_records_.push_back(request_record1);
+      stat1->request_records_.push_back(request_record2);
+      stat1->request_records_.push_back(request_record3);
       threads_stat_.push_back(stat1);
 
-      CHECK(stat1->request_timestamps_.size() == 3);
+      CHECK(stat1->request_records_.size() == 3);
       CHECK(CountCollectedRequests() == 3);
-      CHECK(stat1->request_timestamps_.size() == 3);
+      CHECK(stat1->request_records_.size() == 3);
     }
     SUBCASE("Multiple threads")
     {
       auto stat1 = std::make_shared<ThreadStat>();
-      stat1->request_timestamps_.push_back(timestamp2);
+      stat1->request_records_.push_back(request_record2);
 
       auto stat2 = std::make_shared<ThreadStat>();
-      stat2->request_timestamps_.push_back(timestamp1);
-      stat2->request_timestamps_.push_back(timestamp3);
+      stat2->request_records_.push_back(request_record1);
+      stat2->request_records_.push_back(request_record3);
 
       threads_stat_.push_back(stat1);
       threads_stat_.push_back(stat2);
 
-      CHECK(stat1->request_timestamps_.size() == 1);
-      CHECK(stat2->request_timestamps_.size() == 2);
+      CHECK(stat1->request_records_.size() == 1);
+      CHECK(stat2->request_records_.size() == 2);
       CHECK(CountCollectedRequests() == 3);
-      CHECK(stat1->request_timestamps_.size() == 1);
-      CHECK(stat2->request_timestamps_.size() == 2);
+      CHECK(stat1->request_records_.size() == 1);
+      CHECK(stat2->request_records_.size() == 2);
     }
   }
 
@@ -361,10 +376,11 @@ TEST_CASE("load_manager_check_health: Test the public function CheckHealth()")
 }
 
 TEST_CASE(
-    "load_manager_swap_timestamps: Test the public function SwapTimeStamps()")
+    "load_manager_swap_request_records: Test the public function "
+    "SwapRequestRecords()")
 {
   TestLoadManager tlm(PerfAnalyzerParameters{});
-  tlm.TestSwapTimeStamps();
+  tlm.TestSwapRequestRecords();
 }
 
 TEST_CASE(
diff --git a/src/c++/perf_analyzer/timestamp_vector.h b/src/c++/perf_analyzer/timestamp_vector.h
deleted file mode 100644
index 5132e762e..000000000
--- a/src/c++/perf_analyzer/timestamp_vector.h
+++ /dev/null
@@ -1,36 +0,0 @@
-// Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include <vector>
-
-#include "request_properties.h"
-
-namespace triton { namespace perfanalyzer {
-
-using TimestampVector = std::vector<RequestProperties>;
-
-}}  // namespace triton::perfanalyzer

From ba0b311f1359d70856e492d8b30fb8bd37511186 Mon Sep 17 00:00:00 2001
From: Matthew Kotila <matthew.r.kotila@gmail.com>
Date: Thu, 27 Jul 2023 18:50:13 +0000
Subject: [PATCH 4/4] Address feedback

---
 src/c++/perf_analyzer/infer_context.h          |  3 ++-
 src/c++/perf_analyzer/test_sequence_manager.cc | 12 ++++++++++++
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/src/c++/perf_analyzer/infer_context.h b/src/c++/perf_analyzer/infer_context.h
index 7f9d3c6ce..fb048546e 100644
--- a/src/c++/perf_analyzer/infer_context.h
+++ b/src/c++/perf_analyzer/infer_context.h
@@ -132,7 +132,8 @@ class InferContext {
   /// A helper function to issue inference request to the server.
   /// \param request_id The unique id to be associated with the request.
   /// \param delayed Whether the request fell behind its scheduled time.
-  /// \param sequence_id Sequence ID of the request.
+  /// \param sequence_id Sequence ID of the request. Note that the default of
+  /// `0` means the request is not a sequence.
   virtual void SendRequest(
       const uint64_t request_id, const bool delayed,
       const uint64_t sequence_id = 0);
diff --git a/src/c++/perf_analyzer/test_sequence_manager.cc b/src/c++/perf_analyzer/test_sequence_manager.cc
index 83302bfe1..243500b85 100644
--- a/src/c++/perf_analyzer/test_sequence_manager.cc
+++ b/src/c++/perf_analyzer/test_sequence_manager.cc
@@ -31,6 +31,18 @@
 
 namespace triton { namespace perfanalyzer {
 
+TEST_CASE("get_sequence_id: testing the GetSequenceID function")
+{
+  MockSequenceManager msm{};
+
+  std::shared_ptr<SequenceStatus> sequence_status{
+      std::make_shared<SequenceStatus>(5)};
+
+  msm.sequence_statuses_.push_back(sequence_status);
+
+  CHECK(msm.GetSequenceID(0) == 5);
+}
+
 TEST_CASE(
     "test_set_infer_sequence_options: testing the SetInferSequenceOptions "
     "function")