Reuse and rename response_stats_index_

triton-inference-server · Oct 15, 2024 · db990d3 · db990d3
1 parent 8c9fe0c
commit db990d3
Show file tree

Hide file tree

Showing 4 changed files with 49 additions and 25 deletions.
diff --git a/src/backend_model_instance.cc b/src/backend_model_instance.cc
@@ -1103,8 +1103,7 @@ TRITONBACKEND_ModelInstanceReportResponseStatistics(
 
   InferenceStatsAggregator* sa =
       rs->model_instance->Model()->MutableStatsAggregator();
-  std::string key =
-      std::to_string((*rs->response_factory)->GetAndIncrementResponseIndex());
+  std::string key = std::to_string((*rs->response_factory)->GetResponseIndex());
 
   if (rs->error == nullptr) {
     if (rs->compute_output_start > 0) {

diff --git a/src/infer_response.cc b/src/infer_response.cc
@@ -42,7 +42,11 @@ InferenceResponseFactory::CreateResponse(
 {
   response->reset(new InferenceResponse(
       model_, id_, allocator_, alloc_userp_, response_fn_, response_userp_,
-      response_delegator_, response_cnt_
+      response_delegator_
+#ifdef TRITON_ENABLE_STATS
+      ,
+      response_index_++
+#endif  // TRITON_ENABLE_STATS
 #ifdef TRITON_ENABLE_METRICS
       ,
       infer_start_ns_
@@ -51,7 +55,6 @@ InferenceResponseFactory::CreateResponse(
 #ifdef TRITON_ENABLE_TRACING
   (*response)->SetTrace(trace_);
 #endif  // TRITON_ENABLE_TRACING
-  response_cnt_++;
   return Status::Success;
 }
 
@@ -78,16 +81,22 @@ InferenceResponse::InferenceResponse(
     TRITONSERVER_InferenceResponseCompleteFn_t response_fn,
     void* response_userp,
     const std::function<
-        void(std::unique_ptr<InferenceResponse>&&, const uint32_t)>& delegator,
-    uint64_t seq_num
+        void(std::unique_ptr<InferenceResponse>&&, const uint32_t)>& delegator
+#ifdef TRITON_ENABLE_STATS
+    ,
+    uint64_t index
+#endif  // TRITON_ENABLE_STATS
 #ifdef TRITON_ENABLE_METRICS
     ,
     uint64_t infer_start_ns
 #endif  // TRITON_ENABLE_METRICS
     )
     : model_(model), id_(id), allocator_(allocator), alloc_userp_(alloc_userp),
       response_fn_(response_fn), response_userp_(response_userp),
-      response_delegator_(delegator), seq_num_(seq_num),
+      response_delegator_(delegator),
+#ifdef TRITON_ENABLE_STATS
+      index_(index),
+#endif  // TRITON_ENABLE_STATS
 #ifdef TRITON_ENABLE_METRICS
       infer_start_ns_(infer_start_ns),
 #endif  // TRITON_ENABLE_METRICS
@@ -108,7 +117,10 @@ InferenceResponse::InferenceResponse(
 InferenceResponse::InferenceResponse(
     TRITONSERVER_InferenceResponseCompleteFn_t response_fn,
     void* response_userp)
-    : response_fn_(response_fn), response_userp_(response_userp), seq_num_(0),
+    : response_fn_(response_fn), response_userp_(response_userp),
+#ifdef TRITON_ENABLE_STATS
+      index_(0),
+#endif  // TRITON_ENABLE_STATS
 #ifdef TRITON_ENABLE_METRICS
       infer_start_ns_(0),
 #endif  // TRITON_ENABLE_METRICS
@@ -309,7 +321,7 @@ InferenceResponse::TraceOutputTensors(
 void
 InferenceResponse::UpdateResponseMetrics() const
 {
-  if (model_ != nullptr && seq_num_ == 0) {
+  if (model_ != nullptr && index_ == 0) {
     auto first_response_ns =
         std::chrono::duration_cast<std::chrono::nanoseconds>(
             std::chrono::steady_clock::now().time_since_epoch())

diff --git a/src/infer_response.h b/src/infer_response.h
@@ -60,10 +60,10 @@ class InferenceResponseFactory {
       : model_(model), id_(id), allocator_(allocator),
         alloc_userp_(alloc_userp), response_fn_(response_fn),
         response_userp_(response_userp), response_delegator_(delegator),
-        is_cancelled_(false), response_cnt_(0)
+        is_cancelled_(false)
 #ifdef TRITON_ENABLE_STATS
         ,
-        response_stats_index_(0)
+        response_index_(0)
 #endif  // TRITON_ENABLE_STATS
   {
 #ifdef TRITON_ENABLE_METRICS
@@ -104,8 +104,8 @@ class InferenceResponseFactory {
 #endif  // TRITON_ENABLE_TRACING
 
 #ifdef TRITON_ENABLE_STATS
-  // Return the current response statistics index and increment it.
-  uint64_t GetAndIncrementResponseIndex() { return response_stats_index_++; };
+  // Return the current response index.
+  uint64_t GetResponseIndex() { return response_index_; };
 #endif  // TRITON_ENABLE_STATS
 
  private:
@@ -139,9 +139,6 @@ class InferenceResponseFactory {
 
   std::atomic<bool> is_cancelled_;
 
-  // The number of responses created by this factory.
-  std::atomic<uint64_t> response_cnt_;
-
 #ifdef TRITON_ENABLE_METRICS
   // The start time of associate request in ns.
   uint64_t infer_start_ns_;
@@ -154,7 +151,7 @@ class InferenceResponseFactory {
 
 #ifdef TRITON_ENABLE_STATS
   // Number of response statistics reported.
-  std::atomic<uint64_t> response_stats_index_;
+  std::atomic<uint64_t> response_index_;
 #endif  // TRITON_ENABLE_STATS
 };
 
@@ -259,9 +256,12 @@ class InferenceResponse {
       const ResponseAllocator* allocator, void* alloc_userp,
       TRITONSERVER_InferenceResponseCompleteFn_t response_fn,
       void* response_userp,
-      const std::function<void(
-          std::unique_ptr<InferenceResponse>&&, const uint32_t)>& delegator,
-      uint64_t seq_num
+      const std::function<
+          void(std::unique_ptr<InferenceResponse>&&, const uint32_t)>& delegator
+#ifdef TRITON_ENABLE_STATS
+      ,
+      uint64_t index
+#endif  // TRITON_ENABLE_STATS
 #ifdef TRITON_ENABLE_METRICS
       ,
       uint64_t infer_start_ns
@@ -382,7 +382,10 @@ class InferenceResponse {
   std::function<void(std::unique_ptr<InferenceResponse>&&, const uint32_t)>
       response_delegator_;
 
-  const uint64_t seq_num_;
+#ifdef TRITON_ENABLE_STATS
+  const uint64_t index_;
+#endif  // TRITON_ENABLE_STATS
+
 #ifdef TRITON_ENABLE_METRICS
   const uint64_t infer_start_ns_;
 #endif  // TRITON_ENABLE_METRICS

diff --git a/src/test/response_cache_test.cc b/src/test/response_cache_test.cc
@@ -46,7 +46,11 @@ InferenceResponseFactory::CreateResponse(
 {
   response->reset(new InferenceResponse(
       model_, id_, allocator_, alloc_userp_, response_fn_, response_userp_,
-      response_delegator_, response_cnt_
+      response_delegator_
+#ifdef TRITON_ENABLE_STATS
+      ,
+      response_index_
+#endif  // TRITON_ENABLE_STATS
 #ifdef TRITON_ENABLE_METRICS
       ,
       infer_start_ns_
@@ -186,16 +190,22 @@ InferenceResponse::InferenceResponse(
     TRITONSERVER_InferenceResponseCompleteFn_t response_fn,
     void* response_userp,
     const std::function<
-        void(std::unique_ptr<InferenceResponse>&&, const uint32_t)>& delegator,
-    uint64_t seq_num
+        void(std::unique_ptr<InferenceResponse>&&, const uint32_t)>& delegator
+#ifdef TRITON_ENABLE_STATS
+    ,
+    uint64_t index
+#endif  // TRITON_ENABLE_STATS
 #ifdef TRITON_ENABLE_METRICS
     ,
     uint64_t infer_start_ns
 #endif  // TRITON_ENABLE_METRICS
     )
     : model_(model), id_(id), allocator_(allocator), alloc_userp_(alloc_userp),
       response_fn_(response_fn), response_userp_(response_userp),
-      response_delegator_(delegator), seq_num_(seq_num),
+      response_delegator_(delegator),
+#ifdef TRITON_ENABLE_STATS
+      index_(index),
+#endif  // TRITON_ENABLE_STATS
 #ifdef TRITON_ENABLE_METRICS
       infer_start_ns_(infer_start_ns),
 #endif  // TRITON_ENABLE_METRICS