diff --git a/src/backend_model_instance.cc b/src/backend_model_instance.cc index 87b602c82..feb20115e 100644 --- a/src/backend_model_instance.cc +++ b/src/backend_model_instance.cc @@ -1103,8 +1103,7 @@ TRITONBACKEND_ModelInstanceReportResponseStatistics( InferenceStatsAggregator* sa = rs->model_instance->Model()->MutableStatsAggregator(); - std::string key = - std::to_string((*rs->response_factory)->GetAndIncrementResponseIndex()); + std::string key = std::to_string((*rs->response_factory)->GetResponseIndex()); if (rs->error == nullptr) { if (rs->compute_output_start > 0) { diff --git a/src/infer_response.cc b/src/infer_response.cc index 3c2b39e17..5d5603bd5 100644 --- a/src/infer_response.cc +++ b/src/infer_response.cc @@ -42,7 +42,11 @@ InferenceResponseFactory::CreateResponse( { response->reset(new InferenceResponse( model_, id_, allocator_, alloc_userp_, response_fn_, response_userp_, - response_delegator_, response_cnt_ + response_delegator_ +#ifdef TRITON_ENABLE_STATS + , + response_index_++ +#endif // TRITON_ENABLE_STATS #ifdef TRITON_ENABLE_METRICS , infer_start_ns_ @@ -51,7 +55,6 @@ InferenceResponseFactory::CreateResponse( #ifdef TRITON_ENABLE_TRACING (*response)->SetTrace(trace_); #endif // TRITON_ENABLE_TRACING - response_cnt_++; return Status::Success; } @@ -78,8 +81,11 @@ InferenceResponse::InferenceResponse( TRITONSERVER_InferenceResponseCompleteFn_t response_fn, void* response_userp, const std::function< - void(std::unique_ptr&&, const uint32_t)>& delegator, - uint64_t seq_num + void(std::unique_ptr&&, const uint32_t)>& delegator +#ifdef TRITON_ENABLE_STATS + , + uint64_t index +#endif // TRITON_ENABLE_STATS #ifdef TRITON_ENABLE_METRICS , uint64_t infer_start_ns @@ -87,7 +93,10 @@ InferenceResponse::InferenceResponse( ) : model_(model), id_(id), allocator_(allocator), alloc_userp_(alloc_userp), response_fn_(response_fn), response_userp_(response_userp), - response_delegator_(delegator), seq_num_(seq_num), + response_delegator_(delegator), +#ifdef TRITON_ENABLE_STATS + index_(index), +#endif // TRITON_ENABLE_STATS #ifdef TRITON_ENABLE_METRICS infer_start_ns_(infer_start_ns), #endif // TRITON_ENABLE_METRICS @@ -108,7 +117,10 @@ InferenceResponse::InferenceResponse( InferenceResponse::InferenceResponse( TRITONSERVER_InferenceResponseCompleteFn_t response_fn, void* response_userp) - : response_fn_(response_fn), response_userp_(response_userp), seq_num_(0), + : response_fn_(response_fn), response_userp_(response_userp), +#ifdef TRITON_ENABLE_STATS + index_(0), +#endif // TRITON_ENABLE_STATS #ifdef TRITON_ENABLE_METRICS infer_start_ns_(0), #endif // TRITON_ENABLE_METRICS @@ -309,7 +321,7 @@ InferenceResponse::TraceOutputTensors( void InferenceResponse::UpdateResponseMetrics() const { - if (model_ != nullptr && seq_num_ == 0) { + if (model_ != nullptr && index_ == 0) { auto first_response_ns = std::chrono::duration_cast( std::chrono::steady_clock::now().time_since_epoch()) diff --git a/src/infer_response.h b/src/infer_response.h index 88b158ab1..ca2a9d4db 100644 --- a/src/infer_response.h +++ b/src/infer_response.h @@ -60,10 +60,10 @@ class InferenceResponseFactory { : model_(model), id_(id), allocator_(allocator), alloc_userp_(alloc_userp), response_fn_(response_fn), response_userp_(response_userp), response_delegator_(delegator), - is_cancelled_(false), response_cnt_(0) + is_cancelled_(false) #ifdef TRITON_ENABLE_STATS , - response_stats_index_(0) + response_index_(0) #endif // TRITON_ENABLE_STATS { #ifdef TRITON_ENABLE_METRICS @@ -104,8 +104,8 @@ class InferenceResponseFactory { #endif // TRITON_ENABLE_TRACING #ifdef TRITON_ENABLE_STATS - // Return the current response statistics index and increment it. - uint64_t GetAndIncrementResponseIndex() { return response_stats_index_++; }; + // Return the current response index. + uint64_t GetResponseIndex() { return response_index_; }; #endif // TRITON_ENABLE_STATS private: @@ -139,9 +139,6 @@ class InferenceResponseFactory { std::atomic is_cancelled_; - // The number of responses created by this factory. - std::atomic response_cnt_; - #ifdef TRITON_ENABLE_METRICS // The start time of associate request in ns. uint64_t infer_start_ns_; @@ -154,7 +151,7 @@ class InferenceResponseFactory { #ifdef TRITON_ENABLE_STATS // Number of response statistics reported. - std::atomic response_stats_index_; + std::atomic response_index_; #endif // TRITON_ENABLE_STATS }; @@ -259,9 +256,12 @@ class InferenceResponse { const ResponseAllocator* allocator, void* alloc_userp, TRITONSERVER_InferenceResponseCompleteFn_t response_fn, void* response_userp, - const std::function&&, const uint32_t)>& delegator, - uint64_t seq_num + const std::function< + void(std::unique_ptr&&, const uint32_t)>& delegator +#ifdef TRITON_ENABLE_STATS + , + uint64_t index +#endif // TRITON_ENABLE_STATS #ifdef TRITON_ENABLE_METRICS , uint64_t infer_start_ns @@ -382,7 +382,10 @@ class InferenceResponse { std::function&&, const uint32_t)> response_delegator_; - const uint64_t seq_num_; +#ifdef TRITON_ENABLE_STATS + const uint64_t index_; +#endif // TRITON_ENABLE_STATS + #ifdef TRITON_ENABLE_METRICS const uint64_t infer_start_ns_; #endif // TRITON_ENABLE_METRICS diff --git a/src/test/response_cache_test.cc b/src/test/response_cache_test.cc index 00a1826d8..801bcef32 100644 --- a/src/test/response_cache_test.cc +++ b/src/test/response_cache_test.cc @@ -46,7 +46,11 @@ InferenceResponseFactory::CreateResponse( { response->reset(new InferenceResponse( model_, id_, allocator_, alloc_userp_, response_fn_, response_userp_, - response_delegator_, response_cnt_ + response_delegator_ +#ifdef TRITON_ENABLE_STATS + , + response_index_ +#endif // TRITON_ENABLE_STATS #ifdef TRITON_ENABLE_METRICS , infer_start_ns_ @@ -186,8 +190,11 @@ InferenceResponse::InferenceResponse( TRITONSERVER_InferenceResponseCompleteFn_t response_fn, void* response_userp, const std::function< - void(std::unique_ptr&&, const uint32_t)>& delegator, - uint64_t seq_num + void(std::unique_ptr&&, const uint32_t)>& delegator +#ifdef TRITON_ENABLE_STATS + , + uint64_t index +#endif // TRITON_ENABLE_STATS #ifdef TRITON_ENABLE_METRICS , uint64_t infer_start_ns @@ -195,7 +202,10 @@ InferenceResponse::InferenceResponse( ) : model_(model), id_(id), allocator_(allocator), alloc_userp_(alloc_userp), response_fn_(response_fn), response_userp_(response_userp), - response_delegator_(delegator), seq_num_(seq_num), + response_delegator_(delegator), +#ifdef TRITON_ENABLE_STATS + index_(index), +#endif // TRITON_ENABLE_STATS #ifdef TRITON_ENABLE_METRICS infer_start_ns_(infer_start_ns), #endif // TRITON_ENABLE_METRICS