Skip to content

Commit

Permalink
Reuse and rename response_stats_index_
Browse files Browse the repository at this point in the history
  • Loading branch information
yinggeh committed Oct 15, 2024
1 parent 8c9fe0c commit db990d3
Show file tree
Hide file tree
Showing 4 changed files with 49 additions and 25 deletions.
3 changes: 1 addition & 2 deletions src/backend_model_instance.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1103,8 +1103,7 @@ TRITONBACKEND_ModelInstanceReportResponseStatistics(

InferenceStatsAggregator* sa =
rs->model_instance->Model()->MutableStatsAggregator();
std::string key =
std::to_string((*rs->response_factory)->GetAndIncrementResponseIndex());
std::string key = std::to_string((*rs->response_factory)->GetResponseIndex());

if (rs->error == nullptr) {
if (rs->compute_output_start > 0) {
Expand Down
26 changes: 19 additions & 7 deletions src/infer_response.cc
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,11 @@ InferenceResponseFactory::CreateResponse(
{
response->reset(new InferenceResponse(
model_, id_, allocator_, alloc_userp_, response_fn_, response_userp_,
response_delegator_, response_cnt_
response_delegator_
#ifdef TRITON_ENABLE_STATS
,
response_index_++
#endif // TRITON_ENABLE_STATS
#ifdef TRITON_ENABLE_METRICS
,
infer_start_ns_
Expand All @@ -51,7 +55,6 @@ InferenceResponseFactory::CreateResponse(
#ifdef TRITON_ENABLE_TRACING
(*response)->SetTrace(trace_);
#endif // TRITON_ENABLE_TRACING
response_cnt_++;
return Status::Success;
}

Expand All @@ -78,16 +81,22 @@ InferenceResponse::InferenceResponse(
TRITONSERVER_InferenceResponseCompleteFn_t response_fn,
void* response_userp,
const std::function<
void(std::unique_ptr<InferenceResponse>&&, const uint32_t)>& delegator,
uint64_t seq_num
void(std::unique_ptr<InferenceResponse>&&, const uint32_t)>& delegator
#ifdef TRITON_ENABLE_STATS
,
uint64_t index
#endif // TRITON_ENABLE_STATS
#ifdef TRITON_ENABLE_METRICS
,
uint64_t infer_start_ns
#endif // TRITON_ENABLE_METRICS
)
: model_(model), id_(id), allocator_(allocator), alloc_userp_(alloc_userp),
response_fn_(response_fn), response_userp_(response_userp),
response_delegator_(delegator), seq_num_(seq_num),
response_delegator_(delegator),
#ifdef TRITON_ENABLE_STATS
index_(index),
#endif // TRITON_ENABLE_STATS
#ifdef TRITON_ENABLE_METRICS
infer_start_ns_(infer_start_ns),
#endif // TRITON_ENABLE_METRICS
Expand All @@ -108,7 +117,10 @@ InferenceResponse::InferenceResponse(
InferenceResponse::InferenceResponse(
TRITONSERVER_InferenceResponseCompleteFn_t response_fn,
void* response_userp)
: response_fn_(response_fn), response_userp_(response_userp), seq_num_(0),
: response_fn_(response_fn), response_userp_(response_userp),
#ifdef TRITON_ENABLE_STATS
index_(0),
#endif // TRITON_ENABLE_STATS
#ifdef TRITON_ENABLE_METRICS
infer_start_ns_(0),
#endif // TRITON_ENABLE_METRICS
Expand Down Expand Up @@ -309,7 +321,7 @@ InferenceResponse::TraceOutputTensors(
void
InferenceResponse::UpdateResponseMetrics() const
{
if (model_ != nullptr && seq_num_ == 0) {
if (model_ != nullptr && index_ == 0) {
auto first_response_ns =
std::chrono::duration_cast<std::chrono::nanoseconds>(
std::chrono::steady_clock::now().time_since_epoch())
Expand Down
27 changes: 15 additions & 12 deletions src/infer_response.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,10 +60,10 @@ class InferenceResponseFactory {
: model_(model), id_(id), allocator_(allocator),
alloc_userp_(alloc_userp), response_fn_(response_fn),
response_userp_(response_userp), response_delegator_(delegator),
is_cancelled_(false), response_cnt_(0)
is_cancelled_(false)
#ifdef TRITON_ENABLE_STATS
,
response_stats_index_(0)
response_index_(0)
#endif // TRITON_ENABLE_STATS
{
#ifdef TRITON_ENABLE_METRICS
Expand Down Expand Up @@ -104,8 +104,8 @@ class InferenceResponseFactory {
#endif // TRITON_ENABLE_TRACING

#ifdef TRITON_ENABLE_STATS
// Return the current response statistics index and increment it.
uint64_t GetAndIncrementResponseIndex() { return response_stats_index_++; };
// Return the current response index.
uint64_t GetResponseIndex() { return response_index_; };
#endif // TRITON_ENABLE_STATS

private:
Expand Down Expand Up @@ -139,9 +139,6 @@ class InferenceResponseFactory {

std::atomic<bool> is_cancelled_;

// The number of responses created by this factory.
std::atomic<uint64_t> response_cnt_;

#ifdef TRITON_ENABLE_METRICS
// The start time of associate request in ns.
uint64_t infer_start_ns_;
Expand All @@ -154,7 +151,7 @@ class InferenceResponseFactory {

#ifdef TRITON_ENABLE_STATS
// Number of response statistics reported.
std::atomic<uint64_t> response_stats_index_;
std::atomic<uint64_t> response_index_;
#endif // TRITON_ENABLE_STATS
};

Expand Down Expand Up @@ -259,9 +256,12 @@ class InferenceResponse {
const ResponseAllocator* allocator, void* alloc_userp,
TRITONSERVER_InferenceResponseCompleteFn_t response_fn,
void* response_userp,
const std::function<void(
std::unique_ptr<InferenceResponse>&&, const uint32_t)>& delegator,
uint64_t seq_num
const std::function<
void(std::unique_ptr<InferenceResponse>&&, const uint32_t)>& delegator
#ifdef TRITON_ENABLE_STATS
,
uint64_t index
#endif // TRITON_ENABLE_STATS
#ifdef TRITON_ENABLE_METRICS
,
uint64_t infer_start_ns
Expand Down Expand Up @@ -382,7 +382,10 @@ class InferenceResponse {
std::function<void(std::unique_ptr<InferenceResponse>&&, const uint32_t)>
response_delegator_;

const uint64_t seq_num_;
#ifdef TRITON_ENABLE_STATS
const uint64_t index_;
#endif // TRITON_ENABLE_STATS

#ifdef TRITON_ENABLE_METRICS
const uint64_t infer_start_ns_;
#endif // TRITON_ENABLE_METRICS
Expand Down
18 changes: 14 additions & 4 deletions src/test/response_cache_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,11 @@ InferenceResponseFactory::CreateResponse(
{
response->reset(new InferenceResponse(
model_, id_, allocator_, alloc_userp_, response_fn_, response_userp_,
response_delegator_, response_cnt_
response_delegator_
#ifdef TRITON_ENABLE_STATS
,
response_index_
#endif // TRITON_ENABLE_STATS
#ifdef TRITON_ENABLE_METRICS
,
infer_start_ns_
Expand Down Expand Up @@ -186,16 +190,22 @@ InferenceResponse::InferenceResponse(
TRITONSERVER_InferenceResponseCompleteFn_t response_fn,
void* response_userp,
const std::function<
void(std::unique_ptr<InferenceResponse>&&, const uint32_t)>& delegator,
uint64_t seq_num
void(std::unique_ptr<InferenceResponse>&&, const uint32_t)>& delegator
#ifdef TRITON_ENABLE_STATS
,
uint64_t index
#endif // TRITON_ENABLE_STATS
#ifdef TRITON_ENABLE_METRICS
,
uint64_t infer_start_ns
#endif // TRITON_ENABLE_METRICS
)
: model_(model), id_(id), allocator_(allocator), alloc_userp_(alloc_userp),
response_fn_(response_fn), response_userp_(response_userp),
response_delegator_(delegator), seq_num_(seq_num),
response_delegator_(delegator),
#ifdef TRITON_ENABLE_STATS
index_(index),
#endif // TRITON_ENABLE_STATS
#ifdef TRITON_ENABLE_METRICS
infer_start_ns_(infer_start_ns),
#endif // TRITON_ENABLE_METRICS
Expand Down

0 comments on commit db990d3

Please sign in to comment.