Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Metrics API enhanced for Failure counters (#377) #378

Merged
merged 1 commit into from
Jul 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 13 additions & 2 deletions src/backend_model_instance.cc
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
#include "backend_config.h"
#include "backend_model.h"
#include "cuda_utils.h"
#include "infer_stats.h"
#include "metrics.h"
#include "model_config.pb.h"
#include "numa_utils.h"
Expand Down Expand Up @@ -558,7 +559,8 @@ TritonModelInstance::PrepareRequestsOrRespond(
// If any errors occurred, respond with error for each request.
if (!status.IsOk()) {
for (auto& r : requests) {
InferenceRequest::RespondIfError(r, status, true /* release_requests */);
InferenceRequest::RespondIfError(
r, status, true /* release_requests */, FailureReason::OTHER);
}
// Log a single error for batch of requests for better visibility
LOG_STATUS_ERROR(status, "Requests failed pre-execution checks");
Expand Down Expand Up @@ -685,7 +687,16 @@ TritonModelInstance::Execute(
for (TRITONBACKEND_Request* tr : triton_requests) {
std::unique_ptr<InferenceRequest> ur(
reinterpret_cast<InferenceRequest*>(tr));
InferenceRequest::RespondIfError(ur, status, true /* release_requests */);
// NOTE: If a backend both returns an error in
// TRITONBACKEND_ModelInstanceExecute and reports an error with
// TRITONBACKEND_ModelInstanceReportStatistics, this can result in double
// counting of the failure metric for the same request. However, it is
// currently not expected for this to be a common case, as the return
// value of TRITONBACKEND_ModelInstanceExecute is used to express
// ownership of the request rather than success of an inference request.
// See tritonbackend.h for more details on this.
InferenceRequest::RespondIfError(
ur, status, true /* release_requests */, FailureReason::BACKEND);
}

TRITONSERVER_ErrorDelete(err);
Expand Down
11 changes: 7 additions & 4 deletions src/dynamic_batch_scheduler.cc
Original file line number Diff line number Diff line change
Expand Up @@ -50,11 +50,12 @@ IsStaleState(Payload::State payload_state)
void
FinishSkippedRequests(
std::vector<std::deque<std::unique_ptr<InferenceRequest>>>&& requests,
const Status& response_status)
const Status& response_status, FailureReason reason)
{
for (auto& queue : requests) {
for (auto& request : queue) {
InferenceRequest::RespondIfError(request, response_status, true);
InferenceRequest::RespondIfError(
request, response_status, true /* release_requests */, reason);
}
}
}
Expand All @@ -69,8 +70,10 @@ FinishRejectedCancelledRequests(
const static Status rejected_status =
Status(Status::Code::UNAVAILABLE, "Request timeout expired");
const static Status cancelled_status = Status(Status::Code::CANCELLED);
FinishSkippedRequests(std::move(rejected_requests), rejected_status);
FinishSkippedRequests(std::move(cancelled_requests), cancelled_status);
FinishSkippedRequests(
std::move(rejected_requests), rejected_status, FailureReason::REJECTED);
FinishSkippedRequests(
std::move(cancelled_requests), cancelled_status, FailureReason::CANCELED);
}

DynamicBatchScheduler::DynamicBatchScheduler(
Expand Down
33 changes: 19 additions & 14 deletions src/ensemble_scheduler/ensemble_scheduler.cc
Original file line number Diff line number Diff line change
Expand Up @@ -81,23 +81,26 @@ class RequestTracker {
std::lock_guard<std::mutex> lk(mtx_);
inflight_request_counter_--;
if (inflight_request_counter_ == 0) {
if (request_ != nullptr) {
#ifdef TRITON_ENABLE_STATS
const auto& infer_stats = context_stats_aggregator_.ImmutableInferStats();
request_->ReportStatisticsWithDuration(
metric_reporter_, status_.IsOk(), compute_start_ns_,
infer_stats.compute_input_duration_ns_,
infer_stats.compute_infer_duration_ns_,
infer_stats.compute_output_duration_ns_);
if (status_.IsOk()) {
stats_aggregator_->UpdateInferBatchStatsWithDuration(
metric_reporter_, std::max(1U, request_->BatchSize()),
const auto& infer_stats =
context_stats_aggregator_.ImmutableInferStats();
request_->ReportStatisticsWithDuration(
metric_reporter_, status_.IsOk(), compute_start_ns_,
infer_stats.compute_input_duration_ns_,
infer_stats.compute_infer_duration_ns_,
infer_stats.compute_output_duration_ns_);
}
if (status_.IsOk()) {
stats_aggregator_->UpdateInferBatchStatsWithDuration(
metric_reporter_, std::max(1U, request_->BatchSize()),
infer_stats.compute_input_duration_ns_,
infer_stats.compute_infer_duration_ns_,
infer_stats.compute_output_duration_ns_);
}
#endif
InferenceRequest::Release(
std::move(request_), TRITONSERVER_REQUEST_RELEASE_ALL);
InferenceRequest::Release(
std::move(request_), TRITONSERVER_REQUEST_RELEASE_ALL);
}
}
return (inflight_request_counter_ == 0);
}
Expand Down Expand Up @@ -1136,7 +1139,8 @@ EnsembleContext::FinishEnsemble(std::unique_ptr<InferenceResponse>&& response)
"more "
"ensemble steps can be made");
InferenceRequest::RespondIfError(
request_tracker_->Request(), ensemble_status_);
request_tracker_->Request(), ensemble_status_,
false /* release_requests */, FailureReason::OTHER);
} else {
request_tracker_->Request()->ResponseFactory()->SendFlags(
TRITONSERVER_RESPONSE_COMPLETE_FINAL);
Expand All @@ -1149,7 +1153,8 @@ EnsembleContext::FinishEnsemble(std::unique_ptr<InferenceResponse>&& response)
ensemble_status_);
} else {
InferenceRequest::RespondIfError(
request_tracker_->Request(), ensemble_status_);
request_tracker_->Request(), ensemble_status_,
false /* release_requests */, FailureReason::OTHER);
}
}

Expand Down
64 changes: 43 additions & 21 deletions src/infer_request.cc
Original file line number Diff line number Diff line change
Expand Up @@ -421,10 +421,25 @@ InferenceRequest::Run(std::unique_ptr<InferenceRequest>& request)
return status;
}

FailureReason
stringToFailureReason(const std::string& error_type)
{
if (error_type == "REJECTED") {
return FailureReason::REJECTED;
}
if (error_type == "CANCELED") {
return FailureReason::CANCELED;
}
if (error_type == "BACKEND") {
return FailureReason::BACKEND;
}
return FailureReason::OTHER;
}

void
InferenceRequest::RespondIfError(
std::unique_ptr<InferenceRequest>& request, const Status& status,
const bool release_request)
const bool release_request, FailureReason reason)
{
if (status.IsOk()) {
return;
Expand All @@ -442,7 +457,10 @@ InferenceRequest::RespondIfError(
InferenceResponse::SendWithStatus(
std::move(response), TRITONSERVER_RESPONSE_COMPLETE_FINAL, status),
(request->LogRequest() + "failed to send error response").c_str());

#ifdef TRITON_ENABLE_STATS
request->ReportErrorStatistics(
request->model_raw_->MetricReporter().get(), reason);
#endif
// If releasing the request then invoke the release callback which
// gives ownership to the callback. So can't access 'request' after
// this point.
Expand All @@ -452,20 +470,6 @@ InferenceRequest::RespondIfError(
}
}

void
InferenceRequest::RespondIfError(
std::vector<std::unique_ptr<InferenceRequest>>& requests,
const Status& status, const bool release_requests)
{
if (status.IsOk()) {
return;
}

for (auto& request : requests) {
RespondIfError(request, status, release_requests);
}
}

Status
InferenceRequest::Release(
std::unique_ptr<InferenceRequest>&& request, const uint32_t release_flags)
Expand Down Expand Up @@ -1371,6 +1375,21 @@ InferenceRequest::ValidateBytesInputs(
}

#ifdef TRITON_ENABLE_STATS

void
InferenceRequest::ReportErrorStatistics(
MetricModelReporter* metric_reporter, FailureReason reason)
{
INFER_STATS_DECL_TIMESTAMP(request_end_ns);
model_raw_->MutableStatsAggregator()->UpdateFailure(
metric_reporter, request_start_ns_, request_end_ns, reason);
if (secondary_stats_aggregator_ != nullptr) {
secondary_stats_aggregator_->UpdateFailure(
nullptr /* metric_reporter */, request_start_ns_, request_end_ns,
reason);
}
}

void
InferenceRequest::ReportStatistics(
MetricModelReporter* metric_reporter, bool success,
Expand Down Expand Up @@ -1407,10 +1426,12 @@ InferenceRequest::ReportStatistics(
}
} else {
model_raw_->MutableStatsAggregator()->UpdateFailure(
metric_reporter, request_start_ns_, request_end_ns);
metric_reporter, request_start_ns_, request_end_ns,
FailureReason::BACKEND);
if (secondary_stats_aggregator_ != nullptr) {
secondary_stats_aggregator_->UpdateFailure(
nullptr /* metric_reporter */, request_start_ns_, request_end_ns);
nullptr /* metric_reporter */, request_start_ns_, request_end_ns,
FailureReason::BACKEND);
}
}
}
Expand Down Expand Up @@ -1443,10 +1464,12 @@ InferenceRequest::ReportStatisticsWithDuration(
}
} else {
model_raw_->MutableStatsAggregator()->UpdateFailure(
metric_reporter, request_start_ns_, request_end_ns);
metric_reporter, request_start_ns_, request_end_ns,
FailureReason::OTHER);
if (secondary_stats_aggregator_ != nullptr) {
secondary_stats_aggregator_->UpdateFailure(
nullptr /* metric_reporter */, request_start_ns_, request_end_ns);
nullptr /* metric_reporter */, request_start_ns_, request_end_ns,
FailureReason::OTHER);
}
}
}
Expand Down Expand Up @@ -1850,5 +1873,4 @@ operator!=(
{
return !(lhs == rhs);
}

}} // namespace triton::core
16 changes: 14 additions & 2 deletions src/infer_request.h
Original file line number Diff line number Diff line change
Expand Up @@ -590,7 +590,8 @@ class InferenceRequest {
// 'release_request' is true 'request' is returned as nullptr.
static void RespondIfError(
std::unique_ptr<InferenceRequest>& request, const Status& status,
const bool release_request = false);
const bool release_request = false,
FailureReason reason = FailureReason::OTHER);

// Send an error response to a set of 'requests'. If 'status' is
// Success then no responses are sent and the requests are not
Expand All @@ -603,7 +604,8 @@ class InferenceRequest {
// returned with all nullptrs.
static void RespondIfError(
std::vector<std::unique_ptr<InferenceRequest>>& requests,
const Status& status, const bool release_requests = false);
const Status& status, const bool release_requests = false,
FailureReason reason = FailureReason::OTHER);

// Release the request. Call the release callback and transfer
// ownership of the request to the callback. On return 'request' is
Expand Down Expand Up @@ -673,6 +675,16 @@ class InferenceRequest {
const uint64_t compute_start_ns, const uint64_t compute_input_end_ns,
const uint64_t compute_output_start_ns, const uint64_t compute_end_ns);

// Report the error statistics to stats collectors associated with the
// request.
// FIXME: A separate function may not be necessary here, but is being used
// cautiously in case of unforeseen issues such as possibly capturing a trace
// twice. This should be revisited and better tested to see if the
// ReportStatistics function can be used as-is for the newly captured failure
// cases.
void ReportErrorStatistics(
MetricModelReporter* metric_reporter, FailureReason reason);

// Report the statistics to stats collectors associated with the request.
// Duration and timestamps provide two granularities for stats collectors.
void ReportStatisticsWithDuration(
Expand Down
23 changes: 21 additions & 2 deletions src/infer_stats.cc
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,28 @@ namespace triton { namespace core {

#ifdef TRITON_ENABLE_STATS

// This function converts FailureReason enum values to std::string
std::string
failureReasonToString(FailureReason reason)
{
switch (reason) {
case FailureReason::REJECTED:
return "REJECTED";
case FailureReason::CANCELED:
return "CANCELED";
case FailureReason::BACKEND:
return "BACKEND";
case FailureReason::OTHER:
return "OTHER";
default:
return "OTHER";
}
}

void
InferenceStatsAggregator::UpdateFailure(
MetricModelReporter* metric_reporter, const uint64_t request_start_ns,
const uint64_t request_end_ns)
const uint64_t request_end_ns, FailureReason reason)
{
std::lock_guard<std::mutex> lock(mu_);

Expand All @@ -48,7 +66,8 @@ InferenceStatsAggregator::UpdateFailure(

#ifdef TRITON_ENABLE_METRICS
if (metric_reporter != nullptr) {
metric_reporter->IncrementCounter("inf_failure", 1);
std::string reason_str = failureReasonToString(reason);
metric_reporter->IncrementCounter("inf_failure_" + reason_str, 1);
}
#endif // TRITON_ENABLE_METRICS
}
Expand Down
5 changes: 4 additions & 1 deletion src/infer_stats.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,9 @@

namespace triton { namespace core {

// Define the FailureReason enum within the triton::core namespace
enum class FailureReason { REJECTED, CANCELED, BACKEND, OTHER };

class MetricModelReporter;


Expand Down Expand Up @@ -136,7 +139,7 @@ class InferenceStatsAggregator {
// Add durations to Infer stats for a failed inference request.
void UpdateFailure(
MetricModelReporter* metric_reporter, const uint64_t request_start_ns,
const uint64_t request_end_ns);
const uint64_t request_end_ns, FailureReason reason);

// Add durations to infer stats for a successful inference request.
void UpdateSuccess(
Expand Down
18 changes: 17 additions & 1 deletion src/metric_model_reporter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
#ifdef TRITON_ENABLE_METRICS

#include "constants.h"
#include "infer_stats.h"
#include "triton/common/logging.h"

// Global config group has 'name' of empty string.
Expand Down Expand Up @@ -101,6 +102,13 @@ MetricReporterConfig::ParseQuantiles(std::string options)
//
// MetricModelReporter
//
const std::map<FailureReason, std::string>
MetricModelReporter::failure_reasons_map = {
{FailureReason::REJECTED, "REJECTED"},
{FailureReason::CANCELED, "CANCELED"},
{FailureReason::BACKEND, "BACKEND"},
{FailureReason::OTHER, "OTHER"}};

Status
MetricModelReporter::Create(
const ModelIdentifier& model_id, const int64_t model_version,
Expand Down Expand Up @@ -189,7 +197,6 @@ MetricModelReporter::InitializeCounters(
{
// Always setup these counters, regardless of config
counter_families_["inf_success"] = &Metrics::FamilyInferenceSuccess();
counter_families_["inf_failure"] = &Metrics::FamilyInferenceFailure();
counter_families_["inf_count"] = &Metrics::FamilyInferenceCount();
counter_families_["inf_exec_count"] =
&Metrics::FamilyInferenceExecutionCount();
Expand Down Expand Up @@ -227,6 +234,15 @@ MetricModelReporter::InitializeCounters(
counters_[name] = CreateMetric<prometheus::Counter>(*family_ptr, labels);
}
}

// Initialize failure metrics with reasons
for (const auto& reason_pair : failure_reasons_map) {
std::map<std::string, std::string> extended_labels = labels;
extended_labels["reason"] = reason_pair.second;
counters_["inf_failure_" + reason_pair.second] =
CreateMetric<prometheus::Counter>(
Metrics::FamilyInferenceFailure(), extended_labels);
}
}

void
Expand Down
2 changes: 2 additions & 0 deletions src/metric_model_reporter.h
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,8 @@ class MetricModelReporter {
// Lookup summary metric by name, and observe the value if it exists.
void ObserveSummary(const std::string& name, double value);

static const std::map<FailureReason, std::string> failure_reasons_map;

private:
MetricModelReporter(
const ModelIdentifier& model_id, const int64_t model_version,
Expand Down
Loading
Loading