Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
…into yinggeh-DLIS-6657-client-input-byte-size-check
  • Loading branch information
yinggeh committed Jul 23, 2024
2 parents b336ecc + d2abb8b commit 73d374e
Show file tree
Hide file tree
Showing 19 changed files with 253 additions and 74 deletions.
6 changes: 3 additions & 3 deletions include/triton/core/tritonbackend.h
Original file line number Diff line number Diff line change
Expand Up @@ -1722,9 +1722,9 @@ TRITONBACKEND_BackendAttributeSetParallelModelInstanceLoading(
///
/// \param batcher User-defined placeholder for backend to store and
/// retrieve information about the batching strategy for this
/// model.RITONBACKEND_ISPEC return a TRITONSERVER_Error indicating success or
/// failure. \param model The backend model for which Triton is forming a batch.
/// \return a TRITONSERVER_Error indicating success or failure.
/// model. Returns a TRITONSERVER_Error indicating success
/// or failure. \param model The backend model for which Triton is forming a
/// batch. \return a TRITONSERVER_Error indicating success or failure.
TRITONBACKEND_ISPEC TRITONSERVER_Error* TRITONBACKEND_ModelBatcherInitialize(
TRITONBACKEND_Batcher** batcher, TRITONBACKEND_Model* model);

Expand Down
18 changes: 15 additions & 3 deletions include/triton/core/tritonserver.h
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ struct TRITONSERVER_MetricFamily;
/// }
///
#define TRITONSERVER_API_VERSION_MAJOR 1
#define TRITONSERVER_API_VERSION_MINOR 32
#define TRITONSERVER_API_VERSION_MINOR 33

/// Get the TRITONBACKEND API version supported by the Triton shared
/// library. This value can be compared against the
Expand Down Expand Up @@ -732,7 +732,8 @@ typedef enum tritonserver_traceactivity_enum {
TRITONSERVER_TRACE_REQUEST_END = 6,
TRITONSERVER_TRACE_TENSOR_QUEUE_INPUT = 7,
TRITONSERVER_TRACE_TENSOR_BACKEND_INPUT = 8,
TRITONSERVER_TRACE_TENSOR_BACKEND_OUTPUT = 9
TRITONSERVER_TRACE_TENSOR_BACKEND_OUTPUT = 9,
TRITONSERVER_TRACE_CUSTOM_ACTIVITY = 10
} TRITONSERVER_InferenceTraceActivity;

/// Get the string representation of a trace activity. The returned
Expand Down Expand Up @@ -838,6 +839,18 @@ TRITONSERVER_InferenceTraceTensorNew(
TRITONSERVER_InferenceTraceTensorActivityFn_t tensor_activity_fn,
TRITONSERVER_InferenceTraceReleaseFn_t release_fn, void* trace_userp);

/// Report a trace activity. All the traces reported using this API will be
/// using TRITONSERVER_TRACE_CUSTOM_ACTIVITY type.
///
/// \param trace The trace object.
/// \param timestamp The timestamp associated with the trace activity.
/// \param name The trace activity name.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_DECLSPEC TRITONSERVER_Error*
TRITONSERVER_InferenceTraceReportActivity(
TRITONSERVER_InferenceTrace* trace, uint64_t timestamp,
const char* activity_name);

/// Delete a trace object.
///
/// \param trace The trace object.
Expand Down Expand Up @@ -921,7 +934,6 @@ TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*
TRITONSERVER_InferenceTraceSetContext(
struct TRITONSERVER_InferenceTrace* trace, const char* trace_context);


/// Get TRITONSERVER_InferenceTrace context.
///
/// \param trace The trace.
Expand Down
5 changes: 0 additions & 5 deletions python/test/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -345,11 +345,6 @@ def test_ready(self):
server = tritonserver.Server(self._server_options).start()
self.assertTrue(server.ready())

@pytest.mark.xfail(
tritonserver.__version__ <= "2.48.0",
reason="Known issue on stop: Exit timeout expired. Exiting immediately",
raises=tritonserver.InternalError,
)
def test_stop(self):
server = tritonserver.Server(self._server_options).start(wait_until_ready=True)

Expand Down
15 changes: 13 additions & 2 deletions python/tritonserver/_c/tritonserver_pybind.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Copyright 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -1434,7 +1434,18 @@ class PyServer : public PyWrapper<struct TRITONSERVER_Server> {
owned_ = true;
}

void Stop() const { ThrowIfError(TRITONSERVER_ServerStop(triton_object_)); }
void Stop() const
{
// ServerStop is blocking for the duration of the server exit timeout, so
// ensure to release the GIL. This can allow request release callbacks
// to be interleaved while server is waiting for live requests/models
// to complete. Without releasing GIL, this function may acquire the GIL
// first and block the Triton request from being released/freed, thus
// blocking the server's shutdown in a circular manner thinking a model is
// still alive.
py::gil_scoped_release release;
ThrowIfError(TRITONSERVER_ServerStop(triton_object_));
}

void RegisterModelRepository(
const std::string& repository_path,
Expand Down
15 changes: 13 additions & 2 deletions src/backend_model_instance.cc
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
#include "backend_config.h"
#include "backend_model.h"
#include "cuda_utils.h"
#include "infer_stats.h"
#include "metrics.h"
#include "model_config.pb.h"
#include "numa_utils.h"
Expand Down Expand Up @@ -558,7 +559,8 @@ TritonModelInstance::PrepareRequestsOrRespond(
// If any errors occurred, respond with error for each request.
if (!status.IsOk()) {
for (auto& r : requests) {
InferenceRequest::RespondIfError(r, status, true /* release_requests */);
InferenceRequest::RespondIfError(
r, status, true /* release_requests */, FailureReason::OTHER);
}
// Log a single error for batch of requests for better visibility
LOG_STATUS_ERROR(status, "Requests failed pre-execution checks");
Expand Down Expand Up @@ -685,7 +687,16 @@ TritonModelInstance::Execute(
for (TRITONBACKEND_Request* tr : triton_requests) {
std::unique_ptr<InferenceRequest> ur(
reinterpret_cast<InferenceRequest*>(tr));
InferenceRequest::RespondIfError(ur, status, true /* release_requests */);
// NOTE: If a backend both returns an error in
// TRITONBACKEND_ModelInstanceExecute and reports an error with
// TRITONBACKEND_ModelInstanceReportStatistics, this can result in double
// counting of the failure metric for the same request. However, it is
// currently not expected for this to be a common case, as the return
// value of TRITONBACKEND_ModelInstanceExecute is used to express
// ownership of the request rather than success of an inference request.
// See tritonbackend.h for more details on this.
InferenceRequest::RespondIfError(
ur, status, true /* release_requests */, FailureReason::BACKEND);
}

TRITONSERVER_ErrorDelete(err);
Expand Down
11 changes: 7 additions & 4 deletions src/dynamic_batch_scheduler.cc
Original file line number Diff line number Diff line change
Expand Up @@ -50,11 +50,12 @@ IsStaleState(Payload::State payload_state)
void
FinishSkippedRequests(
std::vector<std::deque<std::unique_ptr<InferenceRequest>>>&& requests,
const Status& response_status)
const Status& response_status, FailureReason reason)
{
for (auto& queue : requests) {
for (auto& request : queue) {
InferenceRequest::RespondIfError(request, response_status, true);
InferenceRequest::RespondIfError(
request, response_status, true /* release_requests */, reason);
}
}
}
Expand All @@ -69,8 +70,10 @@ FinishRejectedCancelledRequests(
const static Status rejected_status =
Status(Status::Code::UNAVAILABLE, "Request timeout expired");
const static Status cancelled_status = Status(Status::Code::CANCELLED);
FinishSkippedRequests(std::move(rejected_requests), rejected_status);
FinishSkippedRequests(std::move(cancelled_requests), cancelled_status);
FinishSkippedRequests(
std::move(rejected_requests), rejected_status, FailureReason::REJECTED);
FinishSkippedRequests(
std::move(cancelled_requests), cancelled_status, FailureReason::CANCELED);
}

DynamicBatchScheduler::DynamicBatchScheduler(
Expand Down
33 changes: 19 additions & 14 deletions src/ensemble_scheduler/ensemble_scheduler.cc
Original file line number Diff line number Diff line change
Expand Up @@ -81,23 +81,26 @@ class RequestTracker {
std::lock_guard<std::mutex> lk(mtx_);
inflight_request_counter_--;
if (inflight_request_counter_ == 0) {
if (request_ != nullptr) {
#ifdef TRITON_ENABLE_STATS
const auto& infer_stats = context_stats_aggregator_.ImmutableInferStats();
request_->ReportStatisticsWithDuration(
metric_reporter_, status_.IsOk(), compute_start_ns_,
infer_stats.compute_input_duration_ns_,
infer_stats.compute_infer_duration_ns_,
infer_stats.compute_output_duration_ns_);
if (status_.IsOk()) {
stats_aggregator_->UpdateInferBatchStatsWithDuration(
metric_reporter_, std::max(1U, request_->BatchSize()),
const auto& infer_stats =
context_stats_aggregator_.ImmutableInferStats();
request_->ReportStatisticsWithDuration(
metric_reporter_, status_.IsOk(), compute_start_ns_,
infer_stats.compute_input_duration_ns_,
infer_stats.compute_infer_duration_ns_,
infer_stats.compute_output_duration_ns_);
}
if (status_.IsOk()) {
stats_aggregator_->UpdateInferBatchStatsWithDuration(
metric_reporter_, std::max(1U, request_->BatchSize()),
infer_stats.compute_input_duration_ns_,
infer_stats.compute_infer_duration_ns_,
infer_stats.compute_output_duration_ns_);
}
#endif
InferenceRequest::Release(
std::move(request_), TRITONSERVER_REQUEST_RELEASE_ALL);
InferenceRequest::Release(
std::move(request_), TRITONSERVER_REQUEST_RELEASE_ALL);
}
}
return (inflight_request_counter_ == 0);
}
Expand Down Expand Up @@ -1136,7 +1139,8 @@ EnsembleContext::FinishEnsemble(std::unique_ptr<InferenceResponse>&& response)
"more "
"ensemble steps can be made");
InferenceRequest::RespondIfError(
request_tracker_->Request(), ensemble_status_);
request_tracker_->Request(), ensemble_status_,
false /* release_requests */, FailureReason::OTHER);
} else {
request_tracker_->Request()->ResponseFactory()->SendFlags(
TRITONSERVER_RESPONSE_COMPLETE_FINAL);
Expand All @@ -1149,7 +1153,8 @@ EnsembleContext::FinishEnsemble(std::unique_ptr<InferenceResponse>&& response)
ensemble_status_);
} else {
InferenceRequest::RespondIfError(
request_tracker_->Request(), ensemble_status_);
request_tracker_->Request(), ensemble_status_,
false /* release_requests */, FailureReason::OTHER);
}
}

Expand Down
64 changes: 43 additions & 21 deletions src/infer_request.cc
Original file line number Diff line number Diff line change
Expand Up @@ -421,10 +421,25 @@ InferenceRequest::Run(std::unique_ptr<InferenceRequest>& request)
return status;
}

FailureReason
stringToFailureReason(const std::string& error_type)
{
if (error_type == "REJECTED") {
return FailureReason::REJECTED;
}
if (error_type == "CANCELED") {
return FailureReason::CANCELED;
}
if (error_type == "BACKEND") {
return FailureReason::BACKEND;
}
return FailureReason::OTHER;
}

void
InferenceRequest::RespondIfError(
std::unique_ptr<InferenceRequest>& request, const Status& status,
const bool release_request)
const bool release_request, FailureReason reason)
{
if (status.IsOk()) {
return;
Expand All @@ -442,7 +457,10 @@ InferenceRequest::RespondIfError(
InferenceResponse::SendWithStatus(
std::move(response), TRITONSERVER_RESPONSE_COMPLETE_FINAL, status),
(request->LogRequest() + "failed to send error response").c_str());

#ifdef TRITON_ENABLE_STATS
request->ReportErrorStatistics(
request->model_raw_->MetricReporter().get(), reason);
#endif
// If releasing the request then invoke the release callback which
// gives ownership to the callback. So can't access 'request' after
// this point.
Expand All @@ -452,20 +470,6 @@ InferenceRequest::RespondIfError(
}
}

void
InferenceRequest::RespondIfError(
std::vector<std::unique_ptr<InferenceRequest>>& requests,
const Status& status, const bool release_requests)
{
if (status.IsOk()) {
return;
}

for (auto& request : requests) {
RespondIfError(request, status, release_requests);
}
}

Status
InferenceRequest::Release(
std::unique_ptr<InferenceRequest>&& request, const uint32_t release_flags)
Expand Down Expand Up @@ -1389,6 +1393,21 @@ InferenceRequest::ValidateBytesInputs(
}

#ifdef TRITON_ENABLE_STATS

void
InferenceRequest::ReportErrorStatistics(
MetricModelReporter* metric_reporter, FailureReason reason)
{
INFER_STATS_DECL_TIMESTAMP(request_end_ns);
model_raw_->MutableStatsAggregator()->UpdateFailure(
metric_reporter, request_start_ns_, request_end_ns, reason);
if (secondary_stats_aggregator_ != nullptr) {
secondary_stats_aggregator_->UpdateFailure(
nullptr /* metric_reporter */, request_start_ns_, request_end_ns,
reason);
}
}

void
InferenceRequest::ReportStatistics(
MetricModelReporter* metric_reporter, bool success,
Expand Down Expand Up @@ -1425,10 +1444,12 @@ InferenceRequest::ReportStatistics(
}
} else {
model_raw_->MutableStatsAggregator()->UpdateFailure(
metric_reporter, request_start_ns_, request_end_ns);
metric_reporter, request_start_ns_, request_end_ns,
FailureReason::BACKEND);
if (secondary_stats_aggregator_ != nullptr) {
secondary_stats_aggregator_->UpdateFailure(
nullptr /* metric_reporter */, request_start_ns_, request_end_ns);
nullptr /* metric_reporter */, request_start_ns_, request_end_ns,
FailureReason::BACKEND);
}
}
}
Expand Down Expand Up @@ -1461,10 +1482,12 @@ InferenceRequest::ReportStatisticsWithDuration(
}
} else {
model_raw_->MutableStatsAggregator()->UpdateFailure(
metric_reporter, request_start_ns_, request_end_ns);
metric_reporter, request_start_ns_, request_end_ns,
FailureReason::OTHER);
if (secondary_stats_aggregator_ != nullptr) {
secondary_stats_aggregator_->UpdateFailure(
nullptr /* metric_reporter */, request_start_ns_, request_end_ns);
nullptr /* metric_reporter */, request_start_ns_, request_end_ns,
FailureReason::OTHER);
}
}
}
Expand Down Expand Up @@ -1868,5 +1891,4 @@ operator!=(
{
return !(lhs == rhs);
}

}} // namespace triton::core
16 changes: 14 additions & 2 deletions src/infer_request.h
Original file line number Diff line number Diff line change
Expand Up @@ -590,7 +590,8 @@ class InferenceRequest {
// 'release_request' is true 'request' is returned as nullptr.
static void RespondIfError(
std::unique_ptr<InferenceRequest>& request, const Status& status,
const bool release_request = false);
const bool release_request = false,
FailureReason reason = FailureReason::OTHER);

// Send an error response to a set of 'requests'. If 'status' is
// Success then no responses are sent and the requests are not
Expand All @@ -603,7 +604,8 @@ class InferenceRequest {
// returned with all nullptrs.
static void RespondIfError(
std::vector<std::unique_ptr<InferenceRequest>>& requests,
const Status& status, const bool release_requests = false);
const Status& status, const bool release_requests = false,
FailureReason reason = FailureReason::OTHER);

// Release the request. Call the release callback and transfer
// ownership of the request to the callback. On return 'request' is
Expand Down Expand Up @@ -673,6 +675,16 @@ class InferenceRequest {
const uint64_t compute_start_ns, const uint64_t compute_input_end_ns,
const uint64_t compute_output_start_ns, const uint64_t compute_end_ns);

// Report the error statistics to stats collectors associated with the
// request.
// FIXME: A separate function may not be necessary here, but is being used
// cautiously in case of unforeseen issues such as possibly capturing a trace
// twice. This should be revisited and better tested to see if the
// ReportStatistics function can be used as-is for the newly captured failure
// cases.
void ReportErrorStatistics(
MetricModelReporter* metric_reporter, FailureReason reason);

// Report the statistics to stats collectors associated with the request.
// Duration and timestamps provide two granularities for stats collectors.
void ReportStatisticsWithDuration(
Expand Down
Loading

0 comments on commit 73d374e

Please sign in to comment.