From 5f10d61aeaaada99c93b31d179f5a69274af115a Mon Sep 17 00:00:00 2001 From: Olga Andreeva <124622579+oandreeva-nv@users.noreply.github.com> Date: Fri, 5 Jul 2024 14:50:14 -0700 Subject: [PATCH 01/10] [refactor]: Refactor Frontend Trace OpenTelemetry Implementation (#7390) Co-authored-by: Iman Tabrizian --- src/tracer.cc | 150 +++++++++++++++++++------------------------------- src/tracer.h | 77 ++++++++++++-------------- 2 files changed, 92 insertions(+), 135 deletions(-) diff --git a/src/tracer.cc b/src/tracer.cc index 560278e4a6..b17f5eb7e8 100644 --- a/src/tracer.cc +++ b/src/tracer.cc @@ -335,13 +335,23 @@ TraceManager::SampleTrace(const TraceStartOptions& start_options) std::chrono::duration_cast( std::chrono::steady_clock::now().time_since_epoch()) .count(); - ts->otel_context_ = start_options.propagated_context; - opentelemetry::nostd::shared_ptr root_span; - root_span = ts->StartSpan( - "InferRequest", steady_timestamp_ns, otel_trace_api::kSpanKey); + if (ts->span_stacks_.find(ts->trace_id_) == ts->span_stacks_.end()) { + std::unique_ptr< + std::stack>> + st(new std::stack< + opentelemetry::nostd::shared_ptr>()); + ts->span_stacks_.emplace(ts->trace_id_, std::move(st)); + } + auto active_span = + otel_trace_api::GetSpan(start_options.propagated_context); + if (active_span->GetContext().IsValid()) { + ts->span_stacks_[ts->trace_id_]->emplace(active_span); + } // Storing "InferRequest" span as a root span // to keep it alive for the duration of the request. - ts->otel_context_ = ts->otel_context_.SetValue(kRootSpan, root_span); + ts->root_span_ = + ts->StartSpan("InferRequest", steady_timestamp_ns, ts->trace_id_); + ts->span_stacks_[ts->trace_id_]->emplace(ts->root_span_); #else LOG_ERROR << "Unsupported trace mode: " << TraceManager::InferenceTraceModeString(ts->setting_->mode_); @@ -358,7 +368,7 @@ TraceManager::Trace::~Trace() setting_->WriteTrace(streams_); } else if (setting_->mode_ == TRACE_MODE_OPENTELEMETRY) { #ifndef _WIN32 - EndSpan(kRootSpan); + EndSpan(trace_id_); #else LOG_ERROR << "Unsupported trace mode: " << TraceManager::InferenceTraceModeString(setting_->mode_); @@ -390,7 +400,8 @@ TraceManager::Trace::CaptureTimestamp( << "{\"name\":\"" << name << "\",\"ns\":" << timestamp_ns << "}]}"; } else if (setting_->mode_ == TRACE_MODE_OPENTELEMETRY) { #ifndef _WIN32 - AddEvent(kRootSpan, name, timestamp_ns); + root_span_->AddEvent( + name, time_offset_ + std::chrono::nanoseconds{timestamp_ns}); #else LOG_ERROR << "Unsupported trace mode: " << TraceManager::InferenceTraceModeString(setting_->mode_); @@ -501,7 +512,7 @@ TraceManager::ProcessOpenTelemetryParameters( void TraceManager::Trace::StartSpan( - std::string span_key, TRITONSERVER_InferenceTrace* trace, + TRITONSERVER_InferenceTrace* trace, TRITONSERVER_InferenceTraceActivity activity, uint64_t timestamp_ns, uint64_t trace_id) { @@ -509,7 +520,7 @@ TraceManager::Trace::StartSpan( LOG_TRITONSERVER_ERROR( TRITONSERVER_InferenceTraceParentId(trace, &parent_id), "getting trace parent id"); - std::string parent_span_key = ""; + auto span_parent_id = parent_id; // Currently, only 2 types of sub-spans are supported: // request span and compute span. Compute span is a leaf span @@ -521,16 +532,9 @@ TraceManager::Trace::StartSpan( // If parent_id > 0, then this is a child trace, spawned from // the ensamble's main request. For this instance, the parent // span is the ensembles's request span. - if (parent_id == 0 && activity == TRITONSERVER_TRACE_REQUEST_START) { - parent_span_key = kRootSpan; - } else if (activity == TRITONSERVER_TRACE_REQUEST_START) { - // [FIXME] For BLS requests parent span for children's request spans - // should be parent model's compute span. Currently, - // this won't work, since parent's compute span will be created - // only after children's spans are created. - parent_span_key = kRequestSpan + std::to_string(parent_id); - } else if (activity == TRITONSERVER_TRACE_COMPUTE_START) { - parent_span_key = kRequestSpan + std::to_string(trace_id); + if ((parent_id == 0 && activity == TRITONSERVER_TRACE_REQUEST_START) || + (activity == TRITONSERVER_TRACE_COMPUTE_START)) { + span_parent_id = trace_id; } std::string display_name = "compute"; @@ -542,7 +546,7 @@ TraceManager::Trace::StartSpan( display_name = model_name; } - auto span = StartSpan(display_name, timestamp_ns, parent_span_key); + auto span = StartSpan(display_name, timestamp_ns, span_parent_id); if (activity == TRITONSERVER_TRACE_REQUEST_START) { int64_t model_version; @@ -564,14 +568,13 @@ TraceManager::Trace::StartSpan( PrepareTraceContext(span, &buffer); TRITONSERVER_InferenceTraceSetContext(trace, buffer.Contents().c_str()); } - - otel_context_ = otel_context_.SetValue(span_key, span); + span_stacks_[trace_id]->emplace(span); } opentelemetry::nostd::shared_ptr TraceManager::Trace::StartSpan( std::string display_name, const uint64_t& raw_timestamp_ns, - std::string parent_span_key) + uint64_t trace_id) { otel_trace_api::StartSpanOptions options; options.kind = otel_trace_api::SpanKind::kServer; @@ -580,45 +583,37 @@ TraceManager::Trace::StartSpan( options.start_steady_time = otel_common::SteadyTimestamp{std::chrono::nanoseconds{raw_timestamp_ns}}; - // If the new span is a child span, we need to retrieve its parent from - // the context and provide it through StartSpanOptions to the child span - if (!parent_span_key.empty() && otel_context_.HasKey(parent_span_key)) { - auto parent_span = opentelemetry::nostd::get< - opentelemetry::nostd::shared_ptr>( - otel_context_.GetValue(parent_span_key)); - options.parent = parent_span->GetContext(); + // If the new span is a child span, we need to retrieve its parent and + // provide it through StartSpanOptions to the child span + if (span_stacks_.find(trace_id) != span_stacks_.end() && + !span_stacks_[trace_id]->empty()) { + options.parent = span_stacks_[trace_id]->top()->GetContext(); } auto provider = opentelemetry::trace::Provider::GetTracerProvider(); return provider->GetTracer(kTritonTracer)->StartSpan(display_name, options); } void -TraceManager::Trace::EndSpan(std::string span_key) +TraceManager::Trace::EndSpan(uint64_t trace_id) { auto timestamp_ns = std::chrono::duration_cast( std::chrono::steady_clock::now().time_since_epoch()) .count(); - EndSpan(span_key, timestamp_ns); + EndSpan(timestamp_ns, trace_id); } void TraceManager::Trace::EndSpan( - std::string span_key, const uint64_t& raw_timestamp_ns) + const uint64_t& raw_timestamp_ns, uint64_t trace_id) { - if (otel_context_.HasKey(span_key)) { - auto span = opentelemetry::nostd::get< - opentelemetry::nostd::shared_ptr>( - otel_context_.GetValue(span_key)); - - if (span == nullptr) { - return; - } - + if (span_stacks_.find(trace_id) != span_stacks_.end() && + !span_stacks_[trace_id]->empty()) { otel_trace_api::EndSpanOptions end_options; end_options.end_steady_time = otel_common::SteadyTimestamp{ std::chrono::nanoseconds{raw_timestamp_ns}}; - span->End(end_options); + span_stacks_[trace_id]->top()->End(end_options); + span_stacks_[trace_id]->pop(); } } @@ -630,79 +625,46 @@ TraceManager::Trace::ReportToOpenTelemetry( uint64_t id; LOG_TRITONSERVER_ERROR( TRITONSERVER_InferenceTraceId(trace, &id), "getting trace id"); - - auto current_span_key = GetSpanKeyForActivity(activity, id); - if (current_span_key.empty()) { - return; + if (span_stacks_.find(id) == span_stacks_.end()) { + std::unique_ptr< + std::stack>> + st(new std::stack< + opentelemetry::nostd::shared_ptr>()); + span_stacks_.emplace(id, std::move(st)); } - AddEvent(current_span_key, trace, activity, timestamp_ns, id); -} - -std::string -TraceManager::Trace::GetSpanKeyForActivity( - TRITONSERVER_InferenceTraceActivity activity, uint64_t trace_id) -{ - std::string span_name; - switch (activity) { - case TRITONSERVER_TRACE_REQUEST_START: - case TRITONSERVER_TRACE_QUEUE_START: - case TRITONSERVER_TRACE_REQUEST_END: { - span_name = kRequestSpan + std::to_string(trace_id); - break; - } - - case TRITONSERVER_TRACE_COMPUTE_START: - case TRITONSERVER_TRACE_COMPUTE_INPUT_END: - case TRITONSERVER_TRACE_COMPUTE_OUTPUT_START: - case TRITONSERVER_TRACE_COMPUTE_END: { - span_name = kComputeSpan + std::to_string(trace_id); - break; - } - case TRITONSERVER_TRACE_TENSOR_QUEUE_INPUT: - case TRITONSERVER_TRACE_TENSOR_BACKEND_INPUT: - case TRITONSERVER_TRACE_TENSOR_BACKEND_OUTPUT: - default: { - LOG_ERROR << "Unsupported activity: " - << TRITONSERVER_InferenceTraceActivityString(activity); - span_name = ""; - break; - } - } - - return span_name; + AddEvent(trace, activity, timestamp_ns, id); } void TraceManager::Trace::AddEvent( - std::string span_key, TRITONSERVER_InferenceTrace* trace, + TRITONSERVER_InferenceTrace* trace, TRITONSERVER_InferenceTraceActivity activity, uint64_t timestamp_ns, - uint64_t id) + uint64_t trace_id) { if (activity == TRITONSERVER_TRACE_REQUEST_START || activity == TRITONSERVER_TRACE_COMPUTE_START) { - StartSpan(span_key, trace, activity, timestamp_ns, id); + StartSpan(trace, activity, timestamp_ns, trace_id); } AddEvent( - span_key, TRITONSERVER_InferenceTraceActivityString(activity), - timestamp_ns); + TRITONSERVER_InferenceTraceActivityString(activity), timestamp_ns, + trace_id); if (activity == TRITONSERVER_TRACE_REQUEST_END || activity == TRITONSERVER_TRACE_COMPUTE_END) { - EndSpan(span_key, timestamp_ns); + EndSpan(timestamp_ns, trace_id); } } void TraceManager::Trace::AddEvent( - std::string span_key, std::string event, uint64_t timestamp) + const std::string& event, uint64_t timestamp, uint64_t trace_id) { - if (otel_context_.HasKey(span_key)) { - auto span = opentelemetry::nostd::get< - opentelemetry::nostd::shared_ptr>( - otel_context_.GetValue(span_key)); - span->AddEvent(event, time_offset_ + std::chrono::nanoseconds{timestamp}); + if (span_stacks_.find(trace_id) != span_stacks_.end() && + !span_stacks_[trace_id]->empty()) { + span_stacks_[trace_id]->top()->AddEvent( + event, time_offset_ + std::chrono::nanoseconds{timestamp}); } } diff --git a/src/tracer.h b/src/tracer.h index d6e8ee65b0..e33b16dbcb 100644 --- a/src/tracer.h +++ b/src/tracer.h @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -237,8 +238,8 @@ class TraceManager { /// `TRITONSERVER_TRACE_COMPUTE_START`, /// it starts a new request or compute span. For the request span it /// adds some triton related attributes, and adds this span to - /// `otel_context_`. Alternatively, if activity is - /// `TRITONSERVER_TRACE_REQUEST_END` or + /// a span stack, corresponding to the current trace. Alternatively, + /// if activity is `TRITONSERVER_TRACE_REQUEST_END` or /// `TRITONSERVER_TRACE_COMPUTE_END`, it ends the corresponding span. /// /// \param trace TRITONSERVER_InferenceTrace instance. @@ -258,16 +259,25 @@ class TraceManager { /// OpenTelemetry SystemTimestamp to display span on a timeline, and /// OpenTelemetry SteadyTimestamp to calculate the duration on the span /// with better precision. - /// \param parent_span_key A span key, to find a parent span in the - /// OpenTelemetry context. If empty, a root span will be started, - /// i.e. with no parent span specified. + /// \param trace_id Trace id. /// \return A shared pointer to a newly created OpenTelemetry span. opentelemetry::nostd::shared_ptr StartSpan( std::string display_name, const uint64_t& raw_timestamp_ns, - std::string parent_span_key = ""); + uint64_t trace_id); + + // A map to hold spans. Any trace can spawn any amount of child traces, + // e.g. ensemble model and BLS. This map holds + // ( trace id, stack of started spans ) pair and for each trase keeps + // started spans alive for the duration of the traced + // event and helps to preserve parent-child relationship. + std::unordered_map< + uint64_t, std::unique_ptr>>> + span_stacks_; - // OTel context to store spans, created in the current trace - opentelemetry::context::Context otel_context_; + // Root span. Some events should be recorded in the root span, while + // request span is still alive and present in the stack. + opentelemetry::nostd::shared_ptr root_span_; /// Prepares trace context to propagate to TRITONSERVER_InferenceTrace. /// Trace context follows W3C Trace Context specification. @@ -304,8 +314,6 @@ class TraceManager { /// For request spans, it will add the following attributes to the span: /// `model_name`, `model_version`, `trace_id`, `parent_id`. /// - /// \param span_key Span's key to retrieve the corresponding span from the - /// OpenTelemetry context. /// \param trace TRITONSERVER_InferenceTrace, used to request model's name, /// version, trace parent_id from the backend. /// \param activity Trace activity. @@ -315,58 +323,45 @@ class TraceManager { /// with better precision. /// \param trace_id Trace id. void StartSpan( - std::string span_key, TRITONSERVER_InferenceTrace* trace, + TRITONSERVER_InferenceTrace* trace, TRITONSERVER_InferenceTraceActivity activity, uint64_t timestamp_ns, uint64_t trace_id); - /// Ends the provided span. + /// Ends the span on the top of the stack, related to trace with `trace_id`. /// - /// \param span_key Span's key to retrieve the corresponding span from the - /// OpenTelemetry context. - void EndSpan(std::string span_key); + /// \param trace_id Trace id. + void EndSpan(uint64_t trace_id); - /// Ends the provided span at specified steady timestamp. + /// Ends the span on the top of the stack, related to trace with `trace_id` + /// at specified steady timestamp. /// - /// \param span_key Span's key to retrieve the corresponding span from the - /// OpenTelemetry context. /// \param raw_timestamp_ns Steady timestamp to use as /// `EndSpanOptions::end_steady_time`. - void EndSpan(std::string span_key, const uint64_t& raw_timestamp_ns); - - /// Returns the span key, for which the activity belongs. - /// - /// \param activity reported activity. /// \param trace_id Trace id. - /// \return A key to identify span, stored in the OpenTelemetry context. - std::string GetSpanKeyForActivity( - TRITONSERVER_InferenceTraceActivity activity, uint64_t trace_id); - - /// Adds event to the span, which is retrieved from OpenTelemetry context - /// with the provided `span_key`. If activity is - /// TRITONSERVER_TRACE_REQUEST_START, or TRITONSERVER_TRACE_COMPUTE_START, - /// starts a new span and adds it to `otel_context_`. + void EndSpan(const uint64_t& raw_timestamp_ns, uint64_t trace_id); + + /// Adds an event to the span on the top of the stack, related to trace + /// with `trace_id`. If activity is TRITONSERVER_TRACE_REQUEST_START, + /// or TRITONSERVER_TRACE_COMPUTE_START, starts a new span and adds it + /// to the span's stack. /// - /// \param span_key Span's key to retrieve the corresponding span from the - /// OpenTelemetry context. /// \param trace TRITONSERVER_InferenceTrace, used to request model's name, /// version, trace parent_id from the backend. /// \param activity Trace activity. /// \param timestamp_ns Timestamp of the provided event. - /// \param id Trace id. + /// \param trace_id Trace id. void AddEvent( - std::string span_key, TRITONSERVER_InferenceTrace* trace, + TRITONSERVER_InferenceTrace* trace, TRITONSERVER_InferenceTraceActivity activity, uint64_t timestamp_ns, - uint64_t id); + uint64_t trace_id); - /// Adds event to the OpenTelemetry span, retrieved from an OpenTelementry - /// context with the provided `span_key`. + /// Adds an event to the OpenTelemetry span. /// - /// \param span_key Span's key to retrieve the corresponding span from the - /// OpenTelemetry context. /// \param event An event to add to the span. /// \param timestamp_ns Timestamp of the provided event. + /// \param trace_id Trace id. void AddEvent( - std::string span_key, std::string event, uint64_t timestamp_ns); + const std::string& event, uint64_t timestamp_ns, uint64_t trace_id); #endif }; From 532ebe350150e6db07b584e0c0cfb4da1008a970 Mon Sep 17 00:00:00 2001 From: Olga Andreeva <124622579+oandreeva-nv@users.noreply.github.com> Date: Fri, 5 Jul 2024 15:34:01 -0700 Subject: [PATCH 02/10] [fix]: grpc state cleanup fix (#7409) --- qa/L0_grpc_state_cleanup/cleanup_test.py | 32 +++++++++++++++++------- 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/qa/L0_grpc_state_cleanup/cleanup_test.py b/qa/L0_grpc_state_cleanup/cleanup_test.py index 28d837e571..431eeb1720 100755 --- a/qa/L0_grpc_state_cleanup/cleanup_test.py +++ b/qa/L0_grpc_state_cleanup/cleanup_test.py @@ -437,10 +437,10 @@ def test_simple_infer_error_status(self): def test_simple_infer_shutdownserver(self): # This test case is used to check whether all the state objects are - # released when the server is interrupted to shutdown in middle of - # inference run with final parameters being returned. + # released when the server is interrupted to shutdown in the beginning + # of inference run with final parameters being returned. with self.assertRaises(InferenceServerException) as cm: - self._simple_infer(request_count=10, kill_server=5) + self._simple_infer(request_count=20, kill_server=5) ### ### Streaming Tests @@ -469,11 +469,18 @@ def test_streaming_timeout(self): def test_streaming_error_status(self): # This test case is used to check whether all the state objects are # released when RPC runs into error. + expected_exceptions = [ + "This protocol is restricted, expecting header 'triton-grpc-protocol-infer-key'", + "The stream is no longer in valid state, the error detail is reported through provided callback. A new stream should be started after stopping the current stream.", + ] with self.assertRaises(InferenceServerException) as cm: self._streaming_infer(request_count=10, should_error=True) - self.assertIn( - "This protocol is restricted, expecting header 'triton-grpc-protocol-infer-key'", - str(cm.exception), + + exception_match = False + for expected_exception in expected_exceptions: + exception_match |= expected_exception in str(cm.exception) + self.assertTrue( + exception_match, "Raised unexpected exception {}".format(str(cm.exception)) ) def test_streaming_infer_shutdownserver(self): @@ -520,11 +527,18 @@ def test_decoupled_timeout(self): def test_decoupled_error_status(self): # This test case is used to check whether all the state objects are # released when RPC runs into error. + expected_exceptions = [ + "This protocol is restricted, expecting header 'triton-grpc-protocol-infer-key'", + "The stream is no longer in valid state, the error detail is reported through provided callback. A new stream should be started after stopping the current stream.", + ] with self.assertRaises(InferenceServerException) as cm: self._decoupled_infer(request_count=10, repeat_count=10, should_error=True) - self.assertIn( - "This protocol is restricted, expecting header 'triton-grpc-protocol-infer-key'", - str(cm.exception), + + exception_match = False + for expected_exception in expected_exceptions: + exception_match |= expected_exception in str(cm.exception) + self.assertTrue( + exception_match, "Raised unexpected exception {}".format(str(cm.exception)) ) def test_decoupled_infer_shutdownserver(self): From 02723f8c9689da259f160331575b02c058ea110b Mon Sep 17 00:00:00 2001 From: Olga Andreeva <124622579+oandreeva-nv@users.noreply.github.com> Date: Fri, 5 Jul 2024 15:38:31 -0700 Subject: [PATCH 03/10] [build]: vllm version update (#7405) --- build.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.py b/build.py index aeb2442763..b9e68d45cc 100755 --- a/build.py +++ b/build.py @@ -76,7 +76,7 @@ "2024.0.0", # ORT OpenVINO "2024.0.0", # Standalone OpenVINO "3.2.6", # DCGM version - "0.4.3", # vLLM version + "0.5.0.post1", # vLLM version ) } From f5273eeec87ef0bbb9f57f8ceb58f772bc6ac346 Mon Sep 17 00:00:00 2001 From: Olga Andreeva <124622579+oandreeva-nv@users.noreply.github.com> Date: Fri, 5 Jul 2024 16:29:36 -0700 Subject: [PATCH 04/10] [feat]:Custom Backend Tracing (#7403) Co-authored-by: Iman Tabrizian Co-authored-by: Kris Hung --- docs/user_guide/trace.md | 16 ++++ qa/L0_trace/opentelemetry_unittest.py | 105 ++++++++++++++++++++++++++ qa/L0_trace/test.sh | 79 ++++++++++++++++++- src/tracer.cc | 83 ++++++++++++++------ src/tracer.h | 16 +++- 5 files changed, 273 insertions(+), 26 deletions(-) diff --git a/docs/user_guide/trace.md b/docs/user_guide/trace.md index d359299499..8f7708665b 100644 --- a/docs/user_guide/trace.md +++ b/docs/user_guide/trace.md @@ -623,6 +623,22 @@ Then, you can specify headers in the `infer` method. For references, please look at our [tests](https://github.com/triton-inference-server/server/blob/main/qa/L0_trace/opentelemetry_unittest.py), e.g. [http context propagation test](https://github.com/triton-inference-server/server/blob/main/qa/L0_trace/opentelemetry_unittest.py#L494-L508). +### Custom Backend Tracing + +In the case when a custom activity needs to be traced in the backend, please +use `TRITONSERVER_InferenceTraceReportActivity` API. For examples, please +refer to the [identity backend](https://github.com/triton-inference-server/identity_backend/blob/main/src/identity.cc). + +In `openTelemetry` trace mode, if one wishes to start a new span, make sure +that the name of your custom activity ends with `_START`. To end the new span, +make sure that corresponding activity ends with `_END`. For example, in the +identity backend, we start a `CUSTOM_ACTIVITY` span, by [reporting](https://github.com/triton-inference-server/identity_backend/blob/oandreeva-custom-trace-activity/src/identity.cc#L872-L876) +`CUSTOM_ACTIVITY_START` event; and we close this span by [reporting](https://github.com/triton-inference-server/identity_backend/blob/oandreeva-custom-trace-activity/src/identity.cc#L880-L883) +`CUSTOM_ACTIVITY_END` event. + +Please note, that it is user's responsibility to make sure that all custom started +spans are properly ended. + ### Limitations - OpenTelemetry trace mode is not supported on Windows systems. diff --git a/qa/L0_trace/opentelemetry_unittest.py b/qa/L0_trace/opentelemetry_unittest.py index 93056e613d..34dc0bfd88 100644 --- a/qa/L0_trace/opentelemetry_unittest.py +++ b/qa/L0_trace/opentelemetry_unittest.py @@ -115,12 +115,14 @@ def setUp(self): self.bls_model_name = "bls_simple" self.trace_context_model = "trace_context" self.non_decoupled_model_name_ = "repeat_int32" + self.identity_model = "custom_identity_int32" self.test_models = [ self.simple_model_name, self.ensemble_model_name, self.bls_model_name, self.non_decoupled_model_name_, self.cancel_queue_model_name, + self.identity_model, ] self.root_span = "InferRequest" self._user_data = UserData() @@ -219,6 +221,7 @@ def _check_events(self, span_name, events, is_cancelled): self.assertFalse( all(entry in events for entry in root_events_http + root_events_grpc) ) + self.assertEquals(len(events), len(compute_events)) elif span_name == self.root_span: # Check that root span has INFER_RESPONSE_COMPLETE, _RECV/_WAITREAD @@ -230,16 +233,20 @@ def _check_events(self, span_name, events, is_cancelled): if "HTTP" in events: self.assertTrue(all(entry in events for entry in root_events_http)) self.assertFalse(all(entry in events for entry in root_events_grpc)) + self.assertEquals(len(events), len(root_events_http)) elif "GRPC" in events: self.assertTrue(all(entry in events for entry in root_events_grpc)) self.assertFalse(all(entry in events for entry in root_events_http)) + self.assertEquals(len(events), len(root_events_grpc)) if is_cancelled == False: self.assertFalse(all(entry in events for entry in request_events)) self.assertFalse(all(entry in events for entry in compute_events)) elif span_name in self.test_models: + if span_name == self.identity_model: + request_events.append("CUSTOM_SINGLE_ACTIVITY") # Check that all request related events (and only them) # are recorded in request span self.assertTrue(all(entry in events for entry in request_events)) @@ -247,6 +254,31 @@ def _check_events(self, span_name, events, is_cancelled): all(entry in events for entry in root_events_http + root_events_grpc) ) self.assertFalse(all(entry in events for entry in compute_events)) + self.assertEquals(len(events), len(request_events)) + + elif span_name.startswith("CUSTOM_ACTIVITY"): + custom_activity_events = [] + if len(span_name) > len("CUSTOM_ACTIVITY"): + custom_activity_events.append(str(span_name + "_START")) + custom_activity_events.append(str(span_name + "_END")) + # Check `custom_identity_int32` config file, + # parameter `single_activity_frequency` identifies + # which custom spans contain "CUSTOM_SINGLE_ACTIVITY" event + if int(span_name[-1]) % 3 == 0: + custom_activity_events.append("CUSTOM_SINGLE_ACTIVITY") + else: + custom_activity_events = [ + "CUSTOM_ACTIVITY_START", + "CUSTOM_ACTIVITY_END", + ] + + self.assertTrue( + all(entry in events for entry in custom_activity_events), + "Span " + span_name, + ) + self.assertEquals( + len(events), len(custom_activity_events), "Span " + span_name + ) def _test_resource_attributes(self, attributes): """ @@ -487,6 +519,52 @@ def _test_simple_trace(self, headers=None): expected_parent_span_dict=expected_parent_span_dict, ) + def _test_custom_identity_trace(self, headers=None): + """ + Helper function, that specifies expected parameters to evaluate trace, + collected from running 1 inference request for `custom_identity_int32` + model. + Number of custom spans defined by the identity backend. + `CUSTOM_ACTIVITY` span will always be there, + `CUSTOM_ACTIVITY` defined by `config.pbtxt parameters`. + """ + expected_number_of_spans = 10 + expected_counts = dict( + { + "compute": 1, + self.identity_model: 1, + self.root_span: 1, + "CUSTOM_ACTIVITY": 1, + "CUSTOM_ACTIVITY0": 1, + "CUSTOM_ACTIVITY1": 1, + "CUSTOM_ACTIVITY2": 1, + "CUSTOM_ACTIVITY3": 1, + "CUSTOM_ACTIVITY4": 1, + "CUSTOM_ACTIVITY5": 1, + } + ) + expected_parent_span_dict = dict( + { + "InferRequest": ["custom_identity_int32"], + "custom_identity_int32": [ + "CUSTOM_ACTIVITY", + "CUSTOM_ACTIVITY0", + "compute", + ], + "CUSTOM_ACTIVITY0": ["CUSTOM_ACTIVITY1"], + "CUSTOM_ACTIVITY1": ["CUSTOM_ACTIVITY2"], + "CUSTOM_ACTIVITY2": ["CUSTOM_ACTIVITY3"], + "CUSTOM_ACTIVITY3": ["CUSTOM_ACTIVITY4"], + "CUSTOM_ACTIVITY4": ["CUSTOM_ACTIVITY5"], + } + ) + self._test_trace( + headers=headers, + expected_number_of_spans=expected_number_of_spans, + expected_counts=expected_counts, + expected_parent_span_dict=expected_parent_span_dict, + ) + def _test_non_decoupled_trace(self, headers=None): """ Helper function, that collects trace for non decoupled model and verifies it. @@ -944,6 +1022,33 @@ def test_trace_context_exposed_to_pbe(self): context_pattern = re.compile(r"\d{2}-[0-9a-f]{32}-[0-9a-f]{16}-\d{2}") self.assertIsNotNone(re.match(context_pattern, context["traceparent"])) + def test_custom_backend_tracing(self): + """ + Tests custom activities reported from identity backend. + """ + input0_ = np.array([[4]], dtype=np.int32) + with httpclient.InferenceServerClient("localhost:8000", verbose=True) as client: + inputs = [] + inputs.append(httpclient.InferInput("INPUT0", [1, 1], "INT32")) + inputs[0].set_data_from_numpy(input0_) + client.infer(self.identity_model, inputs=inputs) + self._test_custom_identity_trace() + + def test_custom_backend_tracing_context_propagation(self): + """ + Tests custom activities reported from identity backend. + """ + input0_ = np.array([[4]], dtype=np.int32) + with httpclient.InferenceServerClient("localhost:8000", verbose=True) as client: + inputs = [] + inputs.append(httpclient.InferInput("INPUT0", [1, 1], "INT32")) + inputs[0].set_data_from_numpy(input0_) + client.infer( + self.identity_model, inputs=inputs, headers=self.client_headers + ) + + self._test_custom_identity_trace(headers=self.client_headers) + if __name__ == "__main__": unittest.main() diff --git a/qa/L0_trace/test.sh b/qa/L0_trace/test.sh index 7d67afb3ba..d2943c1996 100755 --- a/qa/L0_trace/test.sh +++ b/qa/L0_trace/test.sh @@ -97,6 +97,16 @@ cp -r $DATADIR/$MODELBASE $MODELSDIR/simple && \ cp -r ../L0_decoupled/models/repeat_int32 $MODELSDIR sed -i "s/decoupled: True/decoupled: False/" $MODELSDIR/repeat_int32/config.pbtxt +# set up identity model +mkdir -p $MODELSDIR/custom_identity_int32/1 && (cd $MODELSDIR/custom_identity_int32 && \ + echo 'name: "custom_identity_int32"' >> config.pbtxt && \ + echo 'backend: "identity"' >> config.pbtxt && \ + echo 'max_batch_size: 1024' >> config.pbtxt && \ + echo -e 'input [{ name: "INPUT0" \n data_type: TYPE_INT32 \n dims: [ -1 ] }]' >> config.pbtxt && \ + echo -e 'output [{ name: "OUTPUT0" \n data_type: TYPE_INT32 \n dims: [ -1 ] }]' >> config.pbtxt && \ + echo 'instance_group [{ kind: KIND_CPU }]' >> config.pbtxt && \ + echo -e 'parameters [{ key: "execute_delay_ms" \n value: { string_value: "500" } }, { key: "enable_custom_tracing" \n value: { string_value: "true" } }]' >> config.pbtxt) + RET=0 # Helpers ======================================= @@ -742,6 +752,60 @@ wait $SERVER_PID set +e +# Custom backend tracing +SERVER_ARGS="--model-control-mode=explicit --model-repository=$MODELSDIR + --load-model=custom_identity_int32 --trace-config=level=TIMESTAMPS \ + --trace-config=triton,file=custom_tracing_triton.log \ + --trace-config=rate=1 --trace-config=mode=triton" +SERVER_LOG="./custom_backend_tracing.log" +run_server +if [ "$SERVER_PID" == "0" ]; then + echo -e "\n***\n*** Failed to start $SERVER\n***" + cat $SERVER_LOG + exit 1 +fi + +# Send 1 inference request, should expect 3 custom activities: +# CUSTOM_SINGLE_ACTIVITY, CUSTOM_ACTIVITY_START, CUSTOM_ACTIVITY_END +rm -f ./curl.out +data='{"inputs":[{"name":"INPUT0","datatype":"INT32","shape":[1,1],"data":[4]}]}' +set +e +code=`curl -s -w %{http_code} -o ./curl.out -X POST localhost:8000/v2/models/custom_identity_int32/infer -d ${data}` +set -e +if [ "$code" != "200" ]; then + cat ./curl.out + echo -e "\n***\n*** Test Failed\n***" + RET=1 +fi + +set -e + +kill $SERVER_PID +wait $SERVER_PID + +set +e + + +$TRACE_SUMMARY -t custom_tracing_triton.log > summary_custom_tracing_triton.log + +if [ `grep -c "CUSTOM_SINGLE_ACTIVITY" summary_custom_tracing_triton.log` != "1" ]; then + cat summary_custom_tracing_triton.log + echo -e "\n***\n*** Test Failed: Unexpected number of traced "CUSTOM_ACTIVITY" events.\n***" + RET=1 +fi + +if [ `grep -c "CUSTOM_ACTIVITY_START" summary_custom_tracing_triton.log` != "1" ]; then + cat summary_custom_tracing_triton.log + echo -e "\n***\n*** Test Failed: Unexpected number of traced "CUSTOM_ACTIVITY_START" events.\n***" + RET=1 +fi + +if [ `grep -c "CUSTOM_ACTIVITY_END" summary_custom_tracing_triton.log` != "1" ]; then + cat summary_custom_tracing_triton.log + echo -e "\n***\n*** Test Failed: Unexpected number of traced "CUSTOM_ACTIVITY_END" events.\n***" + RET=1 +fi + # Check opentelemetry trace exporter sends proper info. # A helper python script starts listening on $OTLP_PORT, where # OTLP exporter sends traces. @@ -758,7 +822,7 @@ rm collected_traces.json* # Unittests then check that produced spans have expected format and events OPENTELEMETRY_TEST=opentelemetry_unittest.py OPENTELEMETRY_LOG="opentelemetry_unittest.log" -EXPECTED_NUM_TESTS="17" +EXPECTED_NUM_TESTS="19" # Set up repo and args for SageMaker export SAGEMAKER_TRITON_DEFAULT_MODEL_NAME="simple" @@ -772,10 +836,20 @@ cp -r $DATADIR/$MODELBASE/* ${MODEL_PATH} && \ # Add model to test trace context exposed to python backend mkdir -p $MODELSDIR/trace_context/1 && cp ./trace_context.py $MODELSDIR/trace_context/1/model.py +# set up identity model +rm -r ${MODELSDIR}/custom_identity_int32 +mkdir -p $MODELSDIR/custom_identity_int32/1 && (cd $MODELSDIR/custom_identity_int32 && \ + echo 'name: "custom_identity_int32"' >> config.pbtxt && \ + echo 'backend: "identity"' >> config.pbtxt && \ + echo 'max_batch_size: 1024' >> config.pbtxt && \ + echo -e 'input [{ name: "INPUT0" \n data_type: TYPE_INT32 \n dims: [ -1 ] }]' >> config.pbtxt && \ + echo -e 'output [{ name: "OUTPUT0" \n data_type: TYPE_INT32 \n dims: [ -1 ] }]' >> config.pbtxt && \ + echo 'instance_group [{ kind: KIND_CPU }]' >> config.pbtxt && \ + echo -e 'parameters [{ key: "execute_delay_ms" \n value: { string_value: "500" } }, { key: "enable_custom_tracing" \n value: { string_value: "true" } }, { key: "nested_span_count" \n value: { string_value: "6" } }, { key: "single_activity_frequency" \n value: { string_value: "3" } }]' >> config.pbtxt) SERVER_ARGS="--allow-sagemaker=true --model-control-mode=explicit \ --load-model=simple --load-model=ensemble_add_sub_int32_int32_int32 \ - --load-model=repeat_int32 \ + --load-model=repeat_int32 --load-model=custom_identity_int32\ --load-model=input_all_required \ --load-model=dynamic_batch \ --load-model=bls_simple --trace-config=level=TIMESTAMPS \ @@ -1164,5 +1238,4 @@ set -e kill $SERVER_PID wait $SERVER_PID set +e - exit $RET diff --git a/src/tracer.cc b/src/tracer.cc index b17f5eb7e8..5557106dfd 100644 --- a/src/tracer.cc +++ b/src/tracer.cc @@ -28,8 +28,6 @@ #include -#include - #include "common.h" #include "triton/common/logging.h" #ifdef TRITON_ENABLE_GPU @@ -410,6 +408,32 @@ TraceManager::Trace::CaptureTimestamp( } } +std::string +TraceManager::Trace::RetrieveActivityName( + TRITONSERVER_InferenceTrace* trace, + TRITONSERVER_InferenceTraceActivity activity, uint64_t timestamp_ns) +{ + std::string activity_name = + TRITONSERVER_InferenceTraceActivityString(activity); + + if (activity == TRITONSERVER_TRACE_CUSTOM_ACTIVITY) { + const char* val = nullptr; + LOG_TRITONSERVER_ERROR( + TRITONSERVER_InferenceTraceContext(trace, &val), + "Failed to retrieve trace context"); + std::string context_str = (val != nullptr) ? std::string(val) : ""; + triton::common::TritonJson::Value context; + LOG_TRITONSERVER_ERROR( + context.Parse(context_str), "Failed to parse trace context"); + std::string look_for_key = std::to_string(timestamp_ns); + if (context.Find(look_for_key.c_str())) { + context.MemberAsString(look_for_key.c_str(), &activity_name); + } + } + + return activity_name; +} + void TraceManager::InitTracer(const triton::server::TraceConfigMap& config_map) { @@ -514,7 +538,7 @@ void TraceManager::Trace::StartSpan( TRITONSERVER_InferenceTrace* trace, TRITONSERVER_InferenceTraceActivity activity, uint64_t timestamp_ns, - uint64_t trace_id) + uint64_t trace_id, std::string display_name) { uint64_t parent_id; LOG_TRITONSERVER_ERROR( @@ -533,19 +557,10 @@ TraceManager::Trace::StartSpan( // the ensamble's main request. For this instance, the parent // span is the ensembles's request span. if ((parent_id == 0 && activity == TRITONSERVER_TRACE_REQUEST_START) || - (activity == TRITONSERVER_TRACE_COMPUTE_START)) { + (activity == TRITONSERVER_TRACE_COMPUTE_START) || + (activity == TRITONSERVER_TRACE_CUSTOM_ACTIVITY)) { span_parent_id = trace_id; } - - std::string display_name = "compute"; - const char* model_name; - if (activity == TRITONSERVER_TRACE_REQUEST_START) { - LOG_TRITONSERVER_ERROR( - TRITONSERVER_InferenceTraceModelName(trace, &model_name), - "getting model name"); - display_name = model_name; - } - auto span = StartSpan(display_name, timestamp_ns, span_parent_id); if (activity == TRITONSERVER_TRACE_REQUEST_START) { @@ -557,7 +572,7 @@ TraceManager::Trace::StartSpan( LOG_TRITONSERVER_ERROR( TRITONSERVER_InferenceTraceRequestId(trace, &request_id), "getting request id"); - span->SetAttribute("triton.model_name", model_name); + span->SetAttribute("triton.model_name", display_name); span->SetAttribute("triton.model_version", model_version); span->SetAttribute("triton.trace_id", trace_id); span->SetAttribute("triton.trace_parent_id", parent_id); @@ -642,17 +657,40 @@ TraceManager::Trace::AddEvent( TRITONSERVER_InferenceTraceActivity activity, uint64_t timestamp_ns, uint64_t trace_id) { + std::string activity_name = + RetrieveActivityName(trace, activity, timestamp_ns); + static std::string start = "_START"; + static std::string end = "_END"; if (activity == TRITONSERVER_TRACE_REQUEST_START || - activity == TRITONSERVER_TRACE_COMPUTE_START) { - StartSpan(trace, activity, timestamp_ns, trace_id); + activity == TRITONSERVER_TRACE_COMPUTE_START || + (activity == TRITONSERVER_TRACE_CUSTOM_ACTIVITY && + activity_name.length() > start.length() && + std::equal(start.rbegin(), start.rend(), activity_name.rbegin()))) { + std::string span_name = activity_name; + + if (activity == TRITONSERVER_TRACE_CUSTOM_ACTIVITY) { + span_name = + activity_name.substr(0, activity_name.length() - start.length()); + } else if (activity == TRITONSERVER_TRACE_REQUEST_START) { + const char* model_name; + LOG_TRITONSERVER_ERROR( + TRITONSERVER_InferenceTraceModelName(trace, &model_name), + "getting model name"); + span_name = model_name; + } else if (activity == TRITONSERVER_TRACE_COMPUTE_START) { + span_name = "compute"; + } + + StartSpan(trace, activity, timestamp_ns, trace_id, span_name); } - AddEvent( - TRITONSERVER_InferenceTraceActivityString(activity), timestamp_ns, - trace_id); + AddEvent(activity_name, timestamp_ns, trace_id); if (activity == TRITONSERVER_TRACE_REQUEST_END || - activity == TRITONSERVER_TRACE_COMPUTE_END) { + activity == TRITONSERVER_TRACE_COMPUTE_END || + (activity == TRITONSERVER_TRACE_CUSTOM_ACTIVITY && + activity_name.length() > end.length() && + std::equal(end.rbegin(), end.rend(), activity_name.rbegin()))) { EndSpan(timestamp_ns, trace_id); } } @@ -805,7 +843,8 @@ TraceManager::TraceActivity( } *ss << "{\"id\":" << id << ",\"timestamps\":[" - << "{\"name\":\"" << TRITONSERVER_InferenceTraceActivityString(activity) + << "{\"name\":\"" + << ts->RetrieveActivityName(trace, activity, timestamp_ns) << "\",\"ns\":" << timestamp_ns << "}]}"; } diff --git a/src/tracer.h b/src/tracer.h index e33b16dbcb..8cdeb15121 100644 --- a/src/tracer.h +++ b/src/tracer.h @@ -231,6 +231,19 @@ class TraceManager { // with this trace. void CaptureTimestamp(const std::string& name, uint64_t timestamp_ns); + /// Returns activity name. For custom activities, retrieves the name from + /// the trace context. For other activities, returns default name. + /// + /// \param trace TRITONSERVER_InferenceTrace instance. + /// \param activity Trace activity. + /// \param timestamp_ns Steady timestamp, which is used to calculate + /// OpenTelemetry SystemTimestamp to display span on a timeline, and + /// OpenTelemetry SteadyTimestamp to calculate the duration on the span + /// with better precision. + std::string RetrieveActivityName( + TRITONSERVER_InferenceTrace* trace, + TRITONSERVER_InferenceTraceActivity activity, uint64_t timestamp_ns); + #if !defined(_WIN32) && defined(TRITON_ENABLE_TRACING) /// Reports TRITONSERVER_InferenceTraceActivity as event to /// the currently active span. If activity is an instance of @@ -322,10 +335,11 @@ class TraceManager { /// OpenTelemetry SteadyTimestamp to calculate the duration on the span /// with better precision. /// \param trace_id Trace id. + /// \param display_name Span name. void StartSpan( TRITONSERVER_InferenceTrace* trace, TRITONSERVER_InferenceTraceActivity activity, uint64_t timestamp_ns, - uint64_t trace_id); + uint64_t trace_id, std::string display_name); /// Ends the span on the top of the stack, related to trace with `trace_id`. /// From dfbe63efabf78fd60c5d33c0b7cef6ab41edb3bb Mon Sep 17 00:00:00 2001 From: Kris Hung Date: Mon, 8 Jul 2024 08:54:50 -0700 Subject: [PATCH 05/10] build: Reduce intermediate layers (#7408) --- build.py | 27 +++++++++++---------------- 1 file changed, 11 insertions(+), 16 deletions(-) diff --git a/build.py b/build.py index b9e68d45cc..24bde0f3a4 100755 --- a/build.py +++ b/build.py @@ -1082,25 +1082,20 @@ def create_dockerfile_linux( """ if "tensorrtllm" in backends: df += """ - -RUN ldconfig -# Remove contents that are not needed in runtime -RUN ARCH="$(uname -i)" \\ - && rm -fr ${TRT_ROOT}/bin ${TRT_ROOT}/targets/${ARCH}-linux-gnu/bin ${TRT_ROOT}/data \\ - && rm -fr ${TRT_ROOT}/doc ${TRT_ROOT}/onnx_graphsurgeon ${TRT_ROOT}/python \\ - && rm -fr ${TRT_ROOT}/samples ${TRT_ROOT}/targets/${ARCH}-linux-gnu/samples - # Install required packages for TRT-LLM models -RUN python3 -m pip install --upgrade pip \\ - && pip3 install transformers - -# ldconfig for TRT-LLM -RUN find /usr -name libtensorrt_llm.so -exec dirname {} \; > /etc/ld.so.conf.d/tensorrt-llm.conf -RUN find /opt/tritonserver -name libtritonserver.so -exec dirname {} \; > /etc/ld.so.conf.d/triton-tensorrtllm-worker.conf - +# Remove contents that are not needed in runtime # Setuptools has breaking changes in version 70.0.0, so fix it to 69.5.1 # The generated code in grpc_service_pb2_grpc.py depends on grpcio>=1.64.0, so fix it to 1.64.0 -RUN pip3 install setuptools==69.5.1 grpcio-tools==1.64.0 +RUN ldconfig && \ + ARCH="$(uname -i)" && \ + rm -fr ${TRT_ROOT}/bin ${TRT_ROOT}/targets/${ARCH}-linux-gnu/bin ${TRT_ROOT}/data && \ + rm -fr ${TRT_ROOT}/doc ${TRT_ROOT}/onnx_graphsurgeon ${TRT_ROOT}/python && \ + rm -fr ${TRT_ROOT}/samples ${TRT_ROOT}/targets/${ARCH}-linux-gnu/samples && \ + python3 -m pip install --upgrade pip && \ + pip3 install --no-cache-dir transformers && \ + find /usr -name libtensorrt_llm.so -exec dirname {} \; > /etc/ld.so.conf.d/tensorrt-llm.conf && \ + find /opt/tritonserver -name libtritonserver.so -exec dirname {} \; > /etc/ld.so.conf.d/triton-tensorrtllm-worker.conf && \ + pip3 install --no-cache-dir setuptools==69.5.1 grpcio-tools==1.64.0 ENV LD_LIBRARY_PATH=/usr/local/tensorrt/lib/:/opt/tritonserver/backends/tensorrtllm:$LD_LIBRARY_PATH """ From e0d80d46966bdf83d5dedf5b76f08e0aa40607ec Mon Sep 17 00:00:00 2001 From: Jacky <18255193+kthui@users.noreply.github.com> Date: Mon, 8 Jul 2024 16:10:58 -0700 Subject: [PATCH 06/10] test: Remove AWS bucket on test failure (#7342) Co-authored-by: Kris Hung --- qa/L0_backend_python/env/test.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/qa/L0_backend_python/env/test.sh b/qa/L0_backend_python/env/test.sh index b6963be615..ff9e368e75 100755 --- a/qa/L0_backend_python/env/test.sh +++ b/qa/L0_backend_python/env/test.sh @@ -253,6 +253,7 @@ run_server if [ "$SERVER_PID" == "0" ]; then echo -e "\n***\n*** Failed to start $SERVER\n***" cat $SERVER_LOG + aws s3 rb "${BUCKET_URL}" --force || true exit 1 fi @@ -286,6 +287,7 @@ run_server if [ "$SERVER_PID" == "0" ]; then echo -e "\n***\n*** Failed to start $SERVER\n***" cat $SERVER_LOG + aws s3 rb "${BUCKET_URL}" --force || true exit 1 fi From ac0d4d6f2f75102143b6c2a64497c91479ed069f Mon Sep 17 00:00:00 2001 From: Kris Hung Date: Wed, 10 Jul 2024 10:37:49 -0700 Subject: [PATCH 07/10] fix: Fix error message for L0_trt_compat (#7432) --- qa/L0_trt_compat/test.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/qa/L0_trt_compat/test.sh b/qa/L0_trt_compat/test.sh index 6b4f83cbc8..a8161369df 100755 --- a/qa/L0_trt_compat/test.sh +++ b/qa/L0_trt_compat/test.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# Copyright 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions @@ -64,7 +64,7 @@ if [ "$SERVER_PID" != "0" ]; then exit 1 fi -EXPECTED_ERR="Internal Error (Cannot deserialize engine with lean runtime" +EXPECTED_ERR="Cannot deserialize engine with lean runtime" if ! grep "$EXPECTED_ERR" $SERVER_LOG; then cat $SERVER_LOG echo -e "\n***\n*** Failed to find expected error: ${EXPECTED_ERR} \n***" From d1780d1fb04d81f04e605383a99432b6f7b26ee4 Mon Sep 17 00:00:00 2001 From: Shreyas Jain Date: Thu, 11 Jul 2024 00:40:48 +0530 Subject: [PATCH 08/10] feat: Support for request id field in generate API (#7392) --- docs/protocol/extension_generate.md | 12 ++++++-- qa/L0_http/generate_endpoint_test.py | 43 ++++++++++++++++++++++++++++ qa/L0_http/test.sh | 2 +- src/http_server.cc | 2 ++ 4 files changed, 55 insertions(+), 4 deletions(-) diff --git a/docs/protocol/extension_generate.md b/docs/protocol/extension_generate.md index b54b0caffb..043339eb4a 100644 --- a/docs/protocol/extension_generate.md +++ b/docs/protocol/extension_generate.md @@ -1,5 +1,5 @@