From 990ff16a69ded351244d2345624ffc3360115978 Mon Sep 17 00:00:00 2001 From: kthui <18255193+kthui@users.noreply.github.com> Date: Thu, 23 May 2024 17:21:56 -0700 Subject: [PATCH 1/9] Fix gRPC streaming non-decoupled segfault if sending response and final flag separately --- src/grpc/stream_infer_handler.cc | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/src/grpc/stream_infer_handler.cc b/src/grpc/stream_infer_handler.cc index 592ce80a2e..2258ae37d3 100644 --- a/src/grpc/stream_infer_handler.cc +++ b/src/grpc/stream_infer_handler.cc @@ -573,17 +573,19 @@ ModelStreamInferHandler::StreamInferResponseComplete( } #endif // TRITON_ENABLE_TRACING - // Log appropriate errors bool is_complete = state->complete_ || (flags & TRITONSERVER_RESPONSE_COMPLETE_FINAL) != 0; + + // Log appropriate messages if (!state->is_decoupled_) { if (!is_complete) { - LOG_ERROR << "[INTERNAL] ModelStreamInfer received a response without " - "FINAL flag for a model with one-to-one transaction"; + LOG_VERBOSE(2) + << "[INTERNAL] ModelStreamInfer received a response without FINAL " + "flag for a model with one-to-one transaction"; } if (iresponse == nullptr) { - LOG_ERROR << "[INTERNAL] ModelStreamInfer received a null response for a " - "model with one-to-one transaction"; + LOG_VERBOSE(2) << "[INTERNAL] ModelStreamInfer received a null response " + "for a model with one-to-one transaction"; } } @@ -745,7 +747,9 @@ ModelStreamInferHandler::StreamInferResponseComplete( } } else { state->step_ = Steps::WRITEREADY; - state->context_->WriteResponseIfReady(state); + if (is_complete) { + state->context_->WriteResponseIfReady(state); + } } state->complete_ = is_complete; From 2d2dd367766aa5fcd1efc53d491f1421d3972339 Mon Sep 17 00:00:00 2001 From: kthui <18255193+kthui@users.noreply.github.com> Date: Wed, 29 May 2024 12:50:39 -0700 Subject: [PATCH 2/9] Enhance logging for non-decoupled on null response and final flag --- src/grpc/stream_infer_handler.cc | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/src/grpc/stream_infer_handler.cc b/src/grpc/stream_infer_handler.cc index 2258ae37d3..cf20ad1ea5 100644 --- a/src/grpc/stream_infer_handler.cc +++ b/src/grpc/stream_infer_handler.cc @@ -564,7 +564,8 @@ ModelStreamInferHandler::StreamInferResponseComplete( LOG_VERBOSE(1) << "ModelStreamInferHandler::StreamInferComplete, context " << state->context_->unique_id_ << ", " << state->unique_id_ << " step " << state->step_ << ", callback index " - << state->cb_count_ << ", flags " << flags; + << state->cb_count_ << ", flags " << flags + << ", response is nullptr " << (iresponse == nullptr); #ifdef TRITON_ENABLE_TRACING if (state->cb_count_ == 1) { @@ -576,19 +577,6 @@ ModelStreamInferHandler::StreamInferResponseComplete( bool is_complete = state->complete_ || (flags & TRITONSERVER_RESPONSE_COMPLETE_FINAL) != 0; - // Log appropriate messages - if (!state->is_decoupled_) { - if (!is_complete) { - LOG_VERBOSE(2) - << "[INTERNAL] ModelStreamInfer received a response without FINAL " - "flag for a model with one-to-one transaction"; - } - if (iresponse == nullptr) { - LOG_VERBOSE(2) << "[INTERNAL] ModelStreamInfer received a null response " - "for a model with one-to-one transaction"; - } - } - // If receiving the final callback then erase the state from the inflight // state data structure to prevent cancellation being called on the request. // Also make sure that if this state was sent to gRPC async notification From 63927289b79b257dc7b999c60a163be908c8f06e Mon Sep 17 00:00:00 2001 From: kthui <18255193+kthui@users.noreply.github.com> Date: Thu, 23 May 2024 17:25:02 -0700 Subject: [PATCH 3/9] Remove special handling for non-decoupled sending flag --- qa/python_models/response_sender/model.py | 2 +- .../response_sender/model_async.py | 2 +- .../response_sender/model_common.py | 20 +------------------ 3 files changed, 3 insertions(+), 21 deletions(-) diff --git a/qa/python_models/response_sender/model.py b/qa/python_models/response_sender/model.py index eb2273b3b7..8749b83ee8 100644 --- a/qa/python_models/response_sender/model.py +++ b/qa/python_models/response_sender/model.py @@ -31,7 +31,7 @@ class TritonPythonModel: def initialize(self, args): - self._common = ResponseSenderModelCommon(pb_utils, args) + self._common = ResponseSenderModelCommon(pb_utils) def execute(self, requests): return self._common.execute(requests, use_async=False) diff --git a/qa/python_models/response_sender/model_async.py b/qa/python_models/response_sender/model_async.py index 6ec7ab69c2..b12eccef06 100644 --- a/qa/python_models/response_sender/model_async.py +++ b/qa/python_models/response_sender/model_async.py @@ -31,7 +31,7 @@ class TritonPythonModel: def initialize(self, args): - self._common = ResponseSenderModelCommon(pb_utils, args) + self._common = ResponseSenderModelCommon(pb_utils) async def execute(self, requests): return self._common.execute(requests, use_async=True) diff --git a/qa/python_models/response_sender/model_common.py b/qa/python_models/response_sender/model_common.py index bd89457bad..547ce8b32d 100644 --- a/qa/python_models/response_sender/model_common.py +++ b/qa/python_models/response_sender/model_common.py @@ -25,7 +25,6 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. import asyncio -import json import threading import time @@ -33,11 +32,8 @@ class ResponseSenderModelCommon: - def __init__(self, pb_utils, args): + def __init__(self, pb_utils): self._pb_utils = pb_utils - self._is_decoupled = pb_utils.using_decoupled_model_transaction_policy( - json.loads(args["model_config"]) - ) self._background_tasks = set() def _get_instructions_from_request(self, request): @@ -123,20 +119,6 @@ def _send_responses(self, processed_requests, response_id_offset): batch_size = request["batch_size"] response_sender = request["response_sender"] send_complete_final_flag = request["send_complete_final_flag"] - - # TODO: gRPC frontend may segfault if non-decoupled model send response - # final flag separately from the response. - if ( - not self._is_decoupled - and number_of_response == 1 - and send_complete_final_flag - ): - response_sender.send( - self._create_response(batch_size, response_id=response_id_offset), - flags=self._pb_utils.TRITONSERVER_RESPONSE_COMPLETE_FINAL, - ) - continue - for response_id in range(number_of_response): response_sender.send( self._create_response( From 276cc9829cc5143c0cffb31fdad4e66a84d42229 Mon Sep 17 00:00:00 2001 From: kthui <18255193+kthui@users.noreply.github.com> Date: Tue, 28 May 2024 18:54:48 -0700 Subject: [PATCH 4/9] Add test for improper use of response sending --- .../response_sender/response_sender_test.py | 136 +++++++++++++++++- 1 file changed, 132 insertions(+), 4 deletions(-) diff --git a/qa/L0_backend_python/response_sender/response_sender_test.py b/qa/L0_backend_python/response_sender/response_sender_test.py index 59e0701356..84b13df0db 100644 --- a/qa/L0_backend_python/response_sender/response_sender_test.py +++ b/qa/L0_backend_python/response_sender/response_sender_test.py @@ -195,6 +195,8 @@ def _assert_responses_exception(self, responses, expected_message): self.assertIsNone(response["result"]) self.assertIsInstance(response["error"], InferenceServerException) self.assertIn(expected_message, response["error"].message()) + # There may be more responses, but currently only sees one for all tests. + self.assertEqual(len(responses), 1) def _assert_decoupled_infer_success( self, @@ -236,13 +238,16 @@ def _assert_decoupled_infer_success( number_of_response_after_return, ) - def _assert_non_decoupled_infer_success( + def _assert_non_decoupled_infer_with_expected_response_success( self, number_of_response_before_return, send_complete_final_flag_before_return, return_a_response, number_of_response_after_return, send_complete_final_flag_after_return, + expected_number_of_response_before_return, + expected_return_a_response, + expected_number_of_response_after_return, ): model_name = "response_sender" responses = self._infer( @@ -255,9 +260,9 @@ def _assert_non_decoupled_infer_success( ) self._assert_responses_valid( responses, - number_of_response_before_return, - return_a_response, - number_of_response_after_return, + expected_number_of_response_before_return, + expected_return_a_response, + expected_number_of_response_after_return, ) # Do NOT group into a for-loop as it hides which model failed. model_name = "response_sender_async" @@ -271,9 +276,28 @@ def _assert_non_decoupled_infer_success( ) self._assert_responses_valid( responses, + expected_number_of_response_before_return, + expected_return_a_response, + expected_number_of_response_after_return, + ) + + def _assert_non_decoupled_infer_success( + self, + number_of_response_before_return, + send_complete_final_flag_before_return, + return_a_response, + number_of_response_after_return, + send_complete_final_flag_after_return, + ): + self._assert_non_decoupled_infer_with_expected_response_success( number_of_response_before_return, + send_complete_final_flag_before_return, return_a_response, number_of_response_after_return, + send_complete_final_flag_after_return, + expected_number_of_response_before_return=number_of_response_before_return, + expected_return_a_response=return_a_response, + expected_number_of_response_after_return=number_of_response_after_return, ) # Decoupled model send response final flag before request return. @@ -411,6 +435,110 @@ def test_decoupled_one_response_on_return(self): # TODO: Test for async decoupled after fixing 'AsyncEventFutureDoneCallback' # using `py_future.result()` with error hangs on exit. + # Decoupled model send 1 response and return 1 response. + def test_decoupled_one_response_pre_and_on_return(self): + # Note: The before return response will send a valid response and close the + # response sender. Then, returning a response will generate an error, but + # since the response sender is closed, nothing is passed to the client. + responses = self._infer( + model_name="response_sender_decoupled", + number_of_response_before_return=1, + send_complete_final_flag_before_return=True, + return_a_response=True, + number_of_response_after_return=0, + send_complete_final_flag_after_return=False, + ) + self._assert_responses_valid( + responses, + number_of_response_before_return=1, + return_a_response=0, + number_of_response_after_return=0, + ) + # TODO: Test for async decoupled after fixing 'AsyncEventFutureDoneCallback' + # using `py_future.result()` with error hangs on exit. + + # Decoupled model return 1 response and send 1 response. + def test_decoupled_one_response_on_and_post_return(self): + # Note: The returned response will send an error response and complete final + # flag. The response sender is not yet closed due to the model is still + # executing. Then, sending a response and final flag will make their way + # to the frontend, but the request is completed at the frontend, so they + # are not sent to the client. + responses = self._infer( + model_name="response_sender_decoupled", + number_of_response_before_return=0, + send_complete_final_flag_before_return=False, + return_a_response=True, + number_of_response_after_return=1, + send_complete_final_flag_after_return=True, + ) + self._assert_responses_exception( + responses, + expected_message="using the decoupled mode and the execute function must return None", + ) + # TODO: Test for async decoupled after fixing 'AsyncEventFutureDoneCallback' + # using `py_future.result()` with error hangs on exit. + + # Non-decoupled model send 2 response before return. + def test_non_decoupled_two_response_pre_return(self): + # Note: The 2 responses sent will make their way to the frontend, but only the + # response at index 0 will be sent back to the client. + self._assert_non_decoupled_infer_with_expected_response_success( + number_of_response_before_return=2, + send_complete_final_flag_before_return=True, + return_a_response=False, + number_of_response_after_return=0, + send_complete_final_flag_after_return=False, + expected_number_of_response_before_return=1, + expected_return_a_response=False, + expected_number_of_response_after_return=0, + ) + + # Non-decoupled model send 2 response after return. + def test_non_decoupled_two_response_post_return(self): + # Note: The 2 responses sent will make their way to the frontend, but only the + # response at index 0 will be sent back to the client. + self._assert_non_decoupled_infer_with_expected_response_success( + number_of_response_before_return=0, + send_complete_final_flag_before_return=False, + return_a_response=False, + number_of_response_after_return=2, + send_complete_final_flag_after_return=True, + expected_number_of_response_before_return=0, + expected_return_a_response=False, + expected_number_of_response_after_return=1, + ) + + # Non-decoupled model send 1 response and return 1 response. + def test_non_decoupled_one_response_pre_and_on_return(self): + # Note: The 2 responses sent will make their way to the frontend, but only the + # response at index 0 will be sent back to the client. + self._assert_non_decoupled_infer_with_expected_response_success( + number_of_response_before_return=1, + send_complete_final_flag_before_return=True, + return_a_response=True, + number_of_response_after_return=0, + send_complete_final_flag_after_return=False, + expected_number_of_response_before_return=1, + expected_return_a_response=False, + expected_number_of_response_after_return=0, + ) + + # Non-decoupled model return 1 response and send 1 response. + def test_non_decoupled_one_response_on_and_pre_return(self): + # Note: The 2 responses sent will make their way to the frontend, but only the + # response at index 0 will be sent back to the client. + self._assert_non_decoupled_infer_with_expected_response_success( + number_of_response_before_return=0, + send_complete_final_flag_before_return=False, + return_a_response=True, + number_of_response_after_return=1, + send_complete_final_flag_after_return=True, + expected_number_of_response_before_return=0, + expected_return_a_response=True, + expected_number_of_response_after_return=0, + ) + if __name__ == "__main__": unittest.main() From c011c6dcacb3b6fd09bd796f74bc0c18da6a5f5d Mon Sep 17 00:00:00 2001 From: kthui <18255193+kthui@users.noreply.github.com> Date: Wed, 29 May 2024 10:18:21 -0700 Subject: [PATCH 5/9] Set BLS size to 256 and turn off restart --- qa/L0_backend_python/bls/test.sh | 3 ++- qa/L0_backend_python/test.sh | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/qa/L0_backend_python/bls/test.sh b/qa/L0_backend_python/bls/test.sh index f4435eacaa..204af7e2ba 100755 --- a/qa/L0_backend_python/bls/test.sh +++ b/qa/L0_backend_python/bls/test.sh @@ -100,7 +100,8 @@ if [[ ${TEST_WINDOWS} == 0 ]]; then echo "instance_group [ { kind: KIND_CPU} ]" >> models/libtorch_cpu/config.pbtxt # Test with different sizes of CUDA memory pool - for CUDA_MEMORY_POOL_SIZE_MB in 64 128 ; do + # TODO: Why 256 worked in place of 128, on decoupled data pipeline? + for CUDA_MEMORY_POOL_SIZE_MB in 64 256 ; do CUDA_MEMORY_POOL_SIZE_BYTES=$((CUDA_MEMORY_POOL_SIZE_MB * 1024 * 1024)) SERVER_ARGS="--model-repository=${MODELDIR}/bls/models --backend-directory=${BACKEND_DIR} --log-verbose=1 --cuda-memory-pool-byte-size=0:${CUDA_MEMORY_POOL_SIZE_BYTES}" for TRIAL in non_decoupled decoupled ; do diff --git a/qa/L0_backend_python/test.sh b/qa/L0_backend_python/test.sh index bbaabbaf10..0e0240cd95 100755 --- a/qa/L0_backend_python/test.sh +++ b/qa/L0_backend_python/test.sh @@ -448,7 +448,8 @@ SUBTESTS="lifecycle argument_validation logging custom_metrics" # [DLIS-6122] Disable model_control & request_rescheduling tests for Windows since they require load/unload # [DLIS-6123] Disable examples test for Windows since it requires updates to the example clients if [[ ${TEST_WINDOWS} == 0 ]]; then - SUBTESTS+=" restart model_control examples request_rescheduling" + # TODO: Reimplement restart on decoupled data pipeline and enable restart. + SUBTESTS+=" model_control examples request_rescheduling" fi for TEST in ${SUBTESTS}; do # Run each subtest in a separate virtual environment to avoid conflicts From 80d6b4f9016f9d82571e13a8e211031840b828c0 Mon Sep 17 00:00:00 2001 From: kthui <18255193+kthui@users.noreply.github.com> Date: Thu, 30 May 2024 00:14:04 -0700 Subject: [PATCH 6/9] Update improper tests for added checking in response sender --- .../response_sender/response_sender_test.py | 79 +++++++++++++++++-- 1 file changed, 71 insertions(+), 8 deletions(-) diff --git a/qa/L0_backend_python/response_sender/response_sender_test.py b/qa/L0_backend_python/response_sender/response_sender_test.py index 84b13df0db..fb37d5a470 100644 --- a/qa/L0_backend_python/response_sender/response_sender_test.py +++ b/qa/L0_backend_python/response_sender/response_sender_test.py @@ -479,10 +479,62 @@ def test_decoupled_one_response_on_and_post_return(self): # TODO: Test for async decoupled after fixing 'AsyncEventFutureDoneCallback' # using `py_future.result()` with error hangs on exit. + # Non-decoupled model send response final flag before request return. + def test_non_decoupled_zero_response_pre_return(self): + # Note: The final flag will raise an exception which stops the model. Since the + # exception happens before the model returns, it will be caught by the + # stub process which pass it to the backend and sent an error response + # with final flag. + number_of_response_before_return = 0 + send_complete_final_flag_before_return = True + return_a_response = False + number_of_response_after_return = 0 + send_complete_final_flag_after_return = False + expected_message = ( + "Non-decoupled model cannot send complete final before sending a response" + ) + + model_name = "response_sender" + responses = self._infer( + model_name, + number_of_response_before_return, + send_complete_final_flag_before_return, + return_a_response, + number_of_response_after_return, + send_complete_final_flag_after_return, + ) + self._assert_responses_exception(responses, expected_message) + # Do NOT group into a for-loop as it hides which model failed. + model_name = "response_sender_async" + responses = self._infer( + model_name, + number_of_response_before_return, + send_complete_final_flag_before_return, + return_a_response, + number_of_response_after_return, + send_complete_final_flag_after_return, + ) + self._assert_responses_exception(responses, expected_message) + + # Non-decoupled model send response final flag after request return. + @unittest.skip("Model unload will hang, see the TODO comment.") + def test_non_decoupled_zero_response_post_return(self): + # Note: The final flag will raise an exception which stops the model. Since the + # exception happens after the model returns, it cannot be caught by the + # stub (i.e. in a daemon thread), so nothing will happen. + # TODO: Since the stub does not know if the model failed after returning, the + # complete final flag is not sent and will hang when unloading the model. + # How to detect such event and close the response factory? + raise NotImplementedError("No testing is performed") + # Non-decoupled model send 2 response before return. def test_non_decoupled_two_response_pre_return(self): - # Note: The 2 responses sent will make their way to the frontend, but only the - # response at index 0 will be sent back to the client. + # Note: The 1st response will make its way to the client, but sending the 2nd + # response will raise an exception which stops the model. Since the + # exception happens before the model returns, it will be caught by the + # stub process which pass it to the backend and sent an error response + # with final flag. Since this is non-decoupled model using gRPC stream, + # any response after the 1st will be discarded by the frontend. self._assert_non_decoupled_infer_with_expected_response_success( number_of_response_before_return=2, send_complete_final_flag_before_return=True, @@ -495,9 +547,15 @@ def test_non_decoupled_two_response_pre_return(self): ) # Non-decoupled model send 2 response after return. + @unittest.skip("Model unload will hang, see the TODO comment.") def test_non_decoupled_two_response_post_return(self): - # Note: The 2 responses sent will make their way to the frontend, but only the - # response at index 0 will be sent back to the client. + # Note: The 1st response will make its way to the client, but sending the 2nd + # response will raise an exception which stops the model. Since the + # exception happens after the model returns, it cannot be caught by the + # stub (i.e. in a daemon thread), so nothing will happen. + # TODO: Since the stub does not know if the model failed after returning, the + # complete final flag is not sent and will hang when unloading the model. + # How to detect such event and close the response factory? self._assert_non_decoupled_infer_with_expected_response_success( number_of_response_before_return=0, send_complete_final_flag_before_return=False, @@ -511,8 +569,10 @@ def test_non_decoupled_two_response_post_return(self): # Non-decoupled model send 1 response and return 1 response. def test_non_decoupled_one_response_pre_and_on_return(self): - # Note: The 2 responses sent will make their way to the frontend, but only the - # response at index 0 will be sent back to the client. + # Note: The sent response will make its way to the client and complete final. + # The returned response will see the response sender is closed and raise + # an exception. The backend should see the request is closed and do + # nothing upon receiving the error from stub. self._assert_non_decoupled_infer_with_expected_response_success( number_of_response_before_return=1, send_complete_final_flag_before_return=True, @@ -526,8 +586,11 @@ def test_non_decoupled_one_response_pre_and_on_return(self): # Non-decoupled model return 1 response and send 1 response. def test_non_decoupled_one_response_on_and_pre_return(self): - # Note: The 2 responses sent will make their way to the frontend, but only the - # response at index 0 will be sent back to the client. + # Note: The returned response will send the response to the client and complete + # final. The sent response will see the response sender is closed and + # raise an exception. Since the exception happens after the model returns, + # it cannot be caught by the stub (i.e. in a daemon thread), so nothing + # will happen. self._assert_non_decoupled_infer_with_expected_response_success( number_of_response_before_return=0, send_complete_final_flag_before_return=False, From e38ecfe9c39b5176c4442f29054356d8730a0e5a Mon Sep 17 00:00:00 2001 From: kthui <18255193+kthui@users.noreply.github.com> Date: Thu, 30 May 2024 15:39:25 -0700 Subject: [PATCH 7/9] Skip test_decoupled_one_response_on_and_post_return --- qa/L0_backend_python/response_sender/response_sender_test.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/qa/L0_backend_python/response_sender/response_sender_test.py b/qa/L0_backend_python/response_sender/response_sender_test.py index fb37d5a470..3a417634b4 100644 --- a/qa/L0_backend_python/response_sender/response_sender_test.py +++ b/qa/L0_backend_python/response_sender/response_sender_test.py @@ -458,12 +458,16 @@ def test_decoupled_one_response_pre_and_on_return(self): # using `py_future.result()` with error hangs on exit. # Decoupled model return 1 response and send 1 response. + @unittest.skip("Response factory segmentation fault, see the TODO comment.") def test_decoupled_one_response_on_and_post_return(self): # Note: The returned response will send an error response and complete final # flag. The response sender is not yet closed due to the model is still # executing. Then, sending a response and final flag will make their way # to the frontend, but the request is completed at the frontend, so they # are not sent to the client. + # TODO: The error response will close the response factory, but not the response + # sender, which enables the model to send response and final flag on the + # deleted response factory and causes a segmentation fault. responses = self._infer( model_name="response_sender_decoupled", number_of_response_before_return=0, From d350c682049db2dbfefa979b7e2df0d895ebfb22 Mon Sep 17 00:00:00 2001 From: kthui <18255193+kthui@users.noreply.github.com> Date: Fri, 31 May 2024 14:39:16 -0700 Subject: [PATCH 8/9] Use grouped input parameters variables --- .../response_sender/response_sender_test.py | 70 ++++++------------- 1 file changed, 23 insertions(+), 47 deletions(-) diff --git a/qa/L0_backend_python/response_sender/response_sender_test.py b/qa/L0_backend_python/response_sender/response_sender_test.py index 3a417634b4..62917356a1 100644 --- a/qa/L0_backend_python/response_sender/response_sender_test.py +++ b/qa/L0_backend_python/response_sender/response_sender_test.py @@ -88,6 +88,20 @@ class ResponseSenderTest(unittest.TestCase): "number_of_response_after_return": 0, "send_complete_final_flag_after_return": False, } + _inputs_parameters_one_response_pre_and_on_return = { + "number_of_response_before_return": 1, + "send_complete_final_flag_before_return": True, + "return_a_response": True, + "number_of_response_after_return": 0, + "send_complete_final_flag_after_return": False, + } + _inputs_parameters_one_response_on_and_post_return = { + "number_of_response_before_return": 0, + "send_complete_final_flag_before_return": False, + "return_a_response": True, + "number_of_response_after_return": 1, + "send_complete_final_flag_after_return": True, + } def _get_inputs( self, @@ -442,11 +456,7 @@ def test_decoupled_one_response_pre_and_on_return(self): # since the response sender is closed, nothing is passed to the client. responses = self._infer( model_name="response_sender_decoupled", - number_of_response_before_return=1, - send_complete_final_flag_before_return=True, - return_a_response=True, - number_of_response_after_return=0, - send_complete_final_flag_after_return=False, + **self._inputs_parameters_one_response_pre_and_on_return, ) self._assert_responses_valid( responses, @@ -470,11 +480,7 @@ def test_decoupled_one_response_on_and_post_return(self): # deleted response factory and causes a segmentation fault. responses = self._infer( model_name="response_sender_decoupled", - number_of_response_before_return=0, - send_complete_final_flag_before_return=False, - return_a_response=True, - number_of_response_after_return=1, - send_complete_final_flag_after_return=True, + **self._inputs_parameters_one_response_on_and_post_return, ) self._assert_responses_exception( responses, @@ -489,34 +495,20 @@ def test_non_decoupled_zero_response_pre_return(self): # exception happens before the model returns, it will be caught by the # stub process which pass it to the backend and sent an error response # with final flag. - number_of_response_before_return = 0 - send_complete_final_flag_before_return = True - return_a_response = False - number_of_response_after_return = 0 - send_complete_final_flag_after_return = False expected_message = ( "Non-decoupled model cannot send complete final before sending a response" ) - model_name = "response_sender" responses = self._infer( model_name, - number_of_response_before_return, - send_complete_final_flag_before_return, - return_a_response, - number_of_response_after_return, - send_complete_final_flag_after_return, + **self._inputs_parameters_zero_response_pre_return, ) self._assert_responses_exception(responses, expected_message) # Do NOT group into a for-loop as it hides which model failed. model_name = "response_sender_async" responses = self._infer( model_name, - number_of_response_before_return, - send_complete_final_flag_before_return, - return_a_response, - number_of_response_after_return, - send_complete_final_flag_after_return, + **self._inputs_parameters_zero_response_pre_return, ) self._assert_responses_exception(responses, expected_message) @@ -540,11 +532,7 @@ def test_non_decoupled_two_response_pre_return(self): # with final flag. Since this is non-decoupled model using gRPC stream, # any response after the 1st will be discarded by the frontend. self._assert_non_decoupled_infer_with_expected_response_success( - number_of_response_before_return=2, - send_complete_final_flag_before_return=True, - return_a_response=False, - number_of_response_after_return=0, - send_complete_final_flag_after_return=False, + **self._inputs_parameters_two_response_pre_return, expected_number_of_response_before_return=1, expected_return_a_response=False, expected_number_of_response_after_return=0, @@ -561,11 +549,7 @@ def test_non_decoupled_two_response_post_return(self): # complete final flag is not sent and will hang when unloading the model. # How to detect such event and close the response factory? self._assert_non_decoupled_infer_with_expected_response_success( - number_of_response_before_return=0, - send_complete_final_flag_before_return=False, - return_a_response=False, - number_of_response_after_return=2, - send_complete_final_flag_after_return=True, + **self._inputs_parameters_two_response_post_return, expected_number_of_response_before_return=0, expected_return_a_response=False, expected_number_of_response_after_return=1, @@ -578,29 +562,21 @@ def test_non_decoupled_one_response_pre_and_on_return(self): # an exception. The backend should see the request is closed and do # nothing upon receiving the error from stub. self._assert_non_decoupled_infer_with_expected_response_success( - number_of_response_before_return=1, - send_complete_final_flag_before_return=True, - return_a_response=True, - number_of_response_after_return=0, - send_complete_final_flag_after_return=False, + **self._inputs_parameters_one_response_pre_and_on_return, expected_number_of_response_before_return=1, expected_return_a_response=False, expected_number_of_response_after_return=0, ) # Non-decoupled model return 1 response and send 1 response. - def test_non_decoupled_one_response_on_and_pre_return(self): + def test_non_decoupled_one_response_on_and_post_return(self): # Note: The returned response will send the response to the client and complete # final. The sent response will see the response sender is closed and # raise an exception. Since the exception happens after the model returns, # it cannot be caught by the stub (i.e. in a daemon thread), so nothing # will happen. self._assert_non_decoupled_infer_with_expected_response_success( - number_of_response_before_return=0, - send_complete_final_flag_before_return=False, - return_a_response=True, - number_of_response_after_return=1, - send_complete_final_flag_after_return=True, + **self._inputs_parameters_one_response_on_and_post_return, expected_number_of_response_before_return=0, expected_return_a_response=True, expected_number_of_response_after_return=0, From f7a2e56ef1b455d0fe1696950b97a033e87a0bb7 Mon Sep 17 00:00:00 2001 From: kthui <18255193+kthui@users.noreply.github.com> Date: Fri, 31 May 2024 17:16:26 -0700 Subject: [PATCH 9/9] Enable test_decoupled_one_response_on_and_post_return --- .../response_sender/response_sender_test.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/qa/L0_backend_python/response_sender/response_sender_test.py b/qa/L0_backend_python/response_sender/response_sender_test.py index 62917356a1..81f8c75f2c 100644 --- a/qa/L0_backend_python/response_sender/response_sender_test.py +++ b/qa/L0_backend_python/response_sender/response_sender_test.py @@ -468,16 +468,12 @@ def test_decoupled_one_response_pre_and_on_return(self): # using `py_future.result()` with error hangs on exit. # Decoupled model return 1 response and send 1 response. - @unittest.skip("Response factory segmentation fault, see the TODO comment.") def test_decoupled_one_response_on_and_post_return(self): # Note: The returned response will send an error response and complete final - # flag. The response sender is not yet closed due to the model is still - # executing. Then, sending a response and final flag will make their way - # to the frontend, but the request is completed at the frontend, so they - # are not sent to the client. - # TODO: The error response will close the response factory, but not the response - # sender, which enables the model to send response and final flag on the - # deleted response factory and causes a segmentation fault. + # flag, and close the response sender and factory. Then, sending a + # response will raise an exception. Since the exception happens after the + # model returns, it cannot be caught by the stub (i.e. in a daemon + # thread), so nothing will happen. responses = self._infer( model_name="response_sender_decoupled", **self._inputs_parameters_one_response_on_and_post_return,