diff --git a/qa/L0_backend_python/response_sender/response_sender_test.py b/qa/L0_backend_python/response_sender/response_sender_test.py index 8634d435bc..296e762313 100644 --- a/qa/L0_backend_python/response_sender/response_sender_test.py +++ b/qa/L0_backend_python/response_sender/response_sender_test.py @@ -540,8 +540,11 @@ def test_decoupled_one_response_on_and_post_return(self): # Non-decoupled model send 2 response before return. def test_non_decoupled_two_response_pre_return(self): - # Note: The 2 responses sent will make their way to the frontend, but only the - # response at index 0 will be sent back to the client. + # Note: The 1st response will make its way to the client, but sending the 2nd + # response will raise an exception which stops the model. However, since + # the exception happens before the model returns, it will be caught by the + # stub process which pass it to the backend and sent an error response + # with final flag. self._assert_non_decoupled_infer_with_expected_response_success( number_of_response_before_return=2, send_complete_final_flag_before_return=True, @@ -554,9 +557,15 @@ def test_non_decoupled_two_response_pre_return(self): ) # Non-decoupled model send 2 response after return. + @unittest.skip("Model unload will hang, see the TODO comment.") def test_non_decoupled_two_response_post_return(self): - # Note: The 2 responses sent will make their way to the frontend, but only the - # response at index 0 will be sent back to the client. + # Note: The 1st response will make its way to the client, but sending the 2nd + # response will raise an exception which stops the model. Since the + # exception happens after the model returns, it cannot be caught by the + # stub (i.e. in a daemon thread), so nothing will happen. + # TODO: Since the stub does not know if the model failed after returning, the + # complete final flag is not sent and will hang when unloading the model. + # How to detect such event and close the response factory? self._assert_non_decoupled_infer_with_expected_response_success( number_of_response_before_return=0, send_complete_final_flag_before_return=False, @@ -570,8 +579,10 @@ def test_non_decoupled_two_response_post_return(self): # Non-decoupled model send 1 response and return 1 response. def test_non_decoupled_one_response_pre_and_on_return(self): - # Note: The 2 responses sent will make their way to the frontend, but only the - # response at index 0 will be sent back to the client. + # Note: The sent response will make its way to the client and complete final. + # The returned response will see the response sender is closed and raise + # an exception. The backend should see the request is closed and do + # nothing upon receiving the error from stub. self._assert_non_decoupled_infer_with_expected_response_success( number_of_response_before_return=1, send_complete_final_flag_before_return=True, @@ -585,8 +596,11 @@ def test_non_decoupled_one_response_pre_and_on_return(self): # Non-decoupled model return 1 response and send 1 response. def test_non_decoupled_one_response_on_and_pre_return(self): - # Note: The 2 responses sent will make their way to the frontend, but only the - # response at index 0 will be sent back to the client. + # Note: The returned response will send the response to the client and complete + # final. The sent response will see the response sender is closed and + # raise an exception. Since the exception happens after the model returns, + # it cannot be caught by the stub (i.e. in a daemon thread), so nothing + # will happen. self._assert_non_decoupled_infer_with_expected_response_success( number_of_response_before_return=0, send_complete_final_flag_before_return=False,