diff --git a/qa/L0_backend_python/async_execute/concurrency_test.py b/qa/L0_backend_python/async_execute/concurrency_test.py index b8c3f819cd4..ab9c90dfc11 100644 --- a/qa/L0_backend_python/async_execute/concurrency_test.py +++ b/qa/L0_backend_python/async_execute/concurrency_test.py @@ -46,8 +46,7 @@ def callback(result, error): return callback, response # Helper for testing concurrent execution - def _concurrent_execute_requests(self, batch_size, number_of_requests): - model_name = "async_execute_decouple" + def _concurrent_execute_requests(self, model_name, batch_size, number_of_requests): delay_secs = 4 shape = [batch_size, 1] inputs = [grpcclient.InferInput("WAIT_SECONDS", shape, "FP32")] @@ -76,11 +75,27 @@ def _concurrent_execute_requests(self, batch_size, number_of_requests): # Test batched requests are executed concurrently def test_concurrent_execute_single_request(self): - self._concurrent_execute_requests(batch_size=4, number_of_requests=1) + self._concurrent_execute_requests( + model_name="async_execute_decouple", batch_size=4, number_of_requests=1 + ) # Test multiple requests are executed concurrently def test_concurrent_execute_multi_request(self): - self._concurrent_execute_requests(batch_size=1, number_of_requests=4) + self._concurrent_execute_requests( + model_name="async_execute_decouple", batch_size=1, number_of_requests=4 + ) + + # Test batched requests are executed concurrently via bls + def test_concurrent_execute_single_request_bls(self): + self._concurrent_execute_requests( + model_name="async_execute_decouple_bls", batch_size=4, number_of_requests=1 + ) + + # Test multiple requests are executed concurrently via bls + def test_concurrent_execute_multi_request_bls(self): + self._concurrent_execute_requests( + model_name="async_execute_decouple_bls", batch_size=1, number_of_requests=4 + ) # Test model exception handling def test_model_raise_exception(self): diff --git a/qa/L0_backend_python/async_execute/test.sh b/qa/L0_backend_python/async_execute/test.sh index 449d6dc956a..b52c2bffa53 100755 --- a/qa/L0_backend_python/async_execute/test.sh +++ b/qa/L0_backend_python/async_execute/test.sh @@ -36,6 +36,9 @@ rm -rf models && mkdir models mkdir -p models/async_execute_decouple/1 && \ cp ../../python_models/async_execute_decouple/model.py models/async_execute_decouple/1 && \ cp ../../python_models/async_execute_decouple/config.pbtxt models/async_execute_decouple +mkdir -p models/async_execute_decouple_bls/1 && \ + cp ../../python_models/async_execute_decouple_bls/model.py models/async_execute_decouple_bls/1 && \ + cp ../../python_models/async_execute_decouple_bls/config.pbtxt models/async_execute_decouple_bls TEST_LOG="concurrency_test.log" SERVER_LOG="concurrency_test.server.log" diff --git a/qa/python_models/async_execute_decouple_bls/config.pbtxt b/qa/python_models/async_execute_decouple_bls/config.pbtxt new file mode 100644 index 00000000000..847661d1763 --- /dev/null +++ b/qa/python_models/async_execute_decouple_bls/config.pbtxt @@ -0,0 +1,46 @@ +# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of NVIDIA CORPORATION nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +backend: "python" +max_batch_size: 8 + +input [ + { + name: "WAIT_SECONDS" + data_type: TYPE_FP32 + dims: [ 1 ] + } +] +output [ + { + name: "DUMMY_OUT" + data_type: TYPE_FP32 + dims: [ 1 ] + } +] + +instance_group [{ kind: KIND_CPU }] +model_transaction_policy { decoupled: True } diff --git a/qa/python_models/async_execute_decouple_bls/model.py b/qa/python_models/async_execute_decouple_bls/model.py new file mode 100644 index 00000000000..f5114d4a4d1 --- /dev/null +++ b/qa/python_models/async_execute_decouple_bls/model.py @@ -0,0 +1,61 @@ +# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of NVIDIA CORPORATION nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import asyncio + +import numpy as np +import triton_python_backend_utils as pb_utils + + +class TritonPythonModel: + async def _execute_a_request(self, request): + input_tensor = pb_utils.get_input_tensor_by_name( + request, "WAIT_SECONDS" + ).as_numpy() + bls_input_tensor = pb_utils.Tensor("WAIT_SECONDS", input_tensor) + bls_request = pb_utils.InferenceRequest( + model_name="async_execute_decouple", + inputs=[bls_input_tensor], + requested_output_names=["DUMMY_OUT"], + ) + bls_responses = await bls_request.async_exec(decoupled=True) + response_sender = request.get_response_sender() + for bls_response in bls_responses: + bls_output_tensor = pb_utils.get_output_tensor_by_name( + bls_response, "DUMMY_OUT" + ).as_numpy() + output_tensor = pb_utils.Tensor("DUMMY_OUT", bls_output_tensor) + response = pb_utils.InferenceResponse(output_tensors=[output_tensor]) + response_sender.send(response) + response_sender.send(flags=pb_utils.TRITONSERVER_RESPONSE_COMPLETE_FINAL) + + async def execute(self, requests): + async_futures = [] + for request in requests: + async_future = self._execute_a_request(request) + async_futures.append(async_future) + await asyncio.gather(*async_futures) + return None