Add decoupled bls async exec test

triton-inference-server · Apr 2, 2024 · 110d8eb · 110d8eb
1 parent fb217a7
commit 110d8eb
Show file tree

Hide file tree

Showing 4 changed files with 129 additions and 4 deletions.
diff --git a/qa/L0_backend_python/async_execute/concurrency_test.py b/qa/L0_backend_python/async_execute/concurrency_test.py
@@ -46,8 +46,7 @@ def callback(result, error):
         return callback, response
 
     # Helper for testing concurrent execution
-    def _concurrent_execute_requests(self, batch_size, number_of_requests):
-        model_name = "async_execute_decouple"
+    def _concurrent_execute_requests(self, model_name, batch_size, number_of_requests):
         delay_secs = 4
         shape = [batch_size, 1]
         inputs = [grpcclient.InferInput("WAIT_SECONDS", shape, "FP32")]
@@ -76,11 +75,27 @@ def _concurrent_execute_requests(self, batch_size, number_of_requests):
 
     # Test batched requests are executed concurrently
     def test_concurrent_execute_single_request(self):
-        self._concurrent_execute_requests(batch_size=4, number_of_requests=1)
+        self._concurrent_execute_requests(
+            model_name="async_execute_decouple", batch_size=4, number_of_requests=1
+        )
 
     # Test multiple requests are executed concurrently
     def test_concurrent_execute_multi_request(self):
-        self._concurrent_execute_requests(batch_size=1, number_of_requests=4)
+        self._concurrent_execute_requests(
+            model_name="async_execute_decouple", batch_size=1, number_of_requests=4
+        )
+
+    # Test batched requests are executed concurrently via bls
+    def test_concurrent_execute_single_request_bls(self):
+        self._concurrent_execute_requests(
+            model_name="async_execute_decouple_bls", batch_size=4, number_of_requests=1
+        )
+
+    # Test multiple requests are executed concurrently via bls
+    def test_concurrent_execute_multi_request_bls(self):
+        self._concurrent_execute_requests(
+            model_name="async_execute_decouple_bls", batch_size=1, number_of_requests=4
+        )
 
     # Test model exception handling
     def test_model_raise_exception(self):

diff --git a/qa/L0_backend_python/async_execute/test.sh b/qa/L0_backend_python/async_execute/test.sh
@@ -36,6 +36,9 @@ rm -rf models && mkdir models
 mkdir -p models/async_execute_decouple/1 && \
     cp ../../python_models/async_execute_decouple/model.py models/async_execute_decouple/1 && \
     cp ../../python_models/async_execute_decouple/config.pbtxt models/async_execute_decouple
+mkdir -p models/async_execute_decouple_bls/1 && \
+    cp ../../python_models/async_execute_decouple_bls/model.py models/async_execute_decouple_bls/1 && \
+    cp ../../python_models/async_execute_decouple_bls/config.pbtxt models/async_execute_decouple_bls
 
 TEST_LOG="concurrency_test.log"
 SERVER_LOG="concurrency_test.server.log"

diff --git a/qa/python_models/async_execute_decouple_bls/config.pbtxt b/qa/python_models/async_execute_decouple_bls/config.pbtxt
@@ -0,0 +1,46 @@
+# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+backend: "python"
+max_batch_size: 8
+
+input [
+  {
+    name: "WAIT_SECONDS"
+    data_type: TYPE_FP32
+    dims: [ 1 ]
+  }
+]
+output [
+  {
+    name: "DUMMY_OUT"
+    data_type: TYPE_FP32
+    dims: [ 1 ]
+  }
+]
+
+instance_group [{ kind: KIND_CPU }]
+model_transaction_policy { decoupled: True }
diff --git a/qa/python_models/async_execute_decouple_bls/model.py b/qa/python_models/async_execute_decouple_bls/model.py
@@ -0,0 +1,61 @@
+# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import asyncio
+
+import numpy as np
+import triton_python_backend_utils as pb_utils
+
+
+class TritonPythonModel:
+    async def _execute_a_request(self, request):
+        input_tensor = pb_utils.get_input_tensor_by_name(
+            request, "WAIT_SECONDS"
+        ).as_numpy()
+        bls_input_tensor = pb_utils.Tensor("WAIT_SECONDS", input_tensor)
+        bls_request = pb_utils.InferenceRequest(
+            model_name="async_execute_decouple",
+            inputs=[bls_input_tensor],
+            requested_output_names=["DUMMY_OUT"],
+        )
+        bls_responses = await bls_request.async_exec(decoupled=True)
+        response_sender = request.get_response_sender()
+        for bls_response in bls_responses:
+            bls_output_tensor = pb_utils.get_output_tensor_by_name(
+                bls_response, "DUMMY_OUT"
+            ).as_numpy()
+            output_tensor = pb_utils.Tensor("DUMMY_OUT", bls_output_tensor)
+            response = pb_utils.InferenceResponse(output_tensors=[output_tensor])
+            response_sender.send(response)
+        response_sender.send(flags=pb_utils.TRITONSERVER_RESPONSE_COMPLETE_FINAL)
+
+    async def execute(self, requests):
+        async_futures = []
+        for request in requests:
+            async_future = self._execute_a_request(request)
+            async_futures.append(async_future)
+        await asyncio.gather(*async_futures)
+        return None