Skip to content

Commit

Permalink
Add decoupled bls async exec test
Browse files Browse the repository at this point in the history
  • Loading branch information
kthui committed Apr 2, 2024
1 parent fb217a7 commit 110d8eb
Show file tree
Hide file tree
Showing 4 changed files with 129 additions and 4 deletions.
23 changes: 19 additions & 4 deletions qa/L0_backend_python/async_execute/concurrency_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,7 @@ def callback(result, error):
return callback, response

# Helper for testing concurrent execution
def _concurrent_execute_requests(self, batch_size, number_of_requests):
model_name = "async_execute_decouple"
def _concurrent_execute_requests(self, model_name, batch_size, number_of_requests):
delay_secs = 4
shape = [batch_size, 1]
inputs = [grpcclient.InferInput("WAIT_SECONDS", shape, "FP32")]
Expand Down Expand Up @@ -76,11 +75,27 @@ def _concurrent_execute_requests(self, batch_size, number_of_requests):

# Test batched requests are executed concurrently
def test_concurrent_execute_single_request(self):
self._concurrent_execute_requests(batch_size=4, number_of_requests=1)
self._concurrent_execute_requests(
model_name="async_execute_decouple", batch_size=4, number_of_requests=1
)

# Test multiple requests are executed concurrently
def test_concurrent_execute_multi_request(self):
self._concurrent_execute_requests(batch_size=1, number_of_requests=4)
self._concurrent_execute_requests(
model_name="async_execute_decouple", batch_size=1, number_of_requests=4
)

# Test batched requests are executed concurrently via bls
def test_concurrent_execute_single_request_bls(self):
self._concurrent_execute_requests(
model_name="async_execute_decouple_bls", batch_size=4, number_of_requests=1
)

# Test multiple requests are executed concurrently via bls
def test_concurrent_execute_multi_request_bls(self):
self._concurrent_execute_requests(
model_name="async_execute_decouple_bls", batch_size=1, number_of_requests=4
)

# Test model exception handling
def test_model_raise_exception(self):
Expand Down
3 changes: 3 additions & 0 deletions qa/L0_backend_python/async_execute/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,9 @@ rm -rf models && mkdir models
mkdir -p models/async_execute_decouple/1 && \
cp ../../python_models/async_execute_decouple/model.py models/async_execute_decouple/1 && \
cp ../../python_models/async_execute_decouple/config.pbtxt models/async_execute_decouple
mkdir -p models/async_execute_decouple_bls/1 && \
cp ../../python_models/async_execute_decouple_bls/model.py models/async_execute_decouple_bls/1 && \
cp ../../python_models/async_execute_decouple_bls/config.pbtxt models/async_execute_decouple_bls

TEST_LOG="concurrency_test.log"
SERVER_LOG="concurrency_test.server.log"
Expand Down
46 changes: 46 additions & 0 deletions qa/python_models/async_execute_decouple_bls/config.pbtxt
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

backend: "python"
max_batch_size: 8

input [
{
name: "WAIT_SECONDS"
data_type: TYPE_FP32
dims: [ 1 ]
}
]
output [
{
name: "DUMMY_OUT"
data_type: TYPE_FP32
dims: [ 1 ]
}
]

instance_group [{ kind: KIND_CPU }]
model_transaction_policy { decoupled: True }
61 changes: 61 additions & 0 deletions qa/python_models/async_execute_decouple_bls/model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import asyncio

import numpy as np

Check notice

Code scanning / CodeQL

Unused import Note

Import of 'np' is not used.
import triton_python_backend_utils as pb_utils


class TritonPythonModel:
async def _execute_a_request(self, request):
input_tensor = pb_utils.get_input_tensor_by_name(
request, "WAIT_SECONDS"
).as_numpy()
bls_input_tensor = pb_utils.Tensor("WAIT_SECONDS", input_tensor)
bls_request = pb_utils.InferenceRequest(
model_name="async_execute_decouple",
inputs=[bls_input_tensor],
requested_output_names=["DUMMY_OUT"],
)
bls_responses = await bls_request.async_exec(decoupled=True)
response_sender = request.get_response_sender()
for bls_response in bls_responses:
bls_output_tensor = pb_utils.get_output_tensor_by_name(
bls_response, "DUMMY_OUT"
).as_numpy()
output_tensor = pb_utils.Tensor("DUMMY_OUT", bls_output_tensor)
response = pb_utils.InferenceResponse(output_tensors=[output_tensor])
response_sender.send(response)
response_sender.send(flags=pb_utils.TRITONSERVER_RESPONSE_COMPLETE_FINAL)

async def execute(self, requests):
async_futures = []
for request in requests:
async_future = self._execute_a_request(request)
async_futures.append(async_future)
await asyncio.gather(*async_futures)
return None

0 comments on commit 110d8eb

Please sign in to comment.