Skip to content

Commit

Permalink
Add testing for iterative scheduler backlogged requests
Browse files Browse the repository at this point in the history
  • Loading branch information
Tabrizian committed Mar 29, 2024
1 parent 8c3156e commit a9f9c08
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 11 deletions.
34 changes: 24 additions & 10 deletions qa/L0_iterative_sequence/iterative_sequence_e2e.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@
MODEL_CONFIG_BASE = """
{{
"backend": "iterative_sequence",
"max_batch_size": 4,
"max_batch_size": 1,
"input" : [
{{
"name": "INPUT",
Expand Down Expand Up @@ -103,30 +103,44 @@ def test_generate_stream(self):
self.assertEqual(res_count, data["OUTPUT"])
self.assertEqual(0, res_count)

def test_grpc_stream(self, sequence_id=0, sequence_start=False):
def test_grpc_stream(
self, sequence_id=0, sequence_start=False, num_requests=1, validation=True
):
user_data = UserData()
with grpcclient.InferenceServerClient("localhost:8001") as triton_client:
triton_client.start_stream(callback=partial(callback, user_data))
inputs = []
inputs.append(grpcclient.InferInput("INPUT", [1, 1], "INT32"))
inputs[0].set_data_from_numpy(np.array([[2]], dtype=np.int32))

triton_client.async_stream_infer(
model_name="iterative_sequence",
inputs=inputs,
sequence_id=sequence_id,
sequence_start=sequence_start,
)
res_count = 2
for _ in range(num_requests):
triton_client.async_stream_infer(
model_name="iterative_sequence",
inputs=inputs,
sequence_id=sequence_id,
sequence_start=sequence_start,
)
res_count = 2 * num_requests
while res_count > 0:
data_item = user_data._completed_requests.get()
res_count -= 1
if type(data_item) == InferenceServerException:
raise data_item
else:
self.assertEqual(res_count, data_item.as_numpy("OUTPUT")[0][0])
if validation:
self.assertEqual(
res_count % 2, data_item.as_numpy("OUTPUT")[0][0]
)
self.assertEqual(0, res_count)

def test_backlog_fill(self):
config = r'"sequence_batching" : { "iterative_sequence" : true, "max_sequence_idle_microseconds": 8000000, direct: { "max_queue_delay_microseconds" : 10000000 }}'
with grpcclient.InferenceServerClient("localhost:8001") as triton_client:
triton_client.load_model(
"iterative_sequence", config=MODEL_CONFIG_BASE.format(config)
)
self.test_grpc_stream(num_requests=4, validation=False)

def test_reschedule_error(self):
# Use short idle timeout (< backend reschedule delay: 0.5s) so that
# the backend won't be able to reschedule the request as the scheduler
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
backend: "iterative_sequence"
max_batch_size: 4
max_batch_size: 1
input [
{
name: "INPUT"
Expand Down

0 comments on commit a9f9c08

Please sign in to comment.