Skip to content

Commit

Permalink
test: Tests for Metrics API enhancement to include error counters (#7423
Browse files Browse the repository at this point in the history
)
  • Loading branch information
indrajit96 authored Jul 12, 2024
1 parent 3dbf09e commit 70a0eee
Show file tree
Hide file tree
Showing 4 changed files with 144 additions and 3 deletions.
19 changes: 19 additions & 0 deletions docs/user_guide/metrics.md
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,25 @@ Count*. The count metrics are illustrated by the following examples:
| |Execution Count |`nv_inference_exec_count` |Number of inference batch executions (see [Inference Request Metrics](#inference-request-metrics), does not include cached requests)|Per model|Per request|
| |Pending Request Count |`nv_inference_pending_request_count` |Number of inference requests awaiting execution by a backend. This number is incremented when a request is enqueued to the server (`TRITONSERVER_ServerInferAsync`) and is decremented when a backend is about to start executing the request. More details can be found below. |Per model|Per request|

#### Failure Count Categories

| Failed Request Reason |Description |
|------------|------------|
| REJECTED | Number of inference failures due to request timeout in the schedular. |
| CANCELED | Number of inference failures due to request cancellation in the core. |
| BACKEND | Number of inference failures during execution of requests in the backend/model. |
| OTHER | Number of inference failures due to other uncategorized reasons in the core. |

> **Note**
>
> Ensemble failure metrics will reflect the failure counts of their composing models as well as the parent model, but currently do not capture the same granularity for the "reason" label and will default to the "OTHER" reason.
>
> For example, if EnsembleA contains ModelA, and ModelA experiences a failed request due to a queue/backlog timeout in the scheduler, ModelA will have a failed request metric reflecting `reason=REJECTED` and `count=1`.
> Additionally, EnsembleA will have a failed request metric reflecting `reason=OTHER` and `count=2`.
> The `count=2` reflects 1 from the internally failed request captured by ModelA, as well as 1 from the failed top-level request sent to EnsembleA by the user/client.
> The `reason=OTHER` reflects that fact that the ensemble doesn't currently capture the specific reason why
> ModelA's request failed at this time.
#### Pending Request Count (Queue Size) Per-Model

The *Pending Request Count* reflects the number of requests that have been
Expand Down
35 changes: 34 additions & 1 deletion qa/L0_backend_python/lifecycle/lifecycle_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,11 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import os
import re
import sys

import requests

sys.path.append("../../common")

import queue
Expand Down Expand Up @@ -63,6 +66,29 @@ class LifecycleTest(unittest.TestCase):
def setUp(self):
self._shm_leak_detector = shm_util.ShmLeakDetector()

def _get_metrics(self):
metrics_url = "http://localhost:8002/metrics"
r = requests.get(metrics_url)
r.raise_for_status()
return r.text

def _metrics_before_test(self, model, reason):
pattern = rf'nv_inference_request_failure\{{model="{model}",reason="{reason}",version="1"\}} (\d+)'
metrics = self._get_metrics()
match = re.search(pattern, metrics)
if match:
return int(match.group(1))
else:
raise Exception(f"Failure metrics for model='{model}' not found")

def _assert_metrics(
self, model_name, reason, expected_count_increase, initial_count
):
metrics = self._get_metrics()
# Add initial count + expected count for the the test
expected_metric = f'nv_inference_request_failure{{model="{model_name}",reason="{reason}",version="1"}} {expected_count_increase + initial_count}'
self.assertIn(expected_metric, metrics)

def test_error_code(self):
model_name = "error_code"
shape = [1, 1]
Expand Down Expand Up @@ -181,7 +207,7 @@ def test_batch_error(self):
def test_infer_pymodel_error(self):
model_name = "wrong_model"
shape = [2, 2]

initial_metrics_value = self._metrics_before_test(model_name, "BACKEND")
with self._shm_leak_detector.Probe() as shm_probe:
with httpclient.InferenceServerClient(
f"{_tritonserver_ipaddr}:8000"
Expand All @@ -207,6 +233,13 @@ def test_infer_pymodel_error(self):
self.assertTrue(
False, "Wrong exception raised or did not raise an exception"
)
expected_count_increase = 1
self._assert_metrics(
model_name,
"BACKEND",
expected_count_increase,
initial_metrics_value,
)


if __name__ == "__main__":
Expand Down
50 changes: 49 additions & 1 deletion qa/L0_model_queue/model_queue_test.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/usr/bin/env python3

# Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -30,6 +30,7 @@

sys.path.append("../common")

import re
import threading
import time
import unittest
Expand All @@ -38,6 +39,7 @@

import infer_util as iu
import numpy as np
import requests
import test_util as tu
from tritonclientutils import InferenceServerException

Expand Down Expand Up @@ -69,6 +71,29 @@ def check_deferred_exception(self):
_deferred_exceptions.pop(0)
raise first_exception

def _get_metrics(self):
metrics_url = "http://localhost:8002/metrics"
r = requests.get(metrics_url)
r.raise_for_status()
return r.text

def _metrics_before_test(self, model, reason):
pattern = rf'nv_inference_request_failure\{{model="{model}",reason="{reason}",version="1"\}} (\d+)'
metrics = self._get_metrics()
match = re.search(pattern, metrics)
if match:
return int(match.group(1))
else:
raise Exception(f"Failure metrics for model='{model}' not found")

def _assert_metrics(
self, model_name, reason, expected_count_increase, initial_count
):
metrics = self._get_metrics()
# Add initial count + expected count for the the test
expected_metric = f'nv_inference_request_failure{{model="{model_name}",reason="{reason}",version="1"}} {expected_count_increase + initial_count}'
self.assertIn(expected_metric, metrics)

def check_response(
self,
bs,
Expand Down Expand Up @@ -235,6 +260,12 @@ def test_policy_reject(self):
# requests are sent after 'default_timeout_microseconds'.
# Expect the first request is timed-out and rejected, which makes the
# second and third request be batched together and executed.
initial_metrics_value_ensemble = self._metrics_before_test(
"ensemble_zero_1_float32", "OTHER"
)
initial_metrics_value_custom = self._metrics_before_test(
"custom_zero_1_float32", "REJECTED"
)
dtype = np.float32
shapes = ([16],)
for trial in self.trials_:
Expand Down Expand Up @@ -283,6 +314,23 @@ def test_policy_reject(self):
self.check_deferred_exception()
except InferenceServerException as ex:
self.assertTrue(False, "unexpected error {}".format(ex))
expected_count_increase = 4
# NOTE: Ensemble failure metrics will reflect the failure counts
# of their composing models as well as the parent model, but currently do not capture the same granularity
# for the "reason" label and will default to the "OTHER" reason.
self._assert_metrics(
"ensemble_zero_1_float32",
"OTHER",
expected_count_increase,
initial_metrics_value_ensemble,
)
expected_count_increase = 4
self._assert_metrics(
"custom_zero_1_float32",
"REJECTED",
expected_count_increase,
initial_metrics_value_custom,
)

def test_timeout_override(self):
# Send requests with batch sizes 1, 1, 3 where the first request
Expand Down
43 changes: 42 additions & 1 deletion qa/L0_request_cancellation/scheduler_test.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/usr/bin/env python3

# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
Expand All @@ -27,10 +27,12 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import concurrent.futures
import re
import time
import unittest

import numpy as np
import requests
import tritonclient.grpc as grpcclient
from tritonclient.utils import InferenceServerException

Expand Down Expand Up @@ -84,6 +86,29 @@ def _assert_streaming_response_is_cancelled(self, response):
cancelled_count += 1
self.assertEqual(cancelled_count, 1)

def _get_metrics(self):
metrics_url = "http://localhost:8002/metrics"
r = requests.get(metrics_url)
r.raise_for_status()
return r.text

def _metrics_before_test(self, model, reason):
pattern = rf'nv_inference_request_failure\{{model="{model}",reason="{reason}",version="1"\}} (\d+)'
metrics = self._get_metrics()
match = re.search(pattern, metrics)
if match:
return int(match.group(1))
else:
raise Exception(f"Failure metrics for model='{model}' not found")

def _assert_metrics(
self, model_name, reason, expected_count_increase, initial_count
):
metrics = self._get_metrics()
# Add initial count + expected count for the the test
expected_metric = f'nv_inference_request_failure{{model="{model_name}",reason="{reason}",version="1"}} {expected_count_increase + initial_count}'
self.assertIn(expected_metric, metrics)

# Test queued requests on dynamic batch scheduler can be cancelled
def test_dynamic_batch_scheduler_request_cancellation(self):
model_name = "dynamic_batch"
Expand Down Expand Up @@ -114,6 +139,7 @@ def test_dynamic_batch_scheduler_request_cancellation(self):
# Test backlogged requests on sequence batch scheduler can be cancelled
def test_sequence_batch_scheduler_backlog_request_cancellation(self):
model_name = "sequence_direct"
initial_metrics_value = self._metrics_before_test(model_name, "CANCELED")
with concurrent.futures.ThreadPoolExecutor() as pool:
# Saturate the single sequence slot
saturate_thread = pool.submit(
Expand Down Expand Up @@ -149,11 +175,26 @@ def test_sequence_batch_scheduler_backlog_request_cancellation(self):
self._assert_response_is_cancelled(backlog_requests[1]["response"])
# Join saturating thread
saturate_thread.result()
expected_count_increase = 2
self._assert_metrics(
model_name,
"CANCELED",
expected_count_increase,
initial_metrics_value,
)

# Test queued requests on direct sequence batch scheduler can be cancelled
def test_direct_sequence_batch_scheduler_request_cancellation(self):
model_name = "sequence_direct"
initial_metrics_value = self._metrics_before_test(model_name, "CANCELED")
self._test_sequence_batch_scheduler_queued_request_cancellation(model_name)
expected_count_increase = 2
self._assert_metrics(
model_name,
"CANCELED",
expected_count_increase,
initial_metrics_value,
)

# Test queued requests on oldest sequence batch scheduler can be cancelled
def test_oldest_sequence_batch_scheduler_request_cancellation(self):
Expand Down

0 comments on commit 70a0eee

Please sign in to comment.