diff --git a/docs/user_guide/metrics.md b/docs/user_guide/metrics.md index 8a54565222..5f403d7796 100644 --- a/docs/user_guide/metrics.md +++ b/docs/user_guide/metrics.md @@ -239,7 +239,34 @@ nv_inference_first_response_histogram_ms{model="my_model",version="1", le="5000" nv_inference_first_response_histogram_ms{model="my_model",version="1", le="+Inf"} 37 ``` -Triton initializes histograms with default buckets for each, as shown above. Customization of buckets per metric is currently unsupported. +Triton initializes histograms with default buckets for each, as shown above. +Buckets can be overridden per family by specifying `model_metrics` in the +model configuration. For example: +``` +// config.pbtxt +model_metrics { + metric_control: [ + { + metric_identifier: { + family: "nv_inference_first_response_histogram_ms" + } + histogram_options: { + buckets: [ 1, 2, 4, 8 ] + } + } + ] +} +``` + +> **Note** +> +> To apply changes to metric options dynamically, the model must be completely +> unloaded and then reloaded for the updates to take effect. + +Currently, the following histogram families support custom buckets. +``` +nv_inference_first_response_histogram_ms // Time to First Response +``` #### Summaries diff --git a/qa/L0_metrics/histogram_metrics_test.py b/qa/L0_metrics/histogram_metrics_test.py index 7480e2048b..a59aac0478 100755 --- a/qa/L0_metrics/histogram_metrics_test.py +++ b/qa/L0_metrics/histogram_metrics_test.py @@ -40,6 +40,7 @@ import test_util as tu MILLIS_PER_SEC = 1000 +FIRST_RESPONSE_HISTOGRAM = "nv_inference_first_response_histogram_ms" def get_histogram_metric_key( @@ -47,6 +48,8 @@ def get_histogram_metric_key( ): if metric_type in ["count", "sum"]: return f'{metric_family}_{metric_type}{{model="{model_name}",version="{model_version}"}}' + elif metric_type == "bucket": + return f'{metric_family}_{metric_type}{{model="{model_name}",version="{model_version}",le="{le}"}}' else: return None @@ -55,16 +58,20 @@ class TestHistogramMetrics(tu.TestResultCollector): def setUp(self): self.tritonserver_ipaddr = os.environ.get("TRITONSERVER_IPADDR", "localhost") - def get_histogram_metrics(self, metric_family: str): + def get_metrics(self): r = requests.get(f"http://{self.tritonserver_ipaddr}:8002/metrics") r.raise_for_status() + return r.text + def get_histogram_metrics(self, metric_family: str): # Regular expression to match the pattern pattern = f"^{metric_family}.*" histogram_dict = {} + metrics = self.get_metrics() + # Find all matches in the text - matches = re.findall(pattern, r.text, re.MULTILINE) + matches = re.findall(pattern, metrics, re.MULTILINE) for match in matches: key, value = match.rsplit(" ") @@ -135,24 +142,23 @@ def test_ensemble_decoupled(self): ) # Checks metrics output - first_response_family = "nv_inference_first_response_histogram_ms" - histogram_dict = self.get_histogram_metrics(first_response_family) + histogram_dict = self.get_histogram_metrics(FIRST_RESPONSE_HISTOGRAM) def check_existing_metrics(model_name, wait_secs_per_req, delta): metric_count = get_histogram_metric_key( - first_response_family, model_name, "1", "count" + FIRST_RESPONSE_HISTOGRAM, model_name, "1", "count" ) - model_sum = get_histogram_metric_key( - first_response_family, model_name, "1", "sum" + metric_sum = get_histogram_metric_key( + FIRST_RESPONSE_HISTOGRAM, model_name, "1", "sum" ) # Test histogram count self.assertIn(metric_count, histogram_dict) self.assertEqual(histogram_dict[metric_count], request_num) # Test histogram sum - self.assertIn(model_sum, histogram_dict) + self.assertIn(metric_sum, histogram_dict) self.assertTrue( wait_secs_per_req * MILLIS_PER_SEC * request_num - <= histogram_dict[model_sum] + <= histogram_dict[metric_sum] < (wait_secs_per_req + delta) * MILLIS_PER_SEC * request_num ) # Prometheus histogram buckets are tested in metrics_api_test.cc::HistogramAPIHelper @@ -165,14 +171,29 @@ def check_existing_metrics(model_name, wait_secs_per_req, delta): # Test non-decoupled model metrics non_decoupled_model_count = get_histogram_metric_key( - first_response_family, non_decoupled_model_name, "1", "count" + FIRST_RESPONSE_HISTOGRAM, non_decoupled_model_name, "1", "count" ) non_decoupled_model_sum = get_histogram_metric_key( - first_response_family, non_decoupled_model_name, "1", "sum" + FIRST_RESPONSE_HISTOGRAM, non_decoupled_model_name, "1", "sum" ) self.assertNotIn(non_decoupled_model_count, histogram_dict) self.assertNotIn(non_decoupled_model_sum, histogram_dict) + def test_buckets_override(self): + model_name = "async_execute_decouple" + metrics = self.get_metrics() + override_buckets = [x for x in os.environ.get("OVERRIDE_BUCKETS").split(",")] + + # Check metric output + self.assertEqual( + metrics.count(FIRST_RESPONSE_HISTOGRAM + "_bucket"), len(override_buckets) + ) + for le in override_buckets: + bucket_key = get_histogram_metric_key( + FIRST_RESPONSE_HISTOGRAM, model_name, "1", "bucket", le + ) + self.assertIn(bucket_key, metrics) + if __name__ == "__main__": unittest.main() diff --git a/qa/L0_metrics/metrics_config_test.py b/qa/L0_metrics/metrics_config_test.py index 43e5a79ba1..975d219ef8 100755 --- a/qa/L0_metrics/metrics_config_test.py +++ b/qa/L0_metrics/metrics_config_test.py @@ -102,7 +102,7 @@ def test_cache_counters_missing(self): def test_inf_histograms_decoupled_exist(self): metrics = self._get_metrics() for metric in INF_HISTOGRAM_DECOUPLED_PATTERNS: - for suffix in ["_count", "_sum", ""]: + for suffix in ["_count", "_sum", "_bucket"]: self.assertIn(metric + suffix, metrics) def test_inf_histograms_decoupled_missing(self): diff --git a/qa/L0_metrics/test.sh b/qa/L0_metrics/test.sh index f6802622a3..2d6e85e211 100755 --- a/qa/L0_metrics/test.sh +++ b/qa/L0_metrics/test.sh @@ -46,6 +46,7 @@ BASE_SERVER_ARGS="--model-repository=${MODELDIR}" SERVER_ARGS="${BASE_SERVER_ARGS}" SERVER_LOG="./inference_server.log" PYTHON_TEST="metrics_config_test.py" +HISTOGRAM_PYTEST="histogram_metrics_test.py" source ../common/util.sh CLIENT_LOG="client.log" @@ -301,12 +302,12 @@ check_unit_test kill_server # Check default settings: Histograms should be disabled in decoupled model -decoupled_model_name="async_execute_decouple" -mkdir -p "${MODELDIR}/${decoupled_model_name}/1/" -cp ../python_models/${decoupled_model_name}/model.py ${MODELDIR}/${decoupled_model_name}/1/ -cp ../python_models/${decoupled_model_name}/config.pbtxt ${MODELDIR}/${decoupled_model_name}/ +decoupled_model="async_execute_decouple" +mkdir -p "${MODELDIR}/${decoupled_model}/1/" +cp ../python_models/${decoupled_model}/model.py ${MODELDIR}/${decoupled_model}/1/ +cp ../python_models/${decoupled_model}/config.pbtxt ${MODELDIR}/${decoupled_model}/ -SERVER_ARGS="${BASE_SERVER_ARGS} --load-model=${decoupled_model_name}" +SERVER_ARGS="${BASE_SERVER_ARGS} --load-model=${decoupled_model}" run_and_check_server python3 ${PYTHON_TEST} MetricsConfigTest.test_inf_counters_exist 2>&1 | tee ${CLIENT_LOG} check_unit_test @@ -321,7 +322,7 @@ check_unit_test kill_server # Enable histograms in decoupled model -SERVER_ARGS="${BASE_SERVER_ARGS} --load-model=${decoupled_model_name} --metrics-config histogram_latencies=true" +SERVER_ARGS="${BASE_SERVER_ARGS} --load-model=${decoupled_model} --metrics-config histogram_latencies=true" run_and_check_server python3 ${PYTHON_TEST} MetricsConfigTest.test_inf_counters_exist 2>&1 | tee ${CLIENT_LOG} check_unit_test @@ -460,17 +461,59 @@ check_unit_test "${expected_tests}" ### Test histogram data in ensemble decoupled model ### MODELDIR="${PWD}/ensemble_decoupled" +SERVER_LOG="./histogram_ensemble_decoupled_server.log" +CLIENT_LOG="./histogram_ensemble_decoupled_client.log" SERVER_ARGS="--model-repository=${MODELDIR} --metrics-config histogram_latencies=true --log-verbose=1" -PYTHON_TEST="histogram_metrics_test.py" mkdir -p "${MODELDIR}"/ensemble/1 cp -r "${MODELDIR}"/async_execute_decouple "${MODELDIR}"/async_execute sed -i "s/model_transaction_policy { decoupled: True }//" "${MODELDIR}"/async_execute/config.pbtxt run_and_check_server -python3 ${PYTHON_TEST} 2>&1 | tee ${CLIENT_LOG} +python3 ${HISTOGRAM_PYTEST} TestHistogramMetrics.test_ensemble_decoupled 2>&1 | tee ${CLIENT_LOG} kill_server check_unit_test +### Test model metrics configuration +MODELDIR="${PWD}/model_metrics_model" +SERVER_LOG="./model_metric_config_server.log" +CLIENT_LOG="./model_metric_config_client.log" +decoupled_model="async_execute_decouple" +rm -rf "${MODELDIR}/${decoupled_model}" +mkdir -p "${MODELDIR}/${decoupled_model}/1/" +cp ../python_models/${decoupled_model}/model.py ${MODELDIR}/${decoupled_model}/1/ + +# Test valid model_metrics config +cp ../python_models/${decoupled_model}/config.pbtxt ${MODELDIR}/${decoupled_model}/ +cat >> "${MODELDIR}/${decoupled_model}/config.pbtxt" << EOL +model_metrics { + metric_control: [ + { + metric_identifier: { + family: "nv_inference_first_response_histogram_ms" + } + histogram_options: { + buckets: [ -1, 0.0, 1, 2.5 ] + } + } + ] +} +EOL + +SERVER_ARGS="--model-repository=${MODELDIR} --model-control-mode=explicit --load-model=${decoupled_model} --metrics-config histogram_latencies=true --log-verbose=1" +run_and_check_server +export OVERRIDE_BUCKETS="-1,0,1,2.5,+Inf" +python3 ${HISTOGRAM_PYTEST} TestHistogramMetrics.test_buckets_override 2>&1 | tee ${CLIENT_LOG} +check_unit_test +kill_server + +# Test valid model_metrics config with histogram disabled +PYTHON_TEST="metrics_config_test.py" +SERVER_ARGS="--model-repository=${MODELDIR} --model-control-mode=explicit --load-model=${decoupled_model} --metrics-config histogram_latencies=false --log-verbose=1" +run_and_check_server +python3 ${PYTHON_TEST} MetricsConfigTest.test_inf_histograms_decoupled_missing 2>&1 | tee ${CLIENT_LOG} +check_unit_test +kill_server + if [ $RET -eq 0 ]; then echo -e "\n***\n*** Test Passed\n***" else diff --git a/qa/L0_model_config/model_metrics/invalid_config/empty_buckets/expected b/qa/L0_model_config/model_metrics/invalid_config/empty_buckets/expected new file mode 100644 index 0000000000..0b02a03bf8 --- /dev/null +++ b/qa/L0_model_config/model_metrics/invalid_config/empty_buckets/expected @@ -0,0 +1 @@ +histogram options must specify non-empty 'buckets' diff --git a/qa/L0_model_config/model_metrics/invalid_config/empty_buckets/partial.pbtxt b/qa/L0_model_config/model_metrics/invalid_config/empty_buckets/partial.pbtxt new file mode 100644 index 0000000000..21ebc3c6bf --- /dev/null +++ b/qa/L0_model_config/model_metrics/invalid_config/empty_buckets/partial.pbtxt @@ -0,0 +1,37 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of NVIDIA CORPORATION nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +model_metrics { + metric_control: [ + { + metric_identifier: { + family: "nv_inference_first_response_histogram_ms" + } + histogram_options: { + buckets: [] + } + } + ] +} diff --git a/qa/L0_model_config/model_metrics/invalid_config/empty_metric_family/expected b/qa/L0_model_config/model_metrics/invalid_config/empty_metric_family/expected new file mode 100644 index 0000000000..27e356c734 --- /dev/null +++ b/qa/L0_model_config/model_metrics/invalid_config/empty_metric_family/expected @@ -0,0 +1 @@ +metric identifier must specify non-empty 'family' diff --git a/qa/L0_model_config/model_metrics/invalid_config/empty_metric_family/partial.pbtxt b/qa/L0_model_config/model_metrics/invalid_config/empty_metric_family/partial.pbtxt new file mode 100644 index 0000000000..80fa3a3d47 --- /dev/null +++ b/qa/L0_model_config/model_metrics/invalid_config/empty_metric_family/partial.pbtxt @@ -0,0 +1,37 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of NVIDIA CORPORATION nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +model_metrics { + metric_control: [ + { + metric_identifier: { + family: "" + } + histogram_options: { + buckets: [ 1, 2, 4, 8 ] + } + } + ] +} diff --git a/qa/L0_model_config/model_metrics/invalid_config/no_buckets/expected b/qa/L0_model_config/model_metrics/invalid_config/no_buckets/expected new file mode 100644 index 0000000000..0b02a03bf8 --- /dev/null +++ b/qa/L0_model_config/model_metrics/invalid_config/no_buckets/expected @@ -0,0 +1 @@ +histogram options must specify non-empty 'buckets' diff --git a/qa/L0_model_config/model_metrics/invalid_config/no_buckets/partial.pbtxt b/qa/L0_model_config/model_metrics/invalid_config/no_buckets/partial.pbtxt new file mode 100644 index 0000000000..902e5ae62d --- /dev/null +++ b/qa/L0_model_config/model_metrics/invalid_config/no_buckets/partial.pbtxt @@ -0,0 +1,35 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of NVIDIA CORPORATION nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +model_metrics { + metric_control: [ + { + metric_identifier: { + family: "nv_inference_first_response_histogram_ms" + } + histogram_options: {} + } + ] +} diff --git a/qa/L0_model_config/model_metrics/invalid_config/no_histogram_options/expected b/qa/L0_model_config/model_metrics/invalid_config/no_histogram_options/expected new file mode 100644 index 0000000000..5df31d97ab --- /dev/null +++ b/qa/L0_model_config/model_metrics/invalid_config/no_histogram_options/expected @@ -0,0 +1 @@ +metric control must specify 'histogram_options' diff --git a/qa/L0_model_config/model_metrics/invalid_config/no_histogram_options/partial.pbtxt b/qa/L0_model_config/model_metrics/invalid_config/no_histogram_options/partial.pbtxt new file mode 100644 index 0000000000..68a6b819df --- /dev/null +++ b/qa/L0_model_config/model_metrics/invalid_config/no_histogram_options/partial.pbtxt @@ -0,0 +1,34 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of NVIDIA CORPORATION nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +model_metrics { + metric_control: [ + { + metric_identifier: { + family: "nv_inference_first_response_histogram_ms" + } + } + ] +} diff --git a/qa/L0_model_config/model_metrics/invalid_config/no_metric_family/expected b/qa/L0_model_config/model_metrics/invalid_config/no_metric_family/expected new file mode 100644 index 0000000000..27e356c734 --- /dev/null +++ b/qa/L0_model_config/model_metrics/invalid_config/no_metric_family/expected @@ -0,0 +1 @@ +metric identifier must specify non-empty 'family' diff --git a/qa/L0_model_config/model_metrics/invalid_config/no_metric_family/partial.pbtxt b/qa/L0_model_config/model_metrics/invalid_config/no_metric_family/partial.pbtxt new file mode 100644 index 0000000000..11147f5a7e --- /dev/null +++ b/qa/L0_model_config/model_metrics/invalid_config/no_metric_family/partial.pbtxt @@ -0,0 +1,35 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of NVIDIA CORPORATION nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +model_metrics { + metric_control: [ + { + metric_identifier: {} + histogram_options: { + buckets: [ 1, 2, 4, 8 ] + } + } + ] +} diff --git a/qa/L0_model_config/model_metrics/invalid_config/no_metric_identifier/expected b/qa/L0_model_config/model_metrics/invalid_config/no_metric_identifier/expected new file mode 100644 index 0000000000..d06f25af43 --- /dev/null +++ b/qa/L0_model_config/model_metrics/invalid_config/no_metric_identifier/expected @@ -0,0 +1 @@ +metric control must specify 'metric_identifier' diff --git a/qa/L0_model_config/model_metrics/invalid_config/no_metric_identifier/partial.pbtxt b/qa/L0_model_config/model_metrics/invalid_config/no_metric_identifier/partial.pbtxt new file mode 100644 index 0000000000..916f95eb4d --- /dev/null +++ b/qa/L0_model_config/model_metrics/invalid_config/no_metric_identifier/partial.pbtxt @@ -0,0 +1,34 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of NVIDIA CORPORATION nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +model_metrics { + metric_control: [ + { + histogram_options: { + buckets: [ 1, 2, 4, 8 ] + } + } + ] +} diff --git a/qa/L0_model_config/model_metrics/valid_config/valid_model_metrics/partial.pbtxt b/qa/L0_model_config/model_metrics/valid_config/valid_model_metrics/partial.pbtxt new file mode 100644 index 0000000000..feb879230d --- /dev/null +++ b/qa/L0_model_config/model_metrics/valid_config/valid_model_metrics/partial.pbtxt @@ -0,0 +1,37 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of NVIDIA CORPORATION nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +model_metrics { + metric_control: [ + { + metric_identifier: { + family: "nv_inference_first_response_histogram_ms" + } + histogram_options: { + buckets: [ 1, 2, 4, 8] + } + } + ] +} diff --git a/qa/L0_model_config/model_metrics/valid_config_with_warn/unknown_metric_family/expected b/qa/L0_model_config/model_metrics/valid_config_with_warn/unknown_metric_family/expected new file mode 100644 index 0000000000..57fd06e3f7 --- /dev/null +++ b/qa/L0_model_config/model_metrics/valid_config_with_warn/unknown_metric_family/expected @@ -0,0 +1 @@ +Metric family 'nv_inference_request_success' in 'metric_identifier' is not a customizable metric in Triton core. diff --git a/qa/L0_model_config/model_metrics/valid_config_with_warn/unknown_metric_family/partial.pbtxt b/qa/L0_model_config/model_metrics/valid_config_with_warn/unknown_metric_family/partial.pbtxt new file mode 100644 index 0000000000..54f191feab --- /dev/null +++ b/qa/L0_model_config/model_metrics/valid_config_with_warn/unknown_metric_family/partial.pbtxt @@ -0,0 +1,37 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of NVIDIA CORPORATION nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +model_metrics { + metric_control: [ + { + metric_identifier: { + family: "nv_inference_request_success" + } + histogram_options: { + buckets: [ 1, 2, 4, 8] + } + } + ] +} diff --git a/qa/L0_model_config/test.sh b/qa/L0_model_config/test.sh index 55133e69d9..6071547de1 100755 --- a/qa/L0_model_config/test.sh +++ b/qa/L0_model_config/test.sh @@ -39,7 +39,6 @@ if [ ! -z "$TEST_REPO_ARCH" ]; then fi CLIENT_LOG="./client.log" -CLIENT=model_config_test.py SERVER=/opt/tritonserver/bin/tritonserver SERVER_TIMEOUT=20 @@ -311,6 +310,14 @@ cp -r /data/inferenceserver/${REPO_VERSION}/qa_model_repository/openvino_int8_in cp /data/inferenceserver/${REPO_VERSION}/qa_model_repository/openvino_int8_int8_int8/output0_labels.txt \ autofill_noplatform_success/openvino/partial_config +# Copy decoupled model into the model_metrics test repository. +for modelpath in `ls -d model_metrics/*/*`; do + src_dir="/opt/tritonserver/qa/python_models/async_execute_decouple" + mkdir -p $modelpath/1 + cp $src_dir/model.py $modelpath/1/. + cat $src_dir/config.pbtxt $modelpath/partial.pbtxt > $modelpath/config.pbtxt +done + rm -f $SERVER_LOG_BASE* $CLIENT_LOG RET=0 @@ -630,6 +637,103 @@ for TARGET_DIR in `ls -d autofill_noplatform_success/*/*`; do fi done +# Run all model_metrics tests that are expected to be successful. +for TARGET_DIR in `ls -d model_metrics/valid_config/*`; do + TARGET_DIR_DOT=`echo $TARGET_DIR | tr / .` + + SERVER_ARGS="--model-repository=`pwd`/models --metrics-config histogram_latencies=true" + SERVER_LOG=$SERVER_LOG_BASE.${TARGET_DIR_DOT}.log + + rm -fr models && mkdir models + cp -r ${TARGET_DIR} models/. + + echo -e "Test $TARGET_DIR" >> $CLIENT_LOG + + # We expect all tests to succeed + run_server + if [ "$SERVER_PID" == "0" ]; then + echo -e "*** FAILED: unable to start $SERVER" >> $CLIENT_LOG + RET=1 + else + kill $SERVER_PID + wait $SERVER_PID + fi +done + +# Run all model_metrics tests that are expected to be successful but with warnings. +for TARGET_DIR in `ls -d model_metrics/valid_config_with_warn/*`; do + TARGET_DIR_DOT=`echo $TARGET_DIR | tr / .` + TARGET=`basename ${TARGET_DIR}` + + SERVER_ARGS="--model-repository=`pwd`/models --metrics-config histogram_latencies=true" + SERVER_LOG=$SERVER_LOG_BASE.${TARGET_DIR_DOT}.log + + rm -fr models && mkdir models + cp -r ${TARGET_DIR} models/. + + EXPECTED=models/$TARGET/expected + echo -e "Test $TARGET_DIR" >> $CLIENT_LOG + + # We expect all tests to succeed with the expected warning message + run_server + if [ "$SERVER_PID" == "0" ]; then + echo -e "*** FAILED: unable to start $SERVER" >> $CLIENT_LOG + RET=1 + else + EXFOUND=0 + EX=`cat $EXPECTED` + if grep ^W[0-9][0-9][0-9][0-9].*"$EX" $SERVER_LOG; then + echo -e "Found \"$EX\"" >> $CLIENT_LOG + EXFOUND=1 + else + echo -e "Not found \"$EX\"" >> $CLIENT_LOG + fi + if [ "$EXFOUND" == "0" ]; then + echo -e "*** FAILED: model_metrics/$TARGET" >> $CLIENT_LOG + RET=1 + fi + kill $SERVER_PID + wait $SERVER_PID + fi +done + +# Run all model_metrics tests that are missing required fields. +for TARGET_DIR in `ls -d model_metrics/invalid_config/*`; do + TARGET_DIR_DOT=`echo $TARGET_DIR | tr / .` + TARGET=`basename ${TARGET_DIR}` + + SERVER_ARGS="--model-repository=`pwd`/models --metrics-config histogram_latencies=true" + SERVER_LOG=$SERVER_LOG_BASE.${TARGET_DIR_DOT}.log + + rm -fr models && mkdir models + cp -r ${TARGET_DIR} models/. + + EXPECTED=models/$TARGET/expected + echo -e "Test $TARGET_DIR" >> $CLIENT_LOG + + # We expect all tests to fail with the expected error message + run_server + if [ "$SERVER_PID" != "0" ]; then + echo -e "*** FAILED: unexpected success starting $SERVER" >> $CLIENT_LOG + RET=1 + kill $SERVER_PID + wait $SERVER_PID + else + EXFOUND=0 + EX=`cat $EXPECTED` + if grep ^E[0-9][0-9][0-9][0-9].*"$EX" $SERVER_LOG; then + echo -e "Found \"$EX\"" >> $CLIENT_LOG + EXFOUND=1 + else + echo -e "Not found \"$EX\"" >> $CLIENT_LOG + fi + if [ "$EXFOUND" == "0" ]; then + echo -e "*** FAILED: model_metrics/$TARGET" >> $CLIENT_LOG + RET=1 + fi + fi +done + if [ $RET -eq 0 ]; then echo -e "\n***\n*** Test Passed\n***" else