triton-inference-server · yinggeh · Nov 6, 2024 · Oct 29, 2024 · Oct 31, 2024 · Nov 4, 2024
diff --git a/docs/user_guide/metrics.md b/docs/user_guide/metrics.md
@@ -239,7 +239,34 @@ nv_inference_first_response_histogram_ms{model="my_model",version="1", le="5000"
 nv_inference_first_response_histogram_ms{model="my_model",version="1", le="+Inf"} 37
 ```
 
-Triton initializes histograms with default buckets for each, as shown above. Customization of buckets per metric is currently unsupported.
+Triton initializes histograms with default buckets for each, as shown above.
+Buckets can be overridden per family by specifying `model_metrics` in the
+model configuration. For example:
+```
+// config.pbtxt
+model_metrics {
+  metric_control: [
+    {
+      metric_identifier: {
+        family: "nv_inference_first_response_histogram_ms"
+      }
+      histogram_options: {
+        buckets: [ 1, 2, 4, 8 ]
+      }
+    }
+  ]
+}
+```
+
+> **Note**
+>
+> To apply changes to metric options dynamically, the model must be completely
+> unloaded and then reloaded for the updates to take effect.
+
+Currently, the following histogram families support custom buckets.
+```
+nv_inference_first_response_histogram_ms  // Time to First Response
+```
 
 #### Summaries
 

diff --git a/qa/L0_metrics/histogram_metrics_test.py b/qa/L0_metrics/histogram_metrics_test.py
@@ -40,13 +40,16 @@
 import test_util as tu
 
 MILLIS_PER_SEC = 1000
+FIRST_RESPONSE_HISTOGRAM = "nv_inference_first_response_histogram_ms"
 
 
 def get_histogram_metric_key(
     metric_family, model_name, model_version, metric_type, le=""
 ):
     if metric_type in ["count", "sum"]:
         return f'{metric_family}_{metric_type}{{model="{model_name}",version="{model_version}"}}'
+    elif metric_type == "bucket":
+        return f'{metric_family}_{metric_type}{{model="{model_name}",version="{model_version}",le="{le}"}}'
     else:
         return None
 
@@ -55,16 +58,20 @@ class TestHistogramMetrics(tu.TestResultCollector):
     def setUp(self):
         self.tritonserver_ipaddr = os.environ.get("TRITONSERVER_IPADDR", "localhost")
 
-    def get_histogram_metrics(self, metric_family: str):
+    def get_metrics(self):
         r = requests.get(f"http://{self.tritonserver_ipaddr}:8002/metrics")
         r.raise_for_status()
+        return r.text
 
+    def get_histogram_metrics(self, metric_family: str):
         # Regular expression to match the pattern
         pattern = f"^{metric_family}.*"
         histogram_dict = {}
 
+        metrics = self.get_metrics()
+
         # Find all matches in the text
-        matches = re.findall(pattern, r.text, re.MULTILINE)
+        matches = re.findall(pattern, metrics, re.MULTILINE)
 
         for match in matches:
             key, value = match.rsplit(" ")
@@ -135,24 +142,23 @@ def test_ensemble_decoupled(self):
             )
 
             # Checks metrics output
-            first_response_family = "nv_inference_first_response_histogram_ms"
-            histogram_dict = self.get_histogram_metrics(first_response_family)
+            histogram_dict = self.get_histogram_metrics(FIRST_RESPONSE_HISTOGRAM)
 
             def check_existing_metrics(model_name, wait_secs_per_req, delta):
                 metric_count = get_histogram_metric_key(
-                    first_response_family, model_name, "1", "count"
+                    FIRST_RESPONSE_HISTOGRAM, model_name, "1", "count"
                 )
-                model_sum = get_histogram_metric_key(
-                    first_response_family, model_name, "1", "sum"
+                metric_sum = get_histogram_metric_key(
+                    FIRST_RESPONSE_HISTOGRAM, model_name, "1", "sum"
                 )
                 # Test histogram count
                 self.assertIn(metric_count, histogram_dict)
                 self.assertEqual(histogram_dict[metric_count], request_num)
                 # Test histogram sum
-                self.assertIn(model_sum, histogram_dict)
+                self.assertIn(metric_sum, histogram_dict)
                 self.assertTrue(
                     wait_secs_per_req * MILLIS_PER_SEC * request_num
-                    <= histogram_dict[model_sum]
+                    <= histogram_dict[metric_sum]
                     < (wait_secs_per_req + delta) * MILLIS_PER_SEC * request_num
                 )
                 # Prometheus histogram buckets are tested in metrics_api_test.cc::HistogramAPIHelper
@@ -165,14 +171,29 @@ def check_existing_metrics(model_name, wait_secs_per_req, delta):
 
             # Test non-decoupled model metrics
             non_decoupled_model_count = get_histogram_metric_key(
-                first_response_family, non_decoupled_model_name, "1", "count"
+                FIRST_RESPONSE_HISTOGRAM, non_decoupled_model_name, "1", "count"
             )
             non_decoupled_model_sum = get_histogram_metric_key(
-                first_response_family, non_decoupled_model_name, "1", "sum"
+                FIRST_RESPONSE_HISTOGRAM, non_decoupled_model_name, "1", "sum"
             )
             self.assertNotIn(non_decoupled_model_count, histogram_dict)
             self.assertNotIn(non_decoupled_model_sum, histogram_dict)
 
+    def test_buckets_override(self):
+        model_name = "async_execute_decouple"
+        metrics = self.get_metrics()
+        override_buckets = [x for x in os.environ.get("OVERRIDE_BUCKETS").split(",")]
+
+        # Check metric output
+        self.assertEqual(
+            metrics.count(FIRST_RESPONSE_HISTOGRAM + "_bucket"), len(override_buckets)
+        )
+        for le in override_buckets:
+            bucket_key = get_histogram_metric_key(
+                FIRST_RESPONSE_HISTOGRAM, model_name, "1", "bucket", le
+            )
+            self.assertIn(bucket_key, metrics)
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/qa/L0_metrics/metrics_config_test.py b/qa/L0_metrics/metrics_config_test.py
@@ -102,7 +102,7 @@ def test_cache_counters_missing(self):
     def test_inf_histograms_decoupled_exist(self):
         metrics = self._get_metrics()
         for metric in INF_HISTOGRAM_DECOUPLED_PATTERNS:
-            for suffix in ["_count", "_sum", ""]:
+            for suffix in ["_count", "_sum", "_bucket"]:
                 self.assertIn(metric + suffix, metrics)
 
     def test_inf_histograms_decoupled_missing(self):

diff --git a/qa/L0_metrics/test.sh b/qa/L0_metrics/test.sh
@@ -46,6 +46,7 @@ BASE_SERVER_ARGS="--model-repository=${MODELDIR}"
 SERVER_ARGS="${BASE_SERVER_ARGS}"
 SERVER_LOG="./inference_server.log"
 PYTHON_TEST="metrics_config_test.py"
+HISTOGRAM_PYTEST="histogram_metrics_test.py"
 source ../common/util.sh
 
 CLIENT_LOG="client.log"
@@ -301,12 +302,12 @@ check_unit_test
 kill_server
 
 # Check default settings: Histograms should be disabled in decoupled model
-decoupled_model_name="async_execute_decouple"
-mkdir -p "${MODELDIR}/${decoupled_model_name}/1/"
-cp ../python_models/${decoupled_model_name}/model.py ${MODELDIR}/${decoupled_model_name}/1/
-cp ../python_models/${decoupled_model_name}/config.pbtxt ${MODELDIR}/${decoupled_model_name}/
+decoupled_model="async_execute_decouple"
+mkdir -p "${MODELDIR}/${decoupled_model}/1/"
+cp ../python_models/${decoupled_model}/model.py ${MODELDIR}/${decoupled_model}/1/
+cp ../python_models/${decoupled_model}/config.pbtxt ${MODELDIR}/${decoupled_model}/
 
-SERVER_ARGS="${BASE_SERVER_ARGS} --load-model=${decoupled_model_name}"
+SERVER_ARGS="${BASE_SERVER_ARGS} --load-model=${decoupled_model}"
 run_and_check_server
 python3 ${PYTHON_TEST} MetricsConfigTest.test_inf_counters_exist 2>&1 | tee ${CLIENT_LOG}
 check_unit_test
@@ -321,7 +322,7 @@ check_unit_test
 kill_server
 
 # Enable histograms in decoupled model
-SERVER_ARGS="${BASE_SERVER_ARGS} --load-model=${decoupled_model_name} --metrics-config histogram_latencies=true"
+SERVER_ARGS="${BASE_SERVER_ARGS} --load-model=${decoupled_model} --metrics-config histogram_latencies=true"
 run_and_check_server
 python3 ${PYTHON_TEST} MetricsConfigTest.test_inf_counters_exist 2>&1 | tee ${CLIENT_LOG}
 check_unit_test
@@ -460,17 +461,59 @@ check_unit_test "${expected_tests}"
 
 ### Test histogram data in ensemble decoupled model ###
 MODELDIR="${PWD}/ensemble_decoupled"
+SERVER_LOG="./histogram_ensemble_decoupled_server.log"
+CLIENT_LOG="./histogram_ensemble_decoupled_client.log"
 SERVER_ARGS="--model-repository=${MODELDIR} --metrics-config histogram_latencies=true --log-verbose=1"
-PYTHON_TEST="histogram_metrics_test.py"
 mkdir -p "${MODELDIR}"/ensemble/1
 cp -r "${MODELDIR}"/async_execute_decouple "${MODELDIR}"/async_execute
 sed -i "s/model_transaction_policy { decoupled: True }//" "${MODELDIR}"/async_execute/config.pbtxt
 
 run_and_check_server
-python3 ${PYTHON_TEST} 2>&1 | tee ${CLIENT_LOG}
+python3 ${HISTOGRAM_PYTEST} TestHistogramMetrics.test_ensemble_decoupled 2>&1 | tee ${CLIENT_LOG}
 kill_server
 check_unit_test
 
+### Test model metrics configuration
+MODELDIR="${PWD}/model_metrics_model"
+SERVER_LOG="./model_metric_config_server.log"
+CLIENT_LOG="./model_metric_config_client.log"
+decoupled_model="async_execute_decouple"
+rm -rf "${MODELDIR}/${decoupled_model}"
+mkdir -p "${MODELDIR}/${decoupled_model}/1/"
+cp ../python_models/${decoupled_model}/model.py ${MODELDIR}/${decoupled_model}/1/
+
+# Test valid model_metrics config
+cp ../python_models/${decoupled_model}/config.pbtxt ${MODELDIR}/${decoupled_model}/
+cat >> "${MODELDIR}/${decoupled_model}/config.pbtxt" << EOL
+model_metrics {
+  metric_control: [
+    {
+      metric_identifier: {
+        family: "nv_inference_first_response_histogram_ms"
+      }
+      histogram_options: {
+        buckets: [ -1, 0.0, 1, 2.5 ]
+      }
+    }
+  ]
+}
+EOL
+
+SERVER_ARGS="--model-repository=${MODELDIR} --model-control-mode=explicit --load-model=${decoupled_model} --metrics-config histogram_latencies=true --log-verbose=1"
+run_and_check_server
+export OVERRIDE_BUCKETS="-1,0,1,2.5,+Inf"
+python3 ${HISTOGRAM_PYTEST} TestHistogramMetrics.test_buckets_override 2>&1 | tee ${CLIENT_LOG}
+check_unit_test
+kill_server
+
+# Test valid model_metrics config with histogram disabled
+PYTHON_TEST="metrics_config_test.py"
+SERVER_ARGS="--model-repository=${MODELDIR} --model-control-mode=explicit --load-model=${decoupled_model} --metrics-config histogram_latencies=false --log-verbose=1"
+run_and_check_server
+python3 ${PYTHON_TEST} MetricsConfigTest.test_inf_histograms_decoupled_missing 2>&1 | tee ${CLIENT_LOG}
+check_unit_test
+kill_server
+
 if [ $RET -eq 0 ]; then
   echo -e "\n***\n*** Test Passed\n***"
 else

diff --git a/qa/L0_model_config/model_metrics/invalid_config/empty_buckets/expected b/qa/L0_model_config/model_metrics/invalid_config/empty_buckets/expected
@@ -0,0 +1 @@
+histogram options must specify non-empty 'buckets'
diff --git a/qa/L0_model_config/model_metrics/invalid_config/empty_buckets/partial.pbtxt b/qa/L0_model_config/model_metrics/invalid_config/empty_buckets/partial.pbtxt
@@ -0,0 +1,37 @@
+# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+model_metrics {
+  metric_control: [
+    {
+      metric_identifier: {
+        family: "nv_inference_first_response_histogram_ms"
+      }
+      histogram_options: {
+        buckets: []
+      }
+    }
+  ]
+}
diff --git a/qa/L0_model_config/model_metrics/invalid_config/empty_metric_family/expected b/qa/L0_model_config/model_metrics/invalid_config/empty_metric_family/expected
@@ -0,0 +1 @@
+metric identifier must specify non-empty 'family'
diff --git a/qa/L0_model_config/model_metrics/invalid_config/empty_metric_family/partial.pbtxt b/qa/L0_model_config/model_metrics/invalid_config/empty_metric_family/partial.pbtxt
@@ -0,0 +1,37 @@
+# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+model_metrics {
+  metric_control: [
+    {
+      metric_identifier: {
+        family: ""
+      }
+      histogram_options: {
+        buckets: [ 1, 2, 4, 8 ]
+      }
+    }
+  ]
+}
diff --git a/qa/L0_model_config/model_metrics/invalid_config/no_buckets/expected b/qa/L0_model_config/model_metrics/invalid_config/no_buckets/expected
@@ -0,0 +1 @@
+histogram options must specify non-empty 'buckets'
diff --git a/qa/L0_model_config/model_metrics/invalid_config/no_buckets/partial.pbtxt b/qa/L0_model_config/model_metrics/invalid_config/no_buckets/partial.pbtxt
@@ -0,0 +1,35 @@
+# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+model_metrics {
+  metric_control: [
+    {
+      metric_identifier: {
+        family: "nv_inference_first_response_histogram_ms"
+      }
+      histogram_options: {}
+    }
+  ]
+}
diff --git a/qa/L0_model_config/model_metrics/invalid_config/no_histogram_options/expected b/qa/L0_model_config/model_metrics/invalid_config/no_histogram_options/expected
@@ -0,0 +1 @@
+metric control must specify 'histogram_options'
diff --git a/qa/L0_model_config/model_metrics/invalid_config/no_histogram_options/partial.pbtxt b/qa/L0_model_config/model_metrics/invalid_config/no_histogram_options/partial.pbtxt
@@ -0,0 +1,34 @@
+# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+model_metrics {
+  metric_control: [
+    {
+      metric_identifier: {
+        family: "nv_inference_first_response_histogram_ms"
+      }
+    }
+  ]
+}