Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

test: Test model_metrics config and document histogram buckets override #7752

Merged
merged 7 commits into from
Nov 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 28 additions & 1 deletion docs/user_guide/metrics.md
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,34 @@ nv_inference_first_response_histogram_ms{model="my_model",version="1", le="5000"
nv_inference_first_response_histogram_ms{model="my_model",version="1", le="+Inf"} 37
```

Triton initializes histograms with default buckets for each, as shown above. Customization of buckets per metric is currently unsupported.
Triton initializes histograms with default buckets for each, as shown above.
Buckets can be overridden per family by specifying `model_metrics` in the
model configuration. For example:
```
// config.pbtxt
model_metrics {
metric_control: [
{
metric_identifier: {
family: "nv_inference_first_response_histogram_ms"
}
histogram_options: {
buckets: [ 1, 2, 4, 8 ]
}
}
]
}
```

> **Note**
>
> To apply changes to metric options dynamically, the model must be completely
rmccorm4 marked this conversation as resolved.
Show resolved Hide resolved
> unloaded and then reloaded for the updates to take effect.

Currently, the following histogram families support custom buckets.
```
nv_inference_first_response_histogram_ms // Time to First Response
```

#### Summaries

Expand Down
43 changes: 32 additions & 11 deletions qa/L0_metrics/histogram_metrics_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,13 +40,16 @@
import test_util as tu

MILLIS_PER_SEC = 1000
FIRST_RESPONSE_HISTOGRAM = "nv_inference_first_response_histogram_ms"


def get_histogram_metric_key(
metric_family, model_name, model_version, metric_type, le=""
):
if metric_type in ["count", "sum"]:
return f'{metric_family}_{metric_type}{{model="{model_name}",version="{model_version}"}}'
elif metric_type == "bucket":
return f'{metric_family}_{metric_type}{{model="{model_name}",version="{model_version}",le="{le}"}}'
else:
return None

Expand All @@ -55,16 +58,20 @@ class TestHistogramMetrics(tu.TestResultCollector):
def setUp(self):
self.tritonserver_ipaddr = os.environ.get("TRITONSERVER_IPADDR", "localhost")

def get_histogram_metrics(self, metric_family: str):
def get_metrics(self):
r = requests.get(f"http://{self.tritonserver_ipaddr}:8002/metrics")
r.raise_for_status()
return r.text

def get_histogram_metrics(self, metric_family: str):
# Regular expression to match the pattern
pattern = f"^{metric_family}.*"
histogram_dict = {}

metrics = self.get_metrics()

# Find all matches in the text
matches = re.findall(pattern, r.text, re.MULTILINE)
matches = re.findall(pattern, metrics, re.MULTILINE)

for match in matches:
key, value = match.rsplit(" ")
Expand Down Expand Up @@ -135,24 +142,23 @@ def test_ensemble_decoupled(self):
)

# Checks metrics output
first_response_family = "nv_inference_first_response_histogram_ms"
histogram_dict = self.get_histogram_metrics(first_response_family)
histogram_dict = self.get_histogram_metrics(FIRST_RESPONSE_HISTOGRAM)

def check_existing_metrics(model_name, wait_secs_per_req, delta):
metric_count = get_histogram_metric_key(
first_response_family, model_name, "1", "count"
FIRST_RESPONSE_HISTOGRAM, model_name, "1", "count"
)
model_sum = get_histogram_metric_key(
first_response_family, model_name, "1", "sum"
metric_sum = get_histogram_metric_key(
FIRST_RESPONSE_HISTOGRAM, model_name, "1", "sum"
)
# Test histogram count
self.assertIn(metric_count, histogram_dict)
self.assertEqual(histogram_dict[metric_count], request_num)
# Test histogram sum
self.assertIn(model_sum, histogram_dict)
self.assertIn(metric_sum, histogram_dict)
self.assertTrue(
wait_secs_per_req * MILLIS_PER_SEC * request_num
<= histogram_dict[model_sum]
<= histogram_dict[metric_sum]
< (wait_secs_per_req + delta) * MILLIS_PER_SEC * request_num
)
# Prometheus histogram buckets are tested in metrics_api_test.cc::HistogramAPIHelper
Expand All @@ -165,14 +171,29 @@ def check_existing_metrics(model_name, wait_secs_per_req, delta):

# Test non-decoupled model metrics
non_decoupled_model_count = get_histogram_metric_key(
first_response_family, non_decoupled_model_name, "1", "count"
FIRST_RESPONSE_HISTOGRAM, non_decoupled_model_name, "1", "count"
)
non_decoupled_model_sum = get_histogram_metric_key(
first_response_family, non_decoupled_model_name, "1", "sum"
FIRST_RESPONSE_HISTOGRAM, non_decoupled_model_name, "1", "sum"
)
self.assertNotIn(non_decoupled_model_count, histogram_dict)
self.assertNotIn(non_decoupled_model_sum, histogram_dict)

def test_buckets_override(self):
yinggeh marked this conversation as resolved.
Show resolved Hide resolved
model_name = "async_execute_decouple"
metrics = self.get_metrics()
override_buckets = [x for x in os.environ.get("OVERRIDE_BUCKETS").split(",")]

# Check metric output
self.assertEqual(
metrics.count(FIRST_RESPONSE_HISTOGRAM + "_bucket"), len(override_buckets)
)
for le in override_buckets:
bucket_key = get_histogram_metric_key(
FIRST_RESPONSE_HISTOGRAM, model_name, "1", "bucket", le
)
self.assertIn(bucket_key, metrics)


if __name__ == "__main__":
unittest.main()
2 changes: 1 addition & 1 deletion qa/L0_metrics/metrics_config_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ def test_cache_counters_missing(self):
def test_inf_histograms_decoupled_exist(self):
metrics = self._get_metrics()
for metric in INF_HISTOGRAM_DECOUPLED_PATTERNS:
for suffix in ["_count", "_sum", ""]:
for suffix in ["_count", "_sum", "_bucket"]:
self.assertIn(metric + suffix, metrics)

def test_inf_histograms_decoupled_missing(self):
Expand Down
59 changes: 51 additions & 8 deletions qa/L0_metrics/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ BASE_SERVER_ARGS="--model-repository=${MODELDIR}"
SERVER_ARGS="${BASE_SERVER_ARGS}"
SERVER_LOG="./inference_server.log"
PYTHON_TEST="metrics_config_test.py"
HISTOGRAM_PYTEST="histogram_metrics_test.py"
source ../common/util.sh

CLIENT_LOG="client.log"
Expand Down Expand Up @@ -301,12 +302,12 @@ check_unit_test
kill_server

# Check default settings: Histograms should be disabled in decoupled model
decoupled_model_name="async_execute_decouple"
mkdir -p "${MODELDIR}/${decoupled_model_name}/1/"
cp ../python_models/${decoupled_model_name}/model.py ${MODELDIR}/${decoupled_model_name}/1/
cp ../python_models/${decoupled_model_name}/config.pbtxt ${MODELDIR}/${decoupled_model_name}/
decoupled_model="async_execute_decouple"
mkdir -p "${MODELDIR}/${decoupled_model}/1/"
cp ../python_models/${decoupled_model}/model.py ${MODELDIR}/${decoupled_model}/1/
cp ../python_models/${decoupled_model}/config.pbtxt ${MODELDIR}/${decoupled_model}/

SERVER_ARGS="${BASE_SERVER_ARGS} --load-model=${decoupled_model_name}"
SERVER_ARGS="${BASE_SERVER_ARGS} --load-model=${decoupled_model}"
run_and_check_server
python3 ${PYTHON_TEST} MetricsConfigTest.test_inf_counters_exist 2>&1 | tee ${CLIENT_LOG}
check_unit_test
Expand All @@ -321,7 +322,7 @@ check_unit_test
kill_server

# Enable histograms in decoupled model
SERVER_ARGS="${BASE_SERVER_ARGS} --load-model=${decoupled_model_name} --metrics-config histogram_latencies=true"
SERVER_ARGS="${BASE_SERVER_ARGS} --load-model=${decoupled_model} --metrics-config histogram_latencies=true"
run_and_check_server
python3 ${PYTHON_TEST} MetricsConfigTest.test_inf_counters_exist 2>&1 | tee ${CLIENT_LOG}
check_unit_test
Expand Down Expand Up @@ -460,17 +461,59 @@ check_unit_test "${expected_tests}"

### Test histogram data in ensemble decoupled model ###
MODELDIR="${PWD}/ensemble_decoupled"
SERVER_LOG="./histogram_ensemble_decoupled_server.log"
CLIENT_LOG="./histogram_ensemble_decoupled_client.log"
SERVER_ARGS="--model-repository=${MODELDIR} --metrics-config histogram_latencies=true --log-verbose=1"
PYTHON_TEST="histogram_metrics_test.py"
mkdir -p "${MODELDIR}"/ensemble/1
cp -r "${MODELDIR}"/async_execute_decouple "${MODELDIR}"/async_execute
sed -i "s/model_transaction_policy { decoupled: True }//" "${MODELDIR}"/async_execute/config.pbtxt

run_and_check_server
python3 ${PYTHON_TEST} 2>&1 | tee ${CLIENT_LOG}
python3 ${HISTOGRAM_PYTEST} TestHistogramMetrics.test_ensemble_decoupled 2>&1 | tee ${CLIENT_LOG}
kill_server
check_unit_test

### Test model metrics configuration
MODELDIR="${PWD}/model_metrics_model"
SERVER_LOG="./model_metric_config_server.log"
CLIENT_LOG="./model_metric_config_client.log"
decoupled_model="async_execute_decouple"
rm -rf "${MODELDIR}/${decoupled_model}"
mkdir -p "${MODELDIR}/${decoupled_model}/1/"
cp ../python_models/${decoupled_model}/model.py ${MODELDIR}/${decoupled_model}/1/

# Test valid model_metrics config
cp ../python_models/${decoupled_model}/config.pbtxt ${MODELDIR}/${decoupled_model}/
cat >> "${MODELDIR}/${decoupled_model}/config.pbtxt" << EOL
model_metrics {
metric_control: [
{
metric_identifier: {
family: "nv_inference_first_response_histogram_ms"
}
histogram_options: {
buckets: [ -1, 0.0, 1, 2.5 ]
}
}
]
}
EOL

SERVER_ARGS="--model-repository=${MODELDIR} --model-control-mode=explicit --load-model=${decoupled_model} --metrics-config histogram_latencies=true --log-verbose=1"
run_and_check_server
export OVERRIDE_BUCKETS="-1,0,1,2.5,+Inf"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You are still hard-coding the buckets value..

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hard coded here because it is non-trivial work to convert a double in string to the displayed bucket "le" value in Prometheus.

0.00 -> le="0"
1.0100 -> le="1.01"
100 -> le="100"
000 -> le="0"
1. -> le="1"

python3 ${HISTOGRAM_PYTEST} TestHistogramMetrics.test_buckets_override 2>&1 | tee ${CLIENT_LOG}
check_unit_test
kill_server

# Test valid model_metrics config with histogram disabled
PYTHON_TEST="metrics_config_test.py"
SERVER_ARGS="--model-repository=${MODELDIR} --model-control-mode=explicit --load-model=${decoupled_model} --metrics-config histogram_latencies=false --log-verbose=1"
run_and_check_server
python3 ${PYTHON_TEST} MetricsConfigTest.test_inf_histograms_decoupled_missing 2>&1 | tee ${CLIENT_LOG}
check_unit_test
kill_server

if [ $RET -eq 0 ]; then
echo -e "\n***\n*** Test Passed\n***"
else
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
histogram options must specify non-empty 'buckets'
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
model_metrics {
metric_control: [
{
metric_identifier: {
family: "nv_inference_first_response_histogram_ms"
}
histogram_options: {
buckets: []
}
}
]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
metric identifier must specify non-empty 'family'
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
model_metrics {
metric_control: [
{
metric_identifier: {
family: ""
}
histogram_options: {
buckets: [ 1, 2, 4, 8 ]
}
}
]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
histogram options must specify non-empty 'buckets'
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
model_metrics {
metric_control: [
{
metric_identifier: {
family: "nv_inference_first_response_histogram_ms"
}
histogram_options: {}
}
]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
metric control must specify 'histogram_options'
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
model_metrics {
metric_control: [
{
metric_identifier: {
family: "nv_inference_first_response_histogram_ms"
}
}
]
}
Loading
Loading