Skip to content

Commit

Permalink
fix: each metric should have an expire time (#11838)
Browse files Browse the repository at this point in the history
  • Loading branch information
Revolyssup authored Dec 17, 2024
1 parent e3c904a commit de7bd0b
Show file tree
Hide file tree
Showing 4 changed files with 157 additions and 105 deletions.
20 changes: 12 additions & 8 deletions apisix/plugins/prometheus/exporter.lua
Original file line number Diff line number Diff line change
Expand Up @@ -136,10 +136,14 @@ function _M.http_init(prometheus_enabled_in_stream)
metric_prefix = attr.metric_prefix
end

local exptime
if attr and attr.expire then
exptime = attr.expire
end
local status_metrics_exptime = core.table.try_read_attr(attr, "metrics",
"http_status", "expire")
local latency_metrics_exptime = core.table.try_read_attr(attr, "metrics",
"http_latency", "expire")
local bandwidth_metrics_exptime = core.table.try_read_attr(attr, "metrics",
"bandwidth", "expire")
local upstream_status_exptime = core.table.try_read_attr(attr, "metrics",
"upstream_status", "expire")

prometheus = base_prometheus.init("prometheus-metrics", metric_prefix)

Expand Down Expand Up @@ -172,7 +176,7 @@ function _M.http_init(prometheus_enabled_in_stream)
metrics.upstream_status = prometheus:gauge("upstream_status",
"Upstream status from health check",
{"name", "ip", "port"},
exptime)
upstream_status_exptime)

-- per service

Expand All @@ -183,7 +187,7 @@ function _M.http_init(prometheus_enabled_in_stream)
"HTTP status codes per service in APISIX",
{"code", "route", "matched_uri", "matched_host", "service", "consumer", "node",
unpack(extra_labels("http_status"))},
exptime)
status_metrics_exptime)

local buckets = DEFAULT_BUCKETS
if attr and attr.default_buckets then
Expand All @@ -193,12 +197,12 @@ function _M.http_init(prometheus_enabled_in_stream)
metrics.latency = prometheus:histogram("http_latency",
"HTTP request latency in milliseconds per service in APISIX",
{"type", "route", "service", "consumer", "node", unpack(extra_labels("http_latency"))},
buckets, exptime)
buckets, latency_metrics_exptime)

metrics.bandwidth = prometheus:counter("bandwidth",
"Total bandwidth in bytes consumed per service in APISIX",
{"type", "route", "service", "consumer", "node", unpack(extra_labels("bandwidth"))},
exptime)
bandwidth_metrics_exptime)

if prometheus_enabled_in_stream then
init_stream_metrics()
Expand Down
12 changes: 8 additions & 4 deletions conf/config.yaml.example
Original file line number Diff line number Diff line change
Expand Up @@ -596,25 +596,29 @@ plugin_attr: # Plugin attributes
port: 9091 # Set the port.
# metrics: # Create extra labels from nginx variables: https://nginx.org/en/docs/varindex.html
# http_status:
# expire: 0 # The expiration time after which metrics are removed. unit: second.
# # 0 means the metrics will not expire
# extra_labels:
# - upstream_addr: $upstream_addr
# - status: $upstream_status # The label name does not need to be the same as the variable name.
# http_latency:
# expire: 0 # The expiration time after which metrics are removed. unit: second.
# # 0 means the metrics will not expire
# extra_labels:
# - upstream_addr: $upstream_addr
# bandwidth:
# expire: 0 # The expiration time after which metrics are removed. unit: second.
# # 0 means the metrics will not expire
# extra_labels:
# - upstream_addr: $upstream_addr
# upstream_status:
# expire: 0 # The expiration time after which metrics are removed. unit: second.
# default_buckets:
# - 10
# - 50
# - 100
# - 200
# - 500
# expire: 0 # The expiration time of metrics in seconds.
# 0 means the metrics will not expire.
# Only affect apisix_http_status, apisix_bandwidth, and apisix_http_latency.
# If you need to set the expiration time, it is recommended to use 600, which is 10 minutes.
server-info: # Plugin: server-info
report_ttl: 60 # Set the TTL in seconds for server info in etcd.
# Maximum: 86400. Minimum: 3.
Expand Down
132 changes: 132 additions & 0 deletions t/plugin/prometheus-metric-expire.t
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
BEGIN {
if ($ENV{TEST_NGINX_CHECK_LEAK}) {
$SkipReason = "unavailable for the hup tests";

} else {
$ENV{TEST_NGINX_USE_HUP} = 1;
undef $ENV{TEST_NGINX_USE_STAP};
}
}

use t::APISIX 'no_plan';

add_block_preprocessor(sub {
my ($block) = @_;

if (!defined $block->request) {
$block->set_value("request", "GET /t");
}
});

run_tests;

__DATA__
=== TEST 1: set route with prometheus ttl
--- yaml_config
plugin_attr:
prometheus:
default_buckets:
- 15
- 55
- 105
- 205
- 505
metrics:
http_status:
expire: 1
http_latency:
expire: 1
bandwidth:
expire: 1
--- config
location /t {
content_by_lua_block {
local t = require("lib.test_admin").test
local code = t('/apisix/admin/routes/metrics',
ngx.HTTP_PUT,
[[{
"plugins": {
"public-api": {}
},
"uri": "/apisix/prometheus/metrics"
}]]
)
if code >= 300 then
ngx.status = code
return
end
local code, body = t('/apisix/admin/routes/1',
ngx.HTTP_PUT,
[[{
"plugins": {
"prometheus": {}
},
"upstream": {
"nodes": {
"127.0.0.1:1980": 1
},
"type": "roundrobin"
},
"uri": "/hello1"
}]]
)
if code >= 300 then
ngx.status = code
ngx.say(body)
return
end
local code, body = t('/hello1',
ngx.HTTP_GET,
"",
nil,
nil
)
if code >= 300 then
ngx.status = code
ngx.say(body)
return
end
ngx.sleep(2)
local code, pass, body = t('/apisix/prometheus/metrics',
ngx.HTTP_GET,
"",
nil,
nil
)
local metrics_to_check = {"apisix_bandwidth", "http_latency", "http_status",}
-- verify that above mentioned metrics are not in the metrics response
for _, v in pairs(metrics_to_check) do
local match, err = ngx.re.match(body, "\\b" .. v .. "\\b", "m")
if match then
ngx.status = 500
ngx.say("error found " .. v .. " in metrics")
return
end
end
ngx.say("passed")
}
}
--- request
GET /t
--- response_body
passed
98 changes: 5 additions & 93 deletions t/plugin/prometheus4.t
Original file line number Diff line number Diff line change
Expand Up @@ -192,95 +192,7 @@ apisix_http_latency_bucket\{type="upstream",route="1",service="",consumer="",nod
=== TEST 9: set route with prometheus ttl
--- yaml_config
plugin_attr:
prometheus:
default_buckets:
- 15
- 55
- 105
- 205
- 505
expire: 1
--- config
location /t {
content_by_lua_block {
local t = require("lib.test_admin").test
local code = t('/apisix/admin/routes/metrics',
ngx.HTTP_PUT,
[[{
"plugins": {
"public-api": {}
},
"uri": "/apisix/prometheus/metrics"
}]]
)
if code >= 300 then
ngx.status = code
return
end
local code, body = t('/apisix/admin/routes/1',
ngx.HTTP_PUT,
[[{
"plugins": {
"prometheus": {}
},
"upstream": {
"nodes": {
"127.0.0.1:1980": 1
},
"type": "roundrobin"
},
"uri": "/hello1"
}]]
)
if code >= 300 then
ngx.status = code
ngx.say(body)
return
end
local code, body = t('/hello1',
ngx.HTTP_GET,
"",
nil,
nil
)
if code >= 300 then
ngx.status = code
ngx.say(body)
return
end
ngx.sleep(2)
local code, pass, body = t('/apisix/prometheus/metrics',
ngx.HTTP_GET,
"",
nil,
nil
)
ngx.status = code
ngx.say(body)
}
}
--- request
GET /t
--- response_body_unlike eval
qr/apisix_http_latency_bucket\{type="upstream",route="1",service="",consumer="",node="127.0.0.1",le="15"\} \d+
apisix_http_latency_bucket\{type="upstream",route="1",service="",consumer="",node="127.0.0.1",le="55"\} \d+
apisix_http_latency_bucket\{type="upstream",route="1",service="",consumer="",node="127.0.0.1",le="105"\} \d+
apisix_http_latency_bucket\{type="upstream",route="1",service="",consumer="",node="127.0.0.1",le="205"\} \d+
apisix_http_latency_bucket\{type="upstream",route="1",service="",consumer="",node="127.0.0.1",le="505"\} \d+/
=== TEST 10: set sys plugins
=== TEST 9: set sys plugins
--- config
location /t {
content_by_lua_block {
Expand Down Expand Up @@ -332,7 +244,7 @@ passed
=== TEST 11: remove prometheus -> reload -> send batch request -> add prometheus for next tests
=== TEST 10: remove prometheus -> reload -> send batch request -> add prometheus for next tests
--- yaml_config
deployment:
role: traditional
Expand Down Expand Up @@ -366,7 +278,7 @@ qr/404 Not Found/
=== TEST 12: fetch prometheus metrics -> batch_process_entries metrics should not be present
=== TEST 11: fetch prometheus metrics -> batch_process_entries metrics should not be present
--- yaml_config
deployment:
role: traditional
Expand All @@ -387,14 +299,14 @@ qr/apisix_batch_process_entries\{name="sys-logger",route_id="9",server_addr="127
=== TEST 13: hit batch-process-metrics with prometheus enabled from TEST 11
=== TEST 12: hit batch-process-metrics with prometheus enabled from TEST 11
--- request
GET /batch-process-metrics
--- error_code: 404
=== TEST 14: batch_process_entries metrics should be present now
=== TEST 13: batch_process_entries metrics should be present now
--- request
GET /apisix/prometheus/metrics
--- error_code: 200
Expand Down

0 comments on commit de7bd0b

Please sign in to comment.