From e0ccb04f4976935969a7889bb6160029ea17bf5d Mon Sep 17 00:00:00 2001 From: Will Kahn-Greene Date: Mon, 24 Jun 2024 13:52:32 -0400 Subject: [PATCH] Implement RegisteredMetricsFilter (#15) This implements a filter that makes it possible to define a set of metrics in an external file and enforce that only those metrics are emitted. This is helpful to reduce typos and improve documentation of metrics. This takes a structure rather than a path to an external file. In this way, it makes it generic allowing users to define metrics in whatever way they find most convenieng. e.g. JSON, YAML, Python, etc. --- src/markus/filters.py | 140 ++++++++++++++++++++++++++++++- tests/test_filters.py | 187 ++++++++++++++++++++++++++++++++++++++++++ tests/test_metrics.py | 11 --- 3 files changed, 324 insertions(+), 14 deletions(-) create mode 100644 tests/test_filters.py diff --git a/src/markus/filters.py b/src/markus/filters.py index b1a9cb4..07c92c4 100644 --- a/src/markus/filters.py +++ b/src/markus/filters.py @@ -2,7 +2,141 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at http://mozilla.org/MPL/2.0/. -from markus.main import MetricsFilter +import logging +from typing import Dict + +from markus.main import MetricsFilter, MetricsRecord + + +LOGGER = logging.getLogger(__name__) + + +class MetricsException(Exception): + pass + + +class MetricsInvalidSchema(MetricsException): + pass + + +class MetricsUnknownKey(MetricsException): + pass + + +class MetricsWrongType(MetricsException): + pass + + +RegisteredMetricsType = Dict[str, Dict[str, str]] + + +def _validate_registered_metrics(registered_metrics: RegisteredMetricsType): + if not isinstance(registered_metrics, dict): + raise MetricsInvalidSchema("registered_metrics is not a dict") + + for key, val in registered_metrics.items(): + if not isinstance(key, str): + raise MetricsInvalidSchema(f"key {key!r} is not a str") + + if not isinstance(val, dict): + raise MetricsInvalidSchema(f"key {key!r} has a non-dict value") + + if "type" not in val.keys() or "description" not in val.keys(): + raise MetricsInvalidSchema( + f"key {key!r} has value missing type or description" + ) + + if val["type"] not in ["incr", "gauge", "timing", "histogram"]: + raise MetricsInvalidSchema( + f"key {key!r} type is {val['type']}; " + + "not one of incr, gauge, timing, histogram" + ) + + if not isinstance(val["description"], str): + raise MetricsInvalidSchema(f"key {key!r} description is not a str") + + +class RegisteredMetricsFilter(MetricsFilter): + """Contains a list of registered metrics and validator. + + This is a Markus Metrics filter. It'll complain if metrics are generated + that it doesn't know about. + + Registered metrics should be a dict structured like this:: + + { + KEY -> { + "type": str, # one of "incr" | "gauge" | "timing" | "histogram" + "description": str, # can use markdown + }, + ... + } + + For example:: + + { + "eliot.symbolicate_api": { + "type": "timing", + "description": "Timer for how long a symbolication API request takes.", + }, + "eliot.symbolicate.proxied": { + "type": "incr", + description": "Counter for symbolication requests.", + }, + ... + } + + You can include additional information to suit your needs:: + + { + "eliot.symbolicate_api": { + "type": "timing", + "description": "Timer for how long a symbolication API request takes.", + "data_sensitivity": "technical", + "bugs": [ + "https://example.com/bugid=12345", + ], + }, + ... + } + + You can define your metrics in JSON or YAML, read them in, and pass them to + ``RegisteredMetricsFilter`` for easier management of metrics. + + """ + + def __init__( + self, registered_metrics: RegisteredMetricsType, raise_error: bool = False + ): + _validate_registered_metrics(registered_metrics) + self.registered_metrics = registered_metrics + self.raise_error = raise_error + + def __repr__(self): + return f"" + + def filter(self, record: MetricsRecord) -> MetricsRecord: + metric = self.registered_metrics.get(record.key) + if metric is None: + if self.raise_error: + raise MetricsUnknownKey(f"metrics key {record.key!r} is unknown") + LOGGER.warning("metrics key %r is unknown.", record.key) + + elif record.stat_type != metric["type"]: + if self.raise_error: + raise MetricsWrongType( + f"metrics key {record.key!r} has wrong type; {record.stat_type} vs. " + + f"{metric['type']}" + ) + + LOGGER.warning( + "metrics key %r has wrong type; got %s expecting %s", + record.key, + record.stat_type, + metric["type"], + ) + + return record class AddTagFilter(MetricsFilter): @@ -23,12 +157,12 @@ class AddTagFilter(MetricsFilter): """ - def __init__(self, tag): + def __init__(self, tag: str): self.tag = tag def __repr__(self): return f"" - def filter(self, record): + def filter(self, record: MetricsRecord) -> MetricsRecord: record.tags.append(self.tag) return record diff --git a/tests/test_filters.py b/tests/test_filters.py new file mode 100644 index 0000000..ae1fb63 --- /dev/null +++ b/tests/test_filters.py @@ -0,0 +1,187 @@ +import logging +import pytest + +from markus import get_metrics +from markus.filters import ( + AddTagFilter, + MetricsInvalidSchema, + MetricsUnknownKey, + MetricsWrongType, + RegisteredMetricsFilter, + _validate_registered_metrics, +) +from markus.main import MetricsRecord + + +logging.basicConfig() + + +def test_tag_filter(metricsmock): + metrics = get_metrics("thing", filters=[AddTagFilter("foo:bar")]) + + with metricsmock as mm: + metrics.incr("foo", value=5) + + assert mm.get_records() == [ + MetricsRecord("incr", "thing.foo", 5, ["foo:bar"]), + ] + + +@pytest.mark.parametrize( + "schema", + [ + pytest.param({}, id="empty"), + pytest.param({"testkey": {"type": "incr", "description": "abcde"}}, id="basic"), + pytest.param( + { + "testkey_incr": {"type": "incr", "description": "abcde"}, + "testkey_gauge": {"type": "gauge", "description": "abcde"}, + "testkey_timing": {"type": "timing", "description": "abcde"}, + "testkey_histogram": {"type": "histogram", "description": "abcde"}, + }, + id="cover_stats", + ), + pytest.param( + { + "testkey": { + "type": "incr", + "description": "abcde", + "labels": [], + "bugs": [], + } + }, + id="addtl_info", + ), + ], +) +def test_validate_registered_metrics(schema): + _validate_registered_metrics(schema) + + +@pytest.mark.parametrize( + "schema, error_msg", + [ + pytest.param([], "registered_metrics is not a dict", id="not_dict"), + pytest.param({1: {}}, "key 1 is not a str", id="key_not_str"), + pytest.param( + {"key": []}, "key 'key' has a non-dict value", id="non_dict_value" + ), + pytest.param( + {"key": {"type": "incr"}}, + "key 'key' has value missing type or description", + id="missing_description", + ), + pytest.param( + {"key": {"description": "foo"}}, + "key 'key' has value missing type or description", + id="missing_type", + ), + pytest.param( + {"key": {"type": "foo", "description": "foo"}}, + "key 'key' type is foo; not one of incr, gauge, timing, histogram", + id="invalid_type", + ), + pytest.param( + {"key": {"type": "incr", "description": 5}}, + "key 'key' description is not a str", + id="bad_description_type", + ), + ], +) +def test_validate_registered_metrics_invalid(schema, error_msg): + with pytest.raises(MetricsInvalidSchema) as excinfo: + _validate_registered_metrics(schema) + + assert str(excinfo.value) == error_msg + + +ALLOWED_METRICS = { + "thing.key_incr": { + "type": "incr", + "description": "--", + }, + "thing.key_gauge": { + "type": "gauge", + "description": "--", + }, + "thing.key_timing": { + "type": "timing", + "description": "--", + }, + "thing.key_histogram": { + "type": "histogram", + "description": "--", + }, +} + + +def test_registered_metrics_filter(caplog, metricsmock): + caplog.set_level(logging.INFO) + + metrics = get_metrics( + "thing", filters=[RegisteredMetricsFilter(ALLOWED_METRICS, raise_error=False)] + ) + + with metricsmock as mm: + # Emit allowed metrics + metrics.incr("key_incr", value=1) + metrics.gauge("key_gauge", value=10) + metrics.timing("key_timing", value=1.0) + metrics.histogram("key_histogram", value=10.0) + + assert mm.get_records() == [ + MetricsRecord("incr", "thing.key_incr", 1, []), + MetricsRecord("gauge", "thing.key_gauge", 10, []), + MetricsRecord("timing", "thing.key_timing", 1.0, []), + MetricsRecord("histogram", "thing.key_histogram", 10.0, []), + ] + + assert caplog.records == [] + + +def test_registered_metrics_filter_missing(caplog, metricsmock): + caplog.set_level(logging.INFO) + + metrics = get_metrics( + "thing", filters=[RegisteredMetricsFilter(ALLOWED_METRICS, raise_error=False)] + ) + + with metricsmock as mm: + # Emit unknown metric + metrics.incr("unknown_key", value=1) + + assert mm.get_records() == [ + MetricsRecord("incr", "thing.unknown_key", 1, []), + ] + + assert caplog.records[0].levelname == "WARNING" + assert caplog.records[0].message == "metrics key 'thing.unknown_key' is unknown." + + +def test_registered_metrics_filter_missing_error(metricsmock): + with pytest.raises(MetricsUnknownKey) as excinfo: + metrics = get_metrics( + "thing", + filters=[RegisteredMetricsFilter(ALLOWED_METRICS, raise_error=True)], + ) + with metricsmock: + # Emit unknown metric + metrics.incr("unknown_key", value=1) + + assert str(excinfo.value) == "metrics key 'thing.unknown_key' is unknown" + + +def test_registered_metrics_filter_bad_type(metricsmock): + with pytest.raises(MetricsWrongType) as excinfo: + metrics = get_metrics( + "thing", + filters=[RegisteredMetricsFilter(ALLOWED_METRICS, raise_error=True)], + ) + with metricsmock: + # Emit unknown metric + metrics.incr("key_gauge", value=1) + + assert ( + str(excinfo.value) + == "metrics key 'thing.key_gauge' has wrong type; incr vs. gauge" + ) diff --git a/tests/test_metrics.py b/tests/test_metrics.py index 0dda62e..5cf44a6 100644 --- a/tests/test_metrics.py +++ b/tests/test_metrics.py @@ -128,14 +128,3 @@ def something(): something() assert mm.has_record(fun_name="timing", stat="thing.long_fun") - - -def test_tag_filter(metricsmock): - metrics = get_metrics("thing", filters=[AddTagFilter("foo:bar")]) - - with metricsmock as mm: - metrics.incr("foo", value=5) - - assert mm.get_records() == [ - MetricsRecord("incr", "thing.foo", 5, ["foo:bar"]), - ]