From e0ccb04f4976935969a7889bb6160029ea17bf5d Mon Sep 17 00:00:00 2001
From: Will Kahn-Greene <willkg@mozilla.com>
Date: Mon, 24 Jun 2024 13:52:32 -0400
Subject: [PATCH] Implement RegisteredMetricsFilter (#15)

This implements a filter that makes it possible to define a set of
metrics in an external file and enforce that only those metrics are
emitted. This is helpful to reduce typos and improve documentation of
metrics.

This takes a structure rather than a path to an external file. In this
way, it makes it generic allowing users to define metrics in whatever
way they find most convenieng. e.g. JSON, YAML, Python, etc.
---
 src/markus/filters.py | 140 ++++++++++++++++++++++++++++++-
 tests/test_filters.py | 187 ++++++++++++++++++++++++++++++++++++++++++
 tests/test_metrics.py |  11 ---
 3 files changed, 324 insertions(+), 14 deletions(-)
 create mode 100644 tests/test_filters.py

diff --git a/src/markus/filters.py b/src/markus/filters.py
index b1a9cb4..07c92c4 100644
--- a/src/markus/filters.py
+++ b/src/markus/filters.py
@@ -2,7 +2,141 @@
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
-from markus.main import MetricsFilter
+import logging
+from typing import Dict
+
+from markus.main import MetricsFilter, MetricsRecord
+
+
+LOGGER = logging.getLogger(__name__)
+
+
+class MetricsException(Exception):
+    pass
+
+
+class MetricsInvalidSchema(MetricsException):
+    pass
+
+
+class MetricsUnknownKey(MetricsException):
+    pass
+
+
+class MetricsWrongType(MetricsException):
+    pass
+
+
+RegisteredMetricsType = Dict[str, Dict[str, str]]
+
+
+def _validate_registered_metrics(registered_metrics: RegisteredMetricsType):
+    if not isinstance(registered_metrics, dict):
+        raise MetricsInvalidSchema("registered_metrics is not a dict")
+
+    for key, val in registered_metrics.items():
+        if not isinstance(key, str):
+            raise MetricsInvalidSchema(f"key {key!r} is not a str")
+
+        if not isinstance(val, dict):
+            raise MetricsInvalidSchema(f"key {key!r} has a non-dict value")
+
+        if "type" not in val.keys() or "description" not in val.keys():
+            raise MetricsInvalidSchema(
+                f"key {key!r} has value missing type or description"
+            )
+
+        if val["type"] not in ["incr", "gauge", "timing", "histogram"]:
+            raise MetricsInvalidSchema(
+                f"key {key!r} type is {val['type']}; "
+                + "not one of incr, gauge, timing, histogram"
+            )
+
+        if not isinstance(val["description"], str):
+            raise MetricsInvalidSchema(f"key {key!r} description is not a str")
+
+
+class RegisteredMetricsFilter(MetricsFilter):
+    """Contains a list of registered metrics and validator.
+
+    This is a Markus Metrics filter. It'll complain if metrics are generated
+    that it doesn't know about.
+
+    Registered metrics should be a dict structured like this::
+
+        {
+            KEY -> {
+                "type": str,         # one of "incr" | "gauge" | "timing" | "histogram"
+                "description": str,  # can use markdown
+            },
+            ...
+        }
+
+    For example::
+
+        {
+            "eliot.symbolicate_api": {
+                "type": "timing",
+                "description": "Timer for how long a symbolication API request takes.",
+            },
+            "eliot.symbolicate.proxied": {
+                "type": "incr",
+                description": "Counter for symbolication requests.",
+            },
+            ...
+        }
+
+    You can include additional information to suit your needs::
+
+        {
+            "eliot.symbolicate_api": {
+                "type": "timing",
+                "description": "Timer for how long a symbolication API request takes.",
+                "data_sensitivity": "technical",
+                "bugs": [
+                    "https://example.com/bugid=12345",
+                ],
+            },
+            ...
+        }
+
+    You can define your metrics in JSON or YAML, read them in, and pass them to
+    ``RegisteredMetricsFilter`` for easier management of metrics.
+
+    """
+
+    def __init__(
+        self, registered_metrics: RegisteredMetricsType, raise_error: bool = False
+    ):
+        _validate_registered_metrics(registered_metrics)
+        self.registered_metrics = registered_metrics
+        self.raise_error = raise_error
+
+    def __repr__(self):
+        return f"<RegisteredMetricsFilter {len(self.registered_metrics)} {self.raise_error}>"
+
+    def filter(self, record: MetricsRecord) -> MetricsRecord:
+        metric = self.registered_metrics.get(record.key)
+        if metric is None:
+            if self.raise_error:
+                raise MetricsUnknownKey(f"metrics key {record.key!r} is unknown")
+            LOGGER.warning("metrics key %r is unknown.", record.key)
+
+        elif record.stat_type != metric["type"]:
+            if self.raise_error:
+                raise MetricsWrongType(
+                    f"metrics key {record.key!r} has wrong type; {record.stat_type} vs. "
+                    + f"{metric['type']}"
+                )
+
+            LOGGER.warning(
+                "metrics key %r has wrong type; got %s expecting %s",
+                record.key,
+                record.stat_type,
+                metric["type"],
+            )
+
+        return record
 
 
 class AddTagFilter(MetricsFilter):
@@ -23,12 +157,12 @@ class AddTagFilter(MetricsFilter):
 
     """
 
-    def __init__(self, tag):
+    def __init__(self, tag: str):
         self.tag = tag
 
     def __repr__(self):
         return f"<AddTagFilter {self.tag}>"
 
-    def filter(self, record):
+    def filter(self, record: MetricsRecord) -> MetricsRecord:
         record.tags.append(self.tag)
         return record
diff --git a/tests/test_filters.py b/tests/test_filters.py
new file mode 100644
index 0000000..ae1fb63
--- /dev/null
+++ b/tests/test_filters.py
@@ -0,0 +1,187 @@
+import logging
+import pytest
+
+from markus import get_metrics
+from markus.filters import (
+    AddTagFilter,
+    MetricsInvalidSchema,
+    MetricsUnknownKey,
+    MetricsWrongType,
+    RegisteredMetricsFilter,
+    _validate_registered_metrics,
+)
+from markus.main import MetricsRecord
+
+
+logging.basicConfig()
+
+
+def test_tag_filter(metricsmock):
+    metrics = get_metrics("thing", filters=[AddTagFilter("foo:bar")])
+
+    with metricsmock as mm:
+        metrics.incr("foo", value=5)
+
+    assert mm.get_records() == [
+        MetricsRecord("incr", "thing.foo", 5, ["foo:bar"]),
+    ]
+
+
+@pytest.mark.parametrize(
+    "schema",
+    [
+        pytest.param({}, id="empty"),
+        pytest.param({"testkey": {"type": "incr", "description": "abcde"}}, id="basic"),
+        pytest.param(
+            {
+                "testkey_incr": {"type": "incr", "description": "abcde"},
+                "testkey_gauge": {"type": "gauge", "description": "abcde"},
+                "testkey_timing": {"type": "timing", "description": "abcde"},
+                "testkey_histogram": {"type": "histogram", "description": "abcde"},
+            },
+            id="cover_stats",
+        ),
+        pytest.param(
+            {
+                "testkey": {
+                    "type": "incr",
+                    "description": "abcde",
+                    "labels": [],
+                    "bugs": [],
+                }
+            },
+            id="addtl_info",
+        ),
+    ],
+)
+def test_validate_registered_metrics(schema):
+    _validate_registered_metrics(schema)
+
+
+@pytest.mark.parametrize(
+    "schema, error_msg",
+    [
+        pytest.param([], "registered_metrics is not a dict", id="not_dict"),
+        pytest.param({1: {}}, "key 1 is not a str", id="key_not_str"),
+        pytest.param(
+            {"key": []}, "key 'key' has a non-dict value", id="non_dict_value"
+        ),
+        pytest.param(
+            {"key": {"type": "incr"}},
+            "key 'key' has value missing type or description",
+            id="missing_description",
+        ),
+        pytest.param(
+            {"key": {"description": "foo"}},
+            "key 'key' has value missing type or description",
+            id="missing_type",
+        ),
+        pytest.param(
+            {"key": {"type": "foo", "description": "foo"}},
+            "key 'key' type is foo; not one of incr, gauge, timing, histogram",
+            id="invalid_type",
+        ),
+        pytest.param(
+            {"key": {"type": "incr", "description": 5}},
+            "key 'key' description is not a str",
+            id="bad_description_type",
+        ),
+    ],
+)
+def test_validate_registered_metrics_invalid(schema, error_msg):
+    with pytest.raises(MetricsInvalidSchema) as excinfo:
+        _validate_registered_metrics(schema)
+
+    assert str(excinfo.value) == error_msg
+
+
+ALLOWED_METRICS = {
+    "thing.key_incr": {
+        "type": "incr",
+        "description": "--",
+    },
+    "thing.key_gauge": {
+        "type": "gauge",
+        "description": "--",
+    },
+    "thing.key_timing": {
+        "type": "timing",
+        "description": "--",
+    },
+    "thing.key_histogram": {
+        "type": "histogram",
+        "description": "--",
+    },
+}
+
+
+def test_registered_metrics_filter(caplog, metricsmock):
+    caplog.set_level(logging.INFO)
+
+    metrics = get_metrics(
+        "thing", filters=[RegisteredMetricsFilter(ALLOWED_METRICS, raise_error=False)]
+    )
+
+    with metricsmock as mm:
+        # Emit allowed metrics
+        metrics.incr("key_incr", value=1)
+        metrics.gauge("key_gauge", value=10)
+        metrics.timing("key_timing", value=1.0)
+        metrics.histogram("key_histogram", value=10.0)
+
+        assert mm.get_records() == [
+            MetricsRecord("incr", "thing.key_incr", 1, []),
+            MetricsRecord("gauge", "thing.key_gauge", 10, []),
+            MetricsRecord("timing", "thing.key_timing", 1.0, []),
+            MetricsRecord("histogram", "thing.key_histogram", 10.0, []),
+        ]
+
+    assert caplog.records == []
+
+
+def test_registered_metrics_filter_missing(caplog, metricsmock):
+    caplog.set_level(logging.INFO)
+
+    metrics = get_metrics(
+        "thing", filters=[RegisteredMetricsFilter(ALLOWED_METRICS, raise_error=False)]
+    )
+
+    with metricsmock as mm:
+        # Emit unknown metric
+        metrics.incr("unknown_key", value=1)
+
+        assert mm.get_records() == [
+            MetricsRecord("incr", "thing.unknown_key", 1, []),
+        ]
+
+    assert caplog.records[0].levelname == "WARNING"
+    assert caplog.records[0].message == "metrics key 'thing.unknown_key' is unknown."
+
+
+def test_registered_metrics_filter_missing_error(metricsmock):
+    with pytest.raises(MetricsUnknownKey) as excinfo:
+        metrics = get_metrics(
+            "thing",
+            filters=[RegisteredMetricsFilter(ALLOWED_METRICS, raise_error=True)],
+        )
+        with metricsmock:
+            # Emit unknown metric
+            metrics.incr("unknown_key", value=1)
+
+    assert str(excinfo.value) == "metrics key 'thing.unknown_key' is unknown"
+
+
+def test_registered_metrics_filter_bad_type(metricsmock):
+    with pytest.raises(MetricsWrongType) as excinfo:
+        metrics = get_metrics(
+            "thing",
+            filters=[RegisteredMetricsFilter(ALLOWED_METRICS, raise_error=True)],
+        )
+        with metricsmock:
+            # Emit unknown metric
+            metrics.incr("key_gauge", value=1)
+
+    assert (
+        str(excinfo.value)
+        == "metrics key 'thing.key_gauge' has wrong type; incr vs. gauge"
+    )
diff --git a/tests/test_metrics.py b/tests/test_metrics.py
index 0dda62e..5cf44a6 100644
--- a/tests/test_metrics.py
+++ b/tests/test_metrics.py
@@ -128,14 +128,3 @@ def something():
         something()
 
     assert mm.has_record(fun_name="timing", stat="thing.long_fun")
-
-
-def test_tag_filter(metricsmock):
-    metrics = get_metrics("thing", filters=[AddTagFilter("foo:bar")])
-
-    with metricsmock as mm:
-        metrics.incr("foo", value=5)
-
-    assert mm.get_records() == [
-        MetricsRecord("incr", "thing.foo", 5, ["foo:bar"]),
-    ]