Fixed #1690 -- fixed trac metrics involving time

This also introduces the fix_trac_metrics management command to help fix collected data.
django · Nov 7, 2024 · 95f2767 · 95f2767
1 parent ba9446c
commit 95f2767
Show file tree

Hide file tree

Showing 7 changed files with 394 additions and 35 deletions.
diff --git a/dashboard/management/commands/fix_trac_metrics.py b/dashboard/management/commands/fix_trac_metrics.py
@@ -0,0 +1,93 @@
+from datetime import date, timedelta
+
+import time_machine
+from django.core.management.base import CommandError, LabelCommand
+from django.db.models import Case, Max, Min, When
+
+from ...models import TracTicketMetric
+
+
+def _get_data(metric, options):
+    """
+    Return a queryset of Datum instances for the given metric, taking into
+    account the from_date/to_date keys of the given options dict.
+    """
+    queryset = metric.data.all()
+    if options["from_date"]:
+        queryset = queryset.filter(timestamp__date__gte=options["from_date"])
+    if options["to_date"]:
+        queryset = queryset.filter(timestamp__date__lte=options["to_date"])
+    return queryset
+
+
+def _daterange(queryset):
+    """
+    Given a queryset of Datum objects, generate all dates (as date objects)
+    between the earliest and latest data points in the queryset.
+    """
+    aggregated = queryset.aggregate(
+        start=Min("timestamp__date"), end=Max("timestamp__date")
+    )
+    if aggregated["start"] is None or aggregated["end"] is None:
+        raise ValueError("queryset cannot be empty")
+
+    d = aggregated["start"]
+    while d <= aggregated["end"]:
+        yield d
+        d += timedelta(days=1)
+
+
+def _refetched_case_when(dates, metric):
+    """
+    Refetch the given metric for all the given dates and build a CASE database
+    expression with one WHEN per date.
+    """
+    whens = []
+    for d in dates:
+        with time_machine.travel(d):
+            whens.append(When(timestamp__date=d, then=metric.fetch()))
+    return Case(*whens)
+
+
+class Command(LabelCommand):
+    help = "Retroactively refetch measurements for Trac metrics."
+    label = "slug"
+
+    def add_arguments(self, parser):
+        super().add_arguments(parser)
+        parser.add_argument(
+            "--yes", action="store_true", help="Commit the changes to the database"
+        )
+        parser.add_argument(
+            "--from-date",
+            type=date.fromisoformat,
+            help="Restrict the timestamp range (ISO format)",
+        )
+        parser.add_argument(
+            "--to-date",
+            type=date.fromisoformat,
+            help="Restrict the timestamp range (ISO format)",
+        )
+
+    def handle_label(self, label, **options):
+        try:
+            metric = TracTicketMetric.objects.get(slug=label)
+        except TracTicketMetric.DoesNotExist as e:
+            raise CommandError from e
+
+        verbose = int(options["verbosity"]) > 0
+
+        if verbose:
+            self.stdout.write(f"Fixing metric {label}...")
+        dataset = _get_data(metric, options)
+
+        if options["yes"]:
+            dates = _daterange(dataset)
+            updated_measurement_expression = _refetched_case_when(dates, metric)
+            updated = dataset.update(measurement=updated_measurement_expression)
+            if verbose:
+                self.stdout.write(self.style.SUCCESS(f"{updated} rows updated"))
+        else:
+            if verbose:
+                self.stdout.write(f"{dataset.count()} rows will be updated.")
+                self.stdout.write("Re-run the command with --yes to apply the change")
diff --git a/dashboard/tests.py b/dashboard/tests.py
@@ -1,5 +1,6 @@
 import datetime
 import json
+from operator import attrgetter
 from unittest import mock
 
 import requests_mock
@@ -10,6 +11,7 @@
 
 from tracdb.models import Ticket
 from tracdb.testutils import TracDBCreateDatabaseMixin
+from tracdb.tractime import datetime_to_timestamp
 
 from .models import (
     METRIC_PERIOD_DAILY,
@@ -178,3 +180,115 @@ def test_update_metric(self, mocker, mock_reset_generation_key):
         self.assertTrue(mock_reset_generation_key.called)
         data = GithubItemCountMetric.objects.last().data.last()
         self.assertEqual(data.measurement, 10)
+
+
+class FixTracMetricsCommandTestCase(TracDBCreateDatabaseMixin, TestCase):
+    databases = {"default", "trac"}
+
+    @classmethod
+    def setUpTestData(cls):
+        super().setUpTestData()
+
+        def dt(*args, **kwargs):
+            kwargs.setdefault("tzinfo", datetime.UTC)
+            return datetime.datetime(*args, **kwargs)
+
+        def ts(*args, **kwargs):
+            return datetime_to_timestamp(dt(*args, **kwargs))
+
+        for day in range(7):
+            Ticket.objects.create(_time=ts(2024, 1, day + 1))
+
+        cls.metric_today = TracTicketMetric.objects.create(
+            slug="today", query="time=today.."
+        )
+        cls.metric_week = TracTicketMetric.objects.create(
+            slug="week", query="time=thisweek.."
+        )
+
+    def test_command_today(self):
+        datum = self.metric_today.data.create(
+            measurement=0, timestamp="2024-01-01T00:00:00"
+        )
+        management.call_command("fix_trac_metrics", "today", yes=True, verbosity=0)
+        datum.refresh_from_db()
+        self.assertEqual(datum.measurement, 1)
+
+    def test_command_week(self):
+        datum = self.metric_week.data.create(
+            measurement=0, timestamp="2024-01-07T00:00:00"
+        )
+        management.call_command("fix_trac_metrics", "week", yes=True, verbosity=0)
+        datum.refresh_from_db()
+        self.assertEqual(datum.measurement, 7)
+
+    def test_command_safe_by_default(self):
+        datum = self.metric_today.data.create(
+            measurement=0, timestamp="2024-01-01T00:00:00"
+        )
+        management.call_command("fix_trac_metrics", "today", verbosity=0)
+        datum.refresh_from_db()
+        self.assertEqual(datum.measurement, 0)
+
+    def test_multiple_measurements(self):
+        self.metric_today.data.create(measurement=0, timestamp="2024-01-01T00:00:00")
+        self.metric_today.data.create(measurement=0, timestamp="2024-01-02T00:00:00")
+        self.metric_today.data.create(measurement=0, timestamp="2024-01-03T00:00:00")
+        management.call_command("fix_trac_metrics", "today", yes=True, verbosity=0)
+        self.assertQuerySetEqual(
+            self.metric_today.data.order_by("timestamp"),
+            [1, 1, 1],
+            transform=attrgetter("measurement"),
+        )
+
+    def test_option_from_date(self):
+        self.metric_today.data.create(measurement=0, timestamp="2024-01-01T00:00:00")
+        self.metric_today.data.create(measurement=0, timestamp="2024-01-02T00:00:00")
+        self.metric_today.data.create(measurement=0, timestamp="2024-01-03T00:00:00")
+        management.call_command(
+            "fix_trac_metrics",
+            "today",
+            yes=True,
+            from_date=datetime.date(2024, 1, 2),
+            verbosity=0,
+        )
+        self.assertQuerySetEqual(
+            self.metric_today.data.order_by("timestamp"),
+            [0, 1, 1],
+            transform=attrgetter("measurement"),
+        )
+
+    def test_option_to_date(self):
+        self.metric_today.data.create(measurement=0, timestamp="2024-01-01T00:00:00")
+        self.metric_today.data.create(measurement=0, timestamp="2024-01-02T00:00:00")
+        self.metric_today.data.create(measurement=0, timestamp="2024-01-03T00:00:00")
+        management.call_command(
+            "fix_trac_metrics",
+            "today",
+            yes=True,
+            to_date=datetime.date(2024, 1, 2),
+            verbosity=0,
+        )
+        self.assertQuerySetEqual(
+            self.metric_today.data.order_by("timestamp"),
+            [1, 1, 0],
+            transform=attrgetter("measurement"),
+        )
+
+    def test_option_both_to_and_from_date(self):
+        self.metric_today.data.create(measurement=0, timestamp="2024-01-01T00:00:00")
+        self.metric_today.data.create(measurement=0, timestamp="2024-01-02T00:00:00")
+        self.metric_today.data.create(measurement=0, timestamp="2024-01-03T00:00:00")
+        management.call_command(
+            "fix_trac_metrics",
+            "today",
+            yes=True,
+            from_date=datetime.date(2024, 1, 2),
+            to_date=datetime.date(2024, 1, 2),
+            verbosity=0,
+        )
+        self.assertQuerySetEqual(
+            self.metric_today.data.order_by("timestamp"),
+            [0, 1, 0],
+            transform=attrgetter("measurement"),
+        )
diff --git a/requirements/common.txt b/requirements/common.txt
@@ -21,3 +21,4 @@ requests==2.32.3
 sorl-thumbnail==12.11.0
 Sphinx==8.1.3
 stripe==3.1.0
+time-machine==2.15.0
diff --git a/tracdb/models.py b/tracdb/models.py
@@ -43,37 +43,14 @@
 
 """
 
-import datetime
+from datetime import date
 from functools import reduce
 from operator import and_, or_
 from urllib.parse import parse_qs
 
 from django.db import models
 
-_epoc = datetime.datetime(1970, 1, 1, tzinfo=datetime.UTC)
-
-
-class time_property:
-    """
-    Convert Trac timestamps into UTC datetimes.
-
-    See http://trac.edgewall.org/browser//branches/0.12-stable/trac/util/datefmt.py
-    for Trac's version of all this. Mine's something of a simplification.
-
-    Like the rest of this module this is far from perfect -- no setters, for
-    example! That's good enough for now.
-    """
-
-    def __init__(self, fieldname):
-        self.fieldname = fieldname
-
-    def __get__(self, instance, owner):
-        if instance is None:
-            return self
-        timestamp = getattr(instance, self.fieldname)
-        if timestamp is None:
-            return None
-        return _epoc + datetime.timedelta(microseconds=timestamp)
+from .tractime import dayrange, time_property
 
 
 class JSONBObjectAgg(models.Aggregate):
@@ -97,7 +74,17 @@ def from_querystring(self, querystring):
         filter_kwargs, exclude_kwargs = {}, {}
 
         for field, (value,) in parsed.items():
-            if field not in model_fields:
+            if field == "time":
+                if value == "today..":
+                    timestamp_range = dayrange(date.today(), 1)
+                elif value == "thisweek..":
+                    timestamp_range = dayrange(date.today(), 7)
+                else:
+                    raise ValueError(f"Unsupported time value {value}")
+
+                filter_kwargs["_time__range"] = timestamp_range
+                continue
+            elif field not in model_fields:
                 custom_lookup_required = True
                 field = f"custom__{field}"
             if value.startswith("!"):