Skip to content

Commit

Permalink
Fixed #1690 -- fixed trac metrics involving time
Browse files Browse the repository at this point in the history
This also introduces the fix_trac_metrics management
command to help fix collected data.
  • Loading branch information
bmispelon committed Nov 7, 2024
1 parent ba9446c commit 95f2767
Show file tree
Hide file tree
Showing 7 changed files with 394 additions and 35 deletions.
93 changes: 93 additions & 0 deletions dashboard/management/commands/fix_trac_metrics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
from datetime import date, timedelta

import time_machine
from django.core.management.base import CommandError, LabelCommand
from django.db.models import Case, Max, Min, When

from ...models import TracTicketMetric


def _get_data(metric, options):
"""
Return a queryset of Datum instances for the given metric, taking into
account the from_date/to_date keys of the given options dict.
"""
queryset = metric.data.all()
if options["from_date"]:
queryset = queryset.filter(timestamp__date__gte=options["from_date"])
if options["to_date"]:
queryset = queryset.filter(timestamp__date__lte=options["to_date"])
return queryset


def _daterange(queryset):
"""
Given a queryset of Datum objects, generate all dates (as date objects)
between the earliest and latest data points in the queryset.
"""
aggregated = queryset.aggregate(
start=Min("timestamp__date"), end=Max("timestamp__date")
)
if aggregated["start"] is None or aggregated["end"] is None:
raise ValueError("queryset cannot be empty")

d = aggregated["start"]
while d <= aggregated["end"]:
yield d
d += timedelta(days=1)


def _refetched_case_when(dates, metric):
"""
Refetch the given metric for all the given dates and build a CASE database
expression with one WHEN per date.
"""
whens = []
for d in dates:
with time_machine.travel(d):
whens.append(When(timestamp__date=d, then=metric.fetch()))
return Case(*whens)


class Command(LabelCommand):
help = "Retroactively refetch measurements for Trac metrics."
label = "slug"

def add_arguments(self, parser):
super().add_arguments(parser)
parser.add_argument(
"--yes", action="store_true", help="Commit the changes to the database"
)
parser.add_argument(
"--from-date",
type=date.fromisoformat,
help="Restrict the timestamp range (ISO format)",
)
parser.add_argument(
"--to-date",
type=date.fromisoformat,
help="Restrict the timestamp range (ISO format)",
)

def handle_label(self, label, **options):
try:
metric = TracTicketMetric.objects.get(slug=label)
except TracTicketMetric.DoesNotExist as e:
raise CommandError from e

verbose = int(options["verbosity"]) > 0

if verbose:
self.stdout.write(f"Fixing metric {label}...")
dataset = _get_data(metric, options)

if options["yes"]:
dates = _daterange(dataset)
updated_measurement_expression = _refetched_case_when(dates, metric)
updated = dataset.update(measurement=updated_measurement_expression)
if verbose:
self.stdout.write(self.style.SUCCESS(f"{updated} rows updated"))
else:
if verbose:
self.stdout.write(f"{dataset.count()} rows will be updated.")
self.stdout.write("Re-run the command with --yes to apply the change")
114 changes: 114 additions & 0 deletions dashboard/tests.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import datetime
import json
from operator import attrgetter
from unittest import mock

import requests_mock
Expand All @@ -10,6 +11,7 @@

from tracdb.models import Ticket
from tracdb.testutils import TracDBCreateDatabaseMixin
from tracdb.tractime import datetime_to_timestamp

from .models import (
METRIC_PERIOD_DAILY,
Expand Down Expand Up @@ -178,3 +180,115 @@ def test_update_metric(self, mocker, mock_reset_generation_key):
self.assertTrue(mock_reset_generation_key.called)
data = GithubItemCountMetric.objects.last().data.last()
self.assertEqual(data.measurement, 10)


class FixTracMetricsCommandTestCase(TracDBCreateDatabaseMixin, TestCase):
databases = {"default", "trac"}

@classmethod
def setUpTestData(cls):
super().setUpTestData()

def dt(*args, **kwargs):
kwargs.setdefault("tzinfo", datetime.UTC)
return datetime.datetime(*args, **kwargs)

def ts(*args, **kwargs):
return datetime_to_timestamp(dt(*args, **kwargs))

for day in range(7):
Ticket.objects.create(_time=ts(2024, 1, day + 1))

cls.metric_today = TracTicketMetric.objects.create(
slug="today", query="time=today.."
)
cls.metric_week = TracTicketMetric.objects.create(
slug="week", query="time=thisweek.."
)

def test_command_today(self):
datum = self.metric_today.data.create(
measurement=0, timestamp="2024-01-01T00:00:00"
)
management.call_command("fix_trac_metrics", "today", yes=True, verbosity=0)
datum.refresh_from_db()
self.assertEqual(datum.measurement, 1)

def test_command_week(self):
datum = self.metric_week.data.create(
measurement=0, timestamp="2024-01-07T00:00:00"
)
management.call_command("fix_trac_metrics", "week", yes=True, verbosity=0)
datum.refresh_from_db()
self.assertEqual(datum.measurement, 7)

def test_command_safe_by_default(self):
datum = self.metric_today.data.create(
measurement=0, timestamp="2024-01-01T00:00:00"
)
management.call_command("fix_trac_metrics", "today", verbosity=0)
datum.refresh_from_db()
self.assertEqual(datum.measurement, 0)

def test_multiple_measurements(self):
self.metric_today.data.create(measurement=0, timestamp="2024-01-01T00:00:00")
self.metric_today.data.create(measurement=0, timestamp="2024-01-02T00:00:00")
self.metric_today.data.create(measurement=0, timestamp="2024-01-03T00:00:00")
management.call_command("fix_trac_metrics", "today", yes=True, verbosity=0)
self.assertQuerySetEqual(
self.metric_today.data.order_by("timestamp"),
[1, 1, 1],
transform=attrgetter("measurement"),
)

def test_option_from_date(self):
self.metric_today.data.create(measurement=0, timestamp="2024-01-01T00:00:00")
self.metric_today.data.create(measurement=0, timestamp="2024-01-02T00:00:00")
self.metric_today.data.create(measurement=0, timestamp="2024-01-03T00:00:00")
management.call_command(
"fix_trac_metrics",
"today",
yes=True,
from_date=datetime.date(2024, 1, 2),
verbosity=0,
)
self.assertQuerySetEqual(
self.metric_today.data.order_by("timestamp"),
[0, 1, 1],
transform=attrgetter("measurement"),
)

def test_option_to_date(self):
self.metric_today.data.create(measurement=0, timestamp="2024-01-01T00:00:00")
self.metric_today.data.create(measurement=0, timestamp="2024-01-02T00:00:00")
self.metric_today.data.create(measurement=0, timestamp="2024-01-03T00:00:00")
management.call_command(
"fix_trac_metrics",
"today",
yes=True,
to_date=datetime.date(2024, 1, 2),
verbosity=0,
)
self.assertQuerySetEqual(
self.metric_today.data.order_by("timestamp"),
[1, 1, 0],
transform=attrgetter("measurement"),
)

def test_option_both_to_and_from_date(self):
self.metric_today.data.create(measurement=0, timestamp="2024-01-01T00:00:00")
self.metric_today.data.create(measurement=0, timestamp="2024-01-02T00:00:00")
self.metric_today.data.create(measurement=0, timestamp="2024-01-03T00:00:00")
management.call_command(
"fix_trac_metrics",
"today",
yes=True,
from_date=datetime.date(2024, 1, 2),
to_date=datetime.date(2024, 1, 2),
verbosity=0,
)
self.assertQuerySetEqual(
self.metric_today.data.order_by("timestamp"),
[0, 1, 0],
transform=attrgetter("measurement"),
)
1 change: 1 addition & 0 deletions requirements/common.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,4 @@ requests==2.32.3
sorl-thumbnail==12.11.0
Sphinx==8.1.3
stripe==3.1.0
time-machine==2.15.0
39 changes: 13 additions & 26 deletions tracdb/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,37 +43,14 @@
"""

import datetime
from datetime import date
from functools import reduce
from operator import and_, or_
from urllib.parse import parse_qs

from django.db import models

_epoc = datetime.datetime(1970, 1, 1, tzinfo=datetime.UTC)


class time_property:
"""
Convert Trac timestamps into UTC datetimes.
See http://trac.edgewall.org/browser//branches/0.12-stable/trac/util/datefmt.py
for Trac's version of all this. Mine's something of a simplification.
Like the rest of this module this is far from perfect -- no setters, for
example! That's good enough for now.
"""

def __init__(self, fieldname):
self.fieldname = fieldname

def __get__(self, instance, owner):
if instance is None:
return self
timestamp = getattr(instance, self.fieldname)
if timestamp is None:
return None
return _epoc + datetime.timedelta(microseconds=timestamp)
from .tractime import dayrange, time_property


class JSONBObjectAgg(models.Aggregate):
Expand All @@ -97,7 +74,17 @@ def from_querystring(self, querystring):
filter_kwargs, exclude_kwargs = {}, {}

for field, (value,) in parsed.items():
if field not in model_fields:
if field == "time":
if value == "today..":
timestamp_range = dayrange(date.today(), 1)
elif value == "thisweek..":
timestamp_range = dayrange(date.today(), 7)
else:
raise ValueError(f"Unsupported time value {value}")

filter_kwargs["_time__range"] = timestamp_range
continue
elif field not in model_fields:
custom_lookup_required = True
field = f"custom__{field}"
if value.startswith("!"):
Expand Down
Loading

0 comments on commit 95f2767

Please sign in to comment.