From ad3ce0e962c85c97fdce1641f3e842ab1c2cfb03 Mon Sep 17 00:00:00 2001 From: Zach Aysan Date: Mon, 12 Feb 2024 09:02:09 -0500 Subject: [PATCH] feat: Create split testing for multivariate (#3235) Co-authored-by: Matthew Elwell --- api/api/urls/v1.py | 37 ++++- api/api/urls/v2.py | 8 + api/app/settings/common.py | 5 + api/app/urls.py | 1 + ...luationraw_identifier_and_index_feature.py | 41 ++++++ api/app_analytics/models.py | 6 +- api/app_analytics/serializers.py | 11 ++ api/app_analytics/tasks.py | 23 ++- api/app_analytics/track.py | 30 +++- api/app_analytics/views.py | 86 ++++++++++- api/integrations/sentry/samplers.py | 1 + api/tests/unit/app_analytics/test_models.py | 2 +- .../test_unit_app_analytics_views.py | 139 +++++++++++++++++- 13 files changed, 377 insertions(+), 13 deletions(-) create mode 100644 api/api/urls/v2.py create mode 100644 api/app_analytics/migrations/0002_featureevaluationraw_identifier_and_index_feature.py diff --git a/api/api/urls/v1.py b/api/api/urls/v1.py index 702195d88be1..275c3b3c4302 100644 --- a/api/api/urls/v1.py +++ b/api/api/urls/v1.py @@ -1,6 +1,7 @@ from app_analytics.views import SDKAnalyticsFlags, SelfHostedTelemetryAPIView +from django.conf import settings from django.conf.urls import url -from django.urls import include +from django.urls import include, path from drf_yasg import openapi from drf_yasg.views import get_schema_view from rest_framework import authentication, permissions, routers @@ -47,8 +48,12 @@ url(r"^flags/$", SDKFeatureStates.as_view(), name="flags"), url(r"^identities/$", SDKIdentities.as_view(), name="sdk-identities"), url(r"^traits/", include(traits_router.urls), name="traits"), - url(r"^analytics/flags/$", SDKAnalyticsFlags.as_view()), - url(r"^analytics/telemetry/$", SelfHostedTelemetryAPIView.as_view()), + url(r"^analytics/flags/$", SDKAnalyticsFlags.as_view(), name="analytics-flags"), + url( + r"^analytics/telemetry/$", + SelfHostedTelemetryAPIView.as_view(), + name="analytics-telemetry", + ), url( r"^environment-document/$", SDKEnvironmentAPIView.as_view(), @@ -67,3 +72,29 @@ name="schema-swagger-ui", ), ] + +if settings.SPLIT_TESTING_INSTALLED: + from split_testing.views import ( + ConversionEventTypeView, + CreateConversionEventView, + SplitTestViewSet, + ) + + split_testing_router = routers.DefaultRouter() + split_testing_router.register(r"", SplitTestViewSet, basename="split-tests") + + urlpatterns += [ + url( + r"^split-testing/", include(split_testing_router.urls), name="split-testing" + ), + url( + r"^split-testing/conversion-events/", + CreateConversionEventView.as_view(), + name="conversion-events", + ), + path( + "conversion_event_types/", + ConversionEventTypeView.as_view(), + name="conversion-event-types", + ), + ] diff --git a/api/api/urls/v2.py b/api/api/urls/v2.py new file mode 100644 index 000000000000..d50b38416279 --- /dev/null +++ b/api/api/urls/v2.py @@ -0,0 +1,8 @@ +from app_analytics.views import SDKAnalyticsFlagsV2 +from django.conf.urls import url + +app_name = "v2" + +urlpatterns = [ + url(r"^analytics/flags/$", SDKAnalyticsFlagsV2.as_view(), name="analytics-flags") +] diff --git a/api/app/settings/common.py b/api/app/settings/common.py index 570c027a2596..64a2205ae67f 100644 --- a/api/app/settings/common.py +++ b/api/app/settings/common.py @@ -1152,3 +1152,8 @@ WEBHOOK_BACKOFF_BASE = env.int("WEBHOOK_BACKOFF_BASE", default=2) WEBHOOK_BACKOFF_RETRIES = env.int("WEBHOOK_BACKOFF_RETRIES", default=3) + +# Split Testing settings +SPLIT_TESTING_INSTALLED = importlib.util.find_spec("split_testing") +if SPLIT_TESTING_INSTALLED: + INSTALLED_APPS += ("split_testing",) diff --git a/api/app/urls.py b/api/app/urls.py index fa38b8d7603a..58dee23ede54 100644 --- a/api/app/urls.py +++ b/api/app/urls.py @@ -13,6 +13,7 @@ urlpatterns = [ url(r"^api/v1/", include("api.urls.deprecated", namespace="api-deprecated")), url(r"^api/v1/", include("api.urls.v1", namespace="api-v1")), + url(r"^api/v2/", include("api.urls.v2", namespace="api-v2")), url(r"^admin/", admin.site.urls), url(r"^health", include("health_check.urls", namespace="health")), url(r"^version", views.version_info, name="version-info"), diff --git a/api/app_analytics/migrations/0002_featureevaluationraw_identifier_and_index_feature.py b/api/app_analytics/migrations/0002_featureevaluationraw_identifier_and_index_feature.py new file mode 100644 index 000000000000..bf00993dcee2 --- /dev/null +++ b/api/app_analytics/migrations/0002_featureevaluationraw_identifier_and_index_feature.py @@ -0,0 +1,41 @@ +# Generated by Django 3.2.23 on 2024-01-02 16:35 + +from django.db import migrations, models +from core.migration_helpers import PostgresOnlyRunSQL + + +class Migration(migrations.Migration): + + atomic = False + + dependencies = [ + ('app_analytics', '0001_initial'), + ] + + operations = [ + migrations.AddField( + model_name='featureevaluationraw', + name='identity_identifier', + field=models.CharField(default=None, max_length=2000, null=True), + ), + migrations.AddField( + model_name='featureevaluationraw', + name='enabled_when_evaluated', + field=models.BooleanField(null=True, default=None), + ), + migrations.SeparateDatabaseAndState( + state_operations=[ + migrations.AlterField( + model_name='featureevaluationraw', + name='feature_name', + field=models.CharField(db_index=True, max_length=2000), + ), + ], + database_operations=[ + PostgresOnlyRunSQL( + 'CREATE INDEX CONCURRENTLY "app_analytics_featureevaluationraw_feature_name_idx" ON "app_analytics_featureevaluationraw" ("feature_name");', + reverse_sql='DROP INDEX CONCURRENTLY "app_analytics_featureevaluationraw_feature_name_idx";', + ) + ], + ), + ] diff --git a/api/app_analytics/models.py b/api/app_analytics/models.py index c40baa7c08f2..eada57ebd3c8 100644 --- a/api/app_analytics/models.py +++ b/api/app_analytics/models.py @@ -73,11 +73,15 @@ def check_overlapping_buckets(self): class FeatureEvaluationRaw(models.Model): - feature_name = models.CharField(max_length=2000) + feature_name = models.CharField(db_index=True, max_length=2000) environment_id = models.PositiveIntegerField() evaluation_count = models.IntegerField(default=0) created_at = models.DateTimeField(auto_now_add=True) + # Both stored for tracking multivariate split testing. + identity_identifier = models.CharField(max_length=2000, null=True, default=None) + enabled_when_evaluated = models.BooleanField(null=True, default=None) + class FeatureEvaluationBucket(AbstractBucket): feature_name = models.CharField(max_length=2000) diff --git a/api/app_analytics/serializers.py b/api/app_analytics/serializers.py index caf61b235407..cca9534fb460 100644 --- a/api/app_analytics/serializers.py +++ b/api/app_analytics/serializers.py @@ -16,3 +16,14 @@ class UsageDataQuerySerializer(serializers.Serializer): class UsageTotalCountSerializer(serializers.Serializer): count = serializers.IntegerField() + + +class SDKAnalyticsFlagsSerializerDetail(serializers.Serializer): + feature_name = serializers.CharField() + identity_identifier = serializers.CharField(required=False, default=None) + enabled_when_evaluated = serializers.BooleanField() + count = serializers.IntegerField() + + +class SDKAnalyticsFlagsSerializer(serializers.Serializer): + evaluations = SDKAnalyticsFlagsSerializerDetail(many=True) diff --git a/api/app_analytics/tasks.py b/api/app_analytics/tasks.py index 90ae17bf5bfb..4fbc203dd1bd 100644 --- a/api/app_analytics/tasks.py +++ b/api/app_analytics/tasks.py @@ -62,7 +62,28 @@ def clean_up_old_analytics_data(): @register_task_handler() -def track_feature_evaluation(environment_id, feature_evaluations): +def track_feature_evaluation_v2( + environment_id: int, feature_evaluations: list[dict[str, int | str | bool]] +) -> None: + feature_evaluation_objects = [] + for feature_evaluation in feature_evaluations: + feature_evaluation_objects.append( + FeatureEvaluationRaw( + environment_id=environment_id, + feature_name=feature_evaluation["feature_name"], + evaluation_count=feature_evaluation["count"], + identity_identifier=feature_evaluation["identity_identifier"], + enabled_when_evaluated=feature_evaluation["enabled_when_evaluated"], + ) + ) + FeatureEvaluationRaw.objects.bulk_create(feature_evaluation_objects) + + +@register_task_handler() +def track_feature_evaluation( + environment_id: int, + feature_evaluations: dict[str, int], +) -> None: feature_evaluation_objects = [] for feature_name, evaluation_count in feature_evaluations.items(): feature_evaluation_objects.append( diff --git a/api/app_analytics/track.py b/api/app_analytics/track.py index 01e2c6dc1f23..fc5b697a7bfd 100644 --- a/api/app_analytics/track.py +++ b/api/app_analytics/track.py @@ -8,6 +8,7 @@ from six.moves.urllib.parse import quote # python 2/3 compatible urllib import from environments.models import Environment +from task_processor.decorators import register_task_handler from util.util import postpone logger = logging.getLogger(__name__) @@ -125,7 +126,10 @@ def track_request_influxdb(request): influxdb.write() -def track_feature_evaluation_influxdb(environment_id, feature_evaluations): +@register_task_handler() +def track_feature_evaluation_influxdb( + environment_id: int, feature_evaluations: dict[str, int] +) -> None: """ Sends Feature analytics event data to InfluxDB @@ -139,3 +143,27 @@ def track_feature_evaluation_influxdb(environment_id, feature_evaluations): influxdb.add_data_point("request_count", evaluation_count, tags=tags) influxdb.write() + + +@register_task_handler() +def track_feature_evaluation_influxdb_v2( + environment_id: int, feature_evaluations: list[dict[str, int | str | bool]] +) -> None: + """ + Sends Feature analytics event data to InfluxDB + + :param environment_id: (int) the id of the environment the feature is being evaluated within + :param feature_evaluations: (list) A collection of feature evaluations including feature name / evaluation counts. + """ + influxdb = InfluxDBWrapper("feature_evaluation") + + for feature_evaluation in feature_evaluations: + feature_name = feature_evaluation["feature_name"] + evaluation_count = feature_evaluation["count"] + + # Note that "feature_id" is a misnamed as it's actually to + # the name of the feature. This was to match existing behavior. + tags = {"feature_id": feature_name, "environment_id": environment_id} + influxdb.add_data_point("request_count", evaluation_count, tags=tags) + + influxdb.write() diff --git a/api/app_analytics/views.py b/api/app_analytics/views.py index 1831334f1d80..b619c89729ff 100644 --- a/api/app_analytics/views.py +++ b/api/app_analytics/views.py @@ -4,8 +4,14 @@ get_total_events_count, get_usage_data, ) -from app_analytics.tasks import track_feature_evaluation -from app_analytics.track import track_feature_evaluation_influxdb +from app_analytics.tasks import ( + track_feature_evaluation, + track_feature_evaluation_v2, +) +from app_analytics.track import ( + track_feature_evaluation_influxdb, + track_feature_evaluation_influxdb_v2, +) from django.conf import settings from drf_yasg.utils import swagger_auto_schema from rest_framework import status @@ -13,6 +19,7 @@ from rest_framework.fields import IntegerField from rest_framework.generics import CreateAPIView, GenericAPIView from rest_framework.permissions import IsAuthenticated +from rest_framework.request import Request from rest_framework.response import Response from rest_framework.serializers import Serializer from telemetry.serializers import TelemetrySerializer @@ -24,6 +31,7 @@ from .permissions import UsageDataPermission from .serializers import ( + SDKAnalyticsFlagsSerializer, UsageDataQuerySerializer, UsageDataSerializer, UsageTotalCountSerializer, @@ -32,6 +40,61 @@ logger = logging.getLogger(__name__) +class SDKAnalyticsFlagsV2(CreateAPIView): + permission_classes = (EnvironmentKeyPermissions,) + authentication_classes = (EnvironmentKeyAuthentication,) + serializer_class = SDKAnalyticsFlagsSerializer + throttle_classes = [] + + def create(self, request: Request, *args, **kwargs) -> Response: + serializer = self.get_serializer(data=request.data) + serializer.is_valid(raise_exception=True) + + self.evaluations = serializer.validated_data["evaluations"] + if not self._is_data_valid(): + return Response( + {"detail": "Invalid feature names associated with the project."}, + content_type="application/json", + status=status.HTTP_400_BAD_REQUEST, + ) + if settings.USE_POSTGRES_FOR_ANALYTICS: + track_feature_evaluation_v2.delay( + args=( + request.environment.id, + self.evaluations, + ) + ) + elif settings.INFLUXDB_TOKEN: + track_feature_evaluation_influxdb_v2.delay( + args=( + request.environment.id, + self.evaluations, + ) + ) + + return Response(status=status.HTTP_204_NO_CONTENT) + + def _is_data_valid(self) -> bool: + environment_feature_names = set( + FeatureState.objects.filter( + environment=self.request.environment, + feature_segment=None, + identity=None, + ).values_list("feature__name", flat=True) + ) + + valid = True + for evaluation in self.evaluations: + if evaluation["feature_name"] in environment_feature_names: + continue + logger.warning( + f"Feature {evaluation['feature_name']} does not belong to project" + ) + valid = False + + return valid + + class SDKAnalyticsFlags(GenericAPIView): """ Class to handle flag analytics events @@ -65,6 +128,10 @@ def get_fields(self): def post(self, request, *args, **kwargs): """ Send flag evaluation events from the SDK back to the API for reporting. + + + TODO: Eventually replace this with the v2 version of + this endpoint once SDKs have been updated. """ is_valid = self._is_data_valid() if not is_valid: @@ -74,10 +141,21 @@ def post(self, request, *args, **kwargs): content_type="application/json", status=status.HTTP_200_OK, ) + if settings.USE_POSTGRES_FOR_ANALYTICS: - track_feature_evaluation.delay(args=(request.environment.id, request.data)) + track_feature_evaluation.delay( + args=( + request.environment.id, + request.data, + ) + ) elif settings.INFLUXDB_TOKEN: - track_feature_evaluation_influxdb(request.environment.id, request.data) + track_feature_evaluation_influxdb.delay( + args=( + request.environment.id, + request.data, + ) + ) return Response(status=status.HTTP_200_OK) diff --git a/api/integrations/sentry/samplers.py b/api/integrations/sentry/samplers.py index f2512af62486..b3c85538a7df 100644 --- a/api/integrations/sentry/samplers.py +++ b/api/integrations/sentry/samplers.py @@ -10,6 +10,7 @@ "/api/v1/traits/bulk", "/api/v1/environment-document", "/api/v1/analytics/flags", + "/api/v2/analytics/flags", } diff --git a/api/tests/unit/app_analytics/test_models.py b/api/tests/unit/app_analytics/test_models.py index fa17851c85fe..d5f060abf6dc 100644 --- a/api/tests/unit/app_analytics/test_models.py +++ b/api/tests/unit/app_analytics/test_models.py @@ -10,7 +10,7 @@ if "analytics" not in settings.DATABASES: pytest.skip( - "Skip test if analytics database is configured", allow_module_level=True + "Skip test if analytics database is not configured", allow_module_level=True ) diff --git a/api/tests/unit/app_analytics/test_unit_app_analytics_views.py b/api/tests/unit/app_analytics/test_unit_app_analytics_views.py index 090ea43fc070..9137584dc6b5 100644 --- a/api/tests/unit/app_analytics/test_unit_app_analytics_views.py +++ b/api/tests/unit/app_analytics/test_unit_app_analytics_views.py @@ -1,9 +1,20 @@ +import json from datetime import date, timedelta +import pytest from app_analytics.dataclasses import UsageData +from app_analytics.models import FeatureEvaluationRaw from app_analytics.views import SDKAnalyticsFlags +from django.conf import settings from django.urls import reverse +from pytest_django.fixtures import SettingsWrapper +from pytest_mock import MockerFixture from rest_framework import status +from rest_framework.test import APIClient + +from environments.identities.models import Identity +from environments.models import Environment +from features.models import Feature def test_sdk_analytics_does_not_allow_bad_data(mocker, settings, environment): @@ -32,7 +43,11 @@ def test_sdk_analytics_allows_valid_data(mocker, settings, environment, feature) settings.INFLUXDB_TOKEN = "some-token" data = {feature.name: 12} - request = mocker.MagicMock(data=data, environment=environment) + request = mocker.MagicMock( + data=data, + environment=environment, + query_params={}, + ) view = SDKAnalyticsFlags(request=request) @@ -45,7 +60,7 @@ def test_sdk_analytics_allows_valid_data(mocker, settings, environment, feature) # Then assert response.status_code == status.HTTP_200_OK - mocked_track_feature_eval.assert_called_once_with(environment.id, data) + mocked_track_feature_eval.delay.assert_called_once_with(args=(environment.id, data)) def test_get_usage_data(mocker, admin_client, organisation): @@ -131,3 +146,123 @@ def test_get_total_usage_count_for_non_admin_user_returns_403( # Then assert response.status_code == status.HTTP_403_FORBIDDEN + + +@pytest.mark.skipif( + "analytics" not in settings.DATABASES, + reason="Skip test if analytics DB is not configured", +) +@pytest.mark.django_db(databases=["default", "analytics"]) +def test_set_sdk_analytics_flags_with_identifier( + api_client: APIClient, + environment: Environment, + feature: Feature, + identity: Identity, + settings: SettingsWrapper, +) -> None: + # Given + settings.USE_POSTGRES_FOR_ANALYTICS = True + url = reverse("api-v2:analytics-flags") + api_client.credentials(HTTP_X_ENVIRONMENT_KEY=environment.api_key) + feature_request_count = 2 + + data = { + "evaluations": [ + { + "feature_name": feature.name, + "identity_identifier": identity.identifier, + "count": feature_request_count, + "enabled_when_evaluated": True, + } + ] + } + + # When + response = api_client.post( + url, data=json.dumps(data), content_type="application/json" + ) + + # Then + assert response.status_code == status.HTTP_204_NO_CONTENT + + assert FeatureEvaluationRaw.objects.count() == 1 + feature_evaluation_raw = FeatureEvaluationRaw.objects.first() + assert feature_evaluation_raw.identity_identifier == identity.identifier + assert feature_evaluation_raw.feature_name == feature.name + assert feature_evaluation_raw.environment_id == environment.id + assert feature_evaluation_raw.evaluation_count is feature_request_count + + +@pytest.mark.skipif( + "analytics" not in settings.DATABASES, + reason="Skip test if analytics DB is not configured", +) +@pytest.mark.django_db(databases=["default", "analytics"]) +def test_set_sdk_analytics_flags_without_identifier( + api_client: APIClient, + environment: Environment, + feature: Feature, + identity: Identity, + settings: SettingsWrapper, +) -> None: + # Given + settings.USE_POSTGRES_FOR_ANALYTICS = True + url = reverse("api-v2:analytics-flags") + api_client.credentials(HTTP_X_ENVIRONMENT_KEY=environment.api_key) + feature_request_count = 2 + data = { + "evaluations": [ + { + "feature_name": feature.name, + "count": feature_request_count, + "enabled_when_evaluated": True, + } + ] + } + + # When + response = api_client.post( + url, data=json.dumps(data), content_type="application/json" + ) + + # Then + assert response.status_code == status.HTTP_204_NO_CONTENT + + assert FeatureEvaluationRaw.objects.count() == 1 + feature_evaluation_raw = FeatureEvaluationRaw.objects.first() + assert feature_evaluation_raw.identity_identifier is None + assert feature_evaluation_raw.feature_name == feature.name + assert feature_evaluation_raw.environment_id == environment.id + assert feature_evaluation_raw.evaluation_count is feature_request_count + + +def test_set_sdk_analytics_flags_v1_to_influxdb( + api_client: APIClient, + environment: Environment, + feature: Feature, + identity: Identity, + settings: SettingsWrapper, + mocker: MockerFixture, +) -> None: + # Given + settings.INFLUXDB_TOKEN = "some-token" + + url = reverse("api-v1:analytics-flags") + api_client.credentials(HTTP_X_ENVIRONMENT_KEY=environment.api_key) + feature_request_count = 2 + data = {feature.name: feature_request_count} + mock = mocker.patch("app_analytics.track.InfluxDBWrapper") + add_data_point_mock = mock.return_value.add_data_point + + # When + response = api_client.post( + url, data=json.dumps(data), content_type="application/json" + ) + + # Then + assert response.status_code == status.HTTP_200_OK + add_data_point_mock.assert_called_with( + "request_count", + feature_request_count, + tags={"feature_id": feature.name, "environment_id": environment.id}, + )