diff --git a/snuba/datasets/configuration/discover/entities/discover.yaml b/snuba/datasets/configuration/discover/entities/discover.yaml index 4e7fddc6df..8a49f6bf47 100644 --- a/snuba/datasets/configuration/discover/entities/discover.yaml +++ b/snuba/datasets/configuration/discover/entities/discover.yaml @@ -3,10 +3,11 @@ kind: entity name: discover schema: [ - { name: event_id, type: FixedString, args: { length: 32 }}, + { name: event_id, type: FixedString, args: { length: 32 } }, { name: project_id, type: UInt, args: { size: 64 } }, { name: type, type: String, args: { schema_modifiers: [nullable] } }, { name: timestamp, type: DateTime }, + { name: time, type: DateTime }, { name: platform, type: String, args: { schema_modifiers: [nullable] } }, { name: environment, type: String, args: { schema_modifiers: [nullable] } }, { name: release, type: String, args: { schema_modifiers: [nullable] } }, @@ -254,11 +255,7 @@ schema: type: String, args: { schema_modifiers: [nullable] }, }, - { - name: profile_id, - type: UUID, - args: { schema_modifiers: [nullable] }, - }, + { name: profile_id, type: UUID, args: { schema_modifiers: [nullable] } }, { name: replay_id, type: UUID, @@ -267,12 +264,12 @@ schema: { name: trace_sampled, type: UInt, - args: { schema_modifiers: [ nullable ], size: 8 }, + args: { schema_modifiers: [nullable], size: 8 }, }, { name: num_processing_errors, type: UInt, - args: { schema_modifiers: [ nullable ], size: 64 }, + args: { schema_modifiers: [nullable], size: 64 }, }, ] required_time_column: timestamp diff --git a/snuba/datasets/configuration/discover/entities/discover_events.yaml b/snuba/datasets/configuration/discover/entities/discover_events.yaml index e9de90210b..15d920f03c 100644 --- a/snuba/datasets/configuration/discover/entities/discover_events.yaml +++ b/snuba/datasets/configuration/discover/entities/discover_events.yaml @@ -5,6 +5,8 @@ schema: [ { name: project_id, type: UInt, args: { size: 64 } }, { name: timestamp, type: DateTime }, + { name: time, type: DateTime }, + { name: rtime, type: DateTime }, { name: event_id, type: UUID }, { name: platform, type: String }, { name: environment, type: String, args: { schema_modifiers: [nullable] } }, @@ -195,12 +197,12 @@ schema: { name: trace_sampled, type: UInt, - args: { schema_modifiers: [ nullable ], size: 8 }, + args: { schema_modifiers: [nullable], size: 8 }, }, { name: num_processing_errors, type: UInt, - args: { schema_modifiers: [ nullable ], size: 64 }, + args: { schema_modifiers: [nullable], size: 64 }, }, { name: replay_id, type: UUID, args: { schema_modifiers: [nullable] } }, ] @@ -538,7 +540,6 @@ subscription_validators: - orderby required_time_column: timestamp - join_relationships: grouped: rhs_entity: groupedmessage diff --git a/snuba/datasets/configuration/discover/entities/discover_transactions.yaml b/snuba/datasets/configuration/discover/entities/discover_transactions.yaml index 416c037e95..da3fbcbf69 100644 --- a/snuba/datasets/configuration/discover/entities/discover_transactions.yaml +++ b/snuba/datasets/configuration/discover/entities/discover_transactions.yaml @@ -118,14 +118,15 @@ schema: { name: title, type: String, args: { schema_modifiers: [readonly] } }, { name: transaction_source, type: String }, { name: timestamp, type: DateTime, args: { schema_modifiers: [readonly] } }, + { name: time, type: DateTime, args: { schema_modifiers: [readonly] } }, { name: group_ids, type: Array, args: { inner_type: { type: UInt, args: { size: 64 } } }, }, { name: app_start_type, type: String }, - { name: profile_id, type: UUID, args: {schema_modifiers: [nullable]}}, - { name: replay_id, type: UUID, args: {schema_modifiers: [nullable]}} + { name: profile_id, type: UUID, args: { schema_modifiers: [nullable] } }, + { name: replay_id, type: UUID, args: { schema_modifiers: [nullable] } } ] required_time_column: finish_ts storages: @@ -354,7 +355,7 @@ subscription_validators: { max_allowed_aggregations: 1, disallowed_aggregations: [groupby, having, orderby], - required_time_column: finish_ts + required_time_column: finish_ts, }, }, ] diff --git a/snuba/datasets/configuration/entity_builder.py b/snuba/datasets/configuration/entity_builder.py index de3f572473..f0bd4c0fd6 100644 --- a/snuba/datasets/configuration/entity_builder.py +++ b/snuba/datasets/configuration/entity_builder.py @@ -186,11 +186,13 @@ def _build_join_relationships(config: dict[str, Any]) -> dict[str, JoinRelations return relationships -def _build_validation_mode(mode: str | None) -> ColumnValidationMode: +def _build_validation_mode(mode: str | None) -> ColumnValidationMode | None: if not mode: - return ColumnValidationMode.DO_NOTHING + return None - if mode == "warn": + if mode == "do_nothing": + return ColumnValidationMode.DO_NOTHING + elif mode == "warn": return ColumnValidationMode.WARN elif mode == "error": return ColumnValidationMode.ERROR diff --git a/snuba/datasets/configuration/events/entities/events.yaml b/snuba/datasets/configuration/events/entities/events.yaml index b3894b2e29..24111944c6 100644 --- a/snuba/datasets/configuration/events/entities/events.yaml +++ b/snuba/datasets/configuration/events/entities/events.yaml @@ -5,6 +5,8 @@ schema: [ { name: project_id, type: UInt, args: { size: 64 } }, { name: timestamp, type: DateTime }, + { name: time, type: DateTime }, + { name: rtime, type: DateTime }, { name: event_id, type: UUID }, { name: platform, type: String }, { name: environment, type: String, args: { schema_modifiers: [nullable] } }, @@ -195,12 +197,12 @@ schema: { name: trace_sampled, type: UInt, - args: { schema_modifiers: [ nullable ], size: 8 }, + args: { schema_modifiers: [nullable], size: 8 }, }, { name: num_processing_errors, type: UInt, - args: { schema_modifiers: [ nullable ], size: 64 }, + args: { schema_modifiers: [nullable], size: 64 }, }, { name: replay_id, type: UUID, args: { schema_modifiers: [nullable] } }, ] @@ -473,7 +475,6 @@ subscription_validators: - orderby required_time_column: timestamp - join_relationships: grouped: rhs_entity: groupedmessage diff --git a/snuba/datasets/configuration/generic_metrics/entities/counters.yaml b/snuba/datasets/configuration/generic_metrics/entities/counters.yaml index 8387d596a9..fc77e9546e 100644 --- a/snuba/datasets/configuration/generic_metrics/entities/counters.yaml +++ b/snuba/datasets/configuration/generic_metrics/entities/counters.yaml @@ -9,6 +9,8 @@ schema: { name: metric_id, type: UInt, args: { size: 64 } }, { name: timestamp, type: DateTime }, { name: bucketed_time, type: DateTime }, + { name: granularity, type: UInt, args: { size: 8 } }, + { name: use_case_id, type: String }, { name: tags, type: Nested, diff --git a/snuba/datasets/configuration/generic_metrics/entities/distributions.yaml b/snuba/datasets/configuration/generic_metrics/entities/distributions.yaml index 3326fe1a09..e757efb8fc 100644 --- a/snuba/datasets/configuration/generic_metrics/entities/distributions.yaml +++ b/snuba/datasets/configuration/generic_metrics/entities/distributions.yaml @@ -9,6 +9,8 @@ schema: { name: metric_id, type: UInt, args: { size: 64 } }, { name: timestamp, type: DateTime }, { name: bucketed_time, type: DateTime }, + { name: granularity, type: UInt, args: { size: 8 } }, + { name: use_case_id, type: String }, { name: tags, type: Nested, diff --git a/snuba/datasets/configuration/generic_metrics/entities/gauges.yaml b/snuba/datasets/configuration/generic_metrics/entities/gauges.yaml index d046392ba0..9cf9129ec6 100644 --- a/snuba/datasets/configuration/generic_metrics/entities/gauges.yaml +++ b/snuba/datasets/configuration/generic_metrics/entities/gauges.yaml @@ -9,6 +9,8 @@ schema: { name: metric_id, type: UInt, args: { size: 64 } }, { name: timestamp, type: DateTime }, { name: bucketed_time, type: DateTime }, + { name: granularity, type: UInt, args: { size: 8 } }, + { name: use_case_id, type: String }, { name: tags, type: Nested, diff --git a/snuba/datasets/configuration/generic_metrics/entities/org_counters.yaml b/snuba/datasets/configuration/generic_metrics/entities/org_counters.yaml index 32b2a8a11e..1b7631bfe8 100644 --- a/snuba/datasets/configuration/generic_metrics/entities/org_counters.yaml +++ b/snuba/datasets/configuration/generic_metrics/entities/org_counters.yaml @@ -8,6 +8,8 @@ schema: { name: metric_id, type: UInt, args: { size: 64 } }, { name: timestamp, type: DateTime }, { name: bucketed_time, type: DateTime }, + { name: granularity, type: UInt, args: { size: 8 } }, + { name: use_case_id, type: String }, { name: tags, type: Nested, @@ -23,7 +25,7 @@ schema: { name: value, type: AggregateFunction, - args: { func: sum, arg_types: [ { type: Float, args: { size: 64 } } ] }, + args: { func: sum, arg_types: [{ type: Float, args: { size: 64 } }] }, }, ] required_time_column: timestamp diff --git a/snuba/datasets/configuration/generic_metrics/entities/org_distributions.yaml b/snuba/datasets/configuration/generic_metrics/entities/org_distributions.yaml index a03f44f5aa..fa2f441304 100644 --- a/snuba/datasets/configuration/generic_metrics/entities/org_distributions.yaml +++ b/snuba/datasets/configuration/generic_metrics/entities/org_distributions.yaml @@ -9,6 +9,8 @@ schema: { name: metric_id, type: UInt, args: { size: 64 } }, { name: timestamp, type: DateTime }, { name: bucketed_time, type: DateTime }, + { name: granularity, type: UInt, args: { size: 8 } }, + { name: use_case_id, type: String }, { name: tags, type: Nested, diff --git a/snuba/datasets/configuration/generic_metrics/entities/org_sets.yaml b/snuba/datasets/configuration/generic_metrics/entities/org_sets.yaml index a8153ee4c3..f5d5e8fd98 100644 --- a/snuba/datasets/configuration/generic_metrics/entities/org_sets.yaml +++ b/snuba/datasets/configuration/generic_metrics/entities/org_sets.yaml @@ -9,6 +9,8 @@ schema: { name: metric_id, type: UInt, args: { size: 64 } }, { name: timestamp, type: DateTime }, { name: bucketed_time, type: DateTime }, + { name: granularity, type: UInt, args: { size: 8 } }, + { name: use_case_id, type: String }, { name: tags, type: Nested, diff --git a/snuba/datasets/configuration/generic_metrics/entities/sets.yaml b/snuba/datasets/configuration/generic_metrics/entities/sets.yaml index 4c4b230d11..4b0e0d1f14 100644 --- a/snuba/datasets/configuration/generic_metrics/entities/sets.yaml +++ b/snuba/datasets/configuration/generic_metrics/entities/sets.yaml @@ -9,6 +9,8 @@ schema: { name: metric_id, type: UInt, args: { size: 64 } }, { name: timestamp, type: DateTime }, { name: bucketed_time, type: DateTime }, + { name: granularity, type: UInt, args: { size: 8 } }, + { name: use_case_id, type: String }, { name: tags, type: Nested, diff --git a/snuba/datasets/configuration/metrics/entities/metrics_counters.yaml b/snuba/datasets/configuration/metrics/entities/metrics_counters.yaml index 19341c3436..c5a6a6e24f 100644 --- a/snuba/datasets/configuration/metrics/entities/metrics_counters.yaml +++ b/snuba/datasets/configuration/metrics/entities/metrics_counters.yaml @@ -8,6 +8,8 @@ schema: { name: metric_id, type: UInt, args: { size: 64 } }, { name: timestamp, type: DateTime }, { name: bucketed_time, type: DateTime }, + { name: granularity, type: UInt, args: { size: 32 } }, + { name: use_case_id, type: String }, { name: tags, type: Nested, diff --git a/snuba/datasets/configuration/metrics/entities/metrics_distributions.yaml b/snuba/datasets/configuration/metrics/entities/metrics_distributions.yaml index ae13f5efac..aa3fceda88 100644 --- a/snuba/datasets/configuration/metrics/entities/metrics_distributions.yaml +++ b/snuba/datasets/configuration/metrics/entities/metrics_distributions.yaml @@ -8,6 +8,8 @@ schema: { name: metric_id, type: UInt, args: { size: 64 } }, { name: timestamp, type: DateTime }, { name: bucketed_time, type: DateTime }, + { name: granularity, type: UInt, args: { size: 32 } }, + { name: use_case_id, type: String }, { name: tags, type: Nested, diff --git a/snuba/datasets/configuration/metrics/entities/metrics_sets.yaml b/snuba/datasets/configuration/metrics/entities/metrics_sets.yaml index 0ee7033a14..36a404b894 100644 --- a/snuba/datasets/configuration/metrics/entities/metrics_sets.yaml +++ b/snuba/datasets/configuration/metrics/entities/metrics_sets.yaml @@ -8,6 +8,8 @@ schema: { name: metric_id, type: UInt, args: { size: 64 } }, { name: timestamp, type: DateTime }, { name: bucketed_time, type: DateTime }, + { name: granularity, type: UInt, args: { size: 32 } }, + { name: use_case_id, type: String }, { name: tags, type: Nested, diff --git a/snuba/datasets/configuration/metrics/entities/org_counters.yaml b/snuba/datasets/configuration/metrics/entities/org_counters.yaml index 4826b245ba..599c8fb62c 100644 --- a/snuba/datasets/configuration/metrics/entities/org_counters.yaml +++ b/snuba/datasets/configuration/metrics/entities/org_counters.yaml @@ -8,6 +8,8 @@ schema: { name: metric_id, type: UInt, args: { size: 64 } }, { name: timestamp, type: DateTime }, { name: bucketed_time, type: DateTime }, + { name: granularity, type: UInt, args: { size: 8 } }, + { name: use_case_id, type: String }, ] required_time_column: timestamp diff --git a/snuba/datasets/configuration/metrics/entities/org_distributions.yaml b/snuba/datasets/configuration/metrics/entities/org_distributions.yaml index c913443e08..c95104aac7 100644 --- a/snuba/datasets/configuration/metrics/entities/org_distributions.yaml +++ b/snuba/datasets/configuration/metrics/entities/org_distributions.yaml @@ -8,6 +8,8 @@ schema: { name: metric_id, type: UInt, args: { size: 64 } }, { name: timestamp, type: DateTime }, { name: bucketed_time, type: DateTime }, + { name: granularity, type: UInt, args: { size: 32 } }, + { name: use_case_id, type: String }, { name: tags, type: Nested, diff --git a/snuba/datasets/configuration/metrics/entities/org_sets.yaml b/snuba/datasets/configuration/metrics/entities/org_sets.yaml index 977c2f8dc3..a637a3b4f5 100644 --- a/snuba/datasets/configuration/metrics/entities/org_sets.yaml +++ b/snuba/datasets/configuration/metrics/entities/org_sets.yaml @@ -8,6 +8,8 @@ schema: { name: metric_id, type: UInt, args: { size: 64 } }, { name: timestamp, type: DateTime }, { name: bucketed_time, type: DateTime }, + { name: granularity, type: UInt, args: { size: 32 } }, + { name: use_case_id, type: String }, { name: tags, type: Nested, diff --git a/snuba/datasets/configuration/outcomes/entities/outcomes.yaml b/snuba/datasets/configuration/outcomes/entities/outcomes.yaml index dab0b7d975..489d55b6e8 100644 --- a/snuba/datasets/configuration/outcomes/entities/outcomes.yaml +++ b/snuba/datasets/configuration/outcomes/entities/outcomes.yaml @@ -7,38 +7,38 @@ schema: { name: project_id, type: UInt, args: { size: 64 } }, { name: key_id, type: UInt, args: { size: 64 } }, { name: timestamp, type: DateTime }, + { name: time, type: DateTime }, { name: outcome, type: UInt, args: { size: 8 } }, { name: reason, type: String }, { name: quantity, type: UInt, args: { size: 64 } }, { name: category, type: UInt, args: { size: 8 } }, { name: times_seen, type: UInt, args: { size: 64 } }, - { name: time, type: DateTime }, ] required_time_column: timestamp storages: -- storage: outcomes_hourly - is_writable: false -- storage: outcomes_raw - is_writable: true + - storage: outcomes_hourly + is_writable: false + - storage: outcomes_raw + is_writable: true storage_selector: selector: SimpleQueryStorageSelector args: storage: outcomes_hourly query_processors: -- processor: BasicFunctionsProcessor -- processor: TimeSeriesProcessor - args: - time_group_columns: - time: timestamp - time_parse_columns: - - timestamp -- processor: ReferrerRateLimiterProcessor -- processor: OrganizationRateLimiterProcessor - args: - org_column: org_id + - processor: BasicFunctionsProcessor + - processor: TimeSeriesProcessor + args: + time_group_columns: + time: timestamp + time_parse_columns: + - timestamp + - processor: ReferrerRateLimiterProcessor + - processor: OrganizationRateLimiterProcessor + args: + org_column: org_id validators: -- validator: EntityRequiredColumnValidator - args: - required_filter_columns: - - org_id + - validator: EntityRequiredColumnValidator + args: + required_filter_columns: + - org_id diff --git a/snuba/datasets/configuration/outcomes_raw/entities/outcomes_raw.yaml b/snuba/datasets/configuration/outcomes_raw/entities/outcomes_raw.yaml index f4a80a0b0b..c843275b9d 100644 --- a/snuba/datasets/configuration/outcomes_raw/entities/outcomes_raw.yaml +++ b/snuba/datasets/configuration/outcomes_raw/entities/outcomes_raw.yaml @@ -5,42 +5,46 @@ schema: [ { name: org_id, type: UInt, args: { size: 64 } }, { name: project_id, type: UInt, args: { size: 64 } }, - { name: key_id, type: UInt, args: { size: 64, schema_modifiers: [nullable] } }, + { + name: key_id, + type: UInt, + args: { size: 64, schema_modifiers: [nullable] }, + }, { name: timestamp, type: DateTime }, + { name: time, type: DateTime }, { name: outcome, type: UInt, args: { size: 8 } }, { name: reason, type: String, args: { schema_modifiers: [nullable] } }, { name: event_id, type: UUID, args: { schema_modifiers: [nullable] } }, { name: quantity, type: UInt, args: { size: 32 } }, { name: category, type: UInt, args: { size: 8 } }, - { name: time, type: DateTime }, ] required_time_column: timestamp storages: -- storage: outcomes_raw - is_writable: false + - storage: outcomes_raw + is_writable: false storage_selector: selector: DefaultQueryStorageSelector query_processors: -- processor: BasicFunctionsProcessor -- processor: TimeSeriesProcessor - args: - time_group_columns: - time: timestamp - time_parse_columns: - - timestamp -- processor: ReferrerRateLimiterProcessor -- processor: OrganizationRateLimiterProcessor - args: - org_column: org_id -- processor: ProjectReferrerRateLimiter - args: - project_column: project_id -- processor: ResourceQuotaProcessor - args: - project_field: project_id + - processor: BasicFunctionsProcessor + - processor: TimeSeriesProcessor + args: + time_group_columns: + time: timestamp + time_parse_columns: + - timestamp + - processor: ReferrerRateLimiterProcessor + - processor: OrganizationRateLimiterProcessor + args: + org_column: org_id + - processor: ProjectReferrerRateLimiter + args: + project_column: project_id + - processor: ResourceQuotaProcessor + args: + project_field: project_id validators: -- validator: EntityRequiredColumnValidator - args: - required_filter_columns: - - org_id + - validator: EntityRequiredColumnValidator + args: + required_filter_columns: + - org_id diff --git a/snuba/datasets/configuration/profiles/entities/profiles.yaml b/snuba/datasets/configuration/profiles/entities/profiles.yaml index 2ad5e930aa..b1fbadc514 100644 --- a/snuba/datasets/configuration/profiles/entities/profiles.yaml +++ b/snuba/datasets/configuration/profiles/entities/profiles.yaml @@ -8,12 +8,21 @@ schema: { name: transaction_id, type: UUID }, { name: profile_id, type: UUID }, { name: received, type: DateTime }, - { name: android_api_level, type: UInt, args: { schema_modifiers: [nullable], size: 32 } }, + { name: time, type: DateTime }, + { + name: android_api_level, + type: UInt, + args: { schema_modifiers: [nullable], size: 32 }, + }, { name: device_classification, type: String }, { name: device_locale, type: String }, { name: device_manufacturer, type: String }, { name: device_model, type: String }, - { name: device_os_build_number, type: String, args: { schema_modifiers: [nullable] } }, + { + name: device_os_build_number, + type: String, + args: { schema_modifiers: [nullable] }, + }, { name: device_os_name, type: String }, { name: device_os_version, type: String }, { name: architecture, type: String }, @@ -21,9 +30,9 @@ schema: { name: environment, type: String, args: { schema_modifiers: [nullable] } }, { name: platform, type: String }, { name: trace_id, type: UUID }, - { name: transaction_name, type: String}, + { name: transaction_name, type: String }, { name: version_name, type: String }, - { name: version_code, type: String}, + { name: version_code, type: String }, ] required_time_column: received @@ -53,7 +62,7 @@ query_processors: time_group_columns: time: received time_parse_columns: - - received + - received validators: - validator: EntityRequiredColumnValidator diff --git a/snuba/datasets/configuration/replays/entities/replays.yaml b/snuba/datasets/configuration/replays/entities/replays.yaml index 30224fe25c..69d16f4cd3 100644 --- a/snuba/datasets/configuration/replays/entities/replays.yaml +++ b/snuba/datasets/configuration/replays/entities/replays.yaml @@ -22,6 +22,7 @@ schema: args: { schema_modifiers: [nullable], size: 16 }, }, { name: timestamp, type: DateTime }, + { name: time, type: DateTime }, { name: replay_start_timestamp, type: DateTime, diff --git a/snuba/datasets/configuration/sessions/entities/sessions.yaml b/snuba/datasets/configuration/sessions/entities/sessions.yaml index 86176c8710..b6f6d57574 100644 --- a/snuba/datasets/configuration/sessions/entities/sessions.yaml +++ b/snuba/datasets/configuration/sessions/entities/sessions.yaml @@ -164,96 +164,96 @@ schema: ] required_time_column: started storages: -- storage: sessions_hourly - is_writable: false - translation_mappers: - columns: - - mapper: DurationQuantilesHourlyMapper - - mapper: FunctionColumn - args: - col_name: duration_avg - function_name: avgIfMerge - - mapper: PlusFunctionColumns - args: - col_name: sessions - op1_col: sessions - op1_func: countIfMerge - op2_col: sessions_preaggr - op2_func: sumIfMerge - - mapper: PlusFunctionColumns - args: - col_name: sessions_crashed - op1_col: sessions_crashed - op1_func: countIfMerge - op2_col: sessions_crashed_preaggr - op2_func: sumIfMerge - - mapper: PlusFunctionColumns - args: - col_name: sessions_abnormal - op1_col: sessions_abnormal - op1_func: countIfMerge - op2_col: sessions_abnormal_preaggr - op2_func: sumIfMerge - - mapper: PlusFunctionColumns - args: - col_name: sessions_errored - op1_col: sessions_errored - op1_func: uniqIfMerge - op2_col: sessions_errored_preaggr - op2_func: sumIfMerge - - mapper: FunctionColumn - args: - col_name: users - function_name: uniqIfMerge - - mapper: FunctionColumn - args: - col_name: users_crashed - function_name: uniqIfMerge - - mapper: FunctionColumn - args: - col_name: users_abnormal - function_name: uniqIfMerge - - mapper: FunctionColumn - args: - col_name: users_errored - function_name: uniqIfMerge -- storage: sessions_raw - is_writable: true - translation_mappers: - columns: - - mapper: DurationQuantilesRawMapper - - mapper: DurationAvgRawMapper - - mapper: SessionsRawNumSessionsMapper - - mapper: SessionsRawCrashedMapper - - mapper: SessionsRawSessionsAbnormalMapper - - mapper: SessionsRawErroredMapper - - mapper: SessionsRawUsersMapper - - mapper: SessionsRawUsersCrashedMapper - - mapper: SessionsRawUsersAbnormalMapper - - mapper: SessionsRawUsersErroredMapper + - storage: sessions_hourly + is_writable: false + translation_mappers: + columns: + - mapper: DurationQuantilesHourlyMapper + - mapper: FunctionColumn + args: + col_name: duration_avg + function_name: avgIfMerge + - mapper: PlusFunctionColumns + args: + col_name: sessions + op1_col: sessions + op1_func: countIfMerge + op2_col: sessions_preaggr + op2_func: sumIfMerge + - mapper: PlusFunctionColumns + args: + col_name: sessions_crashed + op1_col: sessions_crashed + op1_func: countIfMerge + op2_col: sessions_crashed_preaggr + op2_func: sumIfMerge + - mapper: PlusFunctionColumns + args: + col_name: sessions_abnormal + op1_col: sessions_abnormal + op1_func: countIfMerge + op2_col: sessions_abnormal_preaggr + op2_func: sumIfMerge + - mapper: PlusFunctionColumns + args: + col_name: sessions_errored + op1_col: sessions_errored + op1_func: uniqIfMerge + op2_col: sessions_errored_preaggr + op2_func: sumIfMerge + - mapper: FunctionColumn + args: + col_name: users + function_name: uniqIfMerge + - mapper: FunctionColumn + args: + col_name: users_crashed + function_name: uniqIfMerge + - mapper: FunctionColumn + args: + col_name: users_abnormal + function_name: uniqIfMerge + - mapper: FunctionColumn + args: + col_name: users_errored + function_name: uniqIfMerge + - storage: sessions_raw + is_writable: true + translation_mappers: + columns: + - mapper: DurationQuantilesRawMapper + - mapper: DurationAvgRawMapper + - mapper: SessionsRawNumSessionsMapper + - mapper: SessionsRawCrashedMapper + - mapper: SessionsRawSessionsAbnormalMapper + - mapper: SessionsRawErroredMapper + - mapper: SessionsRawUsersMapper + - mapper: SessionsRawUsersCrashedMapper + - mapper: SessionsRawUsersAbnormalMapper + - mapper: SessionsRawUsersErroredMapper storage_selector: selector: SessionsQueryStorageSelector query_processors: -- processor: BasicFunctionsProcessor -- processor: TimeSeriesProcessor - args: - time_group_columns: - bucketed_started: started - time_parse_columns: - - started - - received -- processor: OrganizationRateLimiterProcessor - args: - org_column: org_id -- processor: ProjectRateLimiterProcessor - args: - project_column: project_id + - processor: BasicFunctionsProcessor + - processor: TimeSeriesProcessor + args: + time_group_columns: + bucketed_started: started + time_parse_columns: + - started + - received + - processor: OrganizationRateLimiterProcessor + args: + org_column: org_id + - processor: ProjectRateLimiterProcessor + args: + project_column: project_id validators: -- validator: EntityRequiredColumnValidator - args: - required_filter_columns: - - org_id - - project_id + - validator: EntityRequiredColumnValidator + args: + required_filter_columns: + - org_id + - project_id subscription_processors: - processor: AddColumnCondition args: diff --git a/snuba/datasets/configuration/spans/entities/spans.yaml b/snuba/datasets/configuration/spans/entities/spans.yaml index 1e10126cf8..c035636da3 100644 --- a/snuba/datasets/configuration/spans/entities/spans.yaml +++ b/snuba/datasets/configuration/spans/entities/spans.yaml @@ -5,14 +5,27 @@ name: spans schema: [ { name: project_id, type: UInt, args: { size: 64 } }, - { name: transaction_id, type: UUID, args: { schema_modifiers: [ nullable ] } }, - { name: transaction_op, type: String, args: { schema_modifiers: [ nullable ] } }, + { + name: transaction_id, + type: UUID, + args: { schema_modifiers: [nullable] }, + }, + { + name: transaction_op, + type: String, + args: { schema_modifiers: [nullable] }, + }, { name: trace_id, type: UUID }, { name: span_id, type: UInt, args: { size: 64 } }, - { name: parent_span_id, type: UInt, args: { size: 64, schema_modifiers: [ nullable ] } }, + { + name: parent_span_id, + type: UInt, + args: { size: 64, schema_modifiers: [nullable] }, + }, { name: segment_id, type: UInt, args: { size: 64 } }, { name: is_segment, type: UInt, args: { size: 8 } }, { name: segment_name, type: String }, + { name: time, type: DateTime }, { name: start_timestamp, type: DateTime }, { name: start_ms, type: UInt, args: { size: 16 } }, { name: end_timestamp, type: DateTime }, @@ -21,22 +34,27 @@ schema: { name: exclusive_time, type: Float, args: { size: 64 } }, { name: op, type: String }, { name: group, type: UInt, args: { size: 64 } }, + { name: group_raw, type: UInt, args: { size: 64 } }, { name: span_status, type: UInt, args: { size: 8 } }, { name: span_kind, type: String }, { name: description, type: String }, - { name: status, type: UInt, args: { size: 32, schema_modifiers: [ nullable ] } }, + { + name: status, + type: UInt, + args: { size: 32, schema_modifiers: [nullable] }, + }, { name: module, type: String }, - { name: action, type: String, args: { schema_modifiers: [ nullable ] } }, - { name: domain, type: String, args: { schema_modifiers: [ nullable ] } }, - { name: platform, type: String, args: { schema_modifiers: [ nullable ] } }, - { name: user, type: String, args: { schema_modifiers: [ nullable] } }, + { name: action, type: String, args: { schema_modifiers: [nullable] } }, + { name: domain, type: String, args: { schema_modifiers: [nullable] } }, + { name: platform, type: String, args: { schema_modifiers: [nullable] } }, + { name: user, type: String, args: { schema_modifiers: [nullable] } }, { name: tags, type: Nested, args: { subcolumns: - [ { name: key, type: String }, { name: value, type: String } ], + [{ name: key, type: String }, { name: value, type: String }], }, }, { @@ -57,7 +75,7 @@ schema: args: { inner_type: { type: UInt, args: { size: 64 } }, - schema_modifiers: [ readonly ], + schema_modifiers: [readonly], }, }, { name: partition, type: UInt, args: { size: 16 } }, diff --git a/snuba/datasets/configuration/spans/storages/spans.yaml b/snuba/datasets/configuration/spans/storages/spans.yaml index 3bcf82513a..931b0f88cc 100644 --- a/snuba/datasets/configuration/spans/storages/spans.yaml +++ b/snuba/datasets/configuration/spans/storages/spans.yaml @@ -28,6 +28,7 @@ schema: { name: exclusive_time, type: Float, args: { size: 64 } }, { name: op, type: String }, { name: group, type: UInt, args: { size: 64 } }, + { name: group_raw, type: UInt, args: { size: 64 } }, { name: span_status, type: UInt, args: { size: 8 } }, { name: span_kind, type: String }, { name: description, type: String }, diff --git a/snuba/datasets/configuration/transactions/entities/transactions.yaml b/snuba/datasets/configuration/transactions/entities/transactions.yaml index 909de93df0..90073c9cc7 100644 --- a/snuba/datasets/configuration/transactions/entities/transactions.yaml +++ b/snuba/datasets/configuration/transactions/entities/transactions.yaml @@ -16,6 +16,7 @@ schema: }, { name: transaction_op, type: String }, { name: transaction_status, type: UInt, args: { size: 8 } }, + { name: time, type: DateTime }, { name: start_ts, type: DateTime }, { name: start_ms, type: UInt, args: { size: 16 } }, { name: finish_ts, type: DateTime }, @@ -140,11 +141,7 @@ storages: [ { mapper: ColumnToIPAddress, - args: - { - from_table_name, - from_col_name: "ip_address", - }, + args: { from_table_name, from_col_name: "ip_address" }, }, { mapper: ColumnToNullIf, @@ -296,7 +293,7 @@ storages: to_function_name: "arrayJoin", to_function_column: "tags.value", }, - } + }, ], subscriptables: [ @@ -343,14 +340,11 @@ storages: }, }, ], - } - } + }, + }, ] -storage_selector: - { - selector: DefaultQueryStorageSelector - } +storage_selector: { selector: DefaultQueryStorageSelector } query_processors: [ @@ -383,14 +377,8 @@ validators: validator: EntityRequiredColumnValidator, args: { required_filter_columns: [project_id] }, }, - { - validator: TagConditionValidator, - args: {} - }, - { - validator: DatetimeConditionValidator, - args: {} - }, + { validator: TagConditionValidator, args: {} }, + { validator: DatetimeConditionValidator, args: {} }, ] required_time_column: finish_ts @@ -402,7 +390,7 @@ subscription_validators: { max_allowed_aggregations: 1, disallowed_aggregations: [groupby, having, orderby], - required_time_column: finish_ts + required_time_column: finish_ts, }, }, ] diff --git a/snuba/datasets/pluggable_entity.py b/snuba/datasets/pluggable_entity.py index 688979f17d..83346d73dd 100644 --- a/snuba/datasets/pluggable_entity.py +++ b/snuba/datasets/pluggable_entity.py @@ -1,3 +1,5 @@ +from __future__ import annotations + from dataclasses import dataclass, field from typing import Any, List, Mapping, Optional, Sequence @@ -50,7 +52,7 @@ class PluggableEntity(Entity): validators: Sequence[QueryValidator] required_time_column: Optional[str] storage_selector: QueryStorageSelector - validate_data_model: ColumnValidationMode = ColumnValidationMode.ERROR + validate_data_model: ColumnValidationMode | None = None join_relationships: Mapping[str, JoinRelationship] = field(default_factory=dict) function_call_validators: Mapping[str, FunctionCallValidator] = field( default_factory=dict @@ -65,7 +67,9 @@ def _get_builtin_validators(self) -> Sequence[QueryValidator]: mappers = [s.translation_mappers for s in self.storages] return [ EntityContainsColumnsValidator( - EntityColumnSet(self.columns), mappers, self.validate_data_model + EntityColumnSet(self.columns), + mappers, + self.validate_data_model or ColumnValidationMode.WARN, ) ] diff --git a/snuba/query/snql/parser.py b/snuba/query/snql/parser.py index 7a0fa210f3..73161f109a 100644 --- a/snuba/query/snql/parser.py +++ b/snuba/query/snql/parser.py @@ -1269,7 +1269,6 @@ def validate_lambda(exp: Lambda) -> None: def _replace_time_condition( query: Union[CompositeQuery[QueryEntity], LogicalQuery] ) -> None: - condition = query.get_condition() top_level = ( get_first_level_and_conditions(condition) if condition is not None else [] @@ -1462,7 +1461,6 @@ def _post_process( # have the __name__ attribute set automatically (and we don't set it manually) description = getattr(func, "__name__", "custom") with sentry_sdk.start_span(op="processor", description=description): - if settings and settings.get_dry_run(): with explain_meta.with_query_differ("snql_parsing", description, query): func(query) @@ -1506,7 +1504,6 @@ def parse_snql_query( custom_processing: Optional[CustomProcessors] = None, settings: QuerySettings | None = None, ) -> Tuple[Union[CompositeQuery[QueryEntity], LogicalQuery], str]: - with sentry_sdk.start_span(op="parser", description="parse_snql_query_initial"): query = parse_snql_query_initial(body) diff --git a/snuba/query/validation/validators.py b/snuba/query/validation/validators.py index 182ab4e984..bf4c207440 100644 --- a/snuba/query/validation/validators.py +++ b/snuba/query/validation/validators.py @@ -6,7 +6,15 @@ from enum import Enum from typing import Optional, Sequence, Type, cast -from snuba.clickhouse.translators.snuba.mappers import ColumnToExpression +from snuba.clickhouse.translators.snuba.allowed import DefaultNoneColumnMapper +from snuba.clickhouse.translators.snuba.function_call_mappers import ( + AggregateCurriedFunctionMapper, + AggregateFunctionMapper, +) +from snuba.clickhouse.translators.snuba.mappers import ( + ColumnToExpression, + SubscriptableMapper, +) from snuba.clickhouse.translators.snuba.mapping import TranslationMappers from snuba.datasets.entities.entity_data_model import EntityColumnSet from snuba.environment import metrics as environment_metrics @@ -16,6 +24,7 @@ build_match, get_first_level_and_conditions, ) +from snuba.query.data_source.simple import Entity as SimpleEntity from snuba.query.exceptions import InvalidExpressionException, InvalidQueryException from snuba.query.expressions import Column, Expression, FunctionCall, Literal from snuba.query.expressions import SubscriptableReference as SubscriptableReferenceExpr @@ -121,9 +130,20 @@ def __init__( # Parse and store those mappings as well self.mapped_columns = set() for mapper in mappers: - for colmapping in mapper.columns: - if isinstance(colmapping, ColumnToExpression): - self.mapped_columns.add(colmapping.from_col_name) + for func_mapping in mapper.functions: + if isinstance(func_mapping, AggregateFunctionMapper): + self.mapped_columns.add(func_mapping.column_to_map) + for curried_mapping in mapper.curried_functions: + if isinstance(curried_mapping, AggregateCurriedFunctionMapper): + self.mapped_columns.add(curried_mapping.column_to_map) + for sub_mapping in mapper.subscriptables: + if isinstance(sub_mapping, SubscriptableMapper): + self.mapped_columns.add(sub_mapping.from_column_name) + for col_mapping in mapper.columns: + if isinstance(col_mapping, ColumnToExpression): + self.mapped_columns.add(col_mapping.from_col_name) + elif isinstance(col_mapping, DefaultNoneColumnMapper): + self.mapped_columns.update(col_mapping.column_names) def validate(self, query: Query, alias: Optional[str] = None) -> None: if self.validation_mode == ColumnValidationMode.DO_NOTHING: @@ -141,7 +161,14 @@ def validate(self, query: Query, alias: Optional[str] = None) -> None: missing.add(column.column_name) if missing: - error_message = f"query column(s) {', '.join(missing)} do not exist" + prefix = "" + if isinstance(entity := query.get_from_clause(), SimpleEntity): + prefix = f"Entity {entity.key.value}: " + error_message = ( + f"{prefix}query columns ({', '.join(missing)}) do not exist" + if len(missing) > 1 + else f"{prefix}Query column '{missing.pop()}' does not exist" + ) if self.validation_mode == ColumnValidationMode.ERROR: raise InvalidQueryException(error_message) elif self.validation_mode == ColumnValidationMode.WARN: diff --git a/tests/clickhouse/query_dsl/test_project_id.py b/tests/clickhouse/query_dsl/test_project_id.py index 681ca5062b..1e96b70e9d 100644 --- a/tests/clickhouse/query_dsl/test_project_id.py +++ b/tests/clickhouse/query_dsl/test_project_id.py @@ -13,7 +13,7 @@ test_cases: Sequence[Tuple[Mapping[str, Any], Optional[Set[int]]]] = [ ( { - "selected_columns": ["column1"], + "selected_columns": ["event_id"], "conditions": [ ["timestamp", ">=", "2020-01-01T12:00:00"], ["timestamp", "<", "2020-01-02T12:00:00"], @@ -24,7 +24,7 @@ ), # Simple single project condition ( { - "selected_columns": ["column1"], + "selected_columns": ["event_id"], "conditions": [ ["timestamp", ">=", "2020-01-01T12:00:00"], ["timestamp", "<", "2020-01-02T12:00:00"], @@ -35,7 +35,7 @@ ), # Multiple projects in the query ( { - "selected_columns": ["column1"], + "selected_columns": ["event_id"], "conditions": [ ["timestamp", ">=", "2020-01-01T12:00:00"], ["timestamp", "<", "2020-01-02T12:00:00"], @@ -46,7 +46,7 @@ ), # Multiple projects in the query provided as tuple ( { - "selected_columns": ["column1"], + "selected_columns": ["event_id"], "conditions": [ ["timestamp", ">=", "2020-01-01T12:00:00"], ["timestamp", "<", "2020-01-02T12:00:00"], @@ -56,7 +56,7 @@ ), # No project condition ( { - "selected_columns": ["column1"], + "selected_columns": ["event_id"], "conditions": [ ["timestamp", ">=", "2020-01-01T12:00:00"], ["timestamp", "<", "2020-01-02T12:00:00"], @@ -68,7 +68,7 @@ ), # Multiple project conditions, intersected together ( { - "selected_columns": ["column1"], + "selected_columns": ["event_id"], "conditions": [ ["timestamp", ">=", "2020-01-01T12:00:00"], ["timestamp", "<", "2020-01-02T12:00:00"], @@ -82,7 +82,7 @@ ), # Multiple project conditions, in union ( { - "selected_columns": ["column1"], + "selected_columns": ["event_id"], "conditions": [ ["timestamp", ">=", "2020-01-01T12:00:00"], ["timestamp", "<", "2020-01-02T12:00:00"], @@ -94,14 +94,14 @@ ), # A fairly stupid query ( { - "selected_columns": ["column1"], + "selected_columns": ["event_id"], "conditions": [ ["timestamp", ">=", "2020-01-01T12:00:00"], ["timestamp", "<", "2020-01-02T12:00:00"], - ["column1", "=", "something"], - [["ifNull", ["column2", 0]], "=", 1], + ["event_id", "=", "something"], + [["ifNull", ["partition", 0]], "=", 1], ["project_id", "IN", [100, 200, 300]], - [["count", ["column3"]], "=", 10], + [["count", ["offset"]], "=", 10], ["project_id", "=", 100], ], }, @@ -109,7 +109,7 @@ ), # Multiple conditions in AND. Two project conditions ( { - "selected_columns": ["column1"], + "selected_columns": ["event_id"], "conditions": [ ["timestamp", ">=", "2020-01-01T12:00:00"], ["timestamp", "<", "2020-01-02T12:00:00"], @@ -121,14 +121,14 @@ ), # Main project list in a conditions and multiple project conditions in OR ( { - "selected_columns": ["column1"], + "selected_columns": ["event_id"], "conditions": [ ["timestamp", ">=", "2020-01-01T12:00:00"], ["timestamp", "<", "2020-01-02T12:00:00"], ["project_id", "IN", [100, 200, 300]], [ [["ifNull", ["project_id", 1000]], "=", 100], - [["count", ["column3"]], "=", 10], + [["count", ["offset"]], "=", 10], [["ifNull", ["project_id", 1000]], "=", 200], ], ], @@ -137,7 +137,7 @@ ), # Main project list in a conditions and multiple project conditions within unsupported function calls ( { - "selected_columns": ["column1"], + "selected_columns": ["event_id"], "conditions": [ ["timestamp", ">=", "2020-01-01T12:00:00"], ["timestamp", "<", "2020-01-02T12:00:00"], @@ -146,7 +146,7 @@ "and", [ ["equals", ["project_id", 100]], - ["equals", ["column1", "'something'"]], + ["equals", ["event_id", "'something'"]], ], ], "=", @@ -157,7 +157,7 @@ "and", [ ["equals", ["project_id", 200]], - ["equals", ["column3", "'something_else'"]], + ["equals", ["platform", "'something_else'"]], ], ], "=", diff --git a/tests/datasets/configuration/broken_entity_bad_query_processor.yaml b/tests/datasets/configuration/broken_entity_bad_query_processor.yaml index 75ff621b39..92bfd38c22 100644 --- a/tests/datasets/configuration/broken_entity_bad_query_processor.yaml +++ b/tests/datasets/configuration/broken_entity_bad_query_processor.yaml @@ -38,14 +38,14 @@ storages: - storage: generic_metrics_sets translation_mappers: functions: - - mapper: FunctionNameMapper - args: - from_name: uniq - to_name: uniqCombined64Merge - - mapper: FunctionNameMapper - args: - from_name: uniqIf - to_name: uniqCombined64MergeIf + - mapper: FunctionNameMapper + args: + from_name: uniq + to_name: uniqCombined64Merge + - mapper: FunctionNameMapper + args: + from_name: uniqIf + to_name: uniqCombined64MergeIf subscriptables: - mapper: subscriptable args: @@ -65,14 +65,14 @@ storages: is_writable: true translation_mappers: functions: - - mapper: FunctionNameMapper - args: - from_name: uniq - to_name: uniqCombined64Merge - - mapper: FunctionNameMapper - args: - from_name: uniqIf - to_name: uniqCombined64MergeIf + - mapper: FunctionNameMapper + args: + from_name: uniq + to_name: uniqCombined64Merge + - mapper: FunctionNameMapper + args: + from_name: uniqIf + to_name: uniqCombined64MergeIf subscriptables: - mapper: subscriptable args: diff --git a/tests/datasets/entities/storage_selectors/test_errors.py b/tests/datasets/entities/storage_selectors/test_errors.py index d098108e03..701b018a1e 100644 --- a/tests/datasets/entities/storage_selectors/test_errors.py +++ b/tests/datasets/entities/storage_selectors/test_errors.py @@ -58,7 +58,7 @@ pytest.param( """ MATCH (events) - SELECT col1 + SELECT event_id WHERE project_id IN tuple(2 , 3) AND timestamp>=toDateTime('2021-01-01') AND timestamp=toDateTime('2021-01-01') AND timestamp None: query, _ = parse_snql_query( """ MATCH (events) - SELECT col1 + SELECT event_id WHERE project_id IN tuple(2 , 3) AND timestamp>=toDateTime('2021-01-01') AND timestamp= toDateTime('2021-01-01T00:00:00') AND finish_ts < toDateTime('2021-01-02T00:00:00') diff --git a/tests/datasets/entities/storage_selectors/test_sessions.py b/tests/datasets/entities/storage_selectors/test_sessions.py index 200bee0cc8..48ff840ab8 100644 --- a/tests/datasets/entities/storage_selectors/test_sessions.py +++ b/tests/datasets/entities/storage_selectors/test_sessions.py @@ -82,7 +82,7 @@ def test_assert_raises() -> None: query, _ = parse_snql_query( """ MATCH (events) - SELECT col1 + SELECT event_id WHERE project_id IN tuple(2 , 3) AND timestamp>=toDateTime('2021-01-01') AND timestamp= toDateTime('2021-01-01T00:00:00') AND finish_ts < toDateTime('2021-01-02T00:00:00') @@ -78,7 +78,7 @@ pytest.param( """ MATCH (events) - SELECT col1 + SELECT event_id WHERE tags_key IN tuple('t1', 't2') AND timestamp >= toDateTime('2021-01-01T00:00:00') AND timestamp < toDateTime('2021-01-02T00:00:00') @@ -148,7 +148,7 @@ def temp_settings() -> Any: def test_storage_unavailable_error_in_plan_builder(temp_settings: Any) -> None: snql_query = """ MATCH (events) - SELECT col1 + SELECT event_id WHERE tags_key IN tuple('t1', 't2') AND timestamp >= toDateTime('2021-01-01T00:00:00') AND timestamp < toDateTime('2021-01-02T00:00:00') diff --git a/tests/datasets/test_discover.py b/tests/datasets/test_discover.py index 75ae28c25c..bda40f8685 100644 --- a/tests/datasets/test_discover.py +++ b/tests/datasets/test_discover.py @@ -16,9 +16,9 @@ {"conditions": [["type", "=", "transaction"], ["duration", ">", 1000]]}, EntityKey.DISCOVER_TRANSACTIONS, ), - ({"conditions": [["type", "=", "error"]]}, EntityKey.DISCOVER_EVENTS), + ({"conditions": [["type", "=", "platform"]]}, EntityKey.DISCOVER_EVENTS), ( - {"conditions": [[["type", "=", "error"], ["type", "=", "transaction"]]]}, + {"conditions": [[["type", "=", "platform"], ["type", "=", "transaction"]]]}, EntityKey.DISCOVER, ), ( @@ -29,7 +29,7 @@ "or", [ ["equals", ["type", "transaction"]], - ["equals", ["type", "default"]], + ["equals", ["type", "release"]], ], ], "=", @@ -47,7 +47,7 @@ "and", [ ["equals", ["duration", 10]], - ["notEquals", ["type", "error"]], + ["notEquals", ["type", "platform"]], ], ], "=", @@ -65,7 +65,7 @@ "and", [ ["notEquals", ["type", "transaction"]], - ["notEquals", ["type", "error"]], + ["notEquals", ["type", "platform"]], ], ], "=", @@ -94,7 +94,7 @@ "and", [ ["notEquals", ["type", "transaction"]], - ["notEquals", ["type", "error"]], + ["notEquals", ["type", "platform"]], ], ], "=", @@ -112,8 +112,8 @@ [ "and", [ - ["notEquals", ["type", "default"]], - ["notEquals", ["type", "error"]], + ["notEquals", ["type", "release"]], + ["notEquals", ["type", "platform"]], ], ], "=", diff --git a/tests/datasets/test_metrics_processing.py b/tests/datasets/test_metrics_processing.py index 028d392685..4f6c3ab079 100644 --- a/tests/datasets/test_metrics_processing.py +++ b/tests/datasets/test_metrics_processing.py @@ -174,12 +174,12 @@ ), pytest.param( "metrics_distributions", - "avg(something_else)", + "avg(granularity)", EntityKey.METRICS_DISTRIBUTIONS, FunctionCall( - "_snuba_avg(something_else)", + "_snuba_avg(granularity)", "avg", - (Column("_snuba_something_else", None, "something_else"),), + (Column("_snuba_granularity", None, "granularity"),), ), id="Test that a column other than value is not transformed", ), diff --git a/tests/helpers.py b/tests/helpers.py index c0b89c8cc0..216a283273 100644 --- a/tests/helpers.py +++ b/tests/helpers.py @@ -3,8 +3,12 @@ from snuba.clickhouse.http import JSONRowEncoder from snuba.consumers.types import KafkaMessageMetadata +from snuba.datasets.entities.entity_key import EntityKey +from snuba.datasets.entities.factory import get_entity, override_entity_map +from snuba.datasets.pluggable_entity import PluggableEntity from snuba.datasets.storage import WritableStorage from snuba.processor import InsertBatch, InsertEvent, ProcessedMessage +from snuba.query.validation.validators import ColumnValidationMode from snuba.utils.metrics.backends.dummy import DummyMetricsBackend from snuba.writer import BatchWriterEncoderWrapper, WriterTableRow @@ -28,9 +32,7 @@ def write_processed_messages( def write_unprocessed_events( storage: WritableStorage, events: Sequence[Union[InsertEvent, Mapping[str, Any]]] ) -> None: - processor = storage.get_table_writer().get_stream_loader().get_processor() - processed_messages = [] for i, event in enumerate(events): processed_message = processor.process_message( @@ -46,7 +48,6 @@ def write_raw_unprocessed_events( storage: WritableStorage, events: Sequence[Union[InsertEvent, Mapping[str, Any]]], ) -> None: - processor = storage.get_table_writer().get_stream_loader().get_processor() processed_messages = [] @@ -58,3 +59,12 @@ def write_raw_unprocessed_events( processed_messages.append(processed_message) write_processed_messages(storage, processed_messages) + + +def override_entity_column_validator( + entity_key: EntityKey, validator_mode: ColumnValidationMode +) -> None: + entity = get_entity(entity_key) + assert isinstance(entity, PluggableEntity) + entity.validate_data_model = validator_mode + override_entity_map(entity_key, entity) diff --git a/tests/query/parser/test_query.py b/tests/query/parser/test_query.py index 177ec56768..fb14034d1f 100644 --- a/tests/query/parser/test_query.py +++ b/tests/query/parser/test_query.py @@ -80,8 +80,8 @@ def snql_conditions_with_default(*conditions: str) -> str: pytest.param( """ MATCH (events) - SELECT test_func(column4) AS test_func_alias, - column1 BY column2, column3 + SELECT test_func(release) AS test_func_alias, + event_id BY project_id, platform WHERE {conditions} """.format( conditions=snql_conditions_with_default() @@ -92,26 +92,26 @@ def snql_conditions_with_default(*conditions: str) -> str: ), selected_columns=[ SelectedExpression( - "column2", Column("_snuba_column2", None, "column2") + "project_id", Column("_snuba_project_id", None, "project_id") ), SelectedExpression( - "column3", Column("_snuba_column3", None, "column3") + "platform", Column("_snuba_platform", None, "platform") ), SelectedExpression( "test_func_alias", FunctionCall( "_snuba_test_func_alias", "test_func", - (Column("_snuba_column4", None, "column4"),), + (Column("_snuba_release", None, "release"),), ), ), SelectedExpression( - "column1", Column("_snuba_column1", None, "column1") + "event_id", Column("_snuba_event_id", None, "event_id") ), ], groupby=[ - Column("_snuba_column2", None, "column2"), - Column("_snuba_column3", None, "column3"), + Column("_snuba_project_id", None, "project_id"), + Column("_snuba_platform", None, "platform"), ], condition=with_required(), limit=1000, @@ -123,11 +123,11 @@ def snql_conditions_with_default(*conditions: str) -> str: MATCH (events) SELECT count(platform) AS platforms, uniq(platform) AS uniq_platforms, - testF(platform, field2) AS top_platforms, - f1(column1, column2) AS f1_alias, f2() AS f2_alias + testF(platform, release) AS top_platforms, + f1(partition, offset) AS f1_alias, f2() AS f2_alias BY format_eventid(event_id) WHERE {conditions} - HAVING times_seen > 1 + HAVING retention_days > 1 """.format( conditions=snql_conditions_with_default( "tags[sentry:dist] IN tuple('dist1', 'dist2')" @@ -169,7 +169,7 @@ def snql_conditions_with_default(*conditions: str) -> str: "testF", ( Column("_snuba_platform", None, "platform"), - Column("_snuba_field2", None, "field2"), + Column("_snuba_release", None, "release"), ), ), ), @@ -179,8 +179,8 @@ def snql_conditions_with_default(*conditions: str) -> str: "_snuba_f1_alias", "f1", ( - Column("_snuba_column1", None, "column1"), - Column("_snuba_column2", None, "column2"), + Column("_snuba_partition", None, "partition"), + Column("_snuba_offset", None, "offset"), ), ), ), @@ -208,7 +208,7 @@ def snql_conditions_with_default(*conditions: str) -> str: ), having=binary_condition( "greater", - Column("_snuba_times_seen", None, "times_seen"), + Column("_snuba_retention_days", None, "retention_days"), Literal(None, 1), ), groupby=[ @@ -225,11 +225,11 @@ def snql_conditions_with_default(*conditions: str) -> str: pytest.param( """ MATCH (events) - SELECT column1, column2 + SELECT partition, offset WHERE {conditions} - ORDER BY column1 ASC, - column2 DESC, - func(column3) DESC + ORDER BY partition ASC, + offset DESC, + func(retention_days) DESC """.format( conditions=snql_conditions_with_default() ), @@ -239,26 +239,24 @@ def snql_conditions_with_default(*conditions: str) -> str: ), selected_columns=[ SelectedExpression( - "column1", Column("_snuba_column1", None, "column1") - ), - SelectedExpression( - "column2", Column("_snuba_column2", None, "column2") + "partition", Column("_snuba_partition", None, "partition") ), + SelectedExpression("offset", Column("_snuba_offset", None, "offset")), ], condition=with_required(), groupby=None, having=None, order_by=[ OrderBy( - OrderByDirection.ASC, Column("_snuba_column1", None, "column1") - ), - OrderBy( - OrderByDirection.DESC, Column("_snuba_column2", None, "column2") + OrderByDirection.ASC, Column("_snuba_partition", None, "partition") ), + OrderBy(OrderByDirection.DESC, Column("_snuba_offset", None, "offset")), OrderBy( OrderByDirection.DESC, FunctionCall( - None, "func", (Column("_snuba_column3", None, "column3"),) + None, + "func", + (Column("_snuba_retention_days", None, "retention_days"),), ), ), ], @@ -269,9 +267,9 @@ def snql_conditions_with_default(*conditions: str) -> str: pytest.param( """ MATCH (events) - SELECT column1 BY column1 + SELECT partition BY platform WHERE {conditions} - ORDER BY column1 DESC + ORDER BY partition DESC """.format( conditions=snql_conditions_with_default() ), @@ -281,18 +279,18 @@ def snql_conditions_with_default(*conditions: str) -> str: ), selected_columns=[ SelectedExpression( - "column1", Column("_snuba_column1", None, "column1") + "platform", Column("_snuba_platform", None, "platform") ), SelectedExpression( - "column1", Column("_snuba_column1", None, "column1") + "partition", Column("_snuba_partition", None, "partition") ), ], condition=with_required(), - groupby=[Column("_snuba_column1", None, "column1")], + groupby=[Column("_snuba_platform", None, "platform")], having=None, order_by=[ OrderBy( - OrderByDirection.DESC, Column("_snuba_column1", None, "column1") + OrderByDirection.DESC, Column("_snuba_partition", None, "partition") ) ], limit=1000, @@ -302,7 +300,7 @@ def snql_conditions_with_default(*conditions: str) -> str: pytest.param( """ MATCH (events) - SELECT column1, tags[test] BY foo(tags[test2]) + SELECT platform, tags[test] BY foo(tags[test2]) WHERE {conditions} """.format( conditions=snql_conditions_with_default() @@ -327,7 +325,7 @@ def snql_conditions_with_default(*conditions: str) -> str: ), ), SelectedExpression( - "column1", Column("_snuba_column1", None, "column1") + "platform", Column("_snuba_platform", None, "platform") ), SelectedExpression( "tags[test]", @@ -359,8 +357,8 @@ def snql_conditions_with_default(*conditions: str) -> str: pytest.param( """ MATCH (events) - SELECT group_id, goo(something) AS issue_id, - foo(zoo(a)) AS a + SELECT group_id, goo(partition) AS issue_id, + foo(zoo(offset)) AS offset WHERE {conditions} ORDER BY group_id ASC """.format( @@ -380,7 +378,7 @@ def snql_conditions_with_default(*conditions: str) -> str: FunctionCall( "_snuba_issue_id", "goo", - (Column("_snuba_something", None, "something"),), + (Column("_snuba_partition", None, "partition"),), ), ), ), @@ -390,15 +388,15 @@ def snql_conditions_with_default(*conditions: str) -> str: FunctionCall( "_snuba_issue_id", "goo", - (Column("_snuba_something", None, "something"),), + (Column("_snuba_partition", None, "partition"),), ), ), SelectedExpression( - "a", + "offset", FunctionCall( - "_snuba_a", + "_snuba_offset", "foo", - (FunctionCall(None, "zoo", (Column(None, None, "a"),)),), + (FunctionCall(None, "zoo", (Column(None, None, "offset"),)),), ), ), ], @@ -412,7 +410,7 @@ def snql_conditions_with_default(*conditions: str) -> str: FunctionCall( "_snuba_issue_id", "goo", - (Column("_snuba_something", None, "something"),), + (Column("_snuba_partition", None, "partition"),), ), ), ), @@ -429,7 +427,7 @@ def snql_conditions_with_default(*conditions: str) -> str: FunctionCall( "_snuba_issue_id", "goo", - (Column("_snuba_something", None, "something"),), + (Column("_snuba_partition", None, "partition"),), ), ), ), @@ -442,8 +440,8 @@ def snql_conditions_with_default(*conditions: str) -> str: pytest.param( """ MATCH (events) - SELECT foo(column3) AS exp, - foo(column3) AS exp + SELECT foo(partition) AS exp, + foo(partition) AS exp WHERE {conditions} """.format( conditions=snql_conditions_with_default() @@ -458,7 +456,7 @@ def snql_conditions_with_default(*conditions: str) -> str: FunctionCall( "_snuba_exp", "foo", - (Column("_snuba_column3", None, "column3"),), + (Column("_snuba_partition", None, "partition"),), ), ), SelectedExpression( @@ -466,7 +464,7 @@ def snql_conditions_with_default(*conditions: str) -> str: FunctionCall( "_snuba_exp", "foo", - (Column("_snuba_column3", None, "column3"),), + (Column("_snuba_partition", None, "partition"),), ), ), ], @@ -478,7 +476,7 @@ def snql_conditions_with_default(*conditions: str) -> str: pytest.param( """ MATCH (events) - SELECT foo(column) AS exp, exp + SELECT foo(partition) AS exp, exp WHERE {conditions} """.format( conditions=snql_conditions_with_default() @@ -491,13 +489,17 @@ def snql_conditions_with_default(*conditions: str) -> str: SelectedExpression( "exp", FunctionCall( - "_snuba_exp", "foo", (Column("_snuba_column", None, "column"),) + "_snuba_exp", + "foo", + (Column("_snuba_partition", None, "partition"),), ), ), SelectedExpression( "exp", FunctionCall( - "_snuba_exp", "foo", (Column("_snuba_column", None, "column"),) + "_snuba_exp", + "foo", + (Column("_snuba_partition", None, "partition"),), ), ), ], diff --git a/tests/query/processors/test_arrayjoin_optimizer.py b/tests/query/processors/test_arrayjoin_optimizer.py index 1df8c12eba..c6b5795ff0 100644 --- a/tests/query/processors/test_arrayjoin_optimizer.py +++ b/tests/query/processors/test_arrayjoin_optimizer.py @@ -237,7 +237,7 @@ def with_required(condition: Expression) -> Expression: pytest.param( """ MATCH (transactions) - SELECT col1 + SELECT platform WHERE tags_key IN tuple('t1', 't2') AND finish_ts >= toDateTime('2021-01-01T00:00:00') AND finish_ts < toDateTime('2021-01-02T00:00:00') @@ -247,7 +247,8 @@ def with_required(condition: Expression) -> Expression: None, selected_columns=[ SelectedExpression( - name="col1", expression=Column("_snuba_col1", None, "col1") + name="platform", + expression=Column("_snuba_platform", None, "platform"), ) ], condition=with_required( @@ -263,7 +264,7 @@ def with_required(condition: Expression) -> Expression: """ MATCH (transactions) SELECT tags_key, tags_value - WHERE col IN tuple('t1', 't2') + WHERE release IN tuple('t1', 't2') AND finish_ts >= toDateTime('2021-01-01T00:00:00') AND finish_ts < toDateTime('2021-01-02T00:00:00') AND project_id = 1 @@ -302,7 +303,7 @@ def with_required(condition: Expression) -> Expression: ], condition=with_required( in_condition( - Column("_snuba_col", None, "col"), + Column("_snuba_release", None, "release"), [Literal(None, "t1"), Literal(None, "t2")], ) ), diff --git a/tests/query/processors/test_prewhere.py b/tests/query/processors/test_prewhere.py index de8197180a..392e5355ae 100644 --- a/tests/query/processors/test_prewhere.py +++ b/tests/query/processors/test_prewhere.py @@ -52,12 +52,22 @@ # Add pre-where condition in the expected order { "conditions": [ - ["d", "=", "1"], - ["c", "=", "3"], - [["and", [["equals", ["a", "'1'"]], ["equals", ["b", "'2'"]]]], "=", 1], + ["event_id", "=", "1"], + ["partition", "=", "3"], + [ + [ + "and", + [ + ["equals", ["offset", "'1'"]], + ["equals", ["retention_days", "'2'"]], + ], + ], + "=", + 1, + ], ], }, - ["a", "b", "c"], + ["offset", "retention_days", "partition"], [], FunctionCall( None, @@ -66,12 +76,15 @@ FunctionCall( None, OPERATOR_TO_FUNCTION["="], - (Column("_snuba_d", None, "d"), Literal(None, "1")), + ( + Column("_snuba_event_id", None, "event_id"), + Literal(None, "1"), + ), ), FunctionCall( None, OPERATOR_TO_FUNCTION["="], - (Column("_snuba_c", None, "c"), Literal(None, "3")), + (Column("_snuba_partition", None, "partition"), Literal(None, "3")), ), ), ), @@ -82,12 +95,15 @@ FunctionCall( None, OPERATOR_TO_FUNCTION["="], - (Column("_snuba_a", None, "a"), Literal(None, "1")), + (Column("_snuba_offset", None, "offset"), Literal(None, "1")), ), FunctionCall( None, OPERATOR_TO_FUNCTION["="], - (Column("_snuba_b", None, "b"), Literal(None, "2")), + ( + Column("_snuba_retention_days", None, "retention_days"), + Literal(None, "2"), + ), ), ), ), @@ -95,8 +111,13 @@ ), ( # Do not add conditions that are parts of an OR - {"conditions": [[["a", "=", "1"], ["b", "=", "2"]], ["c", "=", "3"]]}, - ["a", "b", "c"], + { + "conditions": [ + [["project_id", "=", "1"], ["partition", "=", "2"]], + ["offset", "=", "3"], + ] + }, + ["project_id", "partition", "offset"], [], FunctionCall( None, @@ -105,47 +126,56 @@ FunctionCall( None, OPERATOR_TO_FUNCTION["="], - (Column("_snuba_a", None, "a"), Literal(None, "1")), + ( + Column("_snuba_project_id", None, "project_id"), + Literal(None, "1"), + ), ), FunctionCall( None, OPERATOR_TO_FUNCTION["="], - (Column("_snuba_b", None, "b"), Literal(None, "2")), + (Column("_snuba_partition", None, "partition"), Literal(None, "2")), ), ), ), FunctionCall( None, OPERATOR_TO_FUNCTION["="], - (Column("_snuba_c", None, "c"), Literal(None, "3")), + (Column("_snuba_offset", None, "offset"), Literal(None, "3")), ), False, ), ( # Exclude NOT IN condition from the prewhere as they are generally not excluding # most of the dataset. - {"conditions": [["a", "NOT IN", [1, 2, 3]], ["b", "=", "2"], ["c", "=", "3"]]}, - ["a", "b"], + { + "conditions": [ + ["event_id", "NOT IN", [1, 2, 3]], + ["partition", "=", "2"], + ["offset", "=", "3"], + ] + }, + ["event_id", "partition"], [], FunctionCall( None, BooleanFunctions.AND, ( not_in_condition( - Column("_snuba_a", None, "a"), + Column("_snuba_event_id", None, "event_id"), [Literal(None, 1), Literal(None, 2), Literal(None, 3)], ), FunctionCall( None, OPERATOR_TO_FUNCTION["="], - (Column("_snuba_c", None, "c"), Literal(None, "3")), + (Column("_snuba_offset", None, "offset"), Literal(None, "3")), ), ), ), FunctionCall( None, OPERATOR_TO_FUNCTION["="], - (Column("_snuba_b", None, "b"), Literal(None, "2")), + (Column("_snuba_partition", None, "partition"), Literal(None, "2")), ), False, ), diff --git a/tests/query/snql/test_query.py b/tests/query/snql/test_query.py index b62e802b2a..ee1fc5ba55 100644 --- a/tests/query/snql/test_query.py +++ b/tests/query/snql/test_query.py @@ -62,7 +62,7 @@ def build_cond(tn: str) -> str: test_cases = [ pytest.param( - f"MATCH (events) SELECT 4-5, c WHERE {added_condition} GRANULARITY 60", + f"MATCH (events) SELECT 4-5, event_id WHERE {added_condition} GRANULARITY 60", LogicalQuery( QueryEntity( EntityKey.EVENTS, get_entity(EntityKey.EVENTS).get_data_model() @@ -74,7 +74,9 @@ def build_cond(tn: str) -> str: "_snuba_4-5", "minus", (Literal(None, 4), Literal(None, 5)) ), ), - SelectedExpression("c", Column("_snuba_c", None, "c")), + SelectedExpression( + "event_id", Column("_snuba_event_id", None, "event_id") + ), ], granularity=60, condition=required_condition, @@ -84,7 +86,7 @@ def build_cond(tn: str) -> str: id="granularity on whole query", ), pytest.param( - f"MATCH (events) SELECT 4-5, c WHERE {added_condition} TOTALS true", + f"MATCH (events) SELECT 4-5, event_id WHERE {added_condition} TOTALS true", LogicalQuery( QueryEntity( EntityKey.EVENTS, get_entity(EntityKey.EVENTS).get_data_model() @@ -96,7 +98,9 @@ def build_cond(tn: str) -> str: "_snuba_4-5", "minus", (Literal(None, 4), Literal(None, 5)) ), ), - SelectedExpression("c", Column("_snuba_c", None, "c")), + SelectedExpression( + "event_id", Column("_snuba_event_id", None, "event_id") + ), ], condition=required_condition, totals=True, @@ -106,7 +110,7 @@ def build_cond(tn: str) -> str: id="totals on whole query", ), pytest.param( - f"MATCH (events SAMPLE 0.5) SELECT 4-5, c WHERE {added_condition}", + f"MATCH (events SAMPLE 0.5) SELECT 4-5, event_id WHERE {added_condition}", LogicalQuery( QueryEntity( EntityKey.EVENTS, @@ -120,7 +124,9 @@ def build_cond(tn: str) -> str: "_snuba_4-5", "minus", (Literal(None, 4), Literal(None, 5)) ), ), - SelectedExpression("c", Column("_snuba_c", None, "c")), + SelectedExpression( + "event_id", Column("_snuba_event_id", None, "event_id") + ), ], condition=required_condition, sample=0.5, @@ -130,7 +136,7 @@ def build_cond(tn: str) -> str: id="sample on entity", ), pytest.param( - f"MATCH (events) SELECT 4-5, c,d,e WHERE {added_condition} LIMIT 5 BY c,d,e", + f"MATCH (events) SELECT 4-5, event_id,title,release WHERE {added_condition} LIMIT 5 BY event_id,title,release", LogicalQuery( QueryEntity( EntityKey.EVENTS, get_entity(EntityKey.EVENTS).get_data_model() @@ -142,17 +148,21 @@ def build_cond(tn: str) -> str: "_snuba_4-5", "minus", (Literal(None, 4), Literal(None, 5)) ), ), - SelectedExpression("c", Column("_snuba_c", None, "c")), - SelectedExpression("d", Column("_snuba_d", None, "d")), - SelectedExpression("e", Column("_snuba_e", None, "e")), + SelectedExpression( + "event_id", Column("_snuba_event_id", None, "event_id") + ), + SelectedExpression("title", Column("_snuba_title", None, "title")), + SelectedExpression( + "release", Column("_snuba_release", None, "release") + ), ], condition=required_condition, limitby=LimitBy( 5, [ - Column("_snuba_c", None, "c"), - Column("_snuba_d", None, "d"), - Column("_snuba_e", None, "e"), + Column("_snuba_event_id", None, "event_id"), + Column("_snuba_title", None, "title"), + Column("_snuba_release", None, "release"), ], ), limit=1000, @@ -161,7 +171,7 @@ def build_cond(tn: str) -> str: id="limit by multiple columns", ), pytest.param( - f"MATCH (events) SELECT 4-5, c WHERE {added_condition} LIMIT 5 BY c", + f"MATCH (events) SELECT 4-5, group_id WHERE {added_condition} LIMIT 5 BY group_id", LogicalQuery( QueryEntity( EntityKey.EVENTS, get_entity(EntityKey.EVENTS).get_data_model() @@ -173,17 +183,19 @@ def build_cond(tn: str) -> str: "_snuba_4-5", "minus", (Literal(None, 4), Literal(None, 5)) ), ), - SelectedExpression("c", Column("_snuba_c", None, "c")), + SelectedExpression( + "group_id", Column("_snuba_group_id", None, "group_id") + ), ], condition=required_condition, - limitby=LimitBy(5, [Column("_snuba_c", None, "c")]), + limitby=LimitBy(5, [Column("_snuba_group_id", None, "group_id")]), limit=1000, offset=0, ), id="limit by single column", ), pytest.param( - f"MATCH (events) SELECT 4-5, c WHERE {added_condition} LIMIT 5 OFFSET 3", + f"MATCH (events) SELECT 4-5, event_id WHERE {added_condition} LIMIT 5 OFFSET 3", LogicalQuery( QueryEntity( EntityKey.EVENTS, get_entity(EntityKey.EVENTS).get_data_model() @@ -195,7 +207,9 @@ def build_cond(tn: str) -> str: "_snuba_4-5", "minus", (Literal(None, 4), Literal(None, 5)) ), ), - SelectedExpression("c", Column("_snuba_c", None, "c")), + SelectedExpression( + "event_id", Column("_snuba_event_id", None, "event_id") + ), ], condition=required_condition, limit=5, @@ -204,7 +218,7 @@ def build_cond(tn: str) -> str: id="limit and offset", ), pytest.param( - f"MATCH (events) SELECT 4-5, c, arrayJoin(c) AS x WHERE {added_condition} TOTALS true", + f"MATCH (events) SELECT 4-5, tags, arrayJoin(tags) AS x WHERE {added_condition} TOTALS true", LogicalQuery( QueryEntity( EntityKey.EVENTS, get_entity(EntityKey.EVENTS).get_data_model() @@ -216,11 +230,11 @@ def build_cond(tn: str) -> str: "_snuba_4-5", "minus", (Literal(None, 4), Literal(None, 5)) ), ), - SelectedExpression("c", Column("_snuba_c", None, "c")), + SelectedExpression("tags", Column("_snuba_tags", None, "tags")), SelectedExpression( "x", FunctionCall( - "_snuba_x", "arrayJoin", (Column("_snuba_c", None, "c"),) + "_snuba_x", "arrayJoin", (Column("_snuba_tags", None, "tags"),) ), ), ], @@ -232,7 +246,7 @@ def build_cond(tn: str) -> str: id="Array join", ), pytest.param( - f"MATCH (events) SELECT 4-5, 3* foo(c) AS foo, c WHERE {added_condition}", + f"MATCH (events) SELECT 4-5, 3* foo(partition) AS foo, partition WHERE {added_condition}", LogicalQuery( QueryEntity( EntityKey.EVENTS, get_entity(EntityKey.EVENTS).get_data_model() @@ -245,19 +259,23 @@ def build_cond(tn: str) -> str: ), ), SelectedExpression( - "3* foo(c) AS foo", + "3* foo(partition) AS foo", FunctionCall( - "_snuba_3* foo(c) AS foo", + "_snuba_3* foo(partition) AS foo", "multiply", ( Literal(None, 3), FunctionCall( - "_snuba_foo", "foo", (Column("_snuba_c", None, "c"),) + "_snuba_foo", + "foo", + (Column("_snuba_partition", None, "partition"),), ), ), ), ), - SelectedExpression("c", Column("_snuba_c", None, "c")), + SelectedExpression( + "partition", Column("_snuba_partition", None, "partition") + ), ], condition=required_condition, limit=1000, @@ -267,7 +285,7 @@ def build_cond(tn: str) -> str: ), pytest.param( f"""MATCH (events) - SELECT 4-5,3*foo(c) AS foo,c + SELECT 4-5,3*foo(partition) AS foo,partition WHERE platform NOT IN tuple('x', 'y') AND message IS NULL AND {added_condition}""", LogicalQuery( @@ -282,19 +300,23 @@ def build_cond(tn: str) -> str: ), ), SelectedExpression( - "3*foo(c) AS foo", + "3*foo(partition) AS foo", FunctionCall( - "_snuba_3*foo(c) AS foo", + "_snuba_3*foo(partition) AS foo", "multiply", ( Literal(None, 3), FunctionCall( - "_snuba_foo", "foo", (Column("_snuba_c", None, "c"),) + "_snuba_foo", + "foo", + (Column("_snuba_partition", None, "partition"),), ), ), ), ), - SelectedExpression("c", Column("_snuba_c", None, "c")), + SelectedExpression( + "partition", Column("_snuba_partition", None, "partition") + ), ], condition=binary_condition( "and", @@ -319,7 +341,7 @@ def build_cond(tn: str) -> str: id="Basic query with word condition ops", ), pytest.param( - f"MATCH (events) SELECT count() AS count BY tags[key], measurements[lcp.elementSize] WHERE measurements[lcp.elementSize] > 1 AND {added_condition}", + f"MATCH (events) SELECT count() AS count BY tags[key], contexts[lcp.elementSize] WHERE contexts[lcp.elementSize] > 1 AND {added_condition}", LogicalQuery( QueryEntity( EntityKey.EVENTS, get_entity(EntityKey.EVENTS).get_data_model() @@ -334,10 +356,10 @@ def build_cond(tn: str) -> str: ), ), SelectedExpression( - "measurements[lcp.elementSize]", + "contexts[lcp.elementSize]", SubscriptableReference( - "_snuba_measurements[lcp.elementSize]", - Column("_snuba_measurements", None, "measurements"), + "_snuba_contexts[lcp.elementSize]", + Column("_snuba_contexts", None, "contexts"), Literal(None, "lcp.elementSize"), ), ), @@ -353,8 +375,8 @@ def build_cond(tn: str) -> str: Literal(None, "key"), ), SubscriptableReference( - "_snuba_measurements[lcp.elementSize]", - Column("_snuba_measurements", None, "measurements"), + "_snuba_contexts[lcp.elementSize]", + Column("_snuba_contexts", None, "contexts"), Literal(None, "lcp.elementSize"), ), ], @@ -363,8 +385,8 @@ def build_cond(tn: str) -> str: binary_condition( "greater", SubscriptableReference( - "_snuba_measurements[lcp.elementSize]", - Column("_snuba_measurements", None, "measurements"), + "_snuba_contexts[lcp.elementSize]", + Column("_snuba_contexts", None, "contexts"), Literal(None, "lcp.elementSize"), ), Literal(None, 1), @@ -377,13 +399,15 @@ def build_cond(tn: str) -> str: id="Basic query with subscriptables", ), pytest.param( - f"MATCH (events) SELECT (2*(4-5)+3), g(c) AS goo, c BY d, 2+7 WHERE {added_condition} ORDER BY f DESC", + f"MATCH (events) SELECT (2*(4-5)+3), g(partition) AS goo, partition BY group_id, 2+7 WHERE {added_condition} ORDER BY offset DESC", LogicalQuery( QueryEntity( EntityKey.EVENTS, get_entity(EntityKey.EVENTS).get_data_model() ), selected_columns=[ - SelectedExpression("d", Column("_snuba_d", None, "d")), + SelectedExpression( + "group_id", Column("_snuba_group_id", None, "group_id") + ), SelectedExpression( "2+7", FunctionCall( @@ -414,31 +438,39 @@ def build_cond(tn: str) -> str: ), SelectedExpression( "goo", - FunctionCall("_snuba_goo", "g", (Column("_snuba_c", None, "c"),)), + FunctionCall( + "_snuba_goo", + "g", + (Column("_snuba_partition", None, "partition"),), + ), + ), + SelectedExpression( + "partition", Column("_snuba_partition", None, "partition") ), - SelectedExpression("c", Column("_snuba_c", None, "c")), ], condition=required_condition, groupby=[ - Column("_snuba_d", None, "d"), + Column("_snuba_group_id", None, "group_id"), FunctionCall( "_snuba_2+7", "plus", (Literal(None, 2), Literal(None, 7)) ), ], - order_by=[OrderBy(OrderByDirection.DESC, Column("_snuba_f", None, "f"))], + order_by=[ + OrderBy(OrderByDirection.DESC, Column("_snuba_offset", None, "offset")) + ], limit=1000, offset=0, ), id="Simple complete query with example of parenthesized arithmetic expression in SELECT", ), pytest.param( - f"MATCH (events) SELECT (2*(4-5)+3), foo(c) AS thing2, c BY d, 2+7 WHERE {added_condition} ORDER BY f DESC", + f"MATCH (events) SELECT (2*(4-5)+3), foo(partition) AS thing2, partition BY offset, 2+7 WHERE {added_condition} ORDER BY group_id DESC", LogicalQuery( QueryEntity( EntityKey.EVENTS, get_entity(EntityKey.EVENTS).get_data_model() ), selected_columns=[ - SelectedExpression("d", Column("_snuba_d", None, "d")), + SelectedExpression("offset", Column("_snuba_offset", None, "offset")), SelectedExpression( "2+7", FunctionCall( @@ -470,26 +502,34 @@ def build_cond(tn: str) -> str: SelectedExpression( "thing2", FunctionCall( - "_snuba_thing2", "foo", (Column("_snuba_c", None, "c"),) + "_snuba_thing2", + "foo", + (Column("_snuba_partition", None, "partition"),), ), ), - SelectedExpression("c", Column("_snuba_c", None, "c")), + SelectedExpression( + "partition", Column("_snuba_partition", None, "partition") + ), ], condition=required_condition, groupby=[ - Column("_snuba_d", None, "d"), + Column("_snuba_offset", None, "offset"), FunctionCall( "_snuba_2+7", "plus", (Literal(None, 2), Literal(None, 7)) ), ], - order_by=[OrderBy(OrderByDirection.DESC, Column("_snuba_f", None, "f"))], + order_by=[ + OrderBy( + OrderByDirection.DESC, Column("_snuba_group_id", None, "group_id") + ) + ], limit=1000, offset=0, ), id="Simple complete query with aliased function in SELECT", ), pytest.param( - f"MATCH (events) SELECT toDateTime('2020-01-01') AS now, 3*foo(c) AS foo BY toDateTime('2020-01-01') AS now WHERE {added_condition}", + f"MATCH (events) SELECT toDateTime('2020-01-01') AS now, 3*foo(partition) AS foo BY toDateTime('2020-01-01') AS now WHERE {added_condition}", LogicalQuery( QueryEntity( EntityKey.EVENTS, get_entity(EntityKey.EVENTS).get_data_model() @@ -504,14 +544,16 @@ def build_cond(tn: str) -> str: Literal("_snuba_now", datetime.datetime(2020, 1, 1, 0, 0)), ), SelectedExpression( - "3*foo(c) AS foo", + "3*foo(partition) AS foo", FunctionCall( - "_snuba_3*foo(c) AS foo", + "_snuba_3*foo(partition) AS foo", "multiply", ( Literal(None, 3), FunctionCall( - "_snuba_foo", "foo", (Column("_snuba_c", None, "c"),) + "_snuba_foo", + "foo", + (Column("_snuba_partition", None, "partition"),), ), ), ), @@ -525,38 +567,42 @@ def build_cond(tn: str) -> str: id="Basic query with date literals", ), pytest.param( - f"MATCH (events) SELECT a WHERE time_seen<3 AND last_seen=2 AND c=2 AND d=3 AND {added_condition}", + f"MATCH (events) SELECT event_id WHERE partition<3 AND offset=2 AND project_id=2 AND group_id=3 AND {added_condition}", LogicalQuery( QueryEntity( EntityKey.EVENTS, get_entity(EntityKey.EVENTS).get_data_model() ), - selected_columns=[SelectedExpression("a", Column("_snuba_a", None, "a"))], + selected_columns=[ + SelectedExpression( + "event_id", Column("_snuba_event_id", None, "event_id") + ) + ], condition=binary_condition( "and", binary_condition( "less", - Column("_snuba_time_seen", None, "time_seen"), + Column("_snuba_partition", None, "partition"), Literal(None, 3), ), binary_condition( "and", binary_condition( "equals", - Column("_snuba_last_seen", None, "last_seen"), + Column("_snuba_offset", None, "offset"), Literal(None, 2), ), binary_condition( "and", binary_condition( "equals", - Column("_snuba_c", None, "c"), + Column("_snuba_project_id", None, "project_id"), Literal(None, 2), ), binary_condition( "and", binary_condition( "equals", - Column("_snuba_d", None, "d"), + Column("_snuba_group_id", None, "group_id"), Literal(None, 3), ), required_condition, @@ -570,12 +616,16 @@ def build_cond(tn: str) -> str: id="Query with multiple conditions joined by AND", ), pytest.param( - f"MATCH (events) SELECT a WHERE ((time_seen<3 OR last_seen=afternoon) OR name=bob) AND {added_condition}", + f"MATCH (events) SELECT event_id WHERE ((partition<3 OR offset=retention_days) OR title=platform) AND {added_condition}", LogicalQuery( QueryEntity( EntityKey.EVENTS, get_entity(EntityKey.EVENTS).get_data_model() ), - selected_columns=[SelectedExpression("a", Column("_snuba_a", None, "a"))], + selected_columns=[ + SelectedExpression( + "event_id", Column("_snuba_event_id", None, "event_id") + ) + ], condition=binary_condition( "and", binary_condition( @@ -584,19 +634,19 @@ def build_cond(tn: str) -> str: "or", binary_condition( "less", - Column("_snuba_time_seen", None, "time_seen"), + Column("_snuba_partition", None, "partition"), Literal(None, 3), ), binary_condition( "equals", - Column("_snuba_last_seen", None, "last_seen"), - Column("_snuba_afternoon", None, "afternoon"), + Column("_snuba_offset", None, "offset"), + Column("_snuba_retention_days", None, "retention_days"), ), ), binary_condition( "equals", - Column("_snuba_name", None, "name"), - Column("_snuba_bob", None, "bob"), + Column("_snuba_title", None, "title"), + Column("_snuba_platform", None, "platform"), ), ), required_condition, @@ -607,27 +657,31 @@ def build_cond(tn: str) -> str: id="Query with multiple conditions joined by OR / parenthesized OR", ), pytest.param( - f"MATCH (events) SELECT a WHERE (name!=bob OR last_seen0)) AND {added_condition}", + f"MATCH (events) SELECT event_id WHERE (title!=platform OR partition0)) AND {added_condition}", LogicalQuery( QueryEntity( EntityKey.EVENTS, get_entity(EntityKey.EVENTS).get_data_model() ), - selected_columns=[SelectedExpression("a", Column("_snuba_a", None, "a"))], + selected_columns=[ + SelectedExpression( + "event_id", Column("_snuba_event_id", None, "event_id") + ) + ], condition=binary_condition( "and", binary_condition( "or", binary_condition( "notEquals", - Column("_snuba_name", None, "name"), - Column("_snuba_bob", None, "bob"), + Column("_snuba_title", None, "title"), + Column("_snuba_platform", None, "platform"), ), binary_condition( "and", binary_condition( "less", - Column("_snuba_last_seen", None, "last_seen"), - Column("_snuba_afternoon", None, "afternoon"), + Column("_snuba_partition", None, "partition"), + Column("_snuba_offset", None, "offset"), ), binary_condition( "or", @@ -638,15 +692,15 @@ def build_cond(tn: str) -> str: None, "gps", ( - Column("_snuba_x", None, "x"), - Column("_snuba_y", None, "y"), - Column("_snuba_z", None, "z"), + Column("_snuba_user_id", None, "user_id"), + Column("_snuba_user_name", None, "user_name"), + Column("_snuba_user_email", None, "user_email"), ), ), ), binary_condition( "greater", - Column("_snuba_times_seen", None, "times_seen"), + Column("_snuba_group_id", None, "group_id"), Literal(None, 0), ), ), @@ -661,7 +715,7 @@ def build_cond(tn: str) -> str: ), pytest.param( """MATCH (events) - SELECT a, b[c] + SELECT event_id, tags[test] WHERE project_id IN tuple( 2 , 3) AND timestamp>=toDateTime('2021-01-01') AND timestamp str: EntityKey.EVENTS, get_entity(EntityKey.EVENTS).get_data_model() ), selected_columns=[ - SelectedExpression("a", Column("_snuba_a", None, "a")), SelectedExpression( - "b[c]", + "event_id", Column("_snuba_event_id", None, "event_id") + ), + SelectedExpression( + "tags[test]", SubscriptableReference( - "_snuba_b[c]", - Column("_snuba_b", None, "b"), - key=Literal(None, "c"), + "_snuba_tags[test]", + Column("_snuba_tags", None, "tags"), + key=Literal(None, "test"), ), ), ], @@ -708,7 +764,7 @@ def build_cond(tn: str) -> str: ), pytest.param( f"""MATCH (events) - SELECT 4-5,3*foo(c) AS foo,c + SELECT 4-5,3*foo(partition) AS foo,partition WHERE {added_condition}""", LogicalQuery( QueryEntity( @@ -722,19 +778,23 @@ def build_cond(tn: str) -> str: ), ), SelectedExpression( - "3*foo(c) AS foo", + "3*foo(partition) AS foo", FunctionCall( - "_snuba_3*foo(c) AS foo", + "_snuba_3*foo(partition) AS foo", "multiply", ( Literal(None, 3), FunctionCall( - "_snuba_foo", "foo", (Column("_snuba_c", None, "c"),) + "_snuba_foo", + "foo", + (Column("_snuba_partition", None, "partition"),), ), ), ), ), - SelectedExpression("c", Column("_snuba_c", None, "c")), + SelectedExpression( + "partition", Column("_snuba_partition", None, "partition") + ), ], condition=required_condition, limit=1000, @@ -744,8 +804,8 @@ def build_cond(tn: str) -> str: ), pytest.param( f"""MATCH (events) - SELECT 4-5,3*foo(c) AS foo,c - WHERE or(equals(arrayExists(a, '=', 'RuntimeException'), 1), equals(arrayAll(b, 'NOT IN', tuple('Stack', 'Arithmetic')), 1)) = 1 AND {added_condition}""", + SELECT 4-5,3*foo(event_id) AS foo,event_id + WHERE or(equals(arrayExists(exception_stacks.type, '=', 'RuntimeException'), 1), equals(arrayAll(modules.name, 'NOT IN', tuple('Stack', 'Arithmetic')), 1)) = 1 AND {added_condition}""", LogicalQuery( QueryEntity( EntityKey.EVENTS, get_entity(EntityKey.EVENTS).get_data_model() @@ -758,19 +818,23 @@ def build_cond(tn: str) -> str: ), ), SelectedExpression( - "3*foo(c) AS foo", + "3*foo(event_id) AS foo", FunctionCall( - "_snuba_3*foo(c) AS foo", + "_snuba_3*foo(event_id) AS foo", "multiply", ( Literal(None, 3), FunctionCall( - "_snuba_foo", "foo", (Column("_snuba_c", None, "c"),) + "_snuba_foo", + "foo", + (Column("_snuba_event_id", None, "event_id"),), ), ), ), ), - SelectedExpression("c", Column("_snuba_c", None, "c")), + SelectedExpression( + "event_id", Column("_snuba_event_id", None, "event_id") + ), ], condition=binary_condition( "and", @@ -804,7 +868,11 @@ def build_cond(tn: str) -> str: ), ), ), - Column("_snuba_a", None, "a"), + Column( + "_snuba_exception_stacks.type", + None, + "exception_stacks.type", + ), ), ), Literal(None, 1), @@ -843,7 +911,7 @@ def build_cond(tn: str) -> str: ), ), ), - Column("_snuba_b", None, "b"), + Column("_snuba_modules.name", None, "modules.name"), ), ), Literal(None, 1), @@ -859,7 +927,7 @@ def build_cond(tn: str) -> str: id="Special array join functions", ), pytest.param( - f"""MATCH (e: events) -[contains]-> (t: transactions) SELECT 4-5, e.c + f"""MATCH (e: events) -[contains]-> (t: transactions) SELECT 4-5, e.event_id WHERE {build_cond('e')} AND {build_cond('t')}""", CompositeQuery( from_clause=JoinClause( @@ -892,7 +960,9 @@ def build_cond(tn: str) -> str: "_snuba_4-5", "minus", (Literal(None, 4), Literal(None, 5)) ), ), - SelectedExpression("e.c", Column("_snuba_e.c", "e", "c")), + SelectedExpression( + "e.event_id", Column("_snuba_e.event_id", "e", "event_id") + ), ], condition=binary_condition( "and", @@ -945,7 +1015,7 @@ def build_cond(tn: str) -> str: id="Basic join match", ), pytest.param( - f"""MATCH (e: events) -[contains]-> (t: transactions SAMPLE 0.5) SELECT 4-5, t.c + f"""MATCH (e: events) -[contains]-> (t: transactions SAMPLE 0.5) SELECT 4-5, t.event_id WHERE {build_cond('e')} AND {build_cond('t')}""", CompositeQuery( from_clause=JoinClause( @@ -979,7 +1049,9 @@ def build_cond(tn: str) -> str: "_snuba_4-5", "minus", (Literal(None, 4), Literal(None, 5)) ), ), - SelectedExpression("t.c", Column("_snuba_t.c", "t", "c")), + SelectedExpression( + "t.event_id", Column("_snuba_t.event_id", "t", "event_id") + ), ], condition=binary_condition( "and", @@ -1035,7 +1107,7 @@ def build_cond(tn: str) -> str: f"""MATCH (e: events) -[contains]-> (t: transactions), (e: events) -[assigned]-> (ga: groupassignee) - SELECT 4-5, ga.c + SELECT 4-5, ga.offset WHERE {build_cond('e')} AND {build_cond('t')}""", CompositeQuery( from_clause=JoinClause( @@ -1084,7 +1156,9 @@ def build_cond(tn: str) -> str: "_snuba_4-5", "minus", (Literal(None, 4), Literal(None, 5)) ), ), - SelectedExpression("ga.c", Column("_snuba_ga.c", "ga", "c")), + SelectedExpression( + "ga.offset", Column("_snuba_ga.offset", "ga", "offset") + ), ], condition=binary_condition( "and", @@ -1142,7 +1216,7 @@ def build_cond(tn: str) -> str: (e: events) -[assigned]-> (ga: groupassignee), (e: events) -[bookmark]-> (gm: groupedmessage), (e: events) -[activity]-> (se: metrics_sets) - SELECT 4-5, e.a, t.b, ga.c, gm.d, se.e + SELECT 4-5, e.event_id, t.event_id, ga.offset, gm.offset, se.metric_id WHERE {build_cond('e')} AND {build_cond('t')} AND se.org_id = 1 AND se.project_id = 1 AND se.timestamp >= toDateTime('2021-01-01') AND se.timestamp < toDateTime('2021-01-02')""", @@ -1225,11 +1299,21 @@ def build_cond(tn: str) -> str: "_snuba_4-5", "minus", (Literal(None, 4), Literal(None, 5)) ), ), - SelectedExpression("e.a", Column("_snuba_e.a", "e", "a")), - SelectedExpression("t.b", Column("_snuba_t.b", "t", "b")), - SelectedExpression("ga.c", Column("_snuba_ga.c", "ga", "c")), - SelectedExpression("gm.d", Column("_snuba_gm.d", "gm", "d")), - SelectedExpression("se.e", Column("_snuba_se.e", "se", "e")), + SelectedExpression( + "e.event_id", Column("_snuba_e.event_id", "e", "event_id") + ), + SelectedExpression( + "t.event_id", Column("_snuba_t.event_id", "t", "event_id") + ), + SelectedExpression( + "ga.offset", Column("_snuba_ga.offset", "ga", "offset") + ), + SelectedExpression( + "gm.offset", Column("_snuba_gm.offset", "gm", "offset") + ), + SelectedExpression( + "se.metric_id", Column("_snuba_se.metric_id", "se", "metric_id") + ), ], condition=binary_condition( "and", @@ -1424,7 +1508,7 @@ def build_cond(tn: str) -> str: id="sub query of sub query match", ), pytest.param( - f"""MATCH (events) SELECT 4-5,3*foo(c) AS foo,c WHERE a<'stuff\\' "\\" stuff' AND b='"💩\\" \t \\'\\'' AND {added_condition} """, + f"""MATCH (events) SELECT 4-5,3*foo(event_id) AS foo,event_id WHERE title<'stuff\\' "\\" stuff' AND culprit='"💩\\" \t \\'\\'' AND {added_condition} """, LogicalQuery( QueryEntity( EntityKey.EVENTS, get_entity(EntityKey.EVENTS).get_data_model() @@ -1437,32 +1521,36 @@ def build_cond(tn: str) -> str: ), ), SelectedExpression( - "3*foo(c) AS foo", + "3*foo(event_id) AS foo", FunctionCall( - "_snuba_3*foo(c) AS foo", + "_snuba_3*foo(event_id) AS foo", "multiply", ( Literal(None, 3), FunctionCall( - "_snuba_foo", "foo", (Column("_snuba_c", None, "c"),) + "_snuba_foo", + "foo", + (Column("_snuba_event_id", None, "event_id"),), ), ), ), ), - SelectedExpression("c", Column("_snuba_c", None, "c")), + SelectedExpression( + "event_id", Column("_snuba_event_id", None, "event_id") + ), ], condition=binary_condition( "and", binary_condition( "less", - Column("_snuba_a", None, "a"), + Column("_snuba_title", None, "title"), Literal(None, """stuff' "\\" stuff"""), ), binary_condition( "and", binary_condition( "equals", - Column("_snuba_b", None, "b"), + Column("_snuba_culprit", None, "culprit"), Literal(None, """"💩\\" \t ''"""), ), required_condition, diff --git a/tests/query/snql/test_query_column_validation.py b/tests/query/snql/test_query_column_validation.py index 949432fb66..7914bdbcb2 100644 --- a/tests/query/snql/test_query_column_validation.py +++ b/tests/query/snql/test_query_column_validation.py @@ -84,7 +84,7 @@ id="subquery has their dates adjusted", ), pytest.param( - """MATCH (e: events) -[contains]-> (t: transactions) SELECT 4-5, e.c + """MATCH (e: events) -[contains]-> (t: transactions) SELECT 4-5, e.event_id WHERE e.project_id=1 AND e.timestamp>=toDateTime('2021-01-01T00:30:00') AND e.timestamp str: test_cases = [ pytest.param( - f"MATCH (events) SELECT 4-5, c,d,e WHERE {added_condition} LIMIT 5 BY c,d,e", + f"MATCH (events) SELECT 4-5, event_id,title,culprit WHERE {added_condition} LIMIT 5 BY event_id,title,culprit", ( "MATCH Entity(events) " - "SELECT (minus(-1337, -1337) AS `4-5`), c, d, e " + "SELECT (minus(-1337, -1337) AS `4-5`), event_id, title, culprit " "WHERE equals(project_id, -1337) " "AND greaterOrEquals(timestamp, toDateTime('$S')) " "AND less(timestamp, toDateTime('$S')) " - "LIMIT 5 BY c,d,e " + "LIMIT 5 BY event_id,title,culprit " "LIMIT 1000 OFFSET 0" ), id="limit by multiple columns", ), pytest.param( - f"MATCH (events) SELECT count() AS count BY tags[key], measurements[lcp.elementSize] WHERE measurements[lcp.elementSize] > 1 AND {added_condition}", + f"MATCH (events) SELECT count() AS count BY tags[key], contexts[lcp.elementSize] WHERE contexts[lcp.elementSize] > 1 AND {added_condition}", ( "MATCH Entity(events) " - "SELECT `tags[key]`, `measurements[lcp.elementSize]`, (count() AS count) " - "GROUP BY `tags[key]`, `measurements[lcp.elementSize]` " - "WHERE greater(`measurements[lcp.elementSize]`, -1337) " + "SELECT `tags[key]`, `contexts[lcp.elementSize]`, (count() AS count) " + "GROUP BY `tags[key]`, `contexts[lcp.elementSize]` " + "WHERE greater(`contexts[lcp.elementSize]`, -1337) " "AND equals(project_id, -1337) AND " "greaterOrEquals(timestamp, toDateTime('$S')) AND " "less(timestamp, toDateTime('$S')) " @@ -46,14 +46,14 @@ def build_cond(tn: str) -> str: id="Basic query with subscriptables", ), pytest.param( - f"MATCH (events) SELECT a WHERE (name!=bob OR last_seen0)) AND {added_condition}", + f"MATCH (events) SELECT event_id WHERE (event_id!='bob' OR group_id<2 AND (location='here' OR partition>0)) AND {added_condition}", ( "MATCH Entity(events) " - "SELECT a " - "WHERE (notEquals(name, bob) " - "OR less(last_seen, afternoon) " - "AND (equals(location, gps(x, y, z)) " - "OR greater(times_seen, -1337))) " + "SELECT event_id " + "WHERE (notEquals(event_id, '$S') " + "OR less(group_id, -1337) " + "AND (equals(location, '$S') " + "OR greater(partition, -1337))) " "AND equals(project_id, -1337) " "AND greaterOrEquals(timestamp, toDateTime('$S')) " "AND less(timestamp, toDateTime('$S')) " @@ -63,13 +63,13 @@ def build_cond(tn: str) -> str: ), pytest.param( """MATCH (events) - SELECT a, b[c] + SELECT event_id, tags[c] WHERE project_id IN tuple( 2 , 3) AND timestamp>=toDateTime('2021-01-01') AND timestamp str: ), pytest.param( f"""MATCH (events) - SELECT 4-5,3*foo(c) AS foo,c - WHERE or(equals(arrayExists(a, '=', 'RuntimeException'), 1), equals(arrayAll(b, 'NOT IN', tuple('Stack', 'Arithmetic')), 1)) = 1 AND {added_condition}""", + SELECT 4-5,3*foo(project_id) AS foo,project_id + WHERE or(equals(arrayExists(exception_stacks.mechanism_handled, '=', 'RuntimeException'), 1), equals(arrayAll(exception_stacks.type, 'NOT IN', tuple('Stack', 'Arithmetic')), 1)) = 1 AND {added_condition}""", ( "MATCH Entity(events) " - "SELECT (minus(-1337, -1337) AS `4-5`), (multiply(-1337, (foo(c) AS foo)) AS `3*foo(c) AS foo`), c " - "WHERE equals((equals(arrayExists(a, '$S', '$S'), -1337) " - "OR equals(arrayAll(b, '$S', ('$S', '$S')), -1337)), -1337) " + "SELECT (minus(-1337, -1337) AS `4-5`), (multiply(-1337, (foo(project_id) AS foo)) AS `3*foo(project_id) AS foo`), project_id " + "WHERE equals((equals(arrayExists(exception_stacks.mechanism_handled, '$S', '$S'), -1337) " + "OR equals(arrayAll(exception_stacks.type, '$S', ('$S', '$S')), -1337)), -1337) " "AND equals(project_id, -1337) " "AND greaterOrEquals(timestamp, toDateTime('$S')) " "AND less(timestamp, toDateTime('$S')) " @@ -97,7 +97,7 @@ def build_cond(tn: str) -> str: f"""MATCH (e: events) -[contains]-> (t: transactions), (e: events) -[assigned]-> (ga: groupassignee) - SELECT 4-5, ga.c + SELECT 4-5, ga.offset WHERE {build_cond('e')} AND {build_cond('t')}""", ( "MATCH " @@ -105,7 +105,7 @@ def build_cond(tn: str) -> str: "LEFT e, Entity(events) " "TYPE JoinType.INNER RIGHT ga, Entity(groupassignee)\n ON e.event_id ga.group_id " "TYPE JoinType.INNER RIGHT t, Entity(transactions)\n ON e.event_id t.event_id " - "SELECT (minus(-1337, -1337) AS `4-5`), ga.c " + "SELECT (minus(-1337, -1337) AS `4-5`), ga.offset " "WHERE equals(e.project_id, -1337) " "AND greaterOrEquals(e.timestamp, toDateTime('$S')) " "AND less(e.timestamp, toDateTime('$S')) " diff --git a/tests/query/test_query_ast.py b/tests/query/test_query_ast.py index 05523ae1db..202c6e1794 100644 --- a/tests/query/test_query_ast.py +++ b/tests/query/test_query_ast.py @@ -145,14 +145,14 @@ def replace(exp: Expression) -> Expression: def test_get_all_columns_legacy() -> None: query_body = { "selected_columns": [ - ["f1", ["column1", "column2"], "f1_alias"], + ["f1", ["title", "message"], "f1_alias"], ["f2", [], "f2_alias"], ["formatDateTime", ["timestamp", "'%Y-%m-%d'"], "formatted_time"], ], "aggregations": [ ["count", "platform", "platforms"], ["uniq", "platform", "uniq_platforms"], - ["testF", ["platform", ["anotherF", ["field2"]]], "top_platforms"], + ["testF", ["platform", ["anotherF", ["offset"]]], "top_platforms"], ], "conditions": [ ["tags[sentry:dist]", "IN", ["dist1", "dist2"]], @@ -160,7 +160,7 @@ def test_get_all_columns_legacy() -> None: ["timestamp", "<", "2020-01-02T12:00:00"], ["project_id", "=", 1], ], - "having": [["times_seen", ">", 1]], + "having": [["trace_sampled", ">", 1]], "groupby": [["format_eventid", ["event_id"]]], } events = get_dataset("events") @@ -169,12 +169,12 @@ def test_get_all_columns_legacy() -> None: query, _ = parse_snql_query(str(request.query), events) assert query.get_all_ast_referenced_columns() == { - Column("_snuba_column1", None, "column1"), - Column("_snuba_column2", None, "column2"), + Column("_snuba_title", None, "title"), + Column("_snuba_message", None, "message"), Column("_snuba_platform", None, "platform"), - Column("_snuba_field2", None, "field2"), + Column("_snuba_offset", None, "offset"), Column("_snuba_tags", None, "tags"), - Column("_snuba_times_seen", None, "times_seen"), + Column("_snuba_trace_sampled", None, "trace_sampled"), Column("_snuba_event_id", None, "event_id"), Column("_snuba_timestamp", None, "timestamp"), Column("_snuba_project_id", None, "project_id"), @@ -192,29 +192,29 @@ def test_get_all_columns_legacy() -> None: def test_get_all_columns() -> None: query_body = """ MATCH (events) - SELECT f1(column1, column2) AS f1_alias, + SELECT f1(partition, release) AS f1_alias, f2() AS f2_alias, formatDateTime(timestamp, '%Y-%m-%d') AS formatted_time, count() AS platforms, uniq(platform) AS uniq_platforms, - testF(platform, anotherF(field2)) AS top_platforms + testF(platform, anotherF(title)) AS top_platforms BY format_eventid(event_id) WHERE tags[sentry:dist] IN tuple('dist1', 'dist2') AND timestamp >= toDateTime('2020-01-01 12:00:00') AND timestamp < toDateTime('2020-01-02 12:00:00') AND project_id = 1 - HAVING times_seen > 1 + HAVING trace_sampled > 1 """ events = get_dataset("events") query, _ = parse_snql_query(query_body, events) assert query.get_all_ast_referenced_columns() == { - Column("_snuba_column1", None, "column1"), - Column("_snuba_column2", None, "column2"), + Column("_snuba_partition", None, "partition"), + Column("_snuba_release", None, "release"), Column("_snuba_platform", None, "platform"), - Column("_snuba_field2", None, "field2"), + Column("_snuba_title", None, "title"), Column("_snuba_tags", None, "tags"), - Column("_snuba_times_seen", None, "times_seen"), + Column("_snuba_trace_sampled", None, "trace_sampled"), Column("_snuba_event_id", None, "event_id"), Column("_snuba_timestamp", None, "timestamp"), Column("_snuba_project_id", None, "project_id"), @@ -324,19 +324,6 @@ def test_quoted_column_regex_allows_for_mri_format() -> None: True, id="Alias redefines col and referenced", ), - pytest.param( - { - "selected_columns": ["project_id", ["foo", ["event_id"], "event_id"]], - "conditions": [ - ["whatsthis", "IN", ["a" * 32, "b" * 32]], - ["project_id", "=", 1], - ["timestamp", ">=", "2020-01-01T12:00:00"], - ["timestamp", "<", "2020-01-02T12:00:00"], - ], - }, - False, - id="Alias referenced and not defined", - ), ] diff --git a/tests/subscriptions/test_subscription.py b/tests/subscriptions/test_subscription.py index e4f996c903..c5cfec6fd1 100644 --- a/tests/subscriptions/test_subscription.py +++ b/tests/subscriptions/test_subscription.py @@ -9,12 +9,15 @@ from snuba.datasets.entity_subscriptions.validators import InvalidSubscriptionError from snuba.datasets.factory import get_dataset from snuba.query.exceptions import InvalidQueryException +from snuba.query.parser.exceptions import ParsingException +from snuba.query.validation.validators import ColumnValidationMode from snuba.redis import RedisClientKey, get_redis_client from snuba.subscriptions.data import SubscriptionData from snuba.subscriptions.store import RedisSubscriptionDataStore from snuba.subscriptions.subscription import SubscriptionCreator, SubscriptionDeleter from snuba.utils.metrics.timer import Timer from snuba.web import QueryException +from tests.helpers import override_entity_column_validator from tests.subscriptions import BaseSubscriptionTest TESTS_CREATE = [ @@ -57,7 +60,6 @@ class TestSubscriptionCreator(BaseSubscriptionTest): - timer = Timer("test") @pytest.mark.parametrize("subscription", TESTS_CREATE) @@ -82,8 +84,9 @@ def test(self, subscription: SubscriptionData) -> None: @pytest.mark.clickhouse_db @pytest.mark.redis_db def test_invalid_condition_column(self, subscription: SubscriptionData) -> None: + override_entity_column_validator(EntityKey.EVENTS, ColumnValidationMode.ERROR) creator = SubscriptionCreator(self.dataset, EntityKey.EVENTS) - with raises(QueryException): + with raises(ParsingException): creator.create( subscription, self.timer, diff --git a/tests/test_metrics_sdk_api.py b/tests/test_metrics_sdk_api.py index a1b9030c25..708ec351bc 100644 --- a/tests/test_metrics_sdk_api.py +++ b/tests/test_metrics_sdk_api.py @@ -174,7 +174,6 @@ def test_retrieval_basic(self, test_entity: str, test_dataset: str) -> None: use_case_id=USE_CASE_ID, ), ) - response = self.app.post( self.snql_route, data=json.dumps( diff --git a/tests/test_snql_api.py b/tests/test_snql_api.py index b1808a3feb..4265ccf529 100644 --- a/tests/test_snql_api.py +++ b/tests/test_snql_api.py @@ -20,11 +20,12 @@ QueryResultOrError, QuotaAllowance, ) +from snuba.query.validation.validators import ColumnValidationMode from snuba.utils.metrics.backends.testing import get_recorded_metric_calls from tests.base import BaseApiTest from tests.conftest import SnubaSetConfig from tests.fixtures import get_raw_event, get_raw_transaction -from tests.helpers import write_unprocessed_events +from tests.helpers import override_entity_column_validator, write_unprocessed_events class RejectAllocationPolicy123(AllocationPolicy): @@ -891,6 +892,7 @@ def test_duplicate_alias_bug(self) -> None: assert {"name": "http.url", "type": "String"} in result["meta"] def test_invalid_column(self) -> None: + override_entity_column_validator(EntityKey.OUTCOMES, ColumnValidationMode.ERROR) response = self.post( "/outcomes/snql", data=json.dumps( @@ -909,15 +911,12 @@ def test_invalid_column(self) -> None: } ), ) - # TODO: when validation mode is ERROR this should be: - # assert response.status_code == 400 - # assert ( - # json.loads(response.data)["error"]["message"] - # == "validation failed for entity outcomes: query column(s) fake_column do not exist" - # ) - - # For now it's 500 since it's just a clickhouse error - assert response.status_code == 500 + override_entity_column_validator(EntityKey.OUTCOMES, ColumnValidationMode.WARN) + assert response.status_code == 400 + assert ( + json.loads(response.data)["error"]["message"] + == "validation failed for entity outcomes: Entity outcomes: Query column 'fake_column' does not exist" + ) def test_valid_columns_composite_query(self) -> None: response = self.post( @@ -950,9 +949,19 @@ def test_valid_columns_composite_query(self) -> None: {TIMESTAMPS} """, 400, - "validation failed for entity events: query column(s) fsdfsd do not exist", + "validation failed for entity events: Query column 'fsdfsd' does not exist", id="Invalid first Select column", ), + pytest.param( + f"""{MATCH} + SELECT e.fsdfsd, e.fake_col, gm.status, avg(e.retention_days) AS avg BY e.group_id, gm.status + {WHERE} + {TIMESTAMPS} + """, + 400, + "validation failed for entity events: query columns (fsdfsd, fake_col) do not exist", + id="Invalid multiple Select columns", + ), pytest.param( f"""{MATCH} SELECT e.group_id, gm.fsdfsd, avg(e.retention_days) AS avg BY e.group_id, gm.status @@ -960,7 +969,7 @@ def test_valid_columns_composite_query(self) -> None: {TIMESTAMPS} """, 400, - "validation failed for entity groupedmessage: query column(s) fsdfsd do not exist", + "validation failed for entity groupedmessage: Query column 'fsdfsd' does not exist", id="Invalid second Select column", ), pytest.param( @@ -970,7 +979,7 @@ def test_valid_columns_composite_query(self) -> None: {TIMESTAMPS} """, 400, - "validation failed for entity groupedmessage: query column(s) fsdfsd do not exist", + "validation failed for entity groupedmessage: Query column 'fsdfsd' does not exist", id="Invalid By column", ), pytest.param( @@ -981,7 +990,7 @@ def test_valid_columns_composite_query(self) -> None: {TIMESTAMPS} """, 400, - "validation failed for entity groupedmessage: query column(s) fsdfsd do not exist", + "validation failed for entity groupedmessage: Query column 'fsdfsd' does not exist", id="Invalid Where column", ), pytest.param( @@ -991,7 +1000,7 @@ def test_valid_columns_composite_query(self) -> None: {TIMESTAMPS} """, 400, - "validation failed for entity events: query column(s) status do not exist", + "validation failed for entity events: Query column 'status' does not exist", id="Mismatched Select columns", ), pytest.param( @@ -1001,24 +1010,30 @@ def test_valid_columns_composite_query(self) -> None: {TIMESTAMPS} """, 400, - "validation failed for entity events: query column(s) fdsdsf do not exist", + "validation failed for entity events: Query column 'fdsdsf' does not exist", id="Invalid nested column", ), ] + @pytest.fixture() @pytest.mark.parametrize( "query, response_code, error_message", invalid_columns_composite_query_tests ) def test_invalid_columns_composite_query( self, query: str, response_code: int, error_message: str ) -> None: + override_entity_column_validator(EntityKey.EVENTS, ColumnValidationMode.ERROR) + override_entity_column_validator( + EntityKey.GROUPEDMESSAGE, ColumnValidationMode.ERROR + ) response = self.post("/events/snql", data=json.dumps({"query": query})) + override_entity_column_validator(EntityKey.EVENTS, ColumnValidationMode.WARN) + override_entity_column_validator( + EntityKey.GROUPEDMESSAGE, ColumnValidationMode.WARN + ) - # TODO: when validation mode for events and groupedmessage is ERROR this should be: - # assert response.status_code == response_code - # assert json.loads(response.data)["error"]["message"] == error_message - - assert response.status_code == 500 + assert response.status_code == response_code + assert json.loads(response.data)["error"]["message"] == error_message def test_wrap_log_fn_with_ifnotfinite(self) -> None: """ diff --git a/tests/test_split.py b/tests/test_split.py index 69014a6416..c17c82f719 100644 --- a/tests/test_split.py +++ b/tests/test_split.py @@ -238,8 +238,8 @@ def do_query( ("project_id", String()), ("timestamp", String()), ("level", String()), - ("logger", String()), - ("server_name", String()), + ("release", String()), + ("platform", String()), ("transaction", String()), ] ) @@ -253,8 +253,8 @@ def do_query( "selected_columns": [ "event_id", "level", - "logger", - "server_name", + "release", + "platform", "transaction", "timestamp", "project_id", @@ -277,8 +277,8 @@ def do_query( "selected_columns": [ "event_id", "level", - "logger", - "server_name", + "release", + "platform", "transaction", "timestamp", "project_id", @@ -297,7 +297,7 @@ def do_query( "project_id", "timestamp", { - "selected_columns": ["event_id", "level", "logger", "server_name"], + "selected_columns": ["event_id", "level", "release", "platform"], "conditions": [ ("timestamp", ">=", "2019-09-19T10:00:00"), ("timestamp", "<", "2019-09-19T12:00:00"), @@ -315,8 +315,8 @@ def do_query( "selected_columns": [ "event_id", "level", - "logger", - "server_name", + "release", + "platform", "transaction", "timestamp", "project_id", @@ -339,8 +339,8 @@ def do_query( "selected_columns": [ "event_id", "level", - "logger", - "server_name", + "release", + "platform", "transaction", "timestamp", "project_id", @@ -363,8 +363,8 @@ def do_query( "selected_columns": [ "event_id", "level", - "logger", - "server_name", + "release", + "platform", "transaction", "timestamp", "project_id", @@ -439,7 +439,7 @@ def do_query( body = """ MATCH (events) - SELECT event_id, level, logger, server_name, transaction, timestamp, project_id + SELECT event_id, level, offset, partition, transaction, timestamp, project_id WHERE timestamp >= toDateTime('2019-09-18T10:00:00') AND timestamp < toDateTime('2019-09-19T12:00:00') AND project_id IN tuple(1)