From b54d864d7f48c7ca78c29d042d7c998c71b42592 Mon Sep 17 00:00:00 2001 From: Raphael Pierzina Date: Wed, 29 May 2024 11:39:52 +0200 Subject: [PATCH 1/3] Create CTE for issue subquery in incidents.sql --- queries/incidents.sql | 75 ++++++++++++++++++++++--------------------- 1 file changed, 39 insertions(+), 36 deletions(-) diff --git a/queries/incidents.sql b/queries/incidents.sql index 9cf69253..48022590 100644 --- a/queries/incidents.sql +++ b/queries/incidents.sql @@ -1,4 +1,41 @@ -- Incidents View +WITH + issue AS ( + SELECT + source, + CASE + WHEN source LIKE "github%" THEN JSON_EXTRACT_SCALAR(metadata, '$.repository.full_name') + WHEN source LIKE "pagerduty%" THEN JSON_EXTRACT_SCALAR(metadata, '$.event.data.service.summary') + END + AS metadata_service, + CASE + WHEN source LIKE "github%" THEN JSON_EXTRACT_SCALAR(metadata, '$.issue.number') + WHEN source LIKE "pagerduty%" THEN JSON_EXTRACT_SCALAR(metadata, '$.event.data.id') + END + AS incident_id, + CASE + WHEN source LIKE "github%" THEN TIMESTAMP(JSON_EXTRACT_SCALAR(metadata, '$.issue.created_at')) + WHEN source LIKE "pagerduty%" THEN TIMESTAMP(JSON_EXTRACT_SCALAR(metadata, '$.event.occurred_at')) + END + AS time_created, + CASE + WHEN source LIKE "github%" THEN TIMESTAMP(JSON_EXTRACT_SCALAR(metadata, '$.issue.closed_at')) + WHEN source LIKE "pagerduty%" THEN TIMESTAMP(JSON_EXTRACT_SCALAR(metadata, '$.event.occurred_at')) + END + AS time_resolved, + REGEXP_EXTRACT(metadata, r"root cause: ([[:alnum:]]*)") AS root_cause, + CASE + WHEN source LIKE "github%" THEN REGEXP_CONTAINS(JSON_EXTRACT(metadata, '$.issue.labels'), '"name":"Incident"') + WHEN source LIKE "pagerduty%" THEN TRUE # All Pager Duty events are incident-related + END + AS bug, + FROM + four_keys.events_raw + WHERE + event_type LIKE "issue%" + OR event_type LIKE "incident%" + OR (event_type = "note" AND JSON_EXTRACT_SCALAR(metadata, '$.object_attributes.noteable_type') = 'Issue') + ) SELECT source, metadata_service, @@ -8,42 +45,8 @@ SELECT MIN(IF(root.time_created < issue.time_created, root.time_created, issue.time_created)) AS time_created, MAX(time_resolved) AS time_resolved, ARRAY_AGG(root_cause IGNORE NULLS) AS changes, -FROM ( - SELECT - source, - CASE - WHEN source LIKE "github%" THEN JSON_EXTRACT_SCALAR(metadata, '$.repository.full_name') - WHEN source LIKE "pagerduty%" THEN JSON_EXTRACT_SCALAR(metadata, '$.event.data.service.summary') - END - AS metadata_service, - CASE - WHEN source LIKE "github%" THEN JSON_EXTRACT_SCALAR(metadata, '$.issue.number') - WHEN source LIKE "pagerduty%" THEN JSON_EXTRACT_SCALAR(metadata, '$.event.data.id') - END - AS incident_id, - CASE - WHEN source LIKE "github%" THEN TIMESTAMP(JSON_EXTRACT_SCALAR(metadata, '$.issue.created_at')) - WHEN source LIKE "pagerduty%" THEN TIMESTAMP(JSON_EXTRACT_SCALAR(metadata, '$.event.occurred_at')) - END - AS time_created, - CASE - WHEN source LIKE "github%" THEN TIMESTAMP(JSON_EXTRACT_SCALAR(metadata, '$.issue.closed_at')) - WHEN source LIKE "pagerduty%" THEN TIMESTAMP(JSON_EXTRACT_SCALAR(metadata, '$.event.occurred_at')) - END - AS time_resolved, - REGEXP_EXTRACT(metadata, r"root cause: ([[:alnum:]]*)") AS root_cause, - CASE - WHEN source LIKE "github%" THEN REGEXP_CONTAINS(JSON_EXTRACT(metadata, '$.issue.labels'), '"name":"Incident"') - WHEN source LIKE "pagerduty%" THEN TRUE # All Pager Duty events are incident-related - END - AS bug, - FROM - four_keys.events_raw - WHERE - event_type LIKE "issue%" - OR event_type LIKE "incident%" - OR (event_type = "note" AND JSON_EXTRACT_SCALAR(metadata, '$.object_attributes.noteable_type') = 'Issue') -) AS issue +FROM + issue LEFT JOIN `four_keys.services` AS service_catalog ON From e275e494bd4ac2748533ed7f752bf43b8ff022e4 Mon Sep 17 00:00:00 2001 From: Raphael Pierzina Date: Wed, 29 May 2024 11:57:23 +0200 Subject: [PATCH 2/3] Merge in incidents from Google Form in incidents.sql --- queries/incidents.sql | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/queries/incidents.sql b/queries/incidents.sql index 48022590..eeb6d970 100644 --- a/queries/incidents.sql +++ b/queries/incidents.sql @@ -1,6 +1,6 @@ -- Incidents View WITH - issue AS ( + github_pagerduty AS ( SELECT source, CASE @@ -35,6 +35,23 @@ WITH event_type LIKE "issue%" OR event_type LIKE "incident%" OR (event_type = "note" AND JSON_EXTRACT_SCALAR(metadata, '$.object_attributes.noteable_type') = 'Issue') + ), + issue AS ( + SELECT + * + FROM + github_pagerduty + UNION ALL + SELECT + source, + github_repo AS metadata_service, + incident_id, + time_created, + time_resolved, + root_cause, + TRUE as bug, + FROM + `four_keys.incidents_google_form` ) SELECT source, @@ -53,6 +70,7 @@ ON CASE WHEN issue.source = "pagerduty" THEN issue.metadata_service = service_catalog.pagerduty_service WHEN issue.source = "github" THEN issue.metadata_service = service_catalog.github_repository + WHEN issue.source = "google_form" THEN issue.metadata_service = service_catalog.github_repository ELSE FALSE END LEFT JOIN ( From 88f4eaafb27a828825d15bed161917ccea3d3b16 Mon Sep 17 00:00:00 2001 From: Raphael Pierzina Date: Wed, 29 May 2024 14:54:11 +0200 Subject: [PATCH 3/3] Ensure we don't correlate incidents with staging deploys --- queries/incidents.sql | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/queries/incidents.sql b/queries/incidents.sql index eeb6d970..ca5f7be3 100644 --- a/queries/incidents.sql +++ b/queries/incidents.sql @@ -38,7 +38,8 @@ WITH ), issue AS ( SELECT - * + *, + NULL as deployment_environment -- Remove this once we collect deployment_environment FROM github_pagerduty UNION ALL @@ -50,6 +51,7 @@ WITH time_resolved, root_cause, TRUE as bug, + deployment_environment, -- This is expected to always be `production` FROM `four_keys.incidents_google_form` ) @@ -80,11 +82,16 @@ LEFT JOIN ( service, environment FROM - four_keys.deployments d, + `four_keys.deployments` AS d, d.changes ) AS root ON - ( service_catalog.service = root.service AND root_cause = root.changes ) + service_catalog.service = root.service + AND root_cause = root.changes + -- GitHub and PagerDuty incidents don't contain information about the environment. + -- Google Form incidents are always reported for the `production` environment. + -- The following ensures we don't correlate incidents with `staging` deployments. + AND issue.deployment_environment = root.environment GROUP BY 1, 2,