From dcb9e2f9ce64bf6385005619c0155b2fcff11574 Mon Sep 17 00:00:00 2001 From: tonypowa Date: Tue, 17 Dec 2024 13:47:52 +0100 Subject: [PATCH 1/7] new submissions: alerting part 3 --- grafana/alerting-get-started-pt3/finish.md | 3 + grafana/alerting-get-started-pt3/index.json | 35 ++ grafana/alerting-get-started-pt3/intro.md | 17 + .../alerting-get-started-pt3/preprocessed.md | 425 ++++++++++++++++++ grafana/alerting-get-started-pt3/step1.md | 25 ++ grafana/alerting-get-started-pt3/step2.md | 45 ++ grafana/alerting-get-started-pt3/step3.md | 19 + grafana/alerting-get-started-pt3/step4.md | 56 +++ grafana/alerting-get-started-pt3/step5.md | 81 ++++ grafana/alerting-get-started-pt3/step6.md | 36 ++ grafana/structure.json | 1 + 11 files changed, 743 insertions(+) create mode 100644 grafana/alerting-get-started-pt3/finish.md create mode 100644 grafana/alerting-get-started-pt3/index.json create mode 100644 grafana/alerting-get-started-pt3/intro.md create mode 100755 grafana/alerting-get-started-pt3/preprocessed.md create mode 100644 grafana/alerting-get-started-pt3/step1.md create mode 100644 grafana/alerting-get-started-pt3/step2.md create mode 100644 grafana/alerting-get-started-pt3/step3.md create mode 100644 grafana/alerting-get-started-pt3/step4.md create mode 100644 grafana/alerting-get-started-pt3/step5.md create mode 100644 grafana/alerting-get-started-pt3/step6.md diff --git a/grafana/alerting-get-started-pt3/finish.md b/grafana/alerting-get-started-pt3/finish.md new file mode 100644 index 0000000..09ba6de --- /dev/null +++ b/grafana/alerting-get-started-pt3/finish.md @@ -0,0 +1,3 @@ +# Conclusion + +Alert rule grouping simplifies incident management by consolidating related alerts. By configuring **notification policies** and using **labels** (such as _region_), you can group alerts based on specific criteria and route them to the appropriate teams. Fine-tuning **timing options**—including group wait, group interval, and repeat interval—further reduces noise and ensures notifications remain actionable without overwhelming on-call engineers. diff --git a/grafana/alerting-get-started-pt3/index.json b/grafana/alerting-get-started-pt3/index.json new file mode 100644 index 0000000..b523ae9 --- /dev/null +++ b/grafana/alerting-get-started-pt3/index.json @@ -0,0 +1,35 @@ +{ + "title": "Get started with Grafana Alerting - Part 3", + "description": "Learn how to group alert notifications effectively to reduce noise and streamline communication in Grafana Alerting — Part 3.", + "details": { + "intro": { + "text": "intro.md" + }, + "steps": [ + { + "text": "step1.md" + }, + { + "text": "step2.md" + }, + { + "text": "step3.md" + }, + { + "text": "step4.md" + }, + { + "text": "step5.md" + }, + { + "text": "step6.md" + } + ], + "finish": { + "text": "finish.md" + } + }, + "backend": { + "imageid": "ubuntu" + } +} diff --git a/grafana/alerting-get-started-pt3/intro.md b/grafana/alerting-get-started-pt3/intro.md new file mode 100644 index 0000000..f7578fa --- /dev/null +++ b/grafana/alerting-get-started-pt3/intro.md @@ -0,0 +1,17 @@ +# Get started with Grafana Alerting - Part 3 + +The Get started with Grafana Alerting tutorial Part 3 is a continuation of [Get started with Grafana Alerting tutorial Part 2](http://www.grafana.com/tutorials/alerting-get-started-pt2/). + +Alert grouping in Grafana Alerting reduces notification noise by combining related alerts into a single, concise notification. This is essential for on-call engineers, ensuring they focus on resolving incidents instead of sorting through a flood of notifications. + +Grouping is configured by using labels in the notification policy that reference the labels that are generated by the alert instances. With notification policies, you can also configure how often notifications are sent for each group of alerts. + +In this tutorial, you will: + +- Understand how alert rule grouping works. + +- Create a notification policy to handle grouping. + +- Define an alert rule for a real-world scenario. + +- Receive and review grouped alert notifications. diff --git a/grafana/alerting-get-started-pt3/preprocessed.md b/grafana/alerting-get-started-pt3/preprocessed.md new file mode 100755 index 0000000..d62b4b7 --- /dev/null +++ b/grafana/alerting-get-started-pt3/preprocessed.md @@ -0,0 +1,425 @@ +--- +Feedback Link: https://github.com/grafana/tutorials/issues/new +categories: + - alerting +description: Learn how to group alert notifications effectively to reduce noise and streamline communication in Grafana Alerting — Part 3. +labels: + products: + - enterprise + - oss + - cloud +tags: + - intermediate +title: Get started with Grafana Alerting - Part 3 +weight: 60 +killercoda: + title: Get started with Grafana Alerting - Part 3 + description: Learn how to group alert notifications effectively to reduce noise and streamline communication in Grafana Alerting — Part 3. + backend: + imageid: ubuntu +refs: + alert-labels: + - pattern: /docs/grafana/ + destination: /docs/grafana//alerting/fundamentals/alert-rules/annotation-label/ + - pattern: /docs/grafana-cloud/ + destination: /docs/grafana-cloud/alerting-and-irm/alerting/fundamentals/alert-rules/annotation-label/ + notification-policies: + - pattern: /docs/grafana/ + destination: /docs/grafana//alerting/fundamentals/notifications/notification-policies/ + - pattern: /docs/grafana-cloud/ + destination: /docs/grafana-cloud/alerting-and-irm/alerting/fundamentals/notifications/notification-policies/ + alert-grouping: + - pattern: /docs/grafana/ + destination: /docs/grafana//alerting/fundamentals/notifications/group-alert-notifications/ + - pattern: /docs/grafana-cloud/ + destination: /docs/grafana-cloud/alerting-and-irm/alerting/fundamentals/notifications/group-alert-notifications/ +--- + + + + +# Get started with Grafana Alerting - Part 3 + +The Get started with Grafana Alerting tutorial Part 3 is a continuation of [Get started with Grafana Alerting tutorial Part 2](http://www.grafana.com/tutorials/alerting-get-started-pt2/). + +Alert grouping in Grafana Alerting reduces notification noise by combining related alerts into a single, concise notification. This is essential for on-call engineers, ensuring they focus on resolving incidents instead of sorting through a flood of notifications. + +Grouping is configured by using labels in the notification policy that reference the labels that are generated by the alert instances. With notification policies, you can also configure how often notifications are sent for each group of alerts. + +In this tutorial, you will: + +- Understand how alert rule grouping works. +- Create a notification policy to handle grouping. +- Define an alert rule for a real-world scenario. +- Receive and review grouped alert notifications. + + + + + + + +## Set up the Grafana stack + + +## Before you begin + +There are different ways you can follow along with this tutorial. + +- **Grafana Cloud** + + - As a Grafana Cloud user, you don't have to install anything. [Create your free account](http://www.grafana.com/auth/sign-up/create-user). + + Continue to [How alert rule grouping works](#how-alert-rule-grouping-works). + +- **Interactive learning environment** + + - Alternatively, you can try out this example in our interactive learning environment: [Get started with Grafana Alerting - Part 3](https://killercoda.com/grafana-labs/course/grafana/alerting-get-started-pt3/). It's a fully configured environment with all the dependencies already installed. + +- **Grafana OSS** + + - If you opt to run a Grafana stack locally, ensure you have the following applications installed: + + - [Docker Compose](https://docs.docker.com/get-docker/) (included in Docker for Desktop for macOS and Windows) + - [Git](https://git-scm.com/) + +### Set up the Grafana stack (OSS users) + + + +To demonstrate the observation of data using the Grafana stack, download and run the following files. + +1. Clone the [tutorial environment repository](https://www.github.com/grafana/tutorial-environment). + + + + ``` + git clone https://github.com/grafana/tutorial-environment.git + ``` + + + +1. Change to the directory where you cloned the repository: + + + + ``` + cd tutorial-environment + ``` + + + +1. Run the Grafana stack: + + + + ``` + docker compose up -d + ``` + + + + + + + ```bash + docker-compose up -d + ``` + + + + + The first time you run `docker compose up -d`, Docker downloads all the necessary resources for the tutorial. This might take a few minutes, depending on your internet connection. + + + + > **Note:** + > If you already have Grafana, Loki, or Prometheus running on your system, you might see errors, because the Docker image is trying to use ports that your local installations are already using. If this is the case, stop the services, then run the command again. + + + + + NOTE: + + If you already have Grafana, Loki, or Prometheus running on your system, you might see errors, because the Docker image is trying to use ports that your local installations are already using. If this is the case, stop the services, then run the command again. + + + + + +## How alert rule grouping works + +Alert notification grouping is configured with **labels** and **timing options**: + +- **Labels** map the alert rule with the notification policy and define the grouping. +- **Timing options** control when and how often notifications are sent. + +{{< figure src="/media/docs/alerting/alerting-notification-policy-diagram-with-labels-v3.png" max-width="750px" alt="A diagram about the components of a notification policy, including labels and groups" >}} + +### Types of Labels + +1. **Reserved labels** (default): + - Automatically generated by Grafana, e.g., `alertname`, `grafana_folder`. + - Example: `alertname="High CPU usage"`. + +1. **User-configured labels**: + - Added manually to the alert rule. + - Example: `severity`, `priority`. + +1. **Query labels**: + - Returned by the data source query. + - Example: `region`, `service`, `environment`. + +### Timing Options + +1. **Group wait**: Time before sending the first notification. +1. **Group interval**: Time between notifications for a group. +1. **Repeat interval**: Time before resending notifications for an unchanged group. + +Alerts sharing the **same label values** are grouped together, and timing options determine notification frequency. + +For more details, see: +- [Grouping Alerts](ref:alert-grouping) +- [Alert Labels](ref:alert-labels) + + + + +## A real-world example of alert grouping in action + +### Scenario: monitoring a distributed application + +You’re monitoring metrics like CPU usage, memory utilization, and network latency across multiple regions. Alert rules include labels such as `region: us-west` and `region: us-east`. If multiple alerts trigger across these regions, they can result in notification floods. + +### How to manage grouping + +To group alert rule notifications: + +1. **Define labels**: Use `region`, `metric`, or `instance` labels to categorize alerts. +1. **Configure Notification policies**: + - Group alerts by the `region` label. + - Example: + - Alerts for `region: us-west` go to the West Coast team. + - Alerts for `region: us-east` go to the East Coast team. + + + + +## Setting up alert rule grouping + +### Notification Policy + +[Notification policies](ref:notification-policies) group alert instances and route notifications to specific contact points. + +To follow the above example, we will create notification policies that route alert instances based on the `region` label to specific contact points. This setup ensures that alerts for a given region are consolidated into a single notification. Additionally, we will fine-tune the **timing settings** for each region by overriding the default parent policy, allowing more granular control over when notifications are sent. + + +1. Sign in to Grafana: + - **Grafana Cloud** users: Log in via Grafana Cloud. + - **OSS users**: Go to [http://localhost:3000](http://localhost:3000). + +1. Navigate to **Notification Policies**: + - Go to **Alerts & IRM > Alerting > Notification Policies**. + +1. Add a child policy: + - In the Default policy, click **+ New child policy**. + - **Label**: `region` + - **Operator**: `=` + - **Value**: `us-west` + + This label matches alert rules where the region label is us-west. + +1. Choose a **Contact point**: + - Select **Webhook**. + + If you don’t have any contact points, add a Contact point. + +1. Enable Continue matching: + - Turn on **Continue matching subsequent sibling nodes** so the evaluation continues even after one or more labels (i.e. region label) match. + +1. Override grouping settings: + - Toggle **Override grouping**. + - **Group by**: `region`. + + **Group by** consolidates alerts that share the same grouping label into a single notification. For example, all alerts with `region=us-west` will be combined into one notification, making it easier to manage and reducing alert fatigue. + +1. Set custom timin: + - Toggle **Override general timings**. + - **Group interval**: `2m`. This ensures follow-up notifications for the same alert group will be sent at intervals of 2 minutes. + + **Timing options** control how often notifications are sent and can help balance timely alerting with minimizing noise. + +1. Save and repeat: + - Repeat for `region = us-east` with a different webhook or a different contact point. + + {{< figure src="/media/docs/alerting/notificaiton-policies-region.png" max-width="750px" alt="Two nested notification policies to route and group alert notifications" >}} + + These nested policies should route alert instances where the region label is either us-west or us-east. + + **Note**: In Grafana, each label within a notification policy must have a unique key. If you attempt to add the same label key (e.g., region) with different values (us-west and us-east), only the last entry will be saved, and the previous one will be discarded. This is because labels are stored as associative arrays (maps), where each key must be unique. + For identical label keys use regex matchers (e.g., _region=~"us-west|us-east"_). + + + + +1. Visit [http://localhost:3000](http://localhost:3000), where Grafana should be running +1. Navigate to **Alerts & IRM > Alerting > Notification policies**. +1. In the Default policy, click **+ New child policy**. + - In the Default policy, click **+ New child policy**. + - **Label**: `region` + - **Operator**: `=` + - **Value**: `us-west` + + This label matches alert rules where the region label is us-west + +1. Choose a **Contact point**: + - Select **Webhook**. + + If you don’t have any contact points, add a Contact point. + +1. Enable Continue matching: + - Turn on **Continue matching subsequent sibling nodes** so the evaluation continues even after one or more labels (i.e. region label) match. + +1. Override grouping settings: + - Toggle **Override grouping**. + - **Group by**: `region`. + + **Group by** consolidates alerts that share the same grouping label into a single notification. For example, all alerts with `region=us-west` will be combined into one notification, making it easier to manage and reducing alert fatigue. + +1. Set custom timin: + - Toggle **Override general timings**. + - **Group interval**: `2m`. This ensures follow-up notifications for the same alert group will be sent at intervals of 2 minutes. + + **Timing options** control how often notifications are sent and can help balance timely alerting with minimizing noise. + +1. Save and repeat: + - Repeat for `region = us-east` with a different webhook or a different contact point. + + **Note**: In Grafana, each label within a notification policy must have a unique key. If you attempt to add the same label key (e.g., region) with different values (us-west and us-east), only the last entry will be saved, and the previous one will be discarded. This is because labels are stored as associative arrays (maps), where each key must be unique. + For identical label keys use regex matchers (e.g., _region=~"us-west|us-east"_). + + + + +## Create an alert rule + +In this section we configure an alert rule based on our application monitoring example. + +1. Go to **Alerting > Alert rules**. +2. Click **New alert rule**. + +### Enter an alert rule name + +Make it short and descriptive as this will appear in your alert notification. For instance, `High CPU usage - Multi-region`. + + +### Define query and alert condition + +In this section, we use the default options for Grafana-managed alert rule creation. The default options let us define the query, a expression (used to manipulate the data -- the `WHEN` field in the UI), and the condition that must be met for the alert to be triggered (in default mode is the threshold). + +Grafana includes a [test data source](https://grafana.com/docs/grafana/latest/datasources/testdata/) that creates simulated time series data. This data source is included in the demo environment for this tutorial. If you're working in Grafana Cloud or your own local Grafana instance, you can add the data source through the **Connections** menu. + +1. Select **TestData** data source from the drop-down menu. +1. From **Scenario** select **CSV Content**. +1. Copy in the following CSV data: + + - Select **TestData** as the data source. + - Set **Scenario** to **CSV Content**. + - Use the following CSV data: + + ```csv + region,cpu-usage,service,instance + us-west,35,web-server-1,server-01 + us-west,81,web-server-1,server-02 + us-east,79,web-server-2,server-03 + us-east,52,web-server-2,server-04 + us-west,45,db-server-1,server-05 + us-east,77,db-server-2,server-06 + us-west,82,db-server-1,server-07 + us-east,93,db-server-2,server-08 + ``` + + The returned data simulates a data source returning multiple time series, each leading to the creation of an alert instance for that specific time series. + +1. In the **Alert condition** section: + + - Keep `Last` as the value for the reducer function (`WHEN`), and `75` as the threshold value. This is the value above which the alert rule should trigger. + +1. Click **Preview alert rule condition** to run the queries. + + It should return 5 series in Firing state, two firing instances from the us-west region, and three from the us-east region. + + {{< figure src="/media/docs/alerting/regions-alert-instance-preview.png" max-width="750px" alt="Preview of a query returning alert instances." >}} + +### Set evaluation behavior + +Every alert rule is assigned to an evaluation group. You can assign the alert rule to an existing evaluation group or create a new one. + +1. In **Folder**, click **+ New folder** and enter a name. For example: `Multi-region CPU alerts`. This folder contains our alert rules. +1. In the **Evaluation group**, repeat the above step to create a new evaluation group. Name it `Multi-region CPU group`. +1. Choose an **Evaluation interval** (how often the alert are evaluated). Choose `1m`. + + The evaluation interval of 1 minute allows Grafana to detect changes quickly, while the longer **Group wait** (from our notification policy) and **Group interval** (inherited from the Default notification policy) allow for efficient grouping of alerts and minimize unnecessary notifications. + +1. Set the pending period to `0s` (zero seconds), so the alert rule fires the moment the condition is met (this minimizes the waiting time for the demonstration). + +### Configure labels and notifications + +Choose the notification policy where you want to receive your alert notifications. + +1. Select **Use notification policy**. +1. Click **Preview routing** to ensure correct matching. + + {{< figure src="/media/docs/alerting/region-notification-policy-routing-preview.png" max-width="750px" alt="Preview of alert instance routing with the region label matcher" >}} + + The preview shows that the region label from our data source is successfully matching the notification policies that we created earlier thanks to the label matcher that we configured. + +1. Click **Save rule and exit**. + + + + +## Receiving grouped alert notifications + +Now that the alert rule has been configured, you should receive alert notifications in the contact point whenever alerts trigger. + +When the configured alert rule detects CPU usage higher than 75% across multiple regions, it will evaluate the metric every minute. If the condition persists, notifications will be grouped together, with a **Group wait** of 30 seconds before the first alert is sent. Follow-up notifications for the same alert group will be sent at intervals of 2 minutes, reducing the frequency of alerts. If the condition continues for an extended period, a **Repeat interval** of 4 hours ensures that the alert is only resent if the issue persists + +As a result, our notification policy will route two notifications: one notification grouping the three alert instances from the `us-east` region and another grouping the two alert instances from the `us-west` region + +Gouped notifications example: + +Webhook - US East +```json +{ + "receiver": "webhook-us-east", + "status": "firing", + "alerts": [ + { "instance": "server-03" }, + { "instance": "server-06" }, + { "instance": "server-08" } + ] +} +``` + +Webhook - US West +```json +{ + "receiver": "webhook-us-west", + "status": "firing", + "alerts": [ + { "instance": "server-02" }, + { "instance": "server-07" } + ] +} +``` + + + + + +## Conclusion + +Alert rule grouping simplifies incident management by consolidating related alerts. By configuring **notification policies** and using **labels** (such as _region_), you can group alerts based on specific criteria and route them to the appropriate teams. Fine-tuning **timing options**—including group wait, group interval, and repeat interval—further reduces noise and ensures notifications remain actionable without overwhelming on-call engineers. + + \ No newline at end of file diff --git a/grafana/alerting-get-started-pt3/step1.md b/grafana/alerting-get-started-pt3/step1.md new file mode 100644 index 0000000..ac5153f --- /dev/null +++ b/grafana/alerting-get-started-pt3/step1.md @@ -0,0 +1,25 @@ +To demonstrate the observation of data using the Grafana stack, download and run the following files. + +1. Clone the [tutorial environment repository](https://www.github.com/grafana/tutorial-environment). + + ``` + git clone https://github.com/grafana/tutorial-environment.git + ```{{exec}} + +1. Change to the directory where you cloned the repository: + + ``` + cd tutorial-environment + ```{{exec}} + +1. Run the Grafana stack: + + ```bash + docker-compose up -d + ```{{exec}} + + The first time you run `docker compose up -d`{{copy}}, Docker downloads all the necessary resources for the tutorial. This might take a few minutes, depending on your internet connection. + + NOTE: + + If you already have Grafana, Loki, or Prometheus running on your system, you might see errors, because the Docker image is trying to use ports that your local installations are already using. If this is the case, stop the services, then run the command again. diff --git a/grafana/alerting-get-started-pt3/step2.md b/grafana/alerting-get-started-pt3/step2.md new file mode 100644 index 0000000..87ecf72 --- /dev/null +++ b/grafana/alerting-get-started-pt3/step2.md @@ -0,0 +1,45 @@ +# How alert rule grouping works + +Alert notification grouping is configured with **labels** and **timing options**: + +- **Labels** map the alert rule with the notification policy and define the grouping. + +- **Timing options** control when and how often notifications are sent. + +![A diagram about the components of a notification policy, including labels and groups](https://grafana.com/media/docs/alerting/alerting-notification-policy-diagram-with-labels-v3.png) + +## Types of Labels + +1. **Reserved labels** (default): + + - Automatically generated by Grafana, e.g., `alertname`{{copy}}, `grafana_folder`{{copy}}. + + - Example: `alertname="High CPU usage"`{{copy}}. + +1. **User-configured labels**: + + - Added manually to the alert rule. + + - Example: `severity`{{copy}}, `priority`{{copy}}. + +1. **Query labels**: + + - Returned by the data source query. + + - Example: `region`{{copy}}, `service`{{copy}}, `environment`{{copy}}. + +## Timing Options + +1. **Group wait**: Time before sending the first notification. + +1. **Group interval**: Time between notifications for a group. + +1. **Repeat interval**: Time before resending notifications for an unchanged group. + +Alerts sharing the **same label values** are grouped together, and timing options determine notification frequency. + +For more details, see: + +- [Grouping Alerts](ref:alert-grouping) + +- [Alert Labels](ref:alert-labels) diff --git a/grafana/alerting-get-started-pt3/step3.md b/grafana/alerting-get-started-pt3/step3.md new file mode 100644 index 0000000..661feb7 --- /dev/null +++ b/grafana/alerting-get-started-pt3/step3.md @@ -0,0 +1,19 @@ +# A real-world example of alert grouping in action + +## Scenario: monitoring a distributed application + +You’re monitoring metrics like CPU usage, memory utilization, and network latency across multiple regions. Alert rules include labels such as `region: us-west`{{copy}} and `region: us-east`{{copy}}. If multiple alerts trigger across these regions, they can result in notification floods. + +## How to manage grouping + +To group alert rule notifications: + +1. **Define labels**: Use `region`{{copy}}, `metric`{{copy}}, or `instance`{{copy}} labels to categorize alerts. + +1. **Configure Notification policies**: + - Group alerts by the `region`{{copy}} label. + + - Example: + - Alerts for `region: us-west`{{copy}} go to the West Coast team. + + - Alerts for `region: us-east`{{copy}} go to the East Coast team. diff --git a/grafana/alerting-get-started-pt3/step4.md b/grafana/alerting-get-started-pt3/step4.md new file mode 100644 index 0000000..b7050fb --- /dev/null +++ b/grafana/alerting-get-started-pt3/step4.md @@ -0,0 +1,56 @@ +# Setting up alert rule grouping + +## Notification Policy + +[Notification policies](ref:notification-policies) group alert instances and route notifications to specific contact points. + +To follow the above example, we will create notification policies that route alert instances based on the `region`{{copy}} label to specific contact points. This setup ensures that alerts for a given region are consolidated into a single notification. Additionally, we will fine-tune the **timing settings** for each region by overriding the default parent policy, allowing more granular control over when notifications are sent. + +1. Visit [http://localhost:3000]({{TRAFFIC_HOST1_3000}}), where Grafana should be running + +1. Navigate to **Alerts & IRM > Alerting > Notification policies**. + +1. In the Default policy, click **+ New child policy**. + + - In the Default policy, click **+ New child policy**. + + - **Label**: `region`{{copy}} + + - **Operator**: `=`{{copy}} + + - **Value**: `us-west`{{copy}} + + This label matches alert rules where the region label is us-west + +1. Choose a **Contact point**: + + - Select **Webhook**. + + If you don’t have any contact points, add a Contact point. + +1. Enable Continue matching: + + - Turn on **Continue matching subsequent sibling nodes** so the evaluation continues even after one or more labels (i.e. region label) match. + +1. Override grouping settings: + + - Toggle **Override grouping**. + + - **Group by**: `region`{{copy}}. + + **Group by** consolidates alerts that share the same grouping label into a single notification. For example, all alerts with `region=us-west`{{copy}} will be combined into one notification, making it easier to manage and reducing alert fatigue. + +1. Set custom timin: + + - Toggle **Override general timings**. + + - **Group interval**: `2m`{{copy}}. This ensures follow-up notifications for the same alert group will be sent at intervals of 2 minutes. + + **Timing options** control how often notifications are sent and can help balance timely alerting with minimizing noise. + +1. Save and repeat: + + - Repeat for `region = us-east`{{copy}} with a different webhook or a different contact point. + + **Note**: In Grafana, each label within a notification policy must have a unique key. If you attempt to add the same label key (e.g., region) with different values (us-west and us-east), only the last entry will be saved, and the previous one will be discarded. This is because labels are stored as associative arrays (maps), where each key must be unique. + For identical label keys use regex matchers (e.g., _region=~“us-west|us-east”_). diff --git a/grafana/alerting-get-started-pt3/step5.md b/grafana/alerting-get-started-pt3/step5.md new file mode 100644 index 0000000..7d1fda7 --- /dev/null +++ b/grafana/alerting-get-started-pt3/step5.md @@ -0,0 +1,81 @@ +# Create an alert rule + +In this section we configure an alert rule based on our application monitoring example. + +1. Go to **Alerting > Alert rules**. + +1. Click **New alert rule**. + +## Enter an alert rule name + +Make it short and descriptive as this will appear in your alert notification. For instance, `High CPU usage - Multi-region`{{copy}}. + +## Define query and alert condition + +In this section, we use the default options for Grafana-managed alert rule creation. The default options let us define the query, a expression (used to manipulate the data – the `WHEN`{{copy}} field in the UI), and the condition that must be met for the alert to be triggered (in default mode is the threshold). + +Grafana includes a [test data source](https://grafana.com/docs/grafana/latest/datasources/testdata/) that creates simulated time series data. This data source is included in the demo environment for this tutorial. If you’re working in Grafana Cloud or your own local Grafana instance, you can add the data source through the **Connections** menu. + +1. Select **TestData** data source from the drop-down menu. + +1. From **Scenario** select **CSV Content**. + +1. Copy in the following CSV data: + + - Select **TestData** as the data source. + + - Set **Scenario** to **CSV Content**. + + - Use the following CSV data: + + ```csv + region,cpu-usage,service,instance + us-west,35,web-server-1,server-01 + us-west,81,web-server-1,server-02 + us-east,79,web-server-2,server-03 + us-east,52,web-server-2,server-04 + us-west,45,db-server-1,server-05 + us-east,77,db-server-2,server-06 + us-west,82,db-server-1,server-07 + us-east,93,db-server-2,server-08 + ```{{copy}} + + The returned data simulates a data source returning multiple time series, each leading to the creation of an alert instance for that specific time series. + +1. In the **Alert condition** section: + + - Keep `Last`{{copy}} as the value for the reducer function (`WHEN`{{copy}}), and `75`{{copy}} as the threshold value. This is the value above which the alert rule should trigger. + +1. Click **Preview alert rule condition** to run the queries. + + It should return 5 series in Firing state, two firing instances from the us-west region, and three from the us-east region. + + ![Preview of a query returning alert instances.](https://grafana.com/media/docs/alerting/regions-alert-instance-preview.png) + +## Set evaluation behavior + +Every alert rule is assigned to an evaluation group. You can assign the alert rule to an existing evaluation group or create a new one. + +1. In **Folder**, click **+ New folder** and enter a name. For example: `Multi-region CPU alerts`{{copy}}. This folder contains our alert rules. + +1. In the **Evaluation group**, repeat the above step to create a new evaluation group. Name it `Multi-region CPU group`{{copy}}. + +1. Choose an **Evaluation interval** (how often the alert are evaluated). Choose `1m`{{copy}}. + + The evaluation interval of 1 minute allows Grafana to detect changes quickly, while the longer **Group wait** (from our notification policy) and **Group interval** (inherited from the Default notification policy) allow for efficient grouping of alerts and minimize unnecessary notifications. + +1. Set the pending period to `0s`{{copy}} (zero seconds), so the alert rule fires the moment the condition is met (this minimizes the waiting time for the demonstration). + +## Configure labels and notifications + +Choose the notification policy where you want to receive your alert notifications. + +1. Select **Use notification policy**. + +1. Click **Preview routing** to ensure correct matching. + + ![Preview of alert instance routing with the region label matcher](https://grafana.com/media/docs/alerting/region-notification-policy-routing-preview.png) + + The preview shows that the region label from our data source is successfully matching the notification policies that we created earlier thanks to the label matcher that we configured. + +1. Click **Save rule and exit**. diff --git a/grafana/alerting-get-started-pt3/step6.md b/grafana/alerting-get-started-pt3/step6.md new file mode 100644 index 0000000..ba48a65 --- /dev/null +++ b/grafana/alerting-get-started-pt3/step6.md @@ -0,0 +1,36 @@ +# Receiving grouped alert notifications + +Now that the alert rule has been configured, you should receive alert notifications in the contact point whenever alerts trigger. + +When the configured alert rule detects CPU usage higher than 75% across multiple regions, it will evaluate the metric every minute. If the condition persists, notifications will be grouped together, with a **Group wait** of 30 seconds before the first alert is sent. Follow-up notifications for the same alert group will be sent at intervals of 2 minutes, reducing the frequency of alerts. If the condition continues for an extended period, a **Repeat interval** of 4 hours ensures that the alert is only resent if the issue persists + +As a result, our notification policy will route two notifications: one notification grouping the three alert instances from the `us-east`{{copy}} region and another grouping the two alert instances from the `us-west`{{copy}} region + +Gouped notifications example: + +Webhook - US East + +```json +{ + "receiver": "webhook-us-east", + "status": "firing", + "alerts": [ + { "instance": "server-03" }, + { "instance": "server-06" }, + { "instance": "server-08" } + ] +} +```{{copy}} + +Webhook - US West + +```json +{ + "receiver": "webhook-us-west", + "status": "firing", + "alerts": [ + { "instance": "server-02" }, + { "instance": "server-07" } + ] +} +```{{copy}} diff --git a/grafana/structure.json b/grafana/structure.json index 0e0d024..f96e2c4 100644 --- a/grafana/structure.json +++ b/grafana/structure.json @@ -3,6 +3,7 @@ { "path": "grafana-basics", "title": "Grafana Basics"}, { "path": "alerting-get-started", "title": "Get started with Grafana Alerting"}, { "path": "alerting-get-started-pt2", "title": "Get started with Grafana Alerting - Part 2"}, + { "path": "alerting-get-started-pt3", "title": "Get started with Grafana Alerting - Part 3"}, { "path": "alerting-loki-logs", "title": "Create alert rules with logs"}, { "path": "grafana-fundamentals", "title": "Grafana Fundamentals"}, { "path": "fo11y", "title": "Frontend Observability"} From 34b6206714d4541c7770aaabafc35e95a4212f17 Mon Sep 17 00:00:00 2001 From: tonypowa Date: Tue, 17 Dec 2024 14:12:55 +0100 Subject: [PATCH 2/7] typo --- grafana/alerting-get-started-pt3/preprocessed.md | 6 +++--- grafana/alerting-get-started-pt3/step4.md | 2 +- grafana/alerting-get-started-pt3/step6.md | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/grafana/alerting-get-started-pt3/preprocessed.md b/grafana/alerting-get-started-pt3/preprocessed.md index d62b4b7..1314c5f 100755 --- a/grafana/alerting-get-started-pt3/preprocessed.md +++ b/grafana/alerting-get-started-pt3/preprocessed.md @@ -243,7 +243,7 @@ To follow the above example, we will create notification policies that route ale **Group by** consolidates alerts that share the same grouping label into a single notification. For example, all alerts with `region=us-west` will be combined into one notification, making it easier to manage and reducing alert fatigue. -1. Set custom timin: +1. Set custom timing: - Toggle **Override general timings**. - **Group interval**: `2m`. This ensures follow-up notifications for the same alert group will be sent at intervals of 2 minutes. @@ -286,7 +286,7 @@ To follow the above example, we will create notification policies that route ale **Group by** consolidates alerts that share the same grouping label into a single notification. For example, all alerts with `region=us-west` will be combined into one notification, making it easier to manage and reducing alert fatigue. -1. Set custom timin: +1. Set custom timing: - Toggle **Override general timings**. - **Group interval**: `2m`. This ensures follow-up notifications for the same alert group will be sent at intervals of 2 minutes. @@ -387,7 +387,7 @@ When the configured alert rule detects CPU usage higher than 75% across multiple As a result, our notification policy will route two notifications: one notification grouping the three alert instances from the `us-east` region and another grouping the two alert instances from the `us-west` region -Gouped notifications example: +Grouped notifications example: Webhook - US East ```json diff --git a/grafana/alerting-get-started-pt3/step4.md b/grafana/alerting-get-started-pt3/step4.md index b7050fb..b6d0061 100644 --- a/grafana/alerting-get-started-pt3/step4.md +++ b/grafana/alerting-get-started-pt3/step4.md @@ -40,7 +40,7 @@ To follow the above example, we will create notification policies that route ale **Group by** consolidates alerts that share the same grouping label into a single notification. For example, all alerts with `region=us-west`{{copy}} will be combined into one notification, making it easier to manage and reducing alert fatigue. -1. Set custom timin: +1. Set custom timing: - Toggle **Override general timings**. diff --git a/grafana/alerting-get-started-pt3/step6.md b/grafana/alerting-get-started-pt3/step6.md index ba48a65..573b7bf 100644 --- a/grafana/alerting-get-started-pt3/step6.md +++ b/grafana/alerting-get-started-pt3/step6.md @@ -6,7 +6,7 @@ When the configured alert rule detects CPU usage higher than 75% across multiple As a result, our notification policy will route two notifications: one notification grouping the three alert instances from the `us-east`{{copy}} region and another grouping the two alert instances from the `us-west`{{copy}} region -Gouped notifications example: +Grouped notifications example: Webhook - US East From 096ecbdc10f79b1359d89f81fc1bbd5eeb650ac9 Mon Sep 17 00:00:00 2001 From: tonypowa Date: Tue, 17 Dec 2024 14:23:29 +0100 Subject: [PATCH 3/7] all pretty no pity --- .../alerting-get-started-pt3/preprocessed.md | 102 ++++++++++-------- grafana/alerting-get-started-pt3/step4.md | 8 +- grafana/alerting-get-started-pt3/step6.md | 11 +- 3 files changed, 63 insertions(+), 58 deletions(-) diff --git a/grafana/alerting-get-started-pt3/preprocessed.md b/grafana/alerting-get-started-pt3/preprocessed.md index 1314c5f..78b4f84 100755 --- a/grafana/alerting-get-started-pt3/preprocessed.md +++ b/grafana/alerting-get-started-pt3/preprocessed.md @@ -35,22 +35,21 @@ refs: destination: /docs/grafana-cloud/alerting-and-irm/alerting/fundamentals/notifications/group-alert-notifications/ --- - # Get started with Grafana Alerting - Part 3 The Get started with Grafana Alerting tutorial Part 3 is a continuation of [Get started with Grafana Alerting tutorial Part 2](http://www.grafana.com/tutorials/alerting-get-started-pt2/). -Alert grouping in Grafana Alerting reduces notification noise by combining related alerts into a single, concise notification. This is essential for on-call engineers, ensuring they focus on resolving incidents instead of sorting through a flood of notifications. +Alert grouping in Grafana Alerting reduces notification noise by combining related alerts into a single, concise notification. This is essential for on-call engineers, ensuring they focus on resolving incidents instead of sorting through a flood of notifications. Grouping is configured by using labels in the notification policy that reference the labels that are generated by the alert instances. With notification policies, you can also configure how often notifications are sent for each group of alerts. In this tutorial, you will: -- Understand how alert rule grouping works. +- Understand how alert rule grouping works. - Create a notification policy to handle grouping. -- Define an alert rule for a real-world scenario. +- Define an alert rule for a real-world scenario. - Receive and review grouped alert notifications. @@ -159,10 +158,12 @@ Alert notification grouping is configured with **labels** and **timing options** ### Types of Labels 1. **Reserved labels** (default): + - Automatically generated by Grafana, e.g., `alertname`, `grafana_folder`. - Example: `alertname="High CPU usage"`. 1. **User-configured labels**: + - Added manually to the alert rule. - Example: `severity`, `priority`. @@ -179,6 +180,7 @@ Alert notification grouping is configured with **labels** and **timing options** Alerts sharing the **same label values** are grouped together, and timing options determine notification frequency. For more details, see: + - [Grouping Alerts](ref:alert-grouping) - [Alert Labels](ref:alert-labels) @@ -214,50 +216,59 @@ To group alert rule notifications: To follow the above example, we will create notification policies that route alert instances based on the `region` label to specific contact points. This setup ensures that alerts for a given region are consolidated into a single notification. Additionally, we will fine-tune the **timing settings** for each region by overriding the default parent policy, allowing more granular control over when notifications are sent. + 1. Sign in to Grafana: + - **Grafana Cloud** users: Log in via Grafana Cloud. - **OSS users**: Go to [http://localhost:3000](http://localhost:3000). 1. Navigate to **Notification Policies**: + - Go to **Alerts & IRM > Alerting > Notification Policies**. 1. Add a child policy: + - In the Default policy, click **+ New child policy**. - **Label**: `region` - **Operator**: `=` - **Value**: `us-west` - This label matches alert rules where the region label is us-west. + This label matches alert rules where the region label is us-west. 1. Choose a **Contact point**: + - Select **Webhook**. - If you don’t have any contact points, add a Contact point. + If you don’t have any contact points, add a Contact point. 1. Enable Continue matching: + - Turn on **Continue matching subsequent sibling nodes** so the evaluation continues even after one or more labels (i.e. region label) match. 1. Override grouping settings: + - Toggle **Override grouping**. - **Group by**: `region`. - **Group by** consolidates alerts that share the same grouping label into a single notification. For example, all alerts with `region=us-west` will be combined into one notification, making it easier to manage and reducing alert fatigue. + **Group by** consolidates alerts that share the same grouping label into a single notification. For example, all alerts with `region=us-west` will be combined into one notification, making it easier to manage and reducing alert fatigue. 1. Set custom timing: + - Toggle **Override general timings**. - **Group interval**: `2m`. This ensures follow-up notifications for the same alert group will be sent at intervals of 2 minutes. - **Timing options** control how often notifications are sent and can help balance timely alerting with minimizing noise. + **Timing options** control how often notifications are sent and can help balance timely alerting with minimizing noise. 1. Save and repeat: + - Repeat for `region = us-east` with a different webhook or a different contact point. - {{< figure src="/media/docs/alerting/notificaiton-policies-region.png" max-width="750px" alt="Two nested notification policies to route and group alert notifications" >}} + {{< figure src="/media/docs/alerting/notificaiton-policies-region.png" max-width="750px" alt="Two nested notification policies to route and group alert notifications" >}} - These nested policies should route alert instances where the region label is either us-west or us-east. + These nested policies should route alert instances where the region label is either us-west or us-east. - **Note**: In Grafana, each label within a notification policy must have a unique key. If you attempt to add the same label key (e.g., region) with different values (us-west and us-east), only the last entry will be saved, and the previous one will be discarded. This is because labels are stored as associative arrays (maps), where each key must be unique. - For identical label keys use regex matchers (e.g., _region=~"us-west|us-east"_). + **Note**: In Grafana, each label within a notification policy must have a unique key. If you attempt to add the same label key (e.g., region) with different values (us-west and us-east), only the last entry will be saved, and the previous one will be discarded. This is because labels are stored as associative arrays (maps), where each key must be unique. + For identical label keys use regex matchers (e.g., _region=~"us-west|us-east"_). @@ -265,6 +276,7 @@ To follow the above example, we will create notification policies that route ale 1. Visit [http://localhost:3000](http://localhost:3000), where Grafana should be running 1. Navigate to **Alerts & IRM > Alerting > Notification policies**. 1. In the Default policy, click **+ New child policy**. + - In the Default policy, click **+ New child policy**. - **Label**: `region` - **Operator**: `=` @@ -273,34 +285,40 @@ To follow the above example, we will create notification policies that route ale This label matches alert rules where the region label is us-west 1. Choose a **Contact point**: + - Select **Webhook**. - If you don’t have any contact points, add a Contact point. + If you don’t have any contact points, add a Contact point. 1. Enable Continue matching: + - Turn on **Continue matching subsequent sibling nodes** so the evaluation continues even after one or more labels (i.e. region label) match. 1. Override grouping settings: + - Toggle **Override grouping**. - **Group by**: `region`. - **Group by** consolidates alerts that share the same grouping label into a single notification. For example, all alerts with `region=us-west` will be combined into one notification, making it easier to manage and reducing alert fatigue. + **Group by** consolidates alerts that share the same grouping label into a single notification. For example, all alerts with `region=us-west` will be combined into one notification, making it easier to manage and reducing alert fatigue. 1. Set custom timing: + - Toggle **Override general timings**. - **Group interval**: `2m`. This ensures follow-up notifications for the same alert group will be sent at intervals of 2 minutes. - **Timing options** control how often notifications are sent and can help balance timely alerting with minimizing noise. + **Timing options** control how often notifications are sent and can help balance timely alerting with minimizing noise. 1. Save and repeat: + - Repeat for `region = us-east` with a different webhook or a different contact point. - **Note**: In Grafana, each label within a notification policy must have a unique key. If you attempt to add the same label key (e.g., region) with different values (us-west and us-east), only the last entry will be saved, and the previous one will be discarded. This is because labels are stored as associative arrays (maps), where each key must be unique. - For identical label keys use regex matchers (e.g., _region=~"us-west|us-east"_). + **Note**: In Grafana, each label within a notification policy must have a unique key. If you attempt to add the same label key (e.g., region) with different values (us-west and us-east), only the last entry will be saved, and the previous one will be discarded. This is because labels are stored as associative arrays (maps), where each key must be unique. + For identical label keys use regex matchers (e.g., _region=~"us-west|us-east"_). + ## Create an alert rule In this section we configure an alert rule based on our application monitoring example. @@ -312,7 +330,6 @@ In this section we configure an alert rule based on our application monitoring e Make it short and descriptive as this will appear in your alert notification. For instance, `High CPU usage - Multi-region`. - ### Define query and alert condition In this section, we use the default options for Grafana-managed alert rule creation. The default options let us define the query, a expression (used to manipulate the data -- the `WHEN` field in the UI), and the condition that must be met for the alert to be triggered (in default mode is the threshold). @@ -327,19 +344,19 @@ Grafana includes a [test data source](https://grafana.com/docs/grafana/latest/da - Set **Scenario** to **CSV Content**. - Use the following CSV data: - ```csv - region,cpu-usage,service,instance - us-west,35,web-server-1,server-01 - us-west,81,web-server-1,server-02 - us-east,79,web-server-2,server-03 - us-east,52,web-server-2,server-04 - us-west,45,db-server-1,server-05 - us-east,77,db-server-2,server-06 - us-west,82,db-server-1,server-07 - us-east,93,db-server-2,server-08 - ``` + ```csv + region,cpu-usage,service,instance + us-west,35,web-server-1,server-01 + us-west,81,web-server-1,server-02 + us-east,79,web-server-2,server-03 + us-east,52,web-server-2,server-04 + us-west,45,db-server-1,server-05 + us-east,77,db-server-2,server-06 + us-west,82,db-server-1,server-07 + us-east,93,db-server-2,server-08 + ``` - The returned data simulates a data source returning multiple time series, each leading to the creation of an alert instance for that specific time series. + The returned data simulates a data source returning multiple time series, each leading to the creation of an alert instance for that specific time series. 1. In the **Alert condition** section: @@ -347,9 +364,9 @@ Grafana includes a [test data source](https://grafana.com/docs/grafana/latest/da 1. Click **Preview alert rule condition** to run the queries. - It should return 5 series in Firing state, two firing instances from the us-west region, and three from the us-east region. + It should return 5 series in Firing state, two firing instances from the us-west region, and three from the us-east region. - {{< figure src="/media/docs/alerting/regions-alert-instance-preview.png" max-width="750px" alt="Preview of a query returning alert instances." >}} + {{< figure src="/media/docs/alerting/regions-alert-instance-preview.png" max-width="750px" alt="Preview of a query returning alert instances." >}} ### Set evaluation behavior @@ -359,7 +376,7 @@ Every alert rule is assigned to an evaluation group. You can assign the alert ru 1. In the **Evaluation group**, repeat the above step to create a new evaluation group. Name it `Multi-region CPU group`. 1. Choose an **Evaluation interval** (how often the alert are evaluated). Choose `1m`. - The evaluation interval of 1 minute allows Grafana to detect changes quickly, while the longer **Group wait** (from our notification policy) and **Group interval** (inherited from the Default notification policy) allow for efficient grouping of alerts and minimize unnecessary notifications. + The evaluation interval of 1 minute allows Grafana to detect changes quickly, while the longer **Group wait** (from our notification policy) and **Group interval** (inherited from the Default notification policy) allow for efficient grouping of alerts and minimize unnecessary notifications. 1. Set the pending period to `0s` (zero seconds), so the alert rule fires the moment the condition is met (this minimizes the waiting time for the demonstration). @@ -370,9 +387,9 @@ Choose the notification policy where you want to receive your alert notification 1. Select **Use notification policy**. 1. Click **Preview routing** to ensure correct matching. - {{< figure src="/media/docs/alerting/region-notification-policy-routing-preview.png" max-width="750px" alt="Preview of alert instance routing with the region label matcher" >}} + {{< figure src="/media/docs/alerting/region-notification-policy-routing-preview.png" max-width="750px" alt="Preview of alert instance routing with the region label matcher" >}} - The preview shows that the region label from our data source is successfully matching the notification policies that we created earlier thanks to the label matcher that we configured. + The preview shows that the region label from our data source is successfully matching the notification policies that we created earlier thanks to the label matcher that we configured. 1. Click **Save rule and exit**. @@ -390,27 +407,22 @@ As a result, our notification policy will route two notifications: one notificat Grouped notifications example: Webhook - US East + ```json { "receiver": "webhook-us-east", "status": "firing", - "alerts": [ - { "instance": "server-03" }, - { "instance": "server-06" }, - { "instance": "server-08" } - ] + "alerts": [{ "instance": "server-03" }, { "instance": "server-06" }, { "instance": "server-08" }] } ``` Webhook - US West + ```json { "receiver": "webhook-us-west", "status": "firing", - "alerts": [ - { "instance": "server-02" }, - { "instance": "server-07" } - ] + "alerts": [{ "instance": "server-02" }, { "instance": "server-07" }] } ``` @@ -422,4 +434,4 @@ Webhook - US West Alert rule grouping simplifies incident management by consolidating related alerts. By configuring **notification policies** and using **labels** (such as _region_), you can group alerts based on specific criteria and route them to the appropriate teams. Fine-tuning **timing options**—including group wait, group interval, and repeat interval—further reduces noise and ensures notifications remain actionable without overwhelming on-call engineers. - \ No newline at end of file + diff --git a/grafana/alerting-get-started-pt3/step4.md b/grafana/alerting-get-started-pt3/step4.md index b6d0061..ee06fe2 100644 --- a/grafana/alerting-get-started-pt3/step4.md +++ b/grafana/alerting-get-started-pt3/step4.md @@ -38,7 +38,7 @@ To follow the above example, we will create notification policies that route ale - **Group by**: `region`{{copy}}. - **Group by** consolidates alerts that share the same grouping label into a single notification. For example, all alerts with `region=us-west`{{copy}} will be combined into one notification, making it easier to manage and reducing alert fatigue. + **Group by** consolidates alerts that share the same grouping label into a single notification. For example, all alerts with `region=us-west`{{copy}} will be combined into one notification, making it easier to manage and reducing alert fatigue. 1. Set custom timing: @@ -46,11 +46,11 @@ To follow the above example, we will create notification policies that route ale - **Group interval**: `2m`{{copy}}. This ensures follow-up notifications for the same alert group will be sent at intervals of 2 minutes. - **Timing options** control how often notifications are sent and can help balance timely alerting with minimizing noise. + **Timing options** control how often notifications are sent and can help balance timely alerting with minimizing noise. 1. Save and repeat: - Repeat for `region = us-east`{{copy}} with a different webhook or a different contact point. - **Note**: In Grafana, each label within a notification policy must have a unique key. If you attempt to add the same label key (e.g., region) with different values (us-west and us-east), only the last entry will be saved, and the previous one will be discarded. This is because labels are stored as associative arrays (maps), where each key must be unique. - For identical label keys use regex matchers (e.g., _region=~“us-west|us-east”_). + **Note**: In Grafana, each label within a notification policy must have a unique key. If you attempt to add the same label key (e.g., region) with different values (us-west and us-east), only the last entry will be saved, and the previous one will be discarded. This is because labels are stored as associative arrays (maps), where each key must be unique. + For identical label keys use regex matchers (e.g., _region=~“us-west|us-east”_). diff --git a/grafana/alerting-get-started-pt3/step6.md b/grafana/alerting-get-started-pt3/step6.md index 573b7bf..770c90a 100644 --- a/grafana/alerting-get-started-pt3/step6.md +++ b/grafana/alerting-get-started-pt3/step6.md @@ -14,11 +14,7 @@ Webhook - US East { "receiver": "webhook-us-east", "status": "firing", - "alerts": [ - { "instance": "server-03" }, - { "instance": "server-06" }, - { "instance": "server-08" } - ] + "alerts": [{ "instance": "server-03" }, { "instance": "server-06" }, { "instance": "server-08" }] } ```{{copy}} @@ -28,9 +24,6 @@ Webhook - US West { "receiver": "webhook-us-west", "status": "firing", - "alerts": [ - { "instance": "server-02" }, - { "instance": "server-07" } - ] + "alerts": [{ "instance": "server-02" }, { "instance": "server-07" }] } ```{{copy}} From f7b3c400f686118db921dc7905b808e5d4cb244c Mon Sep 17 00:00:00 2001 From: tonypowa Date: Wed, 18 Dec 2024 10:23:44 +0100 Subject: [PATCH 4/7] apply suggestions --- grafana/alerting-get-started-pt3/intro.md | 2 +- .../alerting-get-started-pt3/preprocessed.md | 21 +++++--- grafana/alerting-get-started-pt3/step4.md | 49 ------------------- grafana/alerting-get-started-pt3/step5.md | 6 +-- 4 files changed, 17 insertions(+), 61 deletions(-) diff --git a/grafana/alerting-get-started-pt3/intro.md b/grafana/alerting-get-started-pt3/intro.md index f7578fa..c0689e6 100644 --- a/grafana/alerting-get-started-pt3/intro.md +++ b/grafana/alerting-get-started-pt3/intro.md @@ -8,7 +8,7 @@ Grouping is configured by using labels in the notification policy that reference In this tutorial, you will: -- Understand how alert rule grouping works. +- Learn how alert rule grouping works. - Create a notification policy to handle grouping. diff --git a/grafana/alerting-get-started-pt3/preprocessed.md b/grafana/alerting-get-started-pt3/preprocessed.md index 78b4f84..d0e6179 100755 --- a/grafana/alerting-get-started-pt3/preprocessed.md +++ b/grafana/alerting-get-started-pt3/preprocessed.md @@ -47,7 +47,7 @@ Grouping is configured by using labels in the notification policy that reference In this tutorial, you will: -- Understand how alert rule grouping works. +- Learn how alert rule grouping works. - Create a notification policy to handle grouping. - Define an alert rule for a real-world scenario. - Receive and review grouped alert notifications. @@ -267,10 +267,15 @@ To follow the above example, we will create notification policies that route ale These nested policies should route alert instances where the region label is either us-west or us-east. - **Note**: In Grafana, each label within a notification policy must have a unique key. If you attempt to add the same label key (e.g., region) with different values (us-west and us-east), only the last entry will be saved, and the previous one will be discarded. This is because labels are stored as associative arrays (maps), where each key must be unique. - For identical label keys use regex matchers (e.g., _region=~"us-west|us-east"_). + > **Note:** + > **Note**: In Grafana, each label within a notification policy must have a unique key. If you attempt to add the same label key (e.g., region) with different values (us-west and us-east), only the last entry will be saved, and the previous one will be discarded. This is because labels are stored as associative arrays (maps), where each key must be unique. + > For identical label keys use regex matchers (e.g., _region=~"us-west|us-east"_). - + + > **Note**: In Grafana, each label within a notification policy must have a unique key. If you attempt to add the same label key (e.g., region) with different values (us-west and us-east), only the last entry will be saved, and the previous one will be discarded. This is because labels are stored as associative arrays (maps), where each key must be unique. + > For identical label keys use regex matchers (e.g., _region=~"us-west|us-east"_). + + 1. Visit [http://localhost:3000](http://localhost:3000), where Grafana should be running @@ -312,7 +317,7 @@ To follow the above example, we will create notification policies that route ale - Repeat for `region = us-east` with a different webhook or a different contact point. - **Note**: In Grafana, each label within a notification policy must have a unique key. If you attempt to add the same label key (e.g., region) with different values (us-west and us-east), only the last entry will be saved, and the previous one will be discarded. This is because labels are stored as associative arrays (maps), where each key must be unique. + **Note**: In Grafana, each label within a notification policy must have a unique key. If you attempt to add the same label key (e.g., region) with different values (us-west and us-east), only the last entry is saved, and the previous one is discarded. This is because labels are stored as associative arrays (maps), where each key must be unique. For identical label keys use regex matchers (e.g., _region=~"us-west|us-east"_). @@ -323,12 +328,12 @@ To follow the above example, we will create notification policies that route ale In this section we configure an alert rule based on our application monitoring example. -1. Go to **Alerting > Alert rules**. +1. Navigate to **Alerting > Alert rules**. 2. Click **New alert rule**. ### Enter an alert rule name -Make it short and descriptive as this will appear in your alert notification. For instance, `High CPU usage - Multi-region`. +Make it short and descriptive as this appears in your alert notification. For instance, `High CPU usage - Multi-region`. ### Define query and alert condition @@ -336,7 +341,7 @@ In this section, we use the default options for Grafana-managed alert rule creat Grafana includes a [test data source](https://grafana.com/docs/grafana/latest/datasources/testdata/) that creates simulated time series data. This data source is included in the demo environment for this tutorial. If you're working in Grafana Cloud or your own local Grafana instance, you can add the data source through the **Connections** menu. -1. Select **TestData** data source from the drop-down menu. +1. From the drop-down menu, select **TestData** data source. 1. From **Scenario** select **CSV Content**. 1. Copy in the following CSV data: diff --git a/grafana/alerting-get-started-pt3/step4.md b/grafana/alerting-get-started-pt3/step4.md index ee06fe2..e7ebc60 100644 --- a/grafana/alerting-get-started-pt3/step4.md +++ b/grafana/alerting-get-started-pt3/step4.md @@ -5,52 +5,3 @@ [Notification policies](ref:notification-policies) group alert instances and route notifications to specific contact points. To follow the above example, we will create notification policies that route alert instances based on the `region`{{copy}} label to specific contact points. This setup ensures that alerts for a given region are consolidated into a single notification. Additionally, we will fine-tune the **timing settings** for each region by overriding the default parent policy, allowing more granular control over when notifications are sent. - -1. Visit [http://localhost:3000]({{TRAFFIC_HOST1_3000}}), where Grafana should be running - -1. Navigate to **Alerts & IRM > Alerting > Notification policies**. - -1. In the Default policy, click **+ New child policy**. - - - In the Default policy, click **+ New child policy**. - - - **Label**: `region`{{copy}} - - - **Operator**: `=`{{copy}} - - - **Value**: `us-west`{{copy}} - - This label matches alert rules where the region label is us-west - -1. Choose a **Contact point**: - - - Select **Webhook**. - - If you don’t have any contact points, add a Contact point. - -1. Enable Continue matching: - - - Turn on **Continue matching subsequent sibling nodes** so the evaluation continues even after one or more labels (i.e. region label) match. - -1. Override grouping settings: - - - Toggle **Override grouping**. - - - **Group by**: `region`{{copy}}. - - **Group by** consolidates alerts that share the same grouping label into a single notification. For example, all alerts with `region=us-west`{{copy}} will be combined into one notification, making it easier to manage and reducing alert fatigue. - -1. Set custom timing: - - - Toggle **Override general timings**. - - - **Group interval**: `2m`{{copy}}. This ensures follow-up notifications for the same alert group will be sent at intervals of 2 minutes. - - **Timing options** control how often notifications are sent and can help balance timely alerting with minimizing noise. - -1. Save and repeat: - - - Repeat for `region = us-east`{{copy}} with a different webhook or a different contact point. - - **Note**: In Grafana, each label within a notification policy must have a unique key. If you attempt to add the same label key (e.g., region) with different values (us-west and us-east), only the last entry will be saved, and the previous one will be discarded. This is because labels are stored as associative arrays (maps), where each key must be unique. - For identical label keys use regex matchers (e.g., _region=~“us-west|us-east”_). diff --git a/grafana/alerting-get-started-pt3/step5.md b/grafana/alerting-get-started-pt3/step5.md index 7d1fda7..3c4a322 100644 --- a/grafana/alerting-get-started-pt3/step5.md +++ b/grafana/alerting-get-started-pt3/step5.md @@ -2,13 +2,13 @@ In this section we configure an alert rule based on our application monitoring example. -1. Go to **Alerting > Alert rules**. +1. Navigate to **Alerting > Alert rules**. 1. Click **New alert rule**. ## Enter an alert rule name -Make it short and descriptive as this will appear in your alert notification. For instance, `High CPU usage - Multi-region`{{copy}}. +Make it short and descriptive as this appears in your alert notification. For instance, `High CPU usage - Multi-region`{{copy}}. ## Define query and alert condition @@ -16,7 +16,7 @@ In this section, we use the default options for Grafana-managed alert rule creat Grafana includes a [test data source](https://grafana.com/docs/grafana/latest/datasources/testdata/) that creates simulated time series data. This data source is included in the demo environment for this tutorial. If you’re working in Grafana Cloud or your own local Grafana instance, you can add the data source through the **Connections** menu. -1. Select **TestData** data source from the drop-down menu. +1. From the drop-down menu, select **TestData** data source. 1. From **Scenario** select **CSV Content**. From 6f64a6646a3043e69dec1a4f6685209c2a56efe5 Mon Sep 17 00:00:00 2001 From: tonypowa Date: Wed, 18 Dec 2024 10:29:20 +0100 Subject: [PATCH 5/7] link --- grafana/alerting-get-started-pt3/preprocessed.md | 4 ++-- grafana/alerting-get-started-pt3/step2.md | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/grafana/alerting-get-started-pt3/preprocessed.md b/grafana/alerting-get-started-pt3/preprocessed.md index d0e6179..192ad48 100755 --- a/grafana/alerting-get-started-pt3/preprocessed.md +++ b/grafana/alerting-get-started-pt3/preprocessed.md @@ -181,8 +181,8 @@ Alerts sharing the **same label values** are grouped together, and timing option For more details, see: -- [Grouping Alerts](ref:alert-grouping) -- [Alert Labels](ref:alert-labels) +- [Grouping Alerts](https://grafana.com/docs/grafana/latest/alerting/fundamentals/notifications/group-alert-notifications/) +- [Alert Labels](https://grafana.com/docs/grafana/latest/alerting/fundamentals/alert-rules/annotation-label/#label-types) diff --git a/grafana/alerting-get-started-pt3/step2.md b/grafana/alerting-get-started-pt3/step2.md index 87ecf72..3088728 100644 --- a/grafana/alerting-get-started-pt3/step2.md +++ b/grafana/alerting-get-started-pt3/step2.md @@ -40,6 +40,6 @@ Alerts sharing the **same label values** are grouped together, and timing option For more details, see: -- [Grouping Alerts](ref:alert-grouping) +- [Grouping Alerts](https://grafana.com/docs/grafana/latest/alerting/fundamentals/notifications/group-alert-notifications/) -- [Alert Labels](ref:alert-labels) +- [Alert Labels](https://grafana.com/docs/grafana/latest/alerting/fundamentals/alert-rules/annotation-label/#label-types) From 120fe8fee7ea824c718e70dacb1b9a1d7156e11d Mon Sep 17 00:00:00 2001 From: tonypowa Date: Wed, 18 Dec 2024 10:38:35 +0100 Subject: [PATCH 6/7] format --- .../alerting-get-started-pt3/preprocessed.md | 8 +-- grafana/alerting-get-started-pt3/step4.md | 49 +++++++++++++++++++ 2 files changed, 51 insertions(+), 6 deletions(-) diff --git a/grafana/alerting-get-started-pt3/preprocessed.md b/grafana/alerting-get-started-pt3/preprocessed.md index 192ad48..b1bcf32 100755 --- a/grafana/alerting-get-started-pt3/preprocessed.md +++ b/grafana/alerting-get-started-pt3/preprocessed.md @@ -268,14 +268,10 @@ To follow the above example, we will create notification policies that route ale These nested policies should route alert instances where the region label is either us-west or us-east. > **Note:** - > **Note**: In Grafana, each label within a notification policy must have a unique key. If you attempt to add the same label key (e.g., region) with different values (us-west and us-east), only the last entry will be saved, and the previous one will be discarded. This is because labels are stored as associative arrays (maps), where each key must be unique. + > **Note**: In Grafana, each label within a notification policy must have a unique key. If you attempt to add the same label key (e.g., region) with different values (us-west and us-east), only the last entry is saved, and the previous one is discarded. This is because labels are stored as associative arrays (maps), where each key must be unique. > For identical label keys use regex matchers (e.g., _region=~"us-west|us-east"_). - - > **Note**: In Grafana, each label within a notification policy must have a unique key. If you attempt to add the same label key (e.g., region) with different values (us-west and us-east), only the last entry will be saved, and the previous one will be discarded. This is because labels are stored as associative arrays (maps), where each key must be unique. - > For identical label keys use regex matchers (e.g., _region=~"us-west|us-east"_). - - + 1. Visit [http://localhost:3000](http://localhost:3000), where Grafana should be running diff --git a/grafana/alerting-get-started-pt3/step4.md b/grafana/alerting-get-started-pt3/step4.md index e7ebc60..6fa918e 100644 --- a/grafana/alerting-get-started-pt3/step4.md +++ b/grafana/alerting-get-started-pt3/step4.md @@ -5,3 +5,52 @@ [Notification policies](ref:notification-policies) group alert instances and route notifications to specific contact points. To follow the above example, we will create notification policies that route alert instances based on the `region`{{copy}} label to specific contact points. This setup ensures that alerts for a given region are consolidated into a single notification. Additionally, we will fine-tune the **timing settings** for each region by overriding the default parent policy, allowing more granular control over when notifications are sent. + +1. Visit [http://localhost:3000]({{TRAFFIC_HOST1_3000}}), where Grafana should be running + +1. Navigate to **Alerts & IRM > Alerting > Notification policies**. + +1. In the Default policy, click **+ New child policy**. + + - In the Default policy, click **+ New child policy**. + + - **Label**: `region`{{copy}} + + - **Operator**: `=`{{copy}} + + - **Value**: `us-west`{{copy}} + + This label matches alert rules where the region label is us-west + +1. Choose a **Contact point**: + + - Select **Webhook**. + + If you don’t have any contact points, add a Contact point. + +1. Enable Continue matching: + + - Turn on **Continue matching subsequent sibling nodes** so the evaluation continues even after one or more labels (i.e. region label) match. + +1. Override grouping settings: + + - Toggle **Override grouping**. + + - **Group by**: `region`{{copy}}. + + **Group by** consolidates alerts that share the same grouping label into a single notification. For example, all alerts with `region=us-west`{{copy}} will be combined into one notification, making it easier to manage and reducing alert fatigue. + +1. Set custom timing: + + - Toggle **Override general timings**. + + - **Group interval**: `2m`{{copy}}. This ensures follow-up notifications for the same alert group will be sent at intervals of 2 minutes. + + **Timing options** control how often notifications are sent and can help balance timely alerting with minimizing noise. + +1. Save and repeat: + + - Repeat for `region = us-east`{{copy}} with a different webhook or a different contact point. + + **Note**: In Grafana, each label within a notification policy must have a unique key. If you attempt to add the same label key (e.g., region) with different values (us-west and us-east), only the last entry is saved, and the previous one is discarded. This is because labels are stored as associative arrays (maps), where each key must be unique. + For identical label keys use regex matchers (e.g., _region=~“us-west|us-east”_). From bb43c603d3912f2f2119e0bb9e4f5e1d526c384b Mon Sep 17 00:00:00 2001 From: tonypowa Date: Wed, 18 Dec 2024 10:57:03 +0100 Subject: [PATCH 7/7] group interval note --- grafana/alerting-get-started-pt3/preprocessed.md | 8 ++++---- grafana/alerting-get-started-pt3/step4.md | 2 +- grafana/alerting-get-started-pt3/step6.md | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/grafana/alerting-get-started-pt3/preprocessed.md b/grafana/alerting-get-started-pt3/preprocessed.md index b1bcf32..7cdcab0 100755 --- a/grafana/alerting-get-started-pt3/preprocessed.md +++ b/grafana/alerting-get-started-pt3/preprocessed.md @@ -239,7 +239,7 @@ To follow the above example, we will create notification policies that route ale - Select **Webhook**. - If you don’t have any contact points, add a Contact point. + If you don’t have any contact points, add a [Contact point](https://grafana.com/docs/grafana/latest/alerting/configure-notifications/manage-contact-points/#add-a-contact-point). 1. Enable Continue matching: @@ -255,7 +255,7 @@ To follow the above example, we will create notification policies that route ale 1. Set custom timing: - Toggle **Override general timings**. - - **Group interval**: `2m`. This ensures follow-up notifications for the same alert group will be sent at intervals of 2 minutes. + - **Group interval**: `2m`. This ensures follow-up notifications for the same alert group will be sent at intervals of 2 minutes. While the default is 5 minutes, we chose 2 minutes here to provide faster feedback for demonstration purposes. **Timing options** control how often notifications are sent and can help balance timely alerting with minimizing noise. @@ -305,7 +305,7 @@ To follow the above example, we will create notification policies that route ale 1. Set custom timing: - Toggle **Override general timings**. - - **Group interval**: `2m`. This ensures follow-up notifications for the same alert group will be sent at intervals of 2 minutes. + - **Group interval**: `2m`. This ensures follow-up notifications for the same alert group will be sent at intervals of 2 minutes. While the default is 5 minutes, we chose 2 minutes here to provide faster feedback for demonstration purposes. **Timing options** control how often notifications are sent and can help balance timely alerting with minimizing noise. @@ -401,7 +401,7 @@ Choose the notification policy where you want to receive your alert notification Now that the alert rule has been configured, you should receive alert notifications in the contact point whenever alerts trigger. -When the configured alert rule detects CPU usage higher than 75% across multiple regions, it will evaluate the metric every minute. If the condition persists, notifications will be grouped together, with a **Group wait** of 30 seconds before the first alert is sent. Follow-up notifications for the same alert group will be sent at intervals of 2 minutes, reducing the frequency of alerts. If the condition continues for an extended period, a **Repeat interval** of 4 hours ensures that the alert is only resent if the issue persists +When the configured alert rule detects CPU usage higher than 75% across multiple regions, it will evaluate the metric every minute. If the condition persists, notifications will be grouped together, with a **Group wait** of 30 seconds before the first alert is sent. Follow-up notifications are sent every 2 minutes for quick updates in this demonstration, but for reducing alert frequency, consider using the default or increasing the interval. If the condition continues for an extended period, a **Repeat interval** of 4 hours ensures that the alert is only resent if the issue persists As a result, our notification policy will route two notifications: one notification grouping the three alert instances from the `us-east` region and another grouping the two alert instances from the `us-west` region diff --git a/grafana/alerting-get-started-pt3/step4.md b/grafana/alerting-get-started-pt3/step4.md index 6fa918e..be45b78 100644 --- a/grafana/alerting-get-started-pt3/step4.md +++ b/grafana/alerting-get-started-pt3/step4.md @@ -44,7 +44,7 @@ To follow the above example, we will create notification policies that route ale - Toggle **Override general timings**. - - **Group interval**: `2m`{{copy}}. This ensures follow-up notifications for the same alert group will be sent at intervals of 2 minutes. + - **Group interval**: `2m`{{copy}}. This ensures follow-up notifications for the same alert group will be sent at intervals of 2 minutes. While the default is 5 minutes, we chose 2 minutes here to provide faster feedback for demonstration purposes. **Timing options** control how often notifications are sent and can help balance timely alerting with minimizing noise. diff --git a/grafana/alerting-get-started-pt3/step6.md b/grafana/alerting-get-started-pt3/step6.md index 770c90a..468da3c 100644 --- a/grafana/alerting-get-started-pt3/step6.md +++ b/grafana/alerting-get-started-pt3/step6.md @@ -2,7 +2,7 @@ Now that the alert rule has been configured, you should receive alert notifications in the contact point whenever alerts trigger. -When the configured alert rule detects CPU usage higher than 75% across multiple regions, it will evaluate the metric every minute. If the condition persists, notifications will be grouped together, with a **Group wait** of 30 seconds before the first alert is sent. Follow-up notifications for the same alert group will be sent at intervals of 2 minutes, reducing the frequency of alerts. If the condition continues for an extended period, a **Repeat interval** of 4 hours ensures that the alert is only resent if the issue persists +When the configured alert rule detects CPU usage higher than 75% across multiple regions, it will evaluate the metric every minute. If the condition persists, notifications will be grouped together, with a **Group wait** of 30 seconds before the first alert is sent. Follow-up notifications are sent every 2 minutes for quick updates in this demonstration, but for reducing alert frequency, consider using the default or increasing the interval. If the condition continues for an extended period, a **Repeat interval** of 4 hours ensures that the alert is only resent if the issue persists As a result, our notification policy will route two notifications: one notification grouping the three alert instances from the `us-east`{{copy}} region and another grouping the two alert instances from the `us-west`{{copy}} region