Skip to content

Commit

Permalink
Merge pull request #111 from DFE-Digital/1927-allow-metric-frequency-…
Browse files Browse the repository at this point in the history
…override

Added alert frequency monitor metric
  • Loading branch information
saliceti authored Jul 29, 2024
2 parents 7ffc46f + 4a175da commit 19dd963
Show file tree
Hide file tree
Showing 6 changed files with 44 additions and 10 deletions.
13 changes: 13 additions & 0 deletions aks/postgres/resources.tf
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,16 @@ locals {
azure_enable_monitoring = var.use_azure && var.azure_enable_monitoring

kubernetes_name = "${var.service_name}-${var.environment}-postgres${local.name_suffix}"

alert_frequency_map = {
PT5M = "PT1M"
PT15M = "PT1M"
PT30M = "PT1M"
PT1H = "PT1M"
PT6H = "PT5M"
PT12H = "PT5M"
}
alert_frequency = local.alert_frequency_map[var.alert_window_size]
}

# Username & password
Expand Down Expand Up @@ -172,6 +182,7 @@ resource "azurerm_monitor_metric_alert" "memory" {
scopes = [azurerm_postgresql_flexible_server.main[0].id]
description = "Action will be triggered when memory use is greater than 75%"
window_size = var.alert_window_size
frequency = local.alert_frequency

criteria {
metric_namespace = "Microsoft.DBforPostgreSQL/flexibleServers"
Expand Down Expand Up @@ -200,6 +211,7 @@ resource "azurerm_monitor_metric_alert" "cpu" {
scopes = [azurerm_postgresql_flexible_server.main[0].id]
description = "Action will be triggered when cpu use is greater than ${var.azure_cpu_threshold}%"
window_size = var.alert_window_size
frequency = local.alert_frequency

criteria {
metric_namespace = "Microsoft.DBforPostgreSQL/flexibleServers"
Expand Down Expand Up @@ -228,6 +240,7 @@ resource "azurerm_monitor_metric_alert" "storage" {
scopes = [azurerm_postgresql_flexible_server.main[0].id]
description = "Action will be triggered when storage use is greater than ${var.azure_storage_threshold}%"
window_size = var.alert_window_size
frequency = local.alert_frequency

criteria {
metric_namespace = "Microsoft.DBforPostgreSQL/flexibleServers"
Expand Down
2 changes: 1 addition & 1 deletion aks/postgres/tfdocs.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ No modules.
|------|-------------|------|---------|:--------:|
| <a name="input_admin_password"></a> [admin\_password](#input\_admin\_password) | Password of the admin user | `string` | `null` | no |
| <a name="input_admin_username"></a> [admin\_username](#input\_admin\_username) | Username of the admin user | `string` | `null` | no |
| <a name="input_alert_window_size"></a> [alert\_window\_size](#input\_alert\_window\_size) | The period of time that is used to monitor alert activity e.g PT1M, PT5M, PT15M, PT30M, PT1H, PT6H or PT12H | `string` | `"PT5M"` | no |
| <a name="input_alert_window_size"></a> [alert\_window\_size](#input\_alert\_window\_size) | The period of time that is used to monitor alert activity e.g. PT1M, PT5M, PT15M, PT30M, PT1H, PT6H, PT12H. The interval between checks is adjusted accordingly. | `string` | `"PT5M"` | no |
| <a name="input_azure_cpu_threshold"></a> [azure\_cpu\_threshold](#input\_azure\_cpu\_threshold) | n/a | `number` | `80` | no |
| <a name="input_azure_enable_backup_storage"></a> [azure\_enable\_backup\_storage](#input\_azure\_enable\_backup\_storage) | n/a | `bool` | `true` | no |
| <a name="input_azure_enable_high_availability"></a> [azure\_enable\_high\_availability](#input\_azure\_enable\_high\_availability) | n/a | `bool` | `false` | no |
Expand Down
14 changes: 10 additions & 4 deletions aks/postgres/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -124,11 +124,17 @@ variable "azure_enable_monitoring" {
default = true
}



variable "alert_window_size" {
type = string
nullable = false
default = "PT5M"
description = "The period of time that is used to monitor alert activity e.g PT1M, PT5M, PT15M, PT30M, PT1H, PT6H or PT12H"
type = string
nullable = false
default = "PT5M"
validation {
condition = contains(["PT1M", "PT5M", "PT15M", "PT30M", "PT1H", "PT6H", "PT12H"], var.alert_window_size)
error_message = "The alert_window_size must be one of: PT1M, PT5M, PT15M, PT30M, PT1H, PT6H, PT12H"
}
description = "The period of time that is used to monitor alert activity e.g. PT1M, PT5M, PT15M, PT30M, PT1H, PT6H, PT12H. The interval between checks is adjusted accordingly."
}

variable "azure_maintenance_window" {
Expand Down
11 changes: 11 additions & 0 deletions aks/redis/resources.tf
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,16 @@ locals {
azure_enable_monitoring = var.use_azure && var.azure_enable_monitoring

kubernetes_name = "${var.service_name}-${var.environment}-redis${local.name_suffix}"

alert_frequency_map = {
PT5M = "PT1M"
PT15M = "PT1M"
PT30M = "PT1M"
PT1H = "PT1M"
PT6H = "PT5M"
PT12H = "PT5M"
}
alert_frequency = local.alert_frequency_map[var.alert_window_size]
}

# Azure
Expand Down Expand Up @@ -86,6 +96,7 @@ resource "azurerm_monitor_metric_alert" "memory" {
scopes = [azurerm_redis_cache.main[0].id]
description = "Action will be triggered when memory use is greater than ${var.azure_memory_threshold}%"
window_size = var.alert_window_size
frequency = local.alert_frequency

criteria {
metric_namespace = "Microsoft.Cache/redis"
Expand Down
2 changes: 1 addition & 1 deletion aks/redis/tfdocs.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ No modules.

| Name | Description | Type | Default | Required |
|------|-------------|------|---------|:--------:|
| <a name="input_alert_window_size"></a> [alert\_window\_size](#input\_alert\_window\_size) | The period of time that is used to monitor alert activity e.g PT1M, PT5M, PT15M, PT30M, PT1H, PT6H or PT12H | `string` | `"PT5M"` | no |
| <a name="input_alert_window_size"></a> [alert\_window\_size](#input\_alert\_window\_size) | The period of time that is used to monitor alert activity e,g, PT1M, PT5M, PT15M, PT30M, PT1H, PT6H, PT12H. The interval between checks is adjusted accordingly. | `string` | `"PT5M"` | no |
| <a name="input_azure_capacity"></a> [azure\_capacity](#input\_azure\_capacity) | n/a | `number` | `1` | no |
| <a name="input_azure_enable_monitoring"></a> [azure\_enable\_monitoring](#input\_azure\_enable\_monitoring) | n/a | `bool` | `true` | no |
| <a name="input_azure_family"></a> [azure\_family](#input\_azure\_family) | n/a | `string` | `"C"` | no |
Expand Down
12 changes: 8 additions & 4 deletions aks/redis/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -111,8 +111,12 @@ variable "azure_patch_schedule" {
}

variable "alert_window_size" {
type = string
default = "PT5M"
nullable = false
description = "The period of time that is used to monitor alert activity e.g PT1M, PT5M, PT15M, PT30M, PT1H, PT6H or PT12H"
type = string
default = "PT5M"
nullable = false
validation {
condition = contains(["PT1M", "PT5M", "PT15M", "PT30M", "PT1H", "PT6H", "PT12H"], var.alert_window_size)
error_message = "The alert_window_size must be one of: PT1M, PT5M, PT15M, PT30M, PT1H, PT6H, PT12H"
}
description = "The period of time that is used to monitor alert activity e,g, PT1M, PT5M, PT15M, PT30M, PT1H, PT6H, PT12H. The interval between checks is adjusted accordingly."
}

0 comments on commit 19dd963

Please sign in to comment.