diff --git a/instances/dev/main.tf b/instances/dev/main.tf index 61acff7..fb7ff15 100644 --- a/instances/dev/main.tf +++ b/instances/dev/main.tf @@ -43,6 +43,23 @@ resource "aws_instance" "api" { } } +resource "aws_cloudwatch_metric_alarm" "api_outage_alarm" { + alarm_name = "${var.perm_env.name}-api-instance-outage-alarm" + comparison_operator = "GreaterThanOrEqualToThreshold" + evaluation_periods = "1" + metric_name = "StatusCheckFailed" + namespace = "AWS/EC2" + period = "300" + statistic = "Average" + threshold = "0.99" + actions_enabled = "true" + alarm_actions = ["arn:aws:sns:us-west-2:364159549467:ec2-outage-notifications"] + ok_actions = ["arn:aws:sns:us-west-2:364159549467:ec2-outage-notifications"] + dimensions = { + InstanceId = aws_instance.api.id + } +} + resource "aws_instance" "taskrunner" { ami = module.perm_env_data.taskrunner_ami instance_type = "c4.large" @@ -56,6 +73,23 @@ resource "aws_instance" "taskrunner" { } } +resource "aws_cloudwatch_metric_alarm" "taskrunner_outage_alarm" { + alarm_name = "${var.perm_env.name}-taskrunner-instance-outage-alarm" + comparison_operator = "GreaterThanOrEqualToThreshold" + evaluation_periods = "1" + metric_name = "StatusCheckFailed" + namespace = "AWS/EC2" + period = "300" + statistic = "Average" + threshold = "0.99" + actions_enabled = "true" + alarm_actions = ["arn:aws:sns:us-west-2:364159549467:ec2-outage-notifications"] + ok_actions = ["arn:aws:sns:us-west-2:364159549467:ec2-outage-notifications"] + dimensions = { + InstanceId = aws_instance.taskrunner[0].id + } +} + resource "aws_instance" "cron" { ami = module.perm_env_data.cron_ami instance_type = "t2.micro" @@ -68,6 +102,23 @@ resource "aws_instance" "cron" { } } +resource "aws_cloudwatch_metric_alarm" "cron_outage_alarm" { + alarm_name = "${var.perm_env.name}-cron-instance-outage-alarm" + comparison_operator = "GreaterThanOrEqualToThreshold" + evaluation_periods = "1" + metric_name = "StatusCheckFailed" + namespace = "AWS/EC2" + period = "300" + statistic = "Average" + threshold = "0.99" + actions_enabled = "true" + alarm_actions = ["arn:aws:sns:us-west-2:364159549467:ec2-outage-notifications"] + ok_actions = ["arn:aws:sns:us-west-2:364159549467:ec2-outage-notifications"] + dimensions = { + InstanceId = aws_instance.cron.id + } +} + resource "aws_instance" "sftp" { ami = module.perm_env_data.sftp_ami instance_type = "c4.large" @@ -81,6 +132,23 @@ resource "aws_instance" "sftp" { } } +resource "aws_cloudwatch_metric_alarm" "sftp_outage_alarm" { + alarm_name = "${var.perm_env.name}-sftp-instance-outage-alarm" + comparison_operator = "GreaterThanOrEqualToThreshold" + evaluation_periods = "1" + metric_name = "StatusCheckFailed" + namespace = "AWS/EC2" + period = "300" + statistic = "Average" + threshold = "0.99" + actions_enabled = "true" + alarm_actions = ["arn:aws:sns:us-west-2:364159549467:ec2-outage-notifications"] + ok_actions = ["arn:aws:sns:us-west-2:364159549467:ec2-outage-notifications"] + dimensions = { + InstanceId = aws_instance.sftp.id + } +} + module "perm_env_data" { source = "../modules/get-data" perm_env = var.perm_env diff --git a/instances/production/main.tf b/instances/production/main.tf index ced7dea..dbae1e0 100644 --- a/instances/production/main.tf +++ b/instances/production/main.tf @@ -43,6 +43,23 @@ resource "aws_instance" "api" { } } +resource "aws_cloudwatch_metric_alarm" "api_outage_alarm" { + alarm_name = "${var.perm_env.name}-api-instance-outage-alarm" + comparison_operator = "GreaterThanOrEqualToThreshold" + evaluation_periods = "1" + metric_name = "StatusCheckFailed" + namespace = "AWS/EC2" + period = "300" + statistic = "Average" + threshold = "0.99" + actions_enabled = "true" + alarm_actions = ["arn:aws:sns:us-west-2:364159549467:ec2-outage-notifications"] + ok_actions = ["arn:aws:sns:us-west-2:364159549467:ec2-outage-notifications"] + dimensions = { + InstanceId = aws_instance.api.id + } +} + resource "aws_instance" "taskrunner" { ami = module.perm_env_data.taskrunner_ami instance_type = "c4.xlarge" @@ -56,6 +73,24 @@ resource "aws_instance" "taskrunner" { } } +resource "aws_cloudwatch_metric_alarm" "taskrunner_outage_alarm" { + count = 2 + alarm_name = "${var.perm_env.name}-taskrunner${count.index}-instance-outage-alarm" + comparison_operator = "GreaterThanOrEqualToThreshold" + evaluation_periods = "1" + metric_name = "StatusCheckFailed" + namespace = "AWS/EC2" + period = "300" + statistic = "Average" + threshold = "0.99" + actions_enabled = "true" + alarm_actions = ["arn:aws:sns:us-west-2:364159549467:ec2-outage-notifications"] + ok_actions = ["arn:aws:sns:us-west-2:364159549467:ec2-outage-notifications"] + dimensions = { + InstanceId = aws_instance.taskrunner[count.index].id + } +} + resource "aws_instance" "cron" { ami = module.perm_env_data.cron_ami instance_type = "t2.micro" @@ -68,6 +103,23 @@ resource "aws_instance" "cron" { } } +resource "aws_cloudwatch_metric_alarm" "cron_outage_alarm" { + alarm_name = "${var.perm_env.name}-cron-instance-outage-alarm" + comparison_operator = "GreaterThanOrEqualToThreshold" + evaluation_periods = "1" + metric_name = "StatusCheckFailed" + namespace = "AWS/EC2" + period = "300" + statistic = "Average" + threshold = "0.99" + actions_enabled = "true" + alarm_actions = ["arn:aws:sns:us-west-2:364159549467:ec2-outage-notifications"] + ok_actions = ["arn:aws:sns:us-west-2:364159549467:ec2-outage-notifications"] + dimensions = { + InstanceId = aws_instance.cron.id + } +} + resource "aws_instance" "sftp" { ami = module.perm_env_data.sftp_ami instance_type = "m4.large" @@ -81,6 +133,23 @@ resource "aws_instance" "sftp" { } } +resource "aws_cloudwatch_metric_alarm" "sftp_outage_alarm" { + alarm_name = "${var.perm_env.name}-sftp-instance-outage-alarm" + comparison_operator = "GreaterThanOrEqualToThreshold" + evaluation_periods = "1" + metric_name = "StatusCheckFailed" + namespace = "AWS/EC2" + period = "300" + statistic = "Average" + threshold = "0.99" + actions_enabled = "true" + alarm_actions = ["arn:aws:sns:us-west-2:364159549467:ec2-outage-notifications"] + ok_actions = ["arn:aws:sns:us-west-2:364159549467:ec2-outage-notifications"] + dimensions = { + InstanceId = aws_instance.sftp.id + } +} + module "perm_env_data" { source = "../modules/get-data" perm_env = var.perm_env diff --git a/instances/staging/main.tf b/instances/staging/main.tf index 80dbd7f..ec65624 100644 --- a/instances/staging/main.tf +++ b/instances/staging/main.tf @@ -43,6 +43,23 @@ resource "aws_instance" "api" { } } +resource "aws_cloudwatch_metric_alarm" "api_outage_alarm" { + alarm_name = "${var.perm_env.name}-api-instance-outage-alarm" + comparison_operator = "GreaterThanOrEqualToThreshold" + evaluation_periods = "1" + metric_name = "StatusCheckFailed" + namespace = "AWS/EC2" + period = "300" + statistic = "Average" + threshold = "0.99" + actions_enabled = "true" + alarm_actions = ["arn:aws:sns:us-west-2:364159549467:ec2-outage-notifications"] + ok_actions = ["arn:aws:sns:us-west-2:364159549467:ec2-outage-notifications"] + dimensions = { + InstanceId = aws_instance.api.id + } +} + resource "aws_instance" "taskrunner" { ami = module.perm_env_data.taskrunner_ami instance_type = "c4.large" @@ -56,6 +73,24 @@ resource "aws_instance" "taskrunner" { } } +resource "aws_cloudwatch_metric_alarm" "taskrunner_outage_alarm" { + count = 2 + alarm_name = "${var.perm_env.name}-taskrunner${count.index}-instance-outage-alarm" + comparison_operator = "GreaterThanOrEqualToThreshold" + evaluation_periods = "1" + metric_name = "StatusCheckFailed" + namespace = "AWS/EC2" + period = "300" + statistic = "Average" + threshold = "0.99" + actions_enabled = "true" + alarm_actions = ["arn:aws:sns:us-west-2:364159549467:ec2-outage-notifications"] + ok_actions = ["arn:aws:sns:us-west-2:364159549467:ec2-outage-notifications"] + dimensions = { + InstanceId = aws_instance.taskrunner[count.index].id + } +} + resource "aws_instance" "cron" { ami = module.perm_env_data.cron_ami instance_type = "t2.micro" @@ -68,6 +103,23 @@ resource "aws_instance" "cron" { } } +resource "aws_cloudwatch_metric_alarm" "cron_outage_alarm" { + alarm_name = "${var.perm_env.name}-cron-instance-outage-alarm" + comparison_operator = "GreaterThanOrEqualToThreshold" + evaluation_periods = "1" + metric_name = "StatusCheckFailed" + namespace = "AWS/EC2" + period = "300" + statistic = "Average" + threshold = "0.99" + actions_enabled = "true" + alarm_actions = ["arn:aws:sns:us-west-2:364159549467:ec2-outage-notifications"] + ok_actions = ["arn:aws:sns:us-west-2:364159549467:ec2-outage-notifications"] + dimensions = { + InstanceId = aws_instance.cron.id + } +} + resource "aws_instance" "sftp" { ami = module.perm_env_data.sftp_ami instance_type = "m4.large" @@ -81,6 +133,23 @@ resource "aws_instance" "sftp" { } } +resource "aws_cloudwatch_metric_alarm" "sftp_outage_alarm" { + alarm_name = "${var.perm_env.name}-sftp-instance-outage-alarm" + comparison_operator = "GreaterThanOrEqualToThreshold" + evaluation_periods = "1" + metric_name = "StatusCheckFailed" + namespace = "AWS/EC2" + period = "300" + statistic = "Average" + threshold = "0.99" + actions_enabled = "true" + alarm_actions = ["arn:aws:sns:us-west-2:364159549467:ec2-outage-notifications"] + ok_actions = ["arn:aws:sns:us-west-2:364159549467:ec2-outage-notifications"] + dimensions = { + InstanceId = aws_instance.sftp.id + } +} + module "perm_env_data" { source = "../modules/get-data" perm_env = var.perm_env