From 9e9960958933d3d4a8f23d1d89e21d95df6d2a62 Mon Sep 17 00:00:00 2001 From: Xavier Basty Date: Thu, 24 Aug 2023 19:14:59 +0200 Subject: [PATCH] feat: add prometheus panels to grafana (#118) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix: Prometheus data-source authentication * feat: add panels for Prometheus metrics * Update terraform/monitoring/panels/app/identity/invalid_unregister_jwt.libsonnet Co-authored-by: Szymon Rząd * Update terraform/monitoring/panels/app/identity/invalid_register_cacao.libsonnet Co-authored-by: Szymon Rząd * Update terraform/monitoring/panels/app/invite/invalid_register_jwt.libsonnet Co-authored-by: Szymon Rząd * Update terraform/monitoring/panels/app/invite/invalid_unregister_jwt.libsonnet Co-authored-by: Szymon Rząd --------- Co-authored-by: Szymon Rząd --- terraform/ecs/outputs.tf | 5 +++ terraform/monitoring/dashboard.jsonnet | 42 +++++++++++++------ terraform/monitoring/dashboard.tf | 1 + terraform/monitoring/data_sources.tf | 3 +- .../identity/invalid_register_cacao.libsonnet | 21 ++++++++++ .../identity/invalid_unregister_jwt.libsonnet | 21 ++++++++++ .../panels/app/identity/register.libsonnet | 21 ++++++++++ .../panels/app/identity/resolved.libsonnet | 21 ++++++++++ .../panels/app/identity/unregister.libsonnet | 21 ++++++++++ .../app/invite/invalid_register_jwt.libsonnet | 21 ++++++++++ .../invite/invalid_unregister_jwt.libsonnet | 21 ++++++++++ .../panels/app/invite/register.libsonnet | 21 ++++++++++ .../panels/app/invite/resolved.libsonnet | 21 ++++++++++ .../panels/app/invite/unregister.libsonnet | 21 ++++++++++ .../panels/{app => ecs}/cpu.libsonnet | 0 .../panels/{app => ecs}/memory.libsonnet | 0 terraform/monitoring/panels/panels.libsonnet | 21 +++++++++- terraform/monitoring/variables.tf | 5 +++ terraform/res_monitoring.tf | 1 + 19 files changed, 272 insertions(+), 16 deletions(-) create mode 100644 terraform/monitoring/panels/app/identity/invalid_register_cacao.libsonnet create mode 100644 terraform/monitoring/panels/app/identity/invalid_unregister_jwt.libsonnet create mode 100644 terraform/monitoring/panels/app/identity/register.libsonnet create mode 100644 terraform/monitoring/panels/app/identity/resolved.libsonnet create mode 100644 terraform/monitoring/panels/app/identity/unregister.libsonnet create mode 100644 terraform/monitoring/panels/app/invite/invalid_register_jwt.libsonnet create mode 100644 terraform/monitoring/panels/app/invite/invalid_unregister_jwt.libsonnet create mode 100644 terraform/monitoring/panels/app/invite/register.libsonnet create mode 100644 terraform/monitoring/panels/app/invite/resolved.libsonnet create mode 100644 terraform/monitoring/panels/app/invite/unregister.libsonnet rename terraform/monitoring/panels/{app => ecs}/cpu.libsonnet (100%) rename terraform/monitoring/panels/{app => ecs}/memory.libsonnet (100%) diff --git a/terraform/ecs/outputs.tf b/terraform/ecs/outputs.tf index 24c7371..ac0f3bc 100644 --- a/terraform/ecs/outputs.tf +++ b/terraform/ecs/outputs.tf @@ -22,3 +22,8 @@ output "load_balancer_arn_suffix" { description = "The ARN suffix of the load balancer" value = aws_lb.load_balancer.arn_suffix } + +output "ecs_task_family" { + description = "The family of the task definition" + value = aws_ecs_task_definition.app_task.family +} diff --git a/terraform/monitoring/dashboard.jsonnet b/terraform/monitoring/dashboard.jsonnet index a4a8ad3..0ce2244 100644 --- a/terraform/monitoring/dashboard.jsonnet +++ b/terraform/monitoring/dashboard.jsonnet @@ -20,6 +20,7 @@ local vars = { notifications: std.parseJson(std.extVar('notifications')), ecs_service_name: std.extVar('ecs_service_name'), + ecs_task_family: std.extVar('ecs_task_family'), load_balancer: std.extVar('load_balancer'), target_group: std.extVar('target_group'), docdb_cluster_id: std.extVar('docdb_cluster_id'), @@ -50,23 +51,38 @@ dashboard.new( ) .addPanels(grafana.layout.generate_grid([ row.new('Application'), - panels.app.cpu(ds, vars) { gridPos: pos._2 }, - panels.app.memory(ds, vars) { gridPos: pos._2 }, + panels.app.identity.resolved(ds, vars) { gridPos: pos._3 }, + panels.app.identity.register(ds, vars) { gridPos: pos._3 }, + panels.app.identity.unregister(ds, vars) { gridPos: pos._3 }, + + panels.app.identity.invalid_register_cacao(ds, vars) { gridPos: pos._2 }, + panels.app.identity.invalid_unregister_jwt(ds, vars) { gridPos: pos._2 }, + + panels.app.invite.resolved(ds, vars) { gridPos: pos._3 }, + panels.app.invite.register(ds, vars) { gridPos: pos._3 }, + panels.app.invite.unregister(ds, vars) { gridPos: pos._3 }, + + panels.app.invite.invalid_register_jwt(ds, vars) { gridPos: pos._2 }, + panels.app.invite.invalid_unregister_jwt(ds, vars) { gridPos: pos._2 }, + + row.new('ECS'), + panels.ecs.cpu(ds, vars) { gridPos: pos._2 }, + panels.ecs.memory(ds, vars) { gridPos: pos._2 }, row.new('Load Balancer'), - panels.lb.active_connections(ds, vars) { gridPos: pos._2 }, - panels.lb.healthy_hosts(ds, vars) { gridPos: pos._2 }, + panels.lb.active_connections(ds, vars) { gridPos: pos._2 }, + panels.lb.healthy_hosts(ds, vars) { gridPos: pos._2 }, - panels.lb.requests(ds, vars) { gridPos: pos._3 }, - panels.lb.error_4xx(ds, vars) { gridPos: pos._3 }, - panels.lb.error_5xx(ds, vars) { gridPos: pos._3 }, + panels.lb.requests(ds, vars) { gridPos: pos._3 }, + panels.lb.error_4xx(ds, vars) { gridPos: pos._3 }, + panels.lb.error_5xx(ds, vars) { gridPos: pos._3 }, row.new('DocumentDB'), - panels.docdb.cpu(ds, vars) { gridPos: pos._3 }, - panels.docdb.available_memory(ds, vars) { gridPos: pos._3 }, - panels.docdb.connections(ds, vars) { gridPos: pos._3 }, + panels.docdb.cpu(ds, vars) { gridPos: pos._3 }, + panels.docdb.available_memory(ds, vars) { gridPos: pos._3 }, + panels.docdb.connections(ds, vars) { gridPos: pos._3 }, - panels.docdb.low_mem_op_throttled(ds, vars) { gridPos: pos._3 }, - panels.docdb.volume(ds, vars) { gridPos: pos._3 }, - panels.docdb.buffer_cache_hit_ratio(ds, vars) { gridPos: pos._3 }, + panels.docdb.low_mem_op_throttled(ds, vars) { gridPos: pos._3 }, + panels.docdb.volume(ds, vars) { gridPos: pos._3 }, + panels.docdb.buffer_cache_hit_ratio(ds, vars) { gridPos: pos._3 }, ])) diff --git a/terraform/monitoring/dashboard.tf b/terraform/monitoring/dashboard.tf index 11f1d5b..4aaea05 100644 --- a/terraform/monitoring/dashboard.tf +++ b/terraform/monitoring/dashboard.tf @@ -12,6 +12,7 @@ data "jsonnet_file" "dashboard" { notifications = jsonencode(var.notification_channels) ecs_service_name = var.ecs_service_name + ecs_task_family = var.ecs_task_family load_balancer = var.load_balancer_arn target_group = var.ecs_target_group_arn docdb_cluster_id = var.keystore_cluster_id diff --git a/terraform/monitoring/data_sources.tf b/terraform/monitoring/data_sources.tf index 0bb865c..4ec9f58 100644 --- a/terraform/monitoring/data_sources.tf +++ b/terraform/monitoring/data_sources.tf @@ -1,6 +1,7 @@ module "monitoring-role" { source = "app.terraform.io/wallet-connect/monitoring-role/aws" version = "1.0.2" + context = module.this remote_role_arn = var.monitoring_role_arn } @@ -12,7 +13,7 @@ resource "grafana_data_source" "prometheus" { json_data_encoded = jsonencode({ httpMethod = "GET" sigV4Auth = true - sigV4AuthType = "workspace-iam-role" + sigV4AuthType = "ec2_iam_role" sigV4Region = module.this.region sigV4AssumeRoleArn = module.monitoring-role.iam_role_arn }) diff --git a/terraform/monitoring/panels/app/identity/invalid_register_cacao.libsonnet b/terraform/monitoring/panels/app/identity/invalid_register_cacao.libsonnet new file mode 100644 index 0000000..867397e --- /dev/null +++ b/terraform/monitoring/panels/app/identity/invalid_register_cacao.libsonnet @@ -0,0 +1,21 @@ +local grafana = import '../../../grafonnet-lib/grafana.libsonnet'; +local panels = grafana.panels; +local targets = grafana.targets; + +local defaults = import '../../defaults.libsonnet'; + +{ + new(ds, vars):: + panels.timeseries( + title = 'Identity - Invalid CACAO during Registration', + datasource = ds.prometheus, + ) + .configure(defaults.configuration.timeseries) + + .addTarget(targets.prometheus( + datasource = ds.prometheus, + expr = 'sum(rate(invalid_identity_register_cacao{aws_ecs_task_family="%s"}[5m]))' % vars.ecs_task_family, + refId = "sources", + exemplar = true, + )) +} diff --git a/terraform/monitoring/panels/app/identity/invalid_unregister_jwt.libsonnet b/terraform/monitoring/panels/app/identity/invalid_unregister_jwt.libsonnet new file mode 100644 index 0000000..1bc5145 --- /dev/null +++ b/terraform/monitoring/panels/app/identity/invalid_unregister_jwt.libsonnet @@ -0,0 +1,21 @@ +local grafana = import '../../../grafonnet-lib/grafana.libsonnet'; +local panels = grafana.panels; +local targets = grafana.targets; + +local defaults = import '../../defaults.libsonnet'; + +{ + new(ds, vars):: + panels.timeseries( + title = 'Identity - Invalid JWT during Unregistration', + datasource = ds.prometheus, + ) + .configure(defaults.configuration.timeseries) + + .addTarget(targets.prometheus( + datasource = ds.prometheus, + expr = 'sum(rate(invalid_identity_unregister_jwt{aws_ecs_task_family="%s"}[5m]))' % vars.ecs_task_family, + refId = "sources", + exemplar = true, + )) +} diff --git a/terraform/monitoring/panels/app/identity/register.libsonnet b/terraform/monitoring/panels/app/identity/register.libsonnet new file mode 100644 index 0000000..80acdcd --- /dev/null +++ b/terraform/monitoring/panels/app/identity/register.libsonnet @@ -0,0 +1,21 @@ +local grafana = import '../../../grafonnet-lib/grafana.libsonnet'; +local panels = grafana.panels; +local targets = grafana.targets; + +local defaults = import '../../defaults.libsonnet'; + +{ + new(ds, vars):: + panels.timeseries( + title = 'Identity - Registrations', + datasource = ds.prometheus, + ) + .configure(defaults.configuration.timeseries) + + .addTarget(targets.prometheus( + datasource = ds.prometheus, + expr = 'sum(rate(identity_register{aws_ecs_task_family="%s"}[5m]))' % vars.ecs_task_family, + refId = "sources", + exemplar = true, + )) +} diff --git a/terraform/monitoring/panels/app/identity/resolved.libsonnet b/terraform/monitoring/panels/app/identity/resolved.libsonnet new file mode 100644 index 0000000..6017b0f --- /dev/null +++ b/terraform/monitoring/panels/app/identity/resolved.libsonnet @@ -0,0 +1,21 @@ +local grafana = import '../../../grafonnet-lib/grafana.libsonnet'; +local panels = grafana.panels; +local targets = grafana.targets; + +local defaults = import '../../defaults.libsonnet'; + +{ + new(ds, vars):: + panels.timeseries( + title = 'Identity - Resolutions', + datasource = ds.prometheus, + ) + .configure(defaults.configuration.timeseries) + + .addTarget(targets.prometheus( + datasource = ds.prometheus, + expr = 'sum(rate(identity_resolved{aws_ecs_task_family="%s"}[5m]))' % vars.ecs_task_family, + refId = "sources", + exemplar = true, + )) +} diff --git a/terraform/monitoring/panels/app/identity/unregister.libsonnet b/terraform/monitoring/panels/app/identity/unregister.libsonnet new file mode 100644 index 0000000..c4e9ceb --- /dev/null +++ b/terraform/monitoring/panels/app/identity/unregister.libsonnet @@ -0,0 +1,21 @@ +local grafana = import '../../../grafonnet-lib/grafana.libsonnet'; +local panels = grafana.panels; +local targets = grafana.targets; + +local defaults = import '../../defaults.libsonnet'; + +{ + new(ds, vars):: + panels.timeseries( + title = 'Identity - Unregistrations', + datasource = ds.prometheus, + ) + .configure(defaults.configuration.timeseries) + + .addTarget(targets.prometheus( + datasource = ds.prometheus, + expr = 'sum(rate(identity_unregister{aws_ecs_task_family="%s"}[5m]))' % vars.ecs_task_family, + refId = "sources", + exemplar = true, + )) +} diff --git a/terraform/monitoring/panels/app/invite/invalid_register_jwt.libsonnet b/terraform/monitoring/panels/app/invite/invalid_register_jwt.libsonnet new file mode 100644 index 0000000..a981171 --- /dev/null +++ b/terraform/monitoring/panels/app/invite/invalid_register_jwt.libsonnet @@ -0,0 +1,21 @@ +local grafana = import '../../../grafonnet-lib/grafana.libsonnet'; +local panels = grafana.panels; +local targets = grafana.targets; + +local defaults = import '../../defaults.libsonnet'; + +{ + new(ds, vars):: + panels.timeseries( + title = 'Invite - Invalid JWT during Registration', + datasource = ds.prometheus, + ) + .configure(defaults.configuration.timeseries) + + .addTarget(targets.prometheus( + datasource = ds.prometheus, + expr = 'sum(rate(invalid_invite_register_jwt{aws_ecs_task_family="%s"}[5m]))' % vars.ecs_task_family, + refId = "sources", + exemplar = true, + )) +} diff --git a/terraform/monitoring/panels/app/invite/invalid_unregister_jwt.libsonnet b/terraform/monitoring/panels/app/invite/invalid_unregister_jwt.libsonnet new file mode 100644 index 0000000..99f5c5c --- /dev/null +++ b/terraform/monitoring/panels/app/invite/invalid_unregister_jwt.libsonnet @@ -0,0 +1,21 @@ +local grafana = import '../../../grafonnet-lib/grafana.libsonnet'; +local panels = grafana.panels; +local targets = grafana.targets; + +local defaults = import '../../defaults.libsonnet'; + +{ + new(ds, vars):: + panels.timeseries( + title = 'Invite - Invalid JWT during Unregistration', + datasource = ds.prometheus, + ) + .configure(defaults.configuration.timeseries) + + .addTarget(targets.prometheus( + datasource = ds.prometheus, + expr = 'sum(rate(invalid_invite_unregister_jwt{aws_ecs_task_family="%s"}[5m]))' % vars.ecs_task_family, + refId = "sources", + exemplar = true, + )) +} diff --git a/terraform/monitoring/panels/app/invite/register.libsonnet b/terraform/monitoring/panels/app/invite/register.libsonnet new file mode 100644 index 0000000..92c057d --- /dev/null +++ b/terraform/monitoring/panels/app/invite/register.libsonnet @@ -0,0 +1,21 @@ +local grafana = import '../../../grafonnet-lib/grafana.libsonnet'; +local panels = grafana.panels; +local targets = grafana.targets; + +local defaults = import '../../defaults.libsonnet'; + +{ + new(ds, vars):: + panels.timeseries( + title = 'Invite - Registrations', + datasource = ds.prometheus, + ) + .configure(defaults.configuration.timeseries) + + .addTarget(targets.prometheus( + datasource = ds.prometheus, + expr = 'sum(rate(invite_register{aws_ecs_task_family="%s"}[5m]))' % vars.ecs_task_family, + refId = "sources", + exemplar = true, + )) +} diff --git a/terraform/monitoring/panels/app/invite/resolved.libsonnet b/terraform/monitoring/panels/app/invite/resolved.libsonnet new file mode 100644 index 0000000..595de59 --- /dev/null +++ b/terraform/monitoring/panels/app/invite/resolved.libsonnet @@ -0,0 +1,21 @@ +local grafana = import '../../../grafonnet-lib/grafana.libsonnet'; +local panels = grafana.panels; +local targets = grafana.targets; + +local defaults = import '../../defaults.libsonnet'; + +{ + new(ds, vars):: + panels.timeseries( + title = 'Invite - Resolutions', + datasource = ds.prometheus, + ) + .configure(defaults.configuration.timeseries) + + .addTarget(targets.prometheus( + datasource = ds.prometheus, + expr = 'sum(rate(invite_resolved{aws_ecs_task_family="%s"}[5m]))' % vars.ecs_task_family, + refId = "sources", + exemplar = true, + )) +} diff --git a/terraform/monitoring/panels/app/invite/unregister.libsonnet b/terraform/monitoring/panels/app/invite/unregister.libsonnet new file mode 100644 index 0000000..bd43aae --- /dev/null +++ b/terraform/monitoring/panels/app/invite/unregister.libsonnet @@ -0,0 +1,21 @@ +local grafana = import '../../../grafonnet-lib/grafana.libsonnet'; +local panels = grafana.panels; +local targets = grafana.targets; + +local defaults = import '../../defaults.libsonnet'; + +{ + new(ds, vars):: + panels.timeseries( + title = 'Invite - Unregistrations', + datasource = ds.prometheus, + ) + .configure(defaults.configuration.timeseries) + + .addTarget(targets.prometheus( + datasource = ds.prometheus, + expr = 'sum(rate(invite_unregister{aws_ecs_task_family="%s"}[5m]))' % vars.ecs_task_family, + refId = "sources", + exemplar = true, + )) +} diff --git a/terraform/monitoring/panels/app/cpu.libsonnet b/terraform/monitoring/panels/ecs/cpu.libsonnet similarity index 100% rename from terraform/monitoring/panels/app/cpu.libsonnet rename to terraform/monitoring/panels/ecs/cpu.libsonnet diff --git a/terraform/monitoring/panels/app/memory.libsonnet b/terraform/monitoring/panels/ecs/memory.libsonnet similarity index 100% rename from terraform/monitoring/panels/app/memory.libsonnet rename to terraform/monitoring/panels/ecs/memory.libsonnet diff --git a/terraform/monitoring/panels/panels.libsonnet b/terraform/monitoring/panels/panels.libsonnet index 219a8d2..e0d6ad9 100644 --- a/terraform/monitoring/panels/panels.libsonnet +++ b/terraform/monitoring/panels/panels.libsonnet @@ -1,7 +1,24 @@ { + ecs: { + cpu: (import 'ecs/cpu.libsonnet').new, + memory: (import 'ecs/memory.libsonnet').new, + }, + app: { - cpu: (import 'app/cpu.libsonnet').new, - memory: (import 'app/memory.libsonnet').new, + invite: { + register: (import 'app/invite/register.libsonnet').new, + resolved: (import 'app/invite/resolved.libsonnet').new, + unregister: (import 'app/invite/unregister.libsonnet').new, + invalid_register_jwt: (import 'app/invite/invalid_register_jwt.libsonnet').new, + invalid_unregister_jwt: (import 'app/invite/invalid_unregister_jwt.libsonnet').new, + }, + identity: { + register: (import 'app/identity/register.libsonnet').new, + resolved: (import 'app/identity/resolved.libsonnet').new, + unregister: (import 'app/identity/unregister.libsonnet').new, + invalid_register_cacao: (import 'app/identity/invalid_register_cacao.libsonnet').new, + invalid_unregister_jwt: (import 'app/identity/invalid_unregister_jwt.libsonnet').new, + }, }, lb: { diff --git a/terraform/monitoring/variables.tf b/terraform/monitoring/variables.tf index 00e56b8..6241cd7 100644 --- a/terraform/monitoring/variables.tf +++ b/terraform/monitoring/variables.tf @@ -32,3 +32,8 @@ variable "monitoring_role_arn" { description = "The ARN of the monitoring role." type = string } + +variable "ecs_task_family" { + description = "The name of the ECS task family." + type = string +} diff --git a/terraform/res_monitoring.tf b/terraform/res_monitoring.tf index c12b8c5..5244204 100644 --- a/terraform/res_monitoring.tf +++ b/terraform/res_monitoring.tf @@ -9,4 +9,5 @@ module "monitoring" { load_balancer_arn = module.ecs.load_balancer_arn_suffix keystore_cluster_id = module.keystore.cluster_id monitoring_role_arn = data.terraform_remote_state.monitoring.outputs.grafana_workspaces.main.iam_role_arn + ecs_task_family = module.ecs.ecs_task_family }