Skip to content

Commit

Permalink
feat: add prometheus panels to grafana (#118)
Browse files Browse the repository at this point in the history
* fix: Prometheus data-source authentication

* feat: add panels for Prometheus metrics

* Update terraform/monitoring/panels/app/identity/invalid_unregister_jwt.libsonnet

Co-authored-by: Szymon Rząd <sz.rzad@gmail.com>

* Update terraform/monitoring/panels/app/identity/invalid_register_cacao.libsonnet

Co-authored-by: Szymon Rząd <sz.rzad@gmail.com>

* Update terraform/monitoring/panels/app/invite/invalid_register_jwt.libsonnet

Co-authored-by: Szymon Rząd <sz.rzad@gmail.com>

* Update terraform/monitoring/panels/app/invite/invalid_unregister_jwt.libsonnet

Co-authored-by: Szymon Rząd <sz.rzad@gmail.com>

---------

Co-authored-by: Szymon Rząd <sz.rzad@gmail.com>
  • Loading branch information
xav and Elyniss authored Aug 24, 2023
1 parent 06424db commit 9e99609
Show file tree
Hide file tree
Showing 19 changed files with 272 additions and 16 deletions.
5 changes: 5 additions & 0 deletions terraform/ecs/outputs.tf
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,8 @@ output "load_balancer_arn_suffix" {
description = "The ARN suffix of the load balancer"
value = aws_lb.load_balancer.arn_suffix
}

output "ecs_task_family" {
description = "The family of the task definition"
value = aws_ecs_task_definition.app_task.family
}
42 changes: 29 additions & 13 deletions terraform/monitoring/dashboard.jsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ local vars = {
notifications: std.parseJson(std.extVar('notifications')),

ecs_service_name: std.extVar('ecs_service_name'),
ecs_task_family: std.extVar('ecs_task_family'),
load_balancer: std.extVar('load_balancer'),
target_group: std.extVar('target_group'),
docdb_cluster_id: std.extVar('docdb_cluster_id'),
Expand Down Expand Up @@ -50,23 +51,38 @@ dashboard.new(
)
.addPanels(grafana.layout.generate_grid([
row.new('Application'),
panels.app.cpu(ds, vars) { gridPos: pos._2 },
panels.app.memory(ds, vars) { gridPos: pos._2 },
panels.app.identity.resolved(ds, vars) { gridPos: pos._3 },
panels.app.identity.register(ds, vars) { gridPos: pos._3 },
panels.app.identity.unregister(ds, vars) { gridPos: pos._3 },

panels.app.identity.invalid_register_cacao(ds, vars) { gridPos: pos._2 },
panels.app.identity.invalid_unregister_jwt(ds, vars) { gridPos: pos._2 },

panels.app.invite.resolved(ds, vars) { gridPos: pos._3 },
panels.app.invite.register(ds, vars) { gridPos: pos._3 },
panels.app.invite.unregister(ds, vars) { gridPos: pos._3 },

panels.app.invite.invalid_register_jwt(ds, vars) { gridPos: pos._2 },
panels.app.invite.invalid_unregister_jwt(ds, vars) { gridPos: pos._2 },

row.new('ECS'),
panels.ecs.cpu(ds, vars) { gridPos: pos._2 },
panels.ecs.memory(ds, vars) { gridPos: pos._2 },

row.new('Load Balancer'),
panels.lb.active_connections(ds, vars) { gridPos: pos._2 },
panels.lb.healthy_hosts(ds, vars) { gridPos: pos._2 },
panels.lb.active_connections(ds, vars) { gridPos: pos._2 },
panels.lb.healthy_hosts(ds, vars) { gridPos: pos._2 },

panels.lb.requests(ds, vars) { gridPos: pos._3 },
panels.lb.error_4xx(ds, vars) { gridPos: pos._3 },
panels.lb.error_5xx(ds, vars) { gridPos: pos._3 },
panels.lb.requests(ds, vars) { gridPos: pos._3 },
panels.lb.error_4xx(ds, vars) { gridPos: pos._3 },
panels.lb.error_5xx(ds, vars) { gridPos: pos._3 },

row.new('DocumentDB'),
panels.docdb.cpu(ds, vars) { gridPos: pos._3 },
panels.docdb.available_memory(ds, vars) { gridPos: pos._3 },
panels.docdb.connections(ds, vars) { gridPos: pos._3 },
panels.docdb.cpu(ds, vars) { gridPos: pos._3 },
panels.docdb.available_memory(ds, vars) { gridPos: pos._3 },
panels.docdb.connections(ds, vars) { gridPos: pos._3 },

panels.docdb.low_mem_op_throttled(ds, vars) { gridPos: pos._3 },
panels.docdb.volume(ds, vars) { gridPos: pos._3 },
panels.docdb.buffer_cache_hit_ratio(ds, vars) { gridPos: pos._3 },
panels.docdb.low_mem_op_throttled(ds, vars) { gridPos: pos._3 },
panels.docdb.volume(ds, vars) { gridPos: pos._3 },
panels.docdb.buffer_cache_hit_ratio(ds, vars) { gridPos: pos._3 },
]))
1 change: 1 addition & 0 deletions terraform/monitoring/dashboard.tf
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ data "jsonnet_file" "dashboard" {
notifications = jsonencode(var.notification_channels)

ecs_service_name = var.ecs_service_name
ecs_task_family = var.ecs_task_family
load_balancer = var.load_balancer_arn
target_group = var.ecs_target_group_arn
docdb_cluster_id = var.keystore_cluster_id
Expand Down
3 changes: 2 additions & 1 deletion terraform/monitoring/data_sources.tf
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
module "monitoring-role" {
source = "app.terraform.io/wallet-connect/monitoring-role/aws"
version = "1.0.2"
context = module.this
remote_role_arn = var.monitoring_role_arn
}

Expand All @@ -12,7 +13,7 @@ resource "grafana_data_source" "prometheus" {
json_data_encoded = jsonencode({
httpMethod = "GET"
sigV4Auth = true
sigV4AuthType = "workspace-iam-role"
sigV4AuthType = "ec2_iam_role"
sigV4Region = module.this.region
sigV4AssumeRoleArn = module.monitoring-role.iam_role_arn
})
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
local grafana = import '../../../grafonnet-lib/grafana.libsonnet';
local panels = grafana.panels;
local targets = grafana.targets;

local defaults = import '../../defaults.libsonnet';

{
new(ds, vars)::
panels.timeseries(
title = 'Identity - Invalid CACAO during Registration',
datasource = ds.prometheus,
)
.configure(defaults.configuration.timeseries)

.addTarget(targets.prometheus(
datasource = ds.prometheus,
expr = 'sum(rate(invalid_identity_register_cacao{aws_ecs_task_family="%s"}[5m]))' % vars.ecs_task_family,
refId = "sources",
exemplar = true,
))
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
local grafana = import '../../../grafonnet-lib/grafana.libsonnet';
local panels = grafana.panels;
local targets = grafana.targets;

local defaults = import '../../defaults.libsonnet';

{
new(ds, vars)::
panels.timeseries(
title = 'Identity - Invalid JWT during Unregistration',
datasource = ds.prometheus,
)
.configure(defaults.configuration.timeseries)

.addTarget(targets.prometheus(
datasource = ds.prometheus,
expr = 'sum(rate(invalid_identity_unregister_jwt{aws_ecs_task_family="%s"}[5m]))' % vars.ecs_task_family,
refId = "sources",
exemplar = true,
))
}
21 changes: 21 additions & 0 deletions terraform/monitoring/panels/app/identity/register.libsonnet
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
local grafana = import '../../../grafonnet-lib/grafana.libsonnet';
local panels = grafana.panels;
local targets = grafana.targets;

local defaults = import '../../defaults.libsonnet';

{
new(ds, vars)::
panels.timeseries(
title = 'Identity - Registrations',
datasource = ds.prometheus,
)
.configure(defaults.configuration.timeseries)

.addTarget(targets.prometheus(
datasource = ds.prometheus,
expr = 'sum(rate(identity_register{aws_ecs_task_family="%s"}[5m]))' % vars.ecs_task_family,
refId = "sources",
exemplar = true,
))
}
21 changes: 21 additions & 0 deletions terraform/monitoring/panels/app/identity/resolved.libsonnet
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
local grafana = import '../../../grafonnet-lib/grafana.libsonnet';
local panels = grafana.panels;
local targets = grafana.targets;

local defaults = import '../../defaults.libsonnet';

{
new(ds, vars)::
panels.timeseries(
title = 'Identity - Resolutions',
datasource = ds.prometheus,
)
.configure(defaults.configuration.timeseries)

.addTarget(targets.prometheus(
datasource = ds.prometheus,
expr = 'sum(rate(identity_resolved{aws_ecs_task_family="%s"}[5m]))' % vars.ecs_task_family,
refId = "sources",
exemplar = true,
))
}
21 changes: 21 additions & 0 deletions terraform/monitoring/panels/app/identity/unregister.libsonnet
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
local grafana = import '../../../grafonnet-lib/grafana.libsonnet';
local panels = grafana.panels;
local targets = grafana.targets;

local defaults = import '../../defaults.libsonnet';

{
new(ds, vars)::
panels.timeseries(
title = 'Identity - Unregistrations',
datasource = ds.prometheus,
)
.configure(defaults.configuration.timeseries)

.addTarget(targets.prometheus(
datasource = ds.prometheus,
expr = 'sum(rate(identity_unregister{aws_ecs_task_family="%s"}[5m]))' % vars.ecs_task_family,
refId = "sources",
exemplar = true,
))
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
local grafana = import '../../../grafonnet-lib/grafana.libsonnet';
local panels = grafana.panels;
local targets = grafana.targets;

local defaults = import '../../defaults.libsonnet';

{
new(ds, vars)::
panels.timeseries(
title = 'Invite - Invalid JWT during Registration',
datasource = ds.prometheus,
)
.configure(defaults.configuration.timeseries)

.addTarget(targets.prometheus(
datasource = ds.prometheus,
expr = 'sum(rate(invalid_invite_register_jwt{aws_ecs_task_family="%s"}[5m]))' % vars.ecs_task_family,
refId = "sources",
exemplar = true,
))
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
local grafana = import '../../../grafonnet-lib/grafana.libsonnet';
local panels = grafana.panels;
local targets = grafana.targets;

local defaults = import '../../defaults.libsonnet';

{
new(ds, vars)::
panels.timeseries(
title = 'Invite - Invalid JWT during Unregistration',
datasource = ds.prometheus,
)
.configure(defaults.configuration.timeseries)

.addTarget(targets.prometheus(
datasource = ds.prometheus,
expr = 'sum(rate(invalid_invite_unregister_jwt{aws_ecs_task_family="%s"}[5m]))' % vars.ecs_task_family,
refId = "sources",
exemplar = true,
))
}
21 changes: 21 additions & 0 deletions terraform/monitoring/panels/app/invite/register.libsonnet
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
local grafana = import '../../../grafonnet-lib/grafana.libsonnet';
local panels = grafana.panels;
local targets = grafana.targets;

local defaults = import '../../defaults.libsonnet';

{
new(ds, vars)::
panels.timeseries(
title = 'Invite - Registrations',
datasource = ds.prometheus,
)
.configure(defaults.configuration.timeseries)

.addTarget(targets.prometheus(
datasource = ds.prometheus,
expr = 'sum(rate(invite_register{aws_ecs_task_family="%s"}[5m]))' % vars.ecs_task_family,
refId = "sources",
exemplar = true,
))
}
21 changes: 21 additions & 0 deletions terraform/monitoring/panels/app/invite/resolved.libsonnet
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
local grafana = import '../../../grafonnet-lib/grafana.libsonnet';
local panels = grafana.panels;
local targets = grafana.targets;

local defaults = import '../../defaults.libsonnet';

{
new(ds, vars)::
panels.timeseries(
title = 'Invite - Resolutions',
datasource = ds.prometheus,
)
.configure(defaults.configuration.timeseries)

.addTarget(targets.prometheus(
datasource = ds.prometheus,
expr = 'sum(rate(invite_resolved{aws_ecs_task_family="%s"}[5m]))' % vars.ecs_task_family,
refId = "sources",
exemplar = true,
))
}
21 changes: 21 additions & 0 deletions terraform/monitoring/panels/app/invite/unregister.libsonnet
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
local grafana = import '../../../grafonnet-lib/grafana.libsonnet';
local panels = grafana.panels;
local targets = grafana.targets;

local defaults = import '../../defaults.libsonnet';

{
new(ds, vars)::
panels.timeseries(
title = 'Invite - Unregistrations',
datasource = ds.prometheus,
)
.configure(defaults.configuration.timeseries)

.addTarget(targets.prometheus(
datasource = ds.prometheus,
expr = 'sum(rate(invite_unregister{aws_ecs_task_family="%s"}[5m]))' % vars.ecs_task_family,
refId = "sources",
exemplar = true,
))
}
21 changes: 19 additions & 2 deletions terraform/monitoring/panels/panels.libsonnet
Original file line number Diff line number Diff line change
@@ -1,7 +1,24 @@
{
ecs: {
cpu: (import 'ecs/cpu.libsonnet').new,
memory: (import 'ecs/memory.libsonnet').new,
},

app: {
cpu: (import 'app/cpu.libsonnet').new,
memory: (import 'app/memory.libsonnet').new,
invite: {
register: (import 'app/invite/register.libsonnet').new,
resolved: (import 'app/invite/resolved.libsonnet').new,
unregister: (import 'app/invite/unregister.libsonnet').new,
invalid_register_jwt: (import 'app/invite/invalid_register_jwt.libsonnet').new,
invalid_unregister_jwt: (import 'app/invite/invalid_unregister_jwt.libsonnet').new,
},
identity: {
register: (import 'app/identity/register.libsonnet').new,
resolved: (import 'app/identity/resolved.libsonnet').new,
unregister: (import 'app/identity/unregister.libsonnet').new,
invalid_register_cacao: (import 'app/identity/invalid_register_cacao.libsonnet').new,
invalid_unregister_jwt: (import 'app/identity/invalid_unregister_jwt.libsonnet').new,
},
},

lb: {
Expand Down
5 changes: 5 additions & 0 deletions terraform/monitoring/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -32,3 +32,8 @@ variable "monitoring_role_arn" {
description = "The ARN of the monitoring role."
type = string
}

variable "ecs_task_family" {
description = "The name of the ECS task family."
type = string
}
1 change: 1 addition & 0 deletions terraform/res_monitoring.tf
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,5 @@ module "monitoring" {
load_balancer_arn = module.ecs.load_balancer_arn_suffix
keystore_cluster_id = module.keystore.cluster_id
monitoring_role_arn = data.terraform_remote_state.monitoring.outputs.grafana_workspaces.main.iam_role_arn
ecs_task_family = module.ecs.ecs_task_family
}

0 comments on commit 9e99609

Please sign in to comment.