Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add HTTP metrics to Grafana #116

Merged
merged 1 commit into from
Aug 24, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 24 additions & 20 deletions terraform/monitoring/dashboard.jsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ local grafana = import 'grafonnet-lib/grafana.libsonnet';
local panels = import 'panels/panels.libsonnet';

local dashboard = grafana.dashboard;
local row = grafana.row;

local ds = {
prometheus: {
Expand All @@ -14,10 +15,13 @@ local ds = {
}
};
local vars = {
notifications: std.parseJson(std.extVar('notifications')),
namespace: 'Keys',
environment: std.extVar('environment'),
notifications: std.parseJson(std.extVar('notifications')),

ecs_service_name: std.extVar('ecs_service_name'),
load_balancer: std.extVar('load_balancer'),
target_group: std.extVar('target_group'),
docdb_cluster_id: std.extVar('docdb_cluster_id'),
};

Expand All @@ -44,25 +48,25 @@ dashboard.new(
},
)
)
.addPanels(
grafana.layout.generate_grid([
panels.app.app_cpu_memory(ds, vars) { gridPos: pos._2 },
panels.app.healthy_hosts(ds, vars) { gridPos: pos._2 },
panels.app.active_nlb_flows(ds, vars) { gridPos: pos._2 },
panels.app.nlb_target_resets(ds, vars) { gridPos: pos._2 },
.addPanels(grafana.layout.generate_grid([
row.new('Application'),
panels.app.cpu(ds, vars) { gridPos: pos._2 },
panels.app.memory(ds, vars) { gridPos: pos._2 },

////////////////////////////////////////////////////////////////////////////
grafana.panels.text(
content = '# DocumentDB',
transparent = true
) { gridPos: pos.title },
row.new('Load Balancer'),
panels.lb.active_connections(ds, vars) { gridPos: pos._2 },
panels.lb.healthy_hosts(ds, vars) { gridPos: pos._2 },

panels.docdb.cpu(ds, vars) { gridPos: pos._3 },
panels.docdb.available_memory(ds, vars) { gridPos: pos._3 },
panels.docdb.connections(ds, vars) { gridPos: pos._3 },
panels.lb.requests(ds, vars) { gridPos: pos._3 },
panels.lb.error_4xx(ds, vars) { gridPos: pos._3 },
panels.lb.error_5xx(ds, vars) { gridPos: pos._3 },

panels.docdb.low_mem_op_throttled(ds, vars) { gridPos: pos._3 },
panels.docdb.volume(ds, vars) { gridPos: pos._3 },
panels.docdb.buffer_cache_hit_ratio(ds, vars) { gridPos: pos._3 },
])
)
row.new('DocumentDB'),
panels.docdb.cpu(ds, vars) { gridPos: pos._3 },
panels.docdb.available_memory(ds, vars) { gridPos: pos._3 },
panels.docdb.connections(ds, vars) { gridPos: pos._3 },

panels.docdb.low_mem_op_throttled(ds, vars) { gridPos: pos._3 },
panels.docdb.volume(ds, vars) { gridPos: pos._3 },
panels.docdb.buffer_cache_hit_ratio(ds, vars) { gridPos: pos._3 },
]))
7 changes: 4 additions & 3 deletions terraform/monitoring/dashboard.tf
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,12 @@ data "jsonnet_file" "dashboard" {
prometheus_uid = grafana_data_source.prometheus.uid
cloudwatch_uid = grafana_data_source.cloudwatch.uid

notifications = jsonencode(var.notification_channels)
environment = module.this.stage
environment = module.this.stage
notifications = jsonencode(var.notification_channels)

ecs_service_name = var.ecs_service_name
target_group = var.ecs_target_group_arn
load_balancer = var.load_balancer_arn
target_group = var.ecs_target_group_arn
docdb_cluster_id = var.keystore_cluster_id
}
}
Expand Down
95 changes: 0 additions & 95 deletions terraform/monitoring/panels/app/app_cpu_memory.libsonnet

This file was deleted.

44 changes: 44 additions & 0 deletions terraform/monitoring/panels/app/cpu.libsonnet
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
local grafana = import '../../grafonnet-lib/grafana.libsonnet';
local defaults = import '../../grafonnet-lib/defaults.libsonnet';

local panels = grafana.panels;
local targets = grafana.targets;
local overrides = defaults.overrides;

{
new(ds, vars)::
panels.timeseries(
title = 'CPU Utilization',
datasource = ds.cloudwatch,
)
.configure(overrides.cpu(defaults.configuration.timeseries_resource))
.setAlert(defaults.alerts.cpu(
namespace = vars.namespace,
title = 'ECS',
env = vars.environment,
notifications = vars.notifications,
))

.addTarget(targets.cloudwatch(
alias = 'CPU (Max)',
datasource = ds.cloudwatch,
dimensions = {
ServiceName: vars.ecs_service_name
},
metricName = 'CPUUtilization',
namespace = 'AWS/ECS',
statistic = 'Maximum',
refId = 'CPU_Max',
))
.addTarget(targets.cloudwatch(
alias = 'CPU (Avg)',
datasource = ds.cloudwatch,
dimensions = {
ServiceName: vars.ecs_service_name
},
metricName = 'CPUUtilization',
namespace = 'AWS/ECS',
statistic = 'Average',
refId = 'CPU_Avg',
))
}
44 changes: 44 additions & 0 deletions terraform/monitoring/panels/app/memory.libsonnet
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
local grafana = import '../../grafonnet-lib/grafana.libsonnet';
local defaults = import '../../grafonnet-lib/defaults.libsonnet';

local panels = grafana.panels;
local targets = grafana.targets;

{
new(ds, vars)::
panels.timeseries(
title = 'Memory Utilization',
datasource = ds.cloudwatch,
)
.configure(defaults.overrides.memory(defaults.configuration.timeseries_resource))

.setAlert(defaults.alerts.memory(
namespace = vars.namespace,
title = 'ECS',
env = vars.environment,
notifications = vars.notifications,
))

.addTarget(targets.cloudwatch(
alias = 'Memory (Max)',
datasource = ds.cloudwatch,
namespace = 'AWS/ECS',
metricName = 'MemoryUtilization',
dimensions = {
ServiceName: vars.ecs_service_name
},
statistic = 'Maximum',
refId = 'Mem_Max',
))
.addTarget(targets.cloudwatch(
alias = 'Memory (Avg)',
datasource = ds.cloudwatch,
namespace = 'AWS/ECS',
metricName = 'MemoryUtilization',
dimensions = {
ServiceName: vars.ecs_service_name
},
statistic = 'Average',
refId = 'Mem_Avg',
))
}
33 changes: 0 additions & 33 deletions terraform/monitoring/panels/app/nlb_target_resets.libsonnet

This file was deleted.

15 changes: 8 additions & 7 deletions terraform/monitoring/panels/docdb/available_memory.libsonnet
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
local grafana = import '../../grafonnet-lib/grafana.libsonnet';
local defaults = import '../../grafonnet-lib/defaults.libsonnet';

local panels = grafana.panels;
local targets = grafana.targets;
local alert = grafana.alert;
local alertCondition = grafana.alertCondition;

local defaults = import '../defaults.libsonnet';

local mem_threshold = 4000000000; // 4GiB
local max_memory = 16000000000; // 16GiB (AWS DocDB max on db.r6g.large)

Expand Down Expand Up @@ -45,12 +45,13 @@ local _configuration = defaults.configuration.timeseries


local mem_alert(vars) = alert.new(
name = "%s Keys-Server DocumentDB Freeable Memory Alert" % vars.environment,
message = "%s Keys-Server DocumentDB Freeable Memory" % vars.environment,
period = '5m',
frequency = '1m',
namespace = vars.namespace,
name = "%s DocumentDB Freeable Memory Alert" % vars.environment,
message = "%s DocumentDB Freeable Memory" % vars.environment,
period = '5m',
frequency = '1m',
notifications = vars.notifications,
conditions = [
conditions = [
alertCondition.new(
evaluatorParams = [ mem_threshold ],
evaluatorType = 'lt',
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
local grafana = import '../../grafonnet-lib/grafana.libsonnet';
local panels = grafana.panels;
local targets = grafana.targets;
local grafana = import '../../grafonnet-lib/grafana.libsonnet';
local defaults = import '../../grafonnet-lib/defaults.libsonnet';

local defaults = import '../defaults.libsonnet';
local panels = grafana.panels;
local targets = grafana.targets;

local _configuration = defaults.configuration.timeseries
.withUnit('percent')
Expand Down
12 changes: 5 additions & 7 deletions terraform/monitoring/panels/docdb/connections.libsonnet
Original file line number Diff line number Diff line change
@@ -1,18 +1,16 @@
local grafana = import '../../grafonnet-lib/grafana.libsonnet';
local panels = grafana.panels;
local targets = grafana.targets;
local grafana = import '../../grafonnet-lib/grafana.libsonnet';
local defaults = import '../../grafonnet-lib/defaults.libsonnet';

local defaults = import '../defaults.libsonnet';

local _configuration = defaults.configuration.timeseries;
local panels = grafana.panels;
local targets = grafana.targets;

{
new(ds, vars)::
panels.timeseries(
title = 'Database Connections',
datasource = ds.cloudwatch,
)
.configure(_configuration)
.configure(defaults.configuration.timeseries)

.addTarget(targets.cloudwatch(
alias = 'Database Connections',
Expand Down
Loading