Skip to content

Commit

Permalink
Fix etcd health metric naming conflict (armadaproject#2939)
Browse files Browse the repository at this point in the history
* Fix metric naming conflict

* Fix metric names

* Fix metrix prefix

* Fix label

Signed-off-by: Rich Scott <richscott@sent.com>
  • Loading branch information
severinson authored and richscott committed Aug 31, 2023
1 parent d6a86ff commit 72db539
Show file tree
Hide file tree
Showing 3 changed files with 13 additions and 7 deletions.
5 changes: 3 additions & 2 deletions internal/common/etcdhealth/etcdhealth.go
Original file line number Diff line number Diff line change
Expand Up @@ -141,8 +141,9 @@ func (srv *EtcdReplicaHealthMonitor) initialiseMetrics() {
nil,
)
srv.metricsCollectionDelayHistogram = prometheus.NewHistogram(prometheus.HistogramOpts{
Name: srv.metricsPrefix + "etcd_replica_metrics_collection_delay_seconds",
Help: "Delay in seconds of collecting metrics from this etcd replica.",
Name: srv.metricsPrefix + "etcd_replica_metrics_collection_delay_seconds",
Help: "Delay in seconds of collecting metrics from this etcd replica.",
ConstLabels: prometheus.Labels{etcdMemberUrl: srv.name},
Buckets: prometheus.ExponentialBuckets(
srv.metricsCollectionDelayBucketsStart,
srv.metricsCollectionDelayBucketsFactor,
Expand Down
3 changes: 1 addition & 2 deletions internal/common/healthmonitor/multihealthmonitor.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ func (srv *MultiHealthMonitor) initialiseMetrics() {
srv.healthPrometheusDesc = prometheus.NewDesc(
metricsPrefix+"_health",
fmt.Sprintf("Shows whether %s is healthy.", srv.name),
[]string{srv.name},
nil,
nil,
)
}
Expand Down Expand Up @@ -128,7 +128,6 @@ func (srv *MultiHealthMonitor) Collect(c chan<- prometheus.Metric) {
srv.healthPrometheusDesc,
prometheus.GaugeValue,
resultOfMostRecentHealthCheck,
srv.name,
)

for _, healthMonitor := range srv.healthMonitorsByName {
Expand Down
12 changes: 9 additions & 3 deletions internal/executor/application.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,19 +76,25 @@ func StartUp(ctx context.Context, log *logrus.Entry, config configuration.Execut
etcdClusterHealthMonitoring.ScrapeDelayBucketsFactor,
etcdClusterHealthMonitoring.ScrapeDelayBucketsCount,
common_metrics.NewHttpMetricsProvider(metricsUrl, http.DefaultClient),
)
).WithMetricsPrefix(metrics.ArmadaExecutorMetricsPrefix)
}
etcdClusterHealthMonitoringByName[etcdClusterHealthMonitoring.Name] = healthmonitor.NewMultiHealthMonitor(
etcdClusterHealthMonitoring.Name,
etcdReplicaHealthMonitorsByUrl,
).WithMinimumReplicasAvailable(etcdClusterHealthMonitoring.MinimumReplicasAvailable)
).WithMinimumReplicasAvailable(
etcdClusterHealthMonitoring.MinimumReplicasAvailable,
).WithMetricsPrefix(
metrics.ArmadaExecutorMetricsPrefix,
)
}
var etcdClustersHealthMonitoring healthmonitor.HealthMonitor
if len(etcdClusterHealthMonitoringByName) > 0 {
log.Info("etcd URLs provided; monitoring etcd health enabled")
etcdClustersHealthMonitoring = healthmonitor.NewMultiHealthMonitor(
"etcd",
"overall_etcd",
etcdClusterHealthMonitoringByName,
).WithMetricsPrefix(
metrics.ArmadaExecutorMetricsPrefix,
)
g.Go(func() error { return etcdClustersHealthMonitoring.Run(ctx, log) })
prometheus.MustRegister(etcdClustersHealthMonitoring)
Expand Down

0 comments on commit 72db539

Please sign in to comment.