Skip to content

Commit

Permalink
add flag for running agent only mode, to collect metrics about servic…
Browse files Browse the repository at this point in the history
…es registered only on the local agent

Signed-off-by: n-marton <marton@natko.hu>
  • Loading branch information
n-marton committed Nov 17, 2023
1 parent 468c348 commit 1c68646
Show file tree
Hide file tree
Showing 2 changed files with 60 additions and 17 deletions.
1 change: 1 addition & 0 deletions cmd/consul_exporter/consul_exporter.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ func main() {
kingpin.Flag("consul.timeout", "Timeout on HTTP requests to the Consul API.").Default("500ms").DurationVar(&opts.Timeout)
kingpin.Flag("consul.insecure", "Disable TLS host verification.").Default("false").BoolVar(&opts.Insecure)
kingpin.Flag("consul.request-limit", "Limit the maximum number of concurrent requests to consul, 0 means no limit.").Default("0").IntVar(&opts.RequestLimit)
kingpin.Flag("consul.agent-only", "Only export metrics about services registered on local agent").Default("false").BoolVar(&opts.AgentOnly)

// Query options.
kingpin.Flag("consul.allow_stale", "Allows any Consul server (non-leader) to service a read.").Default("true").BoolVar(&queryOptions.AllowStale)
Expand Down
76 changes: 59 additions & 17 deletions pkg/exporter/consul_exporter.go
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ type Exporter struct {
kvPrefix string
kvFilter *regexp.Regexp
healthSummary bool
agentOnly bool
logger log.Logger
requestLimitChan chan struct{}
}
Expand All @@ -125,6 +126,7 @@ type ConsulOpts struct {
Timeout time.Duration
Insecure bool
RequestLimit int
AgentOnly bool
}

// New returns an initialized Exporter.
Expand Down Expand Up @@ -182,6 +184,7 @@ func New(opts ConsulOpts, queryOptions consul_api.QueryOptions, kvPrefix, kvFilt
healthSummary: healthSummary,
logger: logger,
requestLimitChan: requestLimitChan,
agentOnly: opts.AgentOnly,
}, nil
}

Expand All @@ -206,14 +209,19 @@ func (e *Exporter) Describe(ch chan<- *prometheus.Desc) {
// Collect fetches the stats from configured Consul location and delivers them
// as Prometheus metrics. It implements prometheus.Collector.
func (e *Exporter) Collect(ch chan<- prometheus.Metric) {
ok := e.collectPeersMetric(ch)
ok = e.collectLeaderMetric(ch) && ok
ok = e.collectNodesMetric(ch) && ok
ok = e.collectMembersMetric(ch) && ok
ok = e.collectMembersWanMetric(ch) && ok
ok = e.collectServicesMetric(ch) && ok
ok = e.collectHealthStateMetric(ch) && ok
ok = e.collectKeyValues(ch) && ok
ok := false
if e.agentOnly {
ok = e.collectServicesMetric(ch)
} else {
ok = e.collectPeersMetric(ch)
ok = e.collectLeaderMetric(ch) && ok
ok = e.collectNodesMetric(ch) && ok
ok = e.collectMembersMetric(ch) && ok
ok = e.collectMembersWanMetric(ch) && ok
ok = e.collectServicesMetric(ch) && ok
ok = e.collectHealthStateMetric(ch) && ok
ok = e.collectKeyValues(ch) && ok
}

if ok {
ch <- prometheus.MustNewConstMetric(
Expand Down Expand Up @@ -297,11 +305,25 @@ func (e *Exporter) collectMembersWanMetric(ch chan<- prometheus.Metric) bool {
}

func (e *Exporter) collectServicesMetric(ch chan<- prometheus.Metric) bool {
serviceNames, _, err := e.client.Catalog().Services(&e.queryOptions)
if err != nil {
level.Error(e.logger).Log("msg", "Failed to query for services", "err", err)
return false
serviceNames := make(map[string][]string)
if e.agentOnly {
services, err := e.client.Agent().Services()
if err != nil {
level.Error(e.logger).Log("msg", "Failed to query for agent services", "err", err)
return false
}
for name, srv := range services {
serviceNames[name] = srv.Tags
}
} else {
services, _, err := e.client.Catalog().Services(&e.queryOptions)
if err != nil {
level.Error(e.logger).Log("msg", "Failed to query for services", "err", err)
return false
}
serviceNames = services
}

ch <- prometheus.MustNewConstMetric(
serviceCount, prometheus.GaugeValue, float64(len(serviceNames)),
)
Expand Down Expand Up @@ -401,13 +423,33 @@ func (e *Exporter) collectOneHealthSummary(ch chan<- prometheus.Metric, serviceN
}
level.Debug(e.logger).Log("msg", "Fetching health summary", "serviceName", serviceName)

service, _, err := e.client.Health().Service(serviceName, "", false, &e.queryOptions)
if err != nil {
level.Error(e.logger).Log("msg", "Failed to query service health", "err", err)
return false
var serviceEntries []*consul_api.ServiceEntry

if e.agentOnly {
nodeName, err := e.client.Agent().NodeName()
if err != nil {
level.Error(e.logger).Log("msg", "Failed to query agent node name", "err", err)
return false
}

_, agentServices, err := e.client.Agent().AgentHealthServiceByName(serviceName)
if err != nil {
level.Error(e.logger).Log("msg", "Failed to query agent service health", "err", err)
return false
}
for _, agentService := range agentServices {
serviceEntries = append(serviceEntries, &consul_api.ServiceEntry{Checks: agentService.Checks, Service: agentService.Service, Node: &consul_api.Node{Node: nodeName}})
}
} else {
service, _, err := e.client.Health().Service(serviceName, "", false, &e.queryOptions)
if err != nil {
level.Error(e.logger).Log("msg", "Failed to query service health", "err", err)
return false
}
serviceEntries = service
}

for _, entry := range service {
for _, entry := range serviceEntries {
// We have a Node, a Service, and one or more Checks. Our
// service-node combo is passing if all checks have a `status`
// of "passing."
Expand Down

0 comments on commit 1c68646

Please sign in to comment.