diff --git a/cmd/consul_exporter/consul_exporter.go b/cmd/consul_exporter/consul_exporter.go index 8a439c5..0f891cd 100644 --- a/cmd/consul_exporter/consul_exporter.go +++ b/cmd/consul_exporter/consul_exporter.go @@ -65,6 +65,7 @@ func main() { kingpin.Flag("consul.timeout", "Timeout on HTTP requests to the Consul API.").Default("500ms").DurationVar(&opts.Timeout) kingpin.Flag("consul.insecure", "Disable TLS host verification.").Default("false").BoolVar(&opts.Insecure) kingpin.Flag("consul.request-limit", "Limit the maximum number of concurrent requests to consul, 0 means no limit.").Default("0").IntVar(&opts.RequestLimit) + kingpin.Flag("consul.agent-only", "Only export metrics about services registered on local agent").Default("false").BoolVar(&opts.AgentOnly) // Query options. kingpin.Flag("consul.allow_stale", "Allows any Consul server (non-leader) to service a read.").Default("true").BoolVar(&queryOptions.AllowStale) diff --git a/pkg/exporter/consul_exporter.go b/pkg/exporter/consul_exporter.go index 1be53e7..c2f3864 100644 --- a/pkg/exporter/consul_exporter.go +++ b/pkg/exporter/consul_exporter.go @@ -111,6 +111,7 @@ type Exporter struct { kvPrefix string kvFilter *regexp.Regexp healthSummary bool + agentOnly bool logger log.Logger requestLimitChan chan struct{} } @@ -125,6 +126,7 @@ type ConsulOpts struct { Timeout time.Duration Insecure bool RequestLimit int + AgentOnly bool } // New returns an initialized Exporter. @@ -182,6 +184,7 @@ func New(opts ConsulOpts, queryOptions consul_api.QueryOptions, kvPrefix, kvFilt healthSummary: healthSummary, logger: logger, requestLimitChan: requestLimitChan, + agentOnly: opts.AgentOnly, }, nil } @@ -206,14 +209,16 @@ func (e *Exporter) Describe(ch chan<- *prometheus.Desc) { // Collect fetches the stats from configured Consul location and delivers them // as Prometheus metrics. It implements prometheus.Collector. func (e *Exporter) Collect(ch chan<- prometheus.Metric) { - ok := e.collectPeersMetric(ch) - ok = e.collectLeaderMetric(ch) && ok - ok = e.collectNodesMetric(ch) && ok - ok = e.collectMembersMetric(ch) && ok - ok = e.collectMembersWanMetric(ch) && ok - ok = e.collectServicesMetric(ch) && ok - ok = e.collectHealthStateMetric(ch) && ok - ok = e.collectKeyValues(ch) && ok + ok := e.collectServicesMetric(ch) + if !e.agentOnly { + ok = e.collectPeersMetric(ch) && ok + ok = e.collectLeaderMetric(ch) && ok + ok = e.collectNodesMetric(ch) && ok + ok = e.collectMembersMetric(ch) && ok + ok = e.collectMembersWanMetric(ch) && ok + ok = e.collectHealthStateMetric(ch) && ok + ok = e.collectKeyValues(ch) && ok + } if ok { ch <- prometheus.MustNewConstMetric( @@ -297,11 +302,25 @@ func (e *Exporter) collectMembersWanMetric(ch chan<- prometheus.Metric) bool { } func (e *Exporter) collectServicesMetric(ch chan<- prometheus.Metric) bool { - serviceNames, _, err := e.client.Catalog().Services(&e.queryOptions) - if err != nil { - level.Error(e.logger).Log("msg", "Failed to query for services", "err", err) - return false + serviceNames := make(map[string][]string) + if e.agentOnly { + services, err := e.client.Agent().Services() + if err != nil { + level.Error(e.logger).Log("msg", "Failed to query for agent services", "err", err) + return false + } + for name, srv := range services { + serviceNames[name] = srv.Tags + } + } else { + services, _, err := e.client.Catalog().Services(&e.queryOptions) + if err != nil { + level.Error(e.logger).Log("msg", "Failed to query for services", "err", err) + return false + } + serviceNames = services } + ch <- prometheus.MustNewConstMetric( serviceCount, prometheus.GaugeValue, float64(len(serviceNames)), ) @@ -401,13 +420,33 @@ func (e *Exporter) collectOneHealthSummary(ch chan<- prometheus.Metric, serviceN } level.Debug(e.logger).Log("msg", "Fetching health summary", "serviceName", serviceName) - service, _, err := e.client.Health().Service(serviceName, "", false, &e.queryOptions) - if err != nil { - level.Error(e.logger).Log("msg", "Failed to query service health", "err", err) - return false + var serviceEntries []*consul_api.ServiceEntry + + if e.agentOnly { + nodeName, err := e.client.Agent().NodeName() + if err != nil { + level.Error(e.logger).Log("msg", "Failed to query agent node name", "err", err) + return false + } + + _, agentServices, err := e.client.Agent().AgentHealthServiceByName(serviceName) + if err != nil { + level.Error(e.logger).Log("msg", "Failed to query agent service health", "err", err) + return false + } + for _, agentService := range agentServices { + serviceEntries = append(serviceEntries, &consul_api.ServiceEntry{Checks: agentService.Checks, Service: agentService.Service, Node: &consul_api.Node{Node: nodeName}}) + } + } else { + service, _, err := e.client.Health().Service(serviceName, "", false, &e.queryOptions) + if err != nil { + level.Error(e.logger).Log("msg", "Failed to query service health", "err", err) + return false + } + serviceEntries = service } - for _, entry := range service { + for _, entry := range serviceEntries { // We have a Node, a Service, and one or more Checks. Our // service-node combo is passing if all checks have a `status` // of "passing."