diff --git a/docs/sources/configure/options.md b/docs/sources/configure/options.md index 6443931f2..677f1c723 100644 --- a/docs/sources/configure/options.md +++ b/docs/sources/configure/options.md @@ -785,7 +785,10 @@ of Beyla: application-level metrics or network metrics. process matching the entries in the `discovery` section. - If the list contains `application_span`, the Beyla OpenTelemetry exporter exports application-level trace span metrics; but only if there is defined an OpenTelemetry endpoint, and Beyla was able to discover any - process matching the entries in the `discovery` section. When this option is enabled, service graph metrics are also generated. + process matching the entries in the `discovery` section. +- If the list contains `application_service_graph`, the Beyla OpenTelemetry exporter exports application-level service graph metrics; + but only if there is defined an OpenTelemetry endpoint, and Beyla was able to discover any + process matching the entries in the `discovery` section. For best experience with generating service graph metrics, use a DNS for service discovery and make sure the DNS names match the OpenTelemetry service names used in Beyla. In Kubernetes environments, the OpenTelemetry service name set by the service name discovery is the best choice for service graph metrics. @@ -1098,7 +1101,10 @@ of Beyla: application-level metrics or network metrics. process matching the entries in the `discovery` section. - If the list contains `application_span`, the Beyla Prometheus exporter exports application-level metrics in traces span metrics format; but only if the Prometheus `port` property is defined, and Beyla was able to discover any - process matching the entries in the `discovery` section. When this option is enabled, service graph metrics are also generated. + process matching the entries in the `discovery` section. +- If the list contains `application_service_graph`, the Beyla Prometheus exporter exports application-level service graph metrics; + but only if the Prometheus `port` property is defined, and Beyla was able to discover any + process matching the entries in the `discovery` section. For best experience with generating service graph metrics, use a DNS for service discovery and make sure the DNS names match the OpenTelemetry service names used in Beyla. In Kubernetes environments, the OpenTelemetry service name set by the service name discovery is the best choice for service graph metrics. diff --git a/pkg/internal/export/otel/metrics.go b/pkg/internal/export/otel/metrics.go index ba223308c..ef756c298 100644 --- a/pkg/internal/export/otel/metrics.go +++ b/pkg/internal/export/otel/metrics.go @@ -56,6 +56,7 @@ const ( FeatureNetwork = "network" FeatureApplication = "application" FeatureSpan = "application_span" + FeatureGraph = "application_service_graph" ) type MetricsConfig struct { @@ -132,12 +133,16 @@ func (m MetricsConfig) SpanMetricsEnabled() bool { return slices.Contains(m.Features, FeatureSpan) } +func (m MetricsConfig) ServiceGraphMetricsEnabled() bool { + return slices.Contains(m.Features, FeatureGraph) +} + func (m MetricsConfig) OTelMetricsEnabled() bool { return slices.Contains(m.Features, FeatureApplication) } func (m MetricsConfig) Enabled() bool { - return m.EndpointEnabled() && (m.OTelMetricsEnabled() || m.SpanMetricsEnabled()) + return m.EndpointEnabled() && (m.OTelMetricsEnabled() || m.SpanMetricsEnabled() || m.ServiceGraphMetricsEnabled()) } // MetricsReporter implements the graph node that receives request.Span @@ -248,6 +253,17 @@ func (mr *MetricsReporter) spanMetricOptions(mlog *slog.Logger) []metric.Option return []metric.Option{ metric.WithView(otelHistogramConfig(SpanMetricsLatency, mr.cfg.Buckets.DurationHistogram, useExponentialHistograms)), + } +} + +func (mr *MetricsReporter) graphMetricOptions(mlog *slog.Logger) []metric.Option { + if !mr.cfg.ServiceGraphMetricsEnabled() { + return []metric.Option{} + } + + useExponentialHistograms := isExponentialAggregation(mr.cfg, mlog) + + return []metric.Option{ metric.WithView(otelHistogramConfig(ServiceGraphClient, mr.cfg.Buckets.DurationHistogram, useExponentialHistograms)), metric.WithView(otelHistogramConfig(ServiceGraphServer, mr.cfg.Buckets.DurationHistogram, useExponentialHistograms)), } @@ -318,6 +334,16 @@ func (mr *MetricsReporter) setupSpanMeters(m *Metrics, meter instrument.Meter) e return fmt.Errorf("creating span metric traces target info: %w", err) } + return nil +} + +func (mr *MetricsReporter) setupGraphMeters(m *Metrics, meter instrument.Meter) error { + if !mr.cfg.ServiceGraphMetricsEnabled() { + return nil + } + + var err error + m.serviceGraphClient, err = meter.Float64Histogram(ServiceGraphClient, instrument.WithUnit("s")) if err != nil { return fmt.Errorf("creating service graph client histogram: %w", err) @@ -354,6 +380,7 @@ func (mr *MetricsReporter) newMetricSet(service svc.ID) (*Metrics, error) { opts = append(opts, mr.otelMetricOptions(mlog)...) opts = append(opts, mr.spanMetricOptions(mlog)...) + opts = append(opts, mr.graphMetricOptions(mlog)...) m := Metrics{ ctx: mr.ctx, @@ -383,6 +410,15 @@ func (mr *MetricsReporter) newMetricSet(service svc.ID) (*Metrics, error) { m.tracesTargetInfo.Add(mr.ctx, 1, attrOpt) } + if mr.cfg.ServiceGraphMetricsEnabled() { + err = mr.setupGraphMeters(&m, meter) + if err != nil { + return nil, err + } + attrOpt := instrument.WithAttributeSet(mr.metricResourceAttributes(service)) + m.tracesTargetInfo.Add(mr.ctx, 1, attrOpt) + } + return &m, nil } @@ -651,8 +687,10 @@ func (r *Metrics) record(span *request.Span, mr *MetricsReporter) { r.spanMetricsLatency.Record(r.ctx, duration, attrOpt) r.spanMetricsCallsTotal.Add(r.ctx, 1, attrOpt) r.spanMetricsSizeTotal.Add(r.ctx, float64(span.ContentLength), attrOpt) + } - attrOpt = instrument.WithAttributeSet(mr.serviceGraphAttributes(span)) + if mr.cfg.ServiceGraphMetricsEnabled() { + attrOpt := instrument.WithAttributeSet(mr.serviceGraphAttributes(span)) if span.IsClientSpan() { r.serviceGraphClient.Record(r.ctx, duration, attrOpt) } else { diff --git a/pkg/internal/export/prom/prom.go b/pkg/internal/export/prom/prom.go index 551ba15da..83fc64e08 100644 --- a/pkg/internal/export/prom/prom.go +++ b/pkg/internal/export/prom/prom.go @@ -137,9 +137,13 @@ func (p PrometheusConfig) OTelMetricsEnabled() bool { return slices.Contains(p.Features, otel.FeatureApplication) } +func (p PrometheusConfig) ServiceGraphMetricsEnabled() bool { + return slices.Contains(p.Features, otel.FeatureGraph) +} + // nolint:gocritic func (p PrometheusConfig) Enabled() bool { - return (p.Port != 0 || p.Registry != nil) && (p.OTelMetricsEnabled() || p.SpanMetricsEnabled()) + return (p.Port != 0 || p.Registry != nil) && (p.OTelMetricsEnabled() || p.SpanMetricsEnabled() || p.ServiceGraphMetricsEnabled()) } type metricsReporter struct { @@ -339,6 +343,11 @@ func newReporter(ctx context.Context, cfg *PrometheusConfig, ctxInfo *global.Con mr.spanMetricsCallsTotal, mr.spanMetricsSizeTotal, mr.tracesTargetInfo, + ) + } + + if cfg.ServiceGraphMetricsEnabled() { + registeredMetrics = append(registeredMetrics, mr.serviceGraphClient, mr.serviceGraphServer, mr.serviceGraphFailed, @@ -396,6 +405,15 @@ func (r *metricsReporter) observe(span *request.Span) { r.spanMetricsCallsTotal.WithLabelValues(lv...).Add(1) r.spanMetricsSizeTotal.WithLabelValues(lv...).Add(float64(span.ContentLength)) + _, ok := r.serviceCache.Get(span.ServiceID.UID) + if !ok { + r.serviceCache.Add(span.ServiceID.UID, span.ServiceID) + lv = r.labelValuesTargetInfo(span.ServiceID) + r.tracesTargetInfo.WithLabelValues(lv...).Add(1) + } + } + + if r.cfg.ServiceGraphMetricsEnabled() { lvg := r.labelValuesServiceGraph(span) if span.IsClientSpan() { r.serviceGraphClient.WithLabelValues(lvg...).Observe(duration) @@ -406,13 +424,6 @@ func (r *metricsReporter) observe(span *request.Span) { if otel.SpanStatusCode(span) == codes.Error { r.serviceGraphFailed.WithLabelValues(lvg...).Add(1) } - - _, ok := r.serviceCache.Get(span.ServiceID.UID) - if !ok { - r.serviceCache.Add(span.ServiceID.UID, span.ServiceID) - lv = r.labelValuesTargetInfo(span.ServiceID) - r.tracesTargetInfo.WithLabelValues(lv...).Add(1) - } } } diff --git a/test/integration/k8s/manifests/05-instrumented-service-otel.yml b/test/integration/k8s/manifests/05-instrumented-service-otel.yml index 4fd526426..a54c09d85 100644 --- a/test/integration/k8s/manifests/05-instrumented-service-otel.yml +++ b/test/integration/k8s/manifests/05-instrumented-service-otel.yml @@ -115,5 +115,5 @@ spec: - name: BEYLA_KUBE_METADATA_ENABLE value: "autodetect" - name: BEYLA_OTEL_METRIC_FEATURES - value: "application,application_span" + value: "application,application_span,application_service_graph" diff --git a/test/integration/k8s/manifests/05-instrumented-service-prometheus.yml b/test/integration/k8s/manifests/05-instrumented-service-prometheus.yml index e7d9d807b..135e76cbd 100644 --- a/test/integration/k8s/manifests/05-instrumented-service-prometheus.yml +++ b/test/integration/k8s/manifests/05-instrumented-service-prometheus.yml @@ -125,7 +125,7 @@ spec: - name: BEYLA_KUBE_METADATA_ENABLE value: "autodetect" - name: BEYLA_PROMETHEUS_FEATURES - value: "application,application_span" + value: "application,application_span,application_service_graph" ports: - containerPort: 8999 hostPort: 8999