diff --git a/docs/sources/configure/options.md b/docs/sources/configure/options.md index ada98c32c..5b6211b03 100644 --- a/docs/sources/configure/options.md +++ b/docs/sources/configure/options.md @@ -757,6 +757,17 @@ It is disabled by default to avoid cardinality explosion. The `buckets` object allows overriding the bucket boundaries of diverse histograms. See [Overriding histogram buckets](#overriding-histogram-buckets) section for more details. +| YAML | Environment variable | Type | Default | +|-------------------------|------------------------------------------------------------|----------|-----------------------------| +| `histogram_aggregation` | `OTEL_EXPORTER_OTLP_METRICS_DEFAULT_HISTOGRAM_AGGREGATION` | `string` | `explicit_bucket_histogram` | + +Specifies the default aggregation to use for histogram instruments. + +Accepted values are: + +* `explicit_bucket_histogram` (default): use [Explicit Bucket Histogram Aggregation](https://opentelemetry.io/docs/specs/otel/metrics/sdk/#explicit-bucket-histogram-aggregation). +* `base2_exponential_bucket_histogram`: use [Base2 Exponential Bucket Histogram Aggregation](https://opentelemetry.io/docs/specs/otel/metrics/sdk/#base2-exponential-bucket-histogram-aggregation). + ### Overriding histogram buckets For both OpenTelemetry and Prometheus metrics exporters, you can override the histogram bucket @@ -800,14 +811,14 @@ conventions recommend a different set of bucket boundaries. ### Use native histograms and exponential histograms -For Prometheus [native histograms](https://prometheus.io/docs/concepts/metric_types/#histogram) are enabled if you have the `native-histograms` feature enabled. - -For OpenTelemetry you can use [exponential histograms](https://opentelemetry.io/docs/specs/otel/metrics/data-model/#exponentialhistogram) for the predefined histograms instead of defining the buckets manually. - -| YAML | Environment variable | Type | Default | -| ---------- | ------------------------------------------------------------------------- | ---- | ------- | -| `use_exponential_histograms` | `BEYLA_OTEL_USE_EXPONENTIAL_HISTOGRAMS` | `boolean` | `false` | +For Prometheus, [native histograms](https://prometheus.io/docs/concepts/metric_types/#histogram) are enabled if you +[enable the `native-histograms` feature in your Prometheus collector](https://prometheus.io/docs/prometheus/latest/feature_flags/#native-histograms). +For OpenTelemetry you can use [exponential histograms](https://opentelemetry.io/docs/specs/otel/metrics/data-model/#exponentialhistogram) +for the predefined histograms instead of defining the buckets manually. You need to set up the standard +[OTEL_EXPORTER_OTLP_METRICS_DEFAULT_HISTOGRAM_AGGREGATION](https://opentelemetry.io/docs/specs/otel/metrics/sdk_exporters/otlp/#additional-configuration) +environment variable. See the `histogram_aggregation` section in the [OTEL metrics exporter](#otel-metrics-exporter) section +for more information. ## OTEL traces exporter diff --git a/pkg/beyla/config.go b/pkg/beyla/config.go index 9805fc16d..4f3d49062 100644 --- a/pkg/beyla/config.go +++ b/pkg/beyla/config.go @@ -43,11 +43,12 @@ var defaultConfig = Config{ }, }, Metrics: otel.MetricsConfig{ - Protocol: otel.ProtocolUnset, - MetricsProtocol: otel.ProtocolUnset, - Interval: 5 * time.Second, - Buckets: otel.DefaultBuckets, - ReportersCacheLen: ReporterLRUSize, + Protocol: otel.ProtocolUnset, + MetricsProtocol: otel.ProtocolUnset, + Interval: 5 * time.Second, + Buckets: otel.DefaultBuckets, + ReportersCacheLen: ReporterLRUSize, + HistogramAggregation: otel.AggregationExplicit, }, Traces: otel.TracesConfig{ Protocol: otel.ProtocolUnset, diff --git a/pkg/beyla/config_test.go b/pkg/beyla/config_test.go index bdaa8e6c7..6be54c8c7 100644 --- a/pkg/beyla/config_test.go +++ b/pkg/beyla/config_test.go @@ -29,6 +29,7 @@ otel_metrics_export: endpoint: localhost:3030 buckets: duration_histogram: [0, 1, 2] + histogram_aggregation: base2_exponential_bucket_histogram prometheus_export: buckets: request_size_histogram: [0, 10, 20, 22] @@ -104,6 +105,7 @@ network: DurationHistogram: []float64{0, 1, 2}, RequestSizeHistogram: otel.DefaultBuckets.RequestSizeHistogram, }, + HistogramAggregation: "base2_exponential_bucket_histogram", }, Traces: otel.TracesConfig{ Protocol: otel.ProtocolUnset, diff --git a/pkg/internal/export/otel/metrics.go b/pkg/internal/export/otel/metrics.go index b90ddd590..6c5e01d19 100644 --- a/pkg/internal/export/otel/metrics.go +++ b/pkg/internal/export/otel/metrics.go @@ -39,6 +39,9 @@ const ( UsualPortGRPC = "4317" UsualPortHTTP = "4318" + + AggregationExplicit = "explicit_bucket_histogram" + AggregationExponential = "base2_exponential_bucket_histogram" ) type MetricsConfig struct { @@ -59,8 +62,8 @@ type MetricsConfig struct { ReportTarget bool `yaml:"report_target" env:"BEYLA_METRICS_REPORT_TARGET"` ReportPeerInfo bool `yaml:"report_peer" env:"BEYLA_METRICS_REPORT_PEER"` - Buckets Buckets `yaml:"buckets"` - UseExponentialHistograms bool `yaml:"use_exponential_histograms" env:"BEYLA_OTEL_USE_EXPONENTIAL_HISTOGRAMS"` + Buckets Buckets `yaml:"buckets"` + HistogramAggregation string `yaml:"histogram_aggregation" env:"OTEL_EXPORTER_OTLP_METRICS_DEFAULT_HISTOGRAM_AGGREGATION"` ReportersCacheLen int `yaml:"reporters_cache_len" env:"BEYLA_METRICS_REPORT_CACHE_LEN"` @@ -171,7 +174,9 @@ func newMetricsReporter(ctx context.Context, cfg *MetricsConfig, ctxInfo *global } func (mr *MetricsReporter) newMetricSet(service svc.ID) (*Metrics, error) { - mlog().Debug("creating new Metrics reporter", "service", service) + mlog := mlog().With("service", service) + mlog.Debug("creating new Metrics reporter") + useExponentialHistograms := isExponentialAggregation(mr.cfg, mlog) resources := otelResource(service) m := Metrics{ ctx: mr.ctx, @@ -179,13 +184,13 @@ func (mr *MetricsReporter) newMetricSet(service svc.ID) (*Metrics, error) { metric.WithResource(resources), metric.WithReader(metric.NewPeriodicReader(mr.exporter, metric.WithInterval(mr.cfg.Interval))), - metric.WithView(otelHistogramConfig(HTTPServerDuration, mr.cfg.Buckets.DurationHistogram, mr.cfg.UseExponentialHistograms)), - metric.WithView(otelHistogramConfig(HTTPClientDuration, mr.cfg.Buckets.DurationHistogram, mr.cfg.UseExponentialHistograms)), - metric.WithView(otelHistogramConfig(RPCServerDuration, mr.cfg.Buckets.DurationHistogram, mr.cfg.UseExponentialHistograms)), - metric.WithView(otelHistogramConfig(RPCClientDuration, mr.cfg.Buckets.DurationHistogram, mr.cfg.UseExponentialHistograms)), - metric.WithView(otelHistogramConfig(SQLClientDuration, mr.cfg.Buckets.DurationHistogram, mr.cfg.UseExponentialHistograms)), - metric.WithView(otelHistogramConfig(HTTPServerRequestSize, mr.cfg.Buckets.RequestSizeHistogram, mr.cfg.UseExponentialHistograms)), - metric.WithView(otelHistogramConfig(HTTPClientRequestSize, mr.cfg.Buckets.RequestSizeHistogram, mr.cfg.UseExponentialHistograms)), + metric.WithView(otelHistogramConfig(HTTPServerDuration, mr.cfg.Buckets.DurationHistogram, useExponentialHistograms)), + metric.WithView(otelHistogramConfig(HTTPClientDuration, mr.cfg.Buckets.DurationHistogram, useExponentialHistograms)), + metric.WithView(otelHistogramConfig(RPCServerDuration, mr.cfg.Buckets.DurationHistogram, useExponentialHistograms)), + metric.WithView(otelHistogramConfig(RPCClientDuration, mr.cfg.Buckets.DurationHistogram, useExponentialHistograms)), + metric.WithView(otelHistogramConfig(SQLClientDuration, mr.cfg.Buckets.DurationHistogram, useExponentialHistograms)), + metric.WithView(otelHistogramConfig(HTTPServerRequestSize, mr.cfg.Buckets.RequestSizeHistogram, useExponentialHistograms)), + metric.WithView(otelHistogramConfig(HTTPClientRequestSize, mr.cfg.Buckets.RequestSizeHistogram, useExponentialHistograms)), ), } // time units for HTTP and GRPC durations are in seconds, according to the OTEL specification: @@ -224,6 +229,20 @@ func (mr *MetricsReporter) newMetricSet(service svc.ID) (*Metrics, error) { return &m, nil } +func isExponentialAggregation(mc *MetricsConfig, mlog *slog.Logger) bool { + switch mc.HistogramAggregation { + case AggregationExponential: + return true + case AggregationExplicit: + // do nothing + default: + mlog.Warn("invalid value for histogram aggregation. Accepted values are: "+ + AggregationExponential+", "+AggregationExplicit+" (default). Using default", + "value", mc.HistogramAggregation) + } + return false +} + // TODO: restore as private func InstantiateMetricsExporter(ctx context.Context, cfg *MetricsConfig, log *slog.Logger) (metric.Exporter, error) { var err error