From 91caf4b5fc30b237d54500f82c1bf5006c9b14f3 Mon Sep 17 00:00:00 2001 From: Nikola Grcevski <6207777+grcevski@users.noreply.github.com> Date: Fri, 12 Jan 2024 09:15:07 -0500 Subject: [PATCH] Document distributed traces (#541) Co-authored-by: Sean Packham --- README.md | 3 +- docs/sources/_index.md | 3 +- docs/sources/distributed-traces.md | 62 ++++++++++++++++++++++ docs/sources/requesttime.md | 2 +- docs/sources/stability.md | 2 +- docs/sources/tutorial/index.md | 7 +-- examples/greeting-apps/docker-compose.yaml | 3 ++ pkg/internal/ebpf/common/common.go | 2 + pkg/internal/ebpf/grpc/grpc.go | 2 +- pkg/internal/ebpf/nethttp/nethttp.go | 8 ++- pkg/internal/ebpf/tracer_linux.go | 40 +++++++++++--- 11 files changed, 117 insertions(+), 17 deletions(-) create mode 100644 docs/sources/distributed-traces.md diff --git a/README.md b/README.md index d5daede1e..ddfd6e3b2 100644 --- a/README.md +++ b/README.md @@ -39,7 +39,8 @@ First, download and unpack the latest release from the [GitHub releases page](ht The release should contain the `./beyla` executable. Beyla supports multiple ways to find the service to be instrumented (by network port, executable name, process ID), -and multiple exposition formats (Prometheus, OpenTelemetry metrics, Single Span traces). +and multiple exposition formats (Prometheus, OpenTelemetry metrics, Distributed Traces for Go, Single Span traces for +other languages). For getting started, we'll tell Beyla to instrument the service running on port 8080 (our example service) and expose metrics in Prometheus format on port 9400. diff --git a/docs/sources/_index.md b/docs/sources/_index.md index ee53e7c4b..74136718f 100644 --- a/docs/sources/_index.md +++ b/docs/sources/_index.md @@ -23,13 +23,14 @@ aliases: Instrumenting an application to obtain metrics and traces typically requires adding a language agent to the application deployment/packages. In some compiled languages like Go or Rust, tracepoints have to be manually added to the code. In both cases, the instrumented version of the application must be redeployed to the staging/production servers. -Grafana Beyla is an eBPF-based application auto-instrumentation tool to easily get started with Application Observability. eBPF is used to automatically inspect application executables and the OS networking layer and capture basic trace spans related to web transactions and Rate-Errors-Duration (RED) metrics for Linux HTTP/S and gRPC services. All data capture occurs without any modifications to application code or configuration. +Grafana Beyla is an eBPF-based application auto-instrumentation tool to easily get started with Application Observability. eBPF is used to automatically inspect application executables and the OS networking layer and capture trace spans related to web transactions and Rate-Errors-Duration (RED) metrics for Linux HTTP/S and gRPC services. All data capture occurs without any modifications to application code or configuration. Beyla offers the following features: - auto-instrument applications written in various programming languages, for example: Go, C/C++, Rust, Python, Ruby, Java (including GraalVM Native), NodeJS, .NET, and others - efficient instrumentation and the low-overhead data capture with natively compiled code even for interpreted languages - vendor agnostic data exports in the OpenTelemetry format and as native Prometheus metrics +- distributed traces for Go services - runs in any Linux environment - listen to the Kubernetes API to decorate metrics and traces with Pods and Services metadata - simple setup for Grafana customers already using Grafana Agent diff --git a/docs/sources/distributed-traces.md b/docs/sources/distributed-traces.md new file mode 100644 index 000000000..497ab1636 --- /dev/null +++ b/docs/sources/distributed-traces.md @@ -0,0 +1,62 @@ +--- +title: Distributed traces with Beyla +menuTitle: Distributed traces +description: Learn about Beyla's distributed traces support. +weight: 5 +keywords: + - Beyla + - eBPF + - distributed traces +aliases: + - /docs/grafana-cloud/monitor-applications/beyla/distributed-traces/ +--- + +# Distributed traces with Beyla + +## Introduction + +Beyla currently supports distributed traces for Go applications, both HTTP/S and gRPC, with some limitations and version restrictions. + +Go distributed tracing is implemented through the propagation of the [W3C `traceparent`](https://www.w3.org/TR/trace-context/) header value. `traceparent` context propagation is automatic and it doesn't require any action or configuration. + +Beyla will read any incoming trace context header values, track the Go program execution flow and propagate the trace context by automatically adding the `traceparent` field in outgoing HTTP/gRPC requests. If an application already adds the `taceparent` field in outgoing requests, Beyla will use that value for tracing instead its own generated trace context. If Beyla cannot find an incoming `traceparent` context value, it will generate one according to the W3C specification. + +## Limitations + +### Kernel integrity mode limitations + +In order to write the `traceparent` value in outgoing HTTP/gRPC request headers, Beyla needs to write to the process memory using the [bpf_probe_write_user](https://www.man7.org/linux/man-pages/man7/bpf-helpers.7.html) eBPF helper. Since kernel 5.15 this helper is protected (and unavailable to BPF programs) if the Linux Kernel is running in `integrity` lockdown mode. Kernel integrity mode is typically enabled by default if the Kernel has [Secure Boot](https://wiki.debian.org/SecureBoot) enabled, but it can also be enabled manually. + +Beyla will automatically check if it can use the `bpf_probe_write_user` helper, and enable context propagation only if it's allowed by the kernel configuration. Verify the Linux Kernel lockdown mode by running the following command: + +```shell +cat /sys/kernel/security/lockdown +``` + +If that file exists and the mode is anything other than `[none]`, Beyla will not be able to perform context propagation and distributed tracing will be disabled. + +### Configuring distributed tracing for containerized environments (including Kubernetes) + +Because of the Kernel lockdown mode restrictions, Docker and Kubernetes configuration files should mount the `/sys/kernel/security/` volume for the **Beyla docker container** from the host system. This way Beyla can correctly determine the Linux Kernel lockdown mode. Here's an example Docker compose configuration, which ensures Beyla has sufficient information to determine the lockdown mode: + +```yaml +version: '3.8' + +services: + ... + beyla: + image: grafana/beyla:latest + command: + - /beyla + - --config=/configs/beyla-config.yml + volumes: + - /sys/kernel/security:/sys/kernel/security +``` + +If the volume is not mounted, Beyla will assume that the Linux Kernel is not running in integrity mode. + +### Linux Kernel version limitations for gRPC context propagation (5.17+) + +The gRPC protocol headers are typically compressed and encoded with the `hpack` algorithm. Uncompressed headers are also supported, however gRPC buffers in Go are typically small and writing uncompressed values is not feasible. We've implemented `hpack` encoding in eBPF to support context propagation in Go gRPC, by utilizing the eBPF helper [bfp_loop](https://www.man7.org/linux/man-pages/man7/bpf-helpers.7.html). + +The gRPC context propagation for Go requires Linux Kernel versions equal or later than **5.17**, because the `bpf_loop` helper is only available in those kernel versions. diff --git a/docs/sources/requesttime.md b/docs/sources/requesttime.md index 2ed417509..223ea408e 100644 --- a/docs/sources/requesttime.md +++ b/docs/sources/requesttime.md @@ -2,7 +2,7 @@ title: Measuring total request times, instead of service times menuTitle: Measuring total request times description: Measure request time instead of service time, for your server-side application code. -weight: 5 +weight: 6 keywords: - Beyla - eBPF diff --git a/docs/sources/stability.md b/docs/sources/stability.md index 57434630c..d437d0388 100644 --- a/docs/sources/stability.md +++ b/docs/sources/stability.md @@ -2,7 +2,7 @@ title: Stability Guarantees menuTitle: Stability Guarantees description: This section covers the major version stability guarantees for Beyla. -weight: 6 +weight: 7 keywords: - Beyla - Stability diff --git a/docs/sources/tutorial/index.md b/docs/sources/tutorial/index.md index 4fff4cf3f..083046c80 100644 --- a/docs/sources/tutorial/index.md +++ b/docs/sources/tutorial/index.md @@ -18,7 +18,7 @@ aliases: # Beyla quick start tutorial -To reduce the time it takes to instrument an application and improve the adoption of Application Observability, Grafana built Beyla, an eBPF auto-instrumentation tool, that is able to report basic transactions span information, as well as [RED metrics](/blog/2018/08/02/the-red-method-how-to-instrument-your-services/) for Linux HTTP/S and gRPC services, without any application code or configuration changes. +To reduce the time it takes to instrument an application and improve the adoption of Application Observability, Grafana built Beyla, an eBPF auto-instrumentation tool, that is able to report transactions span information, as well as [RED metrics](/blog/2018/08/02/the-red-method-how-to-instrument-your-services/) for Linux HTTP/S and gRPC services, without any application code or configuration changes. ## eBPF overview @@ -283,7 +283,7 @@ OpenTelemetry traces]({{< relref "../configure/options.md#otel-traces-exporter" but this functionality is not explained in this tutorial). Beyla has its limitations too. It only provides generic metrics and -single spans trace information (no distributed traces, yet). Language agents and manual +transaction level trace span information. Language agents and manual instrumentation is still recommended, so that you can specify the granularity of each part of the code to be instrumented, putting the focus on your critical operations. @@ -295,7 +295,8 @@ has to be privileged. In the future, we plan to add metrics about other well-established protocols, like database or message queuing connections. -Distributed tracing is also on our road-map. With distributed tracing we will be able to correlate +Distributed tracing is currently only supported for Go services, while other programming language +support remains on our road-map. With distributed tracing we will be able to correlate requests from multiple services (web, database, messaging...). One complexity of distributed tracing is the injection of client-side headers and matching them to the context of the server-side requests. We are making progressive advances towards this goal with each diff --git a/examples/greeting-apps/docker-compose.yaml b/examples/greeting-apps/docker-compose.yaml index 36e8fa1f1..724984634 100644 --- a/examples/greeting-apps/docker-compose.yaml +++ b/examples/greeting-apps/docker-compose.yaml @@ -24,6 +24,7 @@ services: - --config=/configs/beyla-config.yml volumes: - ./configs/:/configs + - ./system/sys/kernel/security:/sys/kernel/security container_name: demo-nginxbeyla privileged: true network_mode: "service:nginx" @@ -55,6 +56,7 @@ services: - --config=/configs/beyla-config.yml volumes: - ./configs/:/configs + - ./system/sys/kernel/security:/sys/kernel/security container_name: demo-gobeyla privileged: true network_mode: "service:gotestserver" @@ -118,6 +120,7 @@ services: - --config=/configs/beyla-config.yml volumes: - ./configs/:/configs + - ./system/sys/kernel/security:/sys/kernel/security container_name: demo-rustbeyla privileged: true network_mode: "service:rusttestserver" diff --git a/pkg/internal/ebpf/common/common.go b/pkg/internal/ebpf/common/common.go index fc231c88f..691d2ad40 100644 --- a/pkg/internal/ebpf/common/common.go +++ b/pkg/internal/ebpf/common/common.go @@ -26,6 +26,8 @@ type SQLRequestTrace bpfSqlRequestTrace const EventTypeSQL = 5 // EVENT_SQL_CLIENT +var IntegrityModeOverride = false + // TracerConfig configuration for eBPF programs type TracerConfig struct { BpfDebug bool `yaml:"bfp_debug" env:"BEYLA_BPF_DEBUG"` diff --git a/pkg/internal/ebpf/grpc/grpc.go b/pkg/internal/ebpf/grpc/grpc.go index ad4073c36..35b9ed161 100644 --- a/pkg/internal/ebpf/grpc/grpc.go +++ b/pkg/internal/ebpf/grpc/grpc.go @@ -64,7 +64,7 @@ func (p *Tracer) BlockPID(pid uint32) { } func (p *Tracer) supportsContextPropagation() bool { - return ebpfcommon.SupportsContextPropagation(p.log) && ebpfcommon.SupportsEBPFLoops() + return !ebpfcommon.IntegrityModeOverride && ebpfcommon.SupportsContextPropagation(p.log) && ebpfcommon.SupportsEBPFLoops() } func (p *Tracer) Load() (*ebpf.CollectionSpec, error) { diff --git a/pkg/internal/ebpf/nethttp/nethttp.go b/pkg/internal/ebpf/nethttp/nethttp.go index ff79a31ea..458a18800 100644 --- a/pkg/internal/ebpf/nethttp/nethttp.go +++ b/pkg/internal/ebpf/nethttp/nethttp.go @@ -62,13 +62,17 @@ func (p *Tracer) BlockPID(pid uint32) { p.pidsFilter.BlockPID(pid) } +func (p *Tracer) supportsContextPropagation() bool { + return !ebpfcommon.IntegrityModeOverride && ebpfcommon.SupportsContextPropagation(p.log) +} + func (p *Tracer) Load() (*ebpf.CollectionSpec, error) { loader := loadBpf if p.cfg.BpfDebug { loader = loadBpf_debug } - if ebpfcommon.SupportsContextPropagation(p.log) { + if p.supportsContextPropagation() { loader = loadBpf_tp if p.cfg.BpfDebug { loader = loadBpf_tp_debug @@ -132,7 +136,7 @@ func (p *Tracer) GoProbes() map[string]ebpfcommon.FunctionPrograms { }, } - if ebpfcommon.SupportsContextPropagation(p.log) { + if p.supportsContextPropagation() { m["net/http.Header.writeSubset"] = ebpfcommon.FunctionPrograms{ Start: p.bpfObjects.UprobeWriteSubset, } diff --git a/pkg/internal/ebpf/tracer_linux.go b/pkg/internal/ebpf/tracer_linux.go index d7be77c58..6423c13dc 100644 --- a/pkg/internal/ebpf/tracer_linux.go +++ b/pkg/internal/ebpf/tracer_linux.go @@ -12,6 +12,7 @@ import ( "github.com/cilium/ebpf" + common "github.com/grafana/beyla/pkg/internal/ebpf/common" "github.com/grafana/beyla/pkg/internal/request" ) @@ -49,6 +50,18 @@ func (pt *ProcessTracer) Run(ctx context.Context, out chan<- []request.Span) { }() } +func (pt *ProcessTracer) loadSpec(p Tracer) (*ebpf.CollectionSpec, error) { + spec, err := p.Load() + if err != nil { + return nil, fmt.Errorf("loading eBPF program: %w", err) + } + if err := spec.RewriteConstants(p.Constants(pt.ELFInfo, pt.Goffsets)); err != nil { + return nil, fmt.Errorf("rewriting BPF constants definition: %w", err) + } + + return spec, nil +} + // tracers returns Tracer implementer for each discovered eBPF traceable source: GRPC, HTTP... func (pt *ProcessTracer) tracers() ([]Tracer, error) { loadMux.Lock() @@ -61,19 +74,32 @@ func (pt *ProcessTracer) tracers() ([]Tracer, error) { for _, p := range pt.Programs { plog := log.With("program", reflect.TypeOf(p)) plog.Debug("loading eBPF program", "PinPath", pt.PinPath, "pid", pt.ELFInfo.Pid, "cmd", pt.ELFInfo.CmdExePath) - spec, err := p.Load() + spec, err := pt.loadSpec(p) if err != nil { - return nil, fmt.Errorf("loading eBPF program: %w", err) - } - if err := spec.RewriteConstants(p.Constants(pt.ELFInfo, pt.Goffsets)); err != nil { - return nil, fmt.Errorf("rewriting BPF constants definition: %w", err) + return nil, err } if err := spec.LoadAndAssign(p.BpfObjects(), &ebpf.CollectionOptions{ Maps: ebpf.MapOptions{ PinPath: pt.PinPath, }}); err != nil { - printVerifierErrorInfo(err) - return nil, fmt.Errorf("loading and assigning BPF objects: %w", err) + if strings.Contains(err.Error(), "unknown func bpf_probe_write_user") { + plog.Warn("Failed to enable distributed tracing context-propagation on a Linux Kernel without write memory support. " + + "To avoid seeing this message, please ensure you have correctly mounted /sys/kernel/security. " + + "For more details set BEYLA_LOG_LEVEL=DEBUG.") + + common.IntegrityModeOverride = true + spec, err = pt.loadSpec(p) + if err == nil { + err = spec.LoadAndAssign(p.BpfObjects(), &ebpf.CollectionOptions{ + Maps: ebpf.MapOptions{ + PinPath: pt.PinPath, + }}) + } + } + if err != nil { + printVerifierErrorInfo(err) + return nil, fmt.Errorf("loading and assigning BPF objects: %w", err) + } } i := instrumenter{ exe: pt.Exe,