Skip to content

Commit

Permalink
Merge branch 'main' into broken-links
Browse files Browse the repository at this point in the history
  • Loading branch information
javiermolinar authored Aug 16, 2024
2 parents 3d993aa + 3e4019f commit bdb6294
Show file tree
Hide file tree
Showing 38 changed files with 314 additions and 107 deletions.
2 changes: 1 addition & 1 deletion .drone/drone.jsonnet
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
local apps = ['tempo', 'tempo-vulture', 'tempo-query'];
local apps = ['tempo', 'tempo-vulture', 'tempo-query', 'tempo-cli'];
local archs = ['amd64', 'arm64'];

//# Building blocks ##
Expand Down
37 changes: 36 additions & 1 deletion .drone/drone.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ steps:
- COMPONENT=tempo GOARCH=amd64 make exe
- COMPONENT=tempo-vulture GOARCH=amd64 make exe
- COMPONENT=tempo-query GOARCH=amd64 make exe
- COMPONENT=tempo-cli GOARCH=amd64 make exe
image: golang:1.22-alpine
name: build-tempo-binaries
- image: plugins/docker
Expand Down Expand Up @@ -55,6 +56,18 @@ steps:
repo: grafana/tempo-query
username:
from_secret: docker_username
- image: plugins/docker
name: build-tempo-cli-image
settings:
build_args:
- TARGETARCH=amd64
dockerfile: cmd/tempo-cli/Dockerfile
password:
from_secret: docker_password
platform: linux/amd64
repo: grafana/tempo-cli
username:
from_secret: docker_username
trigger:
ref:
- refs/heads/main
Expand All @@ -81,6 +94,7 @@ steps:
- COMPONENT=tempo GOARCH=arm64 make exe
- COMPONENT=tempo-vulture GOARCH=arm64 make exe
- COMPONENT=tempo-query GOARCH=arm64 make exe
- COMPONENT=tempo-cli GOARCH=arm64 make exe
image: golang:1.22-alpine
name: build-tempo-binaries
- image: plugins/docker
Expand Down Expand Up @@ -119,6 +133,18 @@ steps:
repo: grafana/tempo-query
username:
from_secret: docker_username
- image: plugins/docker
name: build-tempo-cli-image
settings:
build_args:
- TARGETARCH=arm64
dockerfile: cmd/tempo-cli/Dockerfile
password:
from_secret: docker_password
platform: linux/arm64
repo: grafana/tempo-cli
username:
from_secret: docker_username
trigger:
ref:
- refs/heads/main
Expand Down Expand Up @@ -169,6 +195,15 @@ steps:
target: tempo-query
username:
from_secret: docker_username
- image: plugins/manifest:1.4.0
name: manifest-tempo-cli
settings:
password:
from_secret: docker_password
spec: .drone/docker-manifest.tmpl
target: tempo-cli
username:
from_secret: docker_username
trigger:
ref:
- refs/heads/main
Expand Down Expand Up @@ -530,6 +565,6 @@ kind: secret
name: gpg_passphrase
---
kind: signature
hmac: 6f0a771ee87cefa9d1ef3622fc936adaae60a19e5f285acc4f7310244c0f899c
hmac: bee5601dffa0f46559f5d8734ebda1261ec9171a3dca7add1a23188f6f162945

...
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,9 @@
* [ENHANCEMENT] Added new Traces api V2[#3912](https://github.com/grafana/tempo/pull/3912) (@javiermolinar)
* [ENHANCEMENT] Update to the latest dskit [#3915](https://github.com/grafana/tempo/pull/3915) (@andreasgerstmayr)
* [ENHANCEMENT] Reduce allocs building queriers sharded requests [#3932](https://github.com/grafana/tempo/pull/3932) (@javiermolinar)
* [ENHANCEMENT] Allow compaction disablement per-tenant [#3965](https://github.com/grafana/tempo/pull/3965) (@zalegrala)
* [ENHANCEMENT] Implement polling tenants concurrently [#3647](https://github.com/grafana/tempo/pull/3647) (@zalegrala)
* [ENHANCEMENT] Added new middleware to block matching urls [#3963](https://github.com/grafana/tempo/pull/3963) (@javiermolinar)
* [BUGFIX] Fix panic in certain metrics queries using `rate()` with `by` [#3847](https://github.com/grafana/tempo/pull/3847) (@stoewer)
* [BUGFIX] Fix double appending the primary iterator on second pass with event iterator [#3903](https://github.com/grafana/tempo/pull/3903) (@ie-pham)
* [BUGFIX] Fix metrics queries when grouping by attributes that may not exist [#3734](https://github.com/grafana/tempo/pull/3734) (@mdisibio)
Expand All @@ -66,6 +68,9 @@
* [BUGFIX] **BREAKING CHANGE** Remove unused properties from the WAL configuration [#3911](https://github.com/grafana/tempo/pull/3911) (@javiermolinar)
* [BUGFIX] Bring back OTEL receiver metrics [#3917](https://github.com/grafana/tempo/pull/3917) (@javiermolinar)
* [BUGFIX] Correct block end time when the ingested traces are outside the ingestion slack [#3954](https://github.com/grafana/tempo/pull/3954) (@javiermolinar)
* [BUGFIX] Fix race condition where a streaming response could be marshalled while being modified in the combiner resulting in a panic. [#3961](https://github.com/grafana/tempo/pull/3961) (@joe-elliott)
* [BUGFIX] Pass search options to the backend for SearchTagValuesBlocksV2 requests [#3971](https://github.com/grafana/tempo/pull/3971) (@javiermolinar)


## v2.5.0

Expand Down
10 changes: 5 additions & 5 deletions cmd/tempo/app/overrides_validation.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"golang.org/x/exp/slices"

"github.com/grafana/tempo/modules/generator"
"github.com/grafana/tempo/modules/generator/registry"
"github.com/grafana/tempo/modules/overrides"
"github.com/grafana/tempo/modules/overrides/userconfigurable/api"
"github.com/grafana/tempo/modules/overrides/userconfigurable/client"
Expand All @@ -33,11 +34,10 @@ func (r *runtimeConfigValidator) Validate(config *overrides.Overrides) error {
}
}

if config.MetricsGenerator.GenerateNativeHistograms != "classic" &&
config.MetricsGenerator.GenerateNativeHistograms != "native" &&
config.MetricsGenerator.GenerateNativeHistograms != "both" &&
config.MetricsGenerator.GenerateNativeHistograms != "" {
return fmt.Errorf("metrics_generator.generate_native_histograms \"%s\" is not a valid value, valid values: classic, native, both", config.MetricsGenerator.GenerateNativeHistograms)
if _, ok := registry.HistogramModeToValue[string(config.MetricsGenerator.GenerateNativeHistograms)]; !ok {
if config.MetricsGenerator.GenerateNativeHistograms != "" {
return fmt.Errorf("metrics_generator.generate_native_histograms \"%s\" is not a valid value, valid values: classic, native, both", config.MetricsGenerator.GenerateNativeHistograms)
}
}

return nil
Expand Down
3 changes: 3 additions & 0 deletions docs/sources/tempo/configuration/_index.md
Original file line number Diff line number Diff line change
Expand Up @@ -561,6 +561,9 @@ query_frontend:
# (default: 0)
[api_timeout: <duration>]

# A list of regular expressions for refusing matching requests, these will apply for every request regardless of the endpoint.
[url_deny_list: <list of strings> | default = <empty list>]]

search:

# The number of concurrent jobs to execute when searching the backend.
Expand Down
13 changes: 7 additions & 6 deletions docs/sources/tempo/shared/best-practices-traces.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ labels:

## Span and resource attributes

[Traces]({{< relref "../introduction" >}}) are built from spans, which denote units of work such as a call to, or from, an upstream service. Spans are constructed primarily of span and resource attributes.
[Traces](https://grafana.com/docs/tempo/<TEMPO_VERSION>/introduction/) are built from spans, which denote units of work such as a call to, or from, an upstream service. Spans are constructed primarily of span and resource attributes.
Spans also have a hierarchy, where parent spans can have children or siblings.

In the screenshot below, the left side of the screen (1) shows the list of results for the query. The right side (2) lists each span that makes up the selected trace.
Expand Down Expand Up @@ -63,7 +63,8 @@ In general, consider the following guidelines:
- Don't use redundant attributes.
- When determining which attributes to add, consider an application's service flow, and execution in the context of only the current span.

The OpenTelemetry project does not specify a maximum number of attributes that a span can have. However, the default limits for the number of attributes per span is [128 entries](https://opentelemetry.io/docs/specs/otel/configuration/sdk-environment-variables/#attribute-limits), so you will have to adjust that. There are also default limits on attribute value and name character lengths.
The OpenTelemetry project doesn't specify a maximum number of attributes that a span can have.
However, the default limits for the number of attributes per span is [128 entries](https://opentelemetry.io/docs/specs/otel/configuration/sdk-environment-variables/#attribute-limits), so you will have to adjust that. There are also default limits on attribute value and name character lengths.

## Determining where to add spans

Expand All @@ -78,13 +79,13 @@ However, adding a span for each method or function call in that loop might not,

## Span length

While there are some (high) default limits to the length that a span (and by definition, the traces they belong to) can be, these can be adjusted by [these configurations]({{< relref "../configuration#ingestion-limits" >}}).
While there are some (high) default limits to the length that a span (and by definition, the traces they belong to) can be, these can be adjusted by [these configurations](https://grafana.com/docs/tempo/<TEMPO_VERSION>/configuration/#ingestion-limits).
Traces that include a large number of spans and/or long-running spans can have an impact on the time taken to query them once stored.
Cloud Traces users should contact Grafana Support to modify overrides
Cloud Traces users should contact Grafana Support to modify overrides.

For long-running spans and traces, the best way to see this impact on requests is to send a few test cases and see what the performance looks like (and evaluate the trace size).
For long-running spans and traces, the best way to see this impact on requests is to send a few test cases and see what the performance looks like and to evaluate the trace size.

From there, you can tweak the configuration for Tempo or determine ways to re-architect how the trace is being produced.
From there, you can modify the configuration for Tempo or determine ways to re-architect how the trace is produced.

You can consider breaking up the spans in several ways:
- Decompose the query
Expand Down
5 changes: 5 additions & 0 deletions modules/compactor/compactor.go
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,11 @@ func (c *Compactor) BlockRetentionForTenant(tenantID string) time.Duration {
return c.overrides.BlockRetention(tenantID)
}

// CompactionDisabledForTenant implements CompactorOverrides
func (c *Compactor) CompactionDisabledForTenant(tenantID string) bool {
return c.overrides.CompactionDisabled(tenantID)
}

func (c *Compactor) MaxBytesPerTraceForTenant(tenantID string) int {
return c.overrides.MaxBytesPerTrace(tenantID)
}
Expand Down
8 changes: 6 additions & 2 deletions modules/frontend/combiner/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,9 @@ func (c *genericCombiner[T]) GRPCFinal() (T, error) {
return empty, err
}

return final, nil
// clone the final response to prevent race conditions with marshalling this data
finalClone := proto.Clone(final).(T)
return finalClone, nil
}

func (c *genericCombiner[T]) GRPCDiff() (T, error) {
Expand All @@ -194,7 +196,9 @@ func (c *genericCombiner[T]) GRPCDiff() (T, error) {
return empty, err
}

return diff, nil
// clone the diff to prevent race conditions with marshalling this data
diffClone := proto.Clone(diff)
return diffClone.(T), nil
}

func (c *genericCombiner[T]) erroredResponse() (*http.Response, error) {
Expand Down
3 changes: 3 additions & 0 deletions modules/frontend/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@ type Config struct {
// traceql, tag search, tag value search, trace by id and all streaming gRPC endpoints.
// 0 disables
APITimeout time.Duration `yaml:"api_timeout,omitempty"`

// A list of regexes for black listing requests, these will apply for every request regardless the endpoint
URLDenyList []string `yaml:"url_deny_list,omitempty"`
}

type SearchConfig struct {
Expand Down
8 changes: 8 additions & 0 deletions modules/frontend/frontend.go
Original file line number Diff line number Diff line change
Expand Up @@ -88,12 +88,15 @@ func New(cfg Config, next http.RoundTripper, o overrides.Interface, reader tempo
}

retryWare := pipeline.NewRetryWare(cfg.MaxRetries, registerer)

cacheWare := pipeline.NewCachingWare(cacheProvider, cache.RoleFrontendSearch, logger)
statusCodeWare := pipeline.NewStatusCodeAdjustWare()
traceIDStatusCodeWare := pipeline.NewStatusCodeAdjustWareWithAllowedCode(http.StatusNotFound)
urlDenyListWare := pipeline.NewURLDenyListWare(cfg.URLDenyList)

tracePipeline := pipeline.Build(
[]pipeline.AsyncMiddleware[combiner.PipelineResponse]{
urlDenyListWare,
multiTenantMiddleware(cfg, logger),
newAsyncTraceIDSharder(&cfg.TraceByID, logger),
},
Expand All @@ -102,6 +105,7 @@ func New(cfg Config, next http.RoundTripper, o overrides.Interface, reader tempo

searchPipeline := pipeline.Build(
[]pipeline.AsyncMiddleware[combiner.PipelineResponse]{
urlDenyListWare,
multiTenantMiddleware(cfg, logger),
newAsyncSearchSharder(reader, o, cfg.Search.Sharder, logger),
},
Expand All @@ -110,6 +114,7 @@ func New(cfg Config, next http.RoundTripper, o overrides.Interface, reader tempo

searchTagsPipeline := pipeline.Build(
[]pipeline.AsyncMiddleware[combiner.PipelineResponse]{
urlDenyListWare,
multiTenantMiddleware(cfg, logger),
newAsyncTagSharder(reader, o, cfg.Search.Sharder, parseTagsRequest, logger),
},
Expand All @@ -118,6 +123,7 @@ func New(cfg Config, next http.RoundTripper, o overrides.Interface, reader tempo

searchTagValuesPipeline := pipeline.Build(
[]pipeline.AsyncMiddleware[combiner.PipelineResponse]{
urlDenyListWare,
multiTenantMiddleware(cfg, logger),
newAsyncTagSharder(reader, o, cfg.Search.Sharder, parseTagValuesRequest, logger),
},
Expand All @@ -127,6 +133,7 @@ func New(cfg Config, next http.RoundTripper, o overrides.Interface, reader tempo
// metrics summary
metricsPipeline := pipeline.Build(
[]pipeline.AsyncMiddleware[combiner.PipelineResponse]{
urlDenyListWare,
multiTenantUnsupportedMiddleware(cfg, logger),
},
[]pipeline.Middleware{statusCodeWare, retryWare},
Expand All @@ -135,6 +142,7 @@ func New(cfg Config, next http.RoundTripper, o overrides.Interface, reader tempo
// traceql metrics
queryRangePipeline := pipeline.Build(
[]pipeline.AsyncMiddleware[combiner.PipelineResponse]{
urlDenyListWare,
multiTenantMiddleware(cfg, logger),
newAsyncQueryRangeSharder(reader, o, cfg.Metrics.Sharder, logger),
},
Expand Down
52 changes: 52 additions & 0 deletions modules/frontend/pipeline/deny_list_middleware.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
package pipeline

import (
"fmt"
"regexp"

"github.com/grafana/tempo/modules/frontend/combiner"
)

type urlDenylistWare struct {
denyList []*regexp.Regexp
next AsyncRoundTripper[combiner.PipelineResponse]
}

func NewURLDenyListWare(denyList []string) AsyncMiddleware[combiner.PipelineResponse] {
compiledDenylist := make([]*regexp.Regexp, 0)
for _, v := range denyList {
r, err := regexp.Compile(v)
if err == nil {
compiledDenylist = append(compiledDenylist, r)
} else {
panic(fmt.Sprintf("error compiling query frontend deny list regex: %s", err))
}
}

return AsyncMiddlewareFunc[combiner.PipelineResponse](func(next AsyncRoundTripper[combiner.PipelineResponse]) AsyncRoundTripper[combiner.PipelineResponse] {
return &urlDenylistWare{
next: next,
denyList: compiledDenylist,
}
})
}

func (c urlDenylistWare) RoundTrip(req Request) (Responses[combiner.PipelineResponse], error) {
if len(c.denyList) != 0 {
err := c.validateRequest(req.HTTPRequest().URL.String())
if err != nil {
return NewBadRequest(err), nil
}
}

return c.next.RoundTrip(req)
}

func (c urlDenylistWare) validateRequest(url string) error {
for _, v := range c.denyList {
if v.MatchString(url) {
return fmt.Errorf("Invalid request %s. This query has been identified as one that destabilizes our system. Contact your system administrator for more information", url)
}
}
return nil
}
55 changes: 55 additions & 0 deletions modules/frontend/pipeline/deny_list_middleware_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
package pipeline

import (
"bytes"
"context"
"io"
"net/http"
"testing"

"github.com/grafana/tempo/modules/frontend/combiner"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)

var next = AsyncRoundTripperFunc[combiner.PipelineResponse](func(_ Request) (Responses[combiner.PipelineResponse], error) {
return NewHTTPToAsyncResponse(&http.Response{
StatusCode: 200,
Body: io.NopCloser(bytes.NewReader([]byte{})),
}), nil
})

func TestURLBlackListMiddlewareForEmptyBlackList(t *testing.T) {
regexes := []string{}
roundTrip := NewURLDenyListWare(regexes).Wrap(next)
statusCode := DoRequest(t, "http://localhost:8080/api/v2/traces/123345", roundTrip)
assert.Equal(t, 200, statusCode)
}

func TestURLBlackListMiddlewarePanicsOnSyntacticallyIncorrectRegex(t *testing.T) {
regexes := []string{"qr/^(.*\\.traces\\/[a-f0-9]{32}$/"}
assert.Panics(t, func() {
NewURLDenyListWare(regexes).Wrap(next)
})
}

func TestURLBlackListMiddleware(t *testing.T) {
regexes := []string{
"^.*v2.*",
}
roundTrip := NewURLDenyListWare(regexes).Wrap(next)
statusCode := DoRequest(t, "http://localhost:9000?param1=a&param2=b", roundTrip)
assert.Equal(t, 200, statusCode)

// Blacklisted url
statusCode = DoRequest(t, "http://localhost:8080/api/v2/traces/123345", roundTrip)
assert.Equal(t, 400, statusCode)
}

func DoRequest(t *testing.T, url string, rt AsyncRoundTripper[combiner.PipelineResponse]) int {
req, _ := http.NewRequest(http.MethodGet, url, nil)
resp, _ := rt.RoundTrip(NewHTTPRequest(req))
httpResponse, _, err := resp.Next(context.Background())
require.NoError(t, err)
return httpResponse.HTTPResponse().StatusCode
}
4 changes: 2 additions & 2 deletions modules/generator/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -117,8 +117,8 @@ func (cfg *ProcessorConfig) copyWithOverrides(o metricsGeneratorOverrides, userI
}

if histograms := o.MetricsGeneratorGenerateNativeHistograms(userID); histograms != "" {
copyCfg.ServiceGraphs.HistogramOverride = histograms
copyCfg.SpanMetrics.HistogramOverride = histograms
copyCfg.ServiceGraphs.HistogramOverride = registry.HistogramModeToValue[string(histograms)]
copyCfg.SpanMetrics.HistogramOverride = registry.HistogramModeToValue[string(histograms)]
}

copyCfg.SpanMetrics.DimensionMappings = o.MetricsGeneratorProcessorSpanMetricsDimensionMappings(userID)
Expand Down
Loading

0 comments on commit bdb6294

Please sign in to comment.