diff --git a/Makefile b/Makefile index 192ed39c..5baa4d74 100644 --- a/Makefile +++ b/Makefile @@ -13,7 +13,7 @@ $(GOLANGCI_LINT): ## Download Go linter .PHONY: lint lint: $(GOLANGCI_LINT) ## Run Go linter - $(GOLANGCI_LINT) run -v --fix -c .golangci.yml ./... + $(GOLANGCI_LINT) run -v -c .golangci.yml ./... .PHONY: test test: ## Run unit tests and measure code coverage @@ -24,7 +24,7 @@ test: ## Run unit tests and measure code coverage .PHONY: bench bench: ## Run Go benchmarks - go test ./... -bench . -benchtime 5s -timeout 0 -run=XXX -benchmem + go test ./... -bench . -benchtime 5s -timeout 0 -run='^$$' -benchmem .PHONY: docker docker: ## Build Docker image diff --git a/algorithms.go b/algorithms.go index f2ed4a82..c9231610 100644 --- a/algorithms.go +++ b/algorithms.go @@ -34,8 +34,7 @@ import ( // with 100 emails and the request will succeed. You can override this default behavior with `DRAIN_OVER_LIMIT` // Implements token bucket algorithm for rate limiting. https://en.wikipedia.org/wiki/Token_bucket -func tokenBucket(ctx context.Context, s Store, c Cache, r *RateLimitReq) (resp *RateLimitResp, err error) { - +func tokenBucket(ctx context.Context, s Store, c Cache, r *RateLimitReq, reqState RateLimitReqState) (resp *RateLimitResp, err error) { tokenBucketTimer := prometheus.NewTimer(metricFuncTimeDuration.WithLabelValues("tokenBucket")) defer tokenBucketTimer.ObserveDuration() @@ -100,7 +99,7 @@ func tokenBucket(ctx context.Context, s Store, c Cache, r *RateLimitReq) (resp * s.Remove(ctx, hashKey) } - return tokenBucketNewItem(ctx, s, c, r) + return tokenBucketNewItem(ctx, s, c, r, reqState) } // Update the limit if it changed. @@ -133,12 +132,12 @@ func tokenBucket(ctx context.Context, s Store, c Cache, r *RateLimitReq) (resp * } // If our new duration means we are currently expired. - now := MillisecondNow() - if expire <= now { + createdAt := *r.CreatedAt + if expire <= createdAt { // Renew item. span.AddEvent("Limit has expired") - expire = now + r.Duration - t.CreatedAt = now + expire = createdAt + r.Duration + t.CreatedAt = createdAt t.Remaining = t.Limit } @@ -147,7 +146,7 @@ func tokenBucket(ctx context.Context, s Store, c Cache, r *RateLimitReq) (resp * rl.ResetTime = expire } - if s != nil { + if s != nil && reqState.IsOwner { defer func() { s.OnChange(ctx, r, item) }() @@ -162,7 +161,9 @@ func tokenBucket(ctx context.Context, s Store, c Cache, r *RateLimitReq) (resp * // If we are already at the limit. if rl.Remaining == 0 && r.Hits > 0 { trace.SpanFromContext(ctx).AddEvent("Already over the limit") - metricOverLimitCounter.Add(1) + if reqState.IsOwner { + metricOverLimitCounter.Add(1) + } rl.Status = Status_OVER_LIMIT t.Status = rl.Status return rl, nil @@ -180,7 +181,9 @@ func tokenBucket(ctx context.Context, s Store, c Cache, r *RateLimitReq) (resp * // without updating the cache. if r.Hits > t.Remaining { trace.SpanFromContext(ctx).AddEvent("Over the limit") - metricOverLimitCounter.Add(1) + if reqState.IsOwner { + metricOverLimitCounter.Add(1) + } rl.Status = Status_OVER_LIMIT if HasBehavior(r.Behavior, Behavior_DRAIN_OVER_LIMIT) { // DRAIN_OVER_LIMIT behavior drains the remaining counter. @@ -196,19 +199,19 @@ func tokenBucket(ctx context.Context, s Store, c Cache, r *RateLimitReq) (resp * } // Item is not found in cache or store, create new. - return tokenBucketNewItem(ctx, s, c, r) + return tokenBucketNewItem(ctx, s, c, r, reqState) } // Called by tokenBucket() when adding a new item in the store. -func tokenBucketNewItem(ctx context.Context, s Store, c Cache, r *RateLimitReq) (resp *RateLimitResp, err error) { - now := MillisecondNow() - expire := now + r.Duration +func tokenBucketNewItem(ctx context.Context, s Store, c Cache, r *RateLimitReq, reqState RateLimitReqState) (resp *RateLimitResp, err error) { + createdAt := *r.CreatedAt + expire := createdAt + r.Duration t := &TokenBucketItem{ Limit: r.Limit, Duration: r.Duration, Remaining: r.Limit - r.Hits, - CreatedAt: now, + CreatedAt: createdAt, } // Add a new rate limit to the cache. @@ -236,7 +239,9 @@ func tokenBucketNewItem(ctx context.Context, s Store, c Cache, r *RateLimitReq) // Client could be requesting that we always return OVER_LIMIT. if r.Hits > r.Limit { trace.SpanFromContext(ctx).AddEvent("Over the limit") - metricOverLimitCounter.Add(1) + if reqState.IsOwner { + metricOverLimitCounter.Add(1) + } rl.Status = Status_OVER_LIMIT rl.Remaining = r.Limit t.Remaining = r.Limit @@ -244,7 +249,7 @@ func tokenBucketNewItem(ctx context.Context, s Store, c Cache, r *RateLimitReq) c.Add(item) - if s != nil { + if s != nil && reqState.IsOwner { s.OnChange(ctx, r, item) } @@ -252,7 +257,7 @@ func tokenBucketNewItem(ctx context.Context, s Store, c Cache, r *RateLimitReq) } // Implements leaky bucket algorithm for rate limiting https://en.wikipedia.org/wiki/Leaky_bucket -func leakyBucket(ctx context.Context, s Store, c Cache, r *RateLimitReq) (resp *RateLimitResp, err error) { +func leakyBucket(ctx context.Context, s Store, c Cache, r *RateLimitReq, reqState RateLimitReqState) (resp *RateLimitResp, err error) { leakyBucketTimer := prometheus.NewTimer(metricFuncTimeDuration.WithLabelValues("V1Instance.getRateLimit_leakyBucket")) defer leakyBucketTimer.ObserveDuration() @@ -260,7 +265,7 @@ func leakyBucket(ctx context.Context, s Store, c Cache, r *RateLimitReq) (resp * r.Burst = r.Limit } - now := MillisecondNow() + createdAt := *r.CreatedAt // Get rate limit from cache. hashKey := r.HashKey() @@ -309,7 +314,7 @@ func leakyBucket(ctx context.Context, s Store, c Cache, r *RateLimitReq) (resp * s.Remove(ctx, hashKey) } - return leakyBucketNewItem(ctx, s, c, r) + return leakyBucketNewItem(ctx, s, c, r, reqState) } if HasBehavior(r.Behavior, Behavior_RESET_REMAINING) { @@ -349,16 +354,16 @@ func leakyBucket(ctx context.Context, s Store, c Cache, r *RateLimitReq) (resp * } if r.Hits != 0 { - c.UpdateExpiration(r.HashKey(), now+duration) + c.UpdateExpiration(r.HashKey(), createdAt+duration) } // Calculate how much leaked out of the bucket since the last time we leaked a hit - elapsed := now - b.UpdatedAt + elapsed := createdAt - b.UpdatedAt leak := float64(elapsed) / rate if int64(leak) > 0 { b.Remaining += leak - b.UpdatedAt = now + b.UpdatedAt = createdAt } if int64(b.Remaining) > b.Burst { @@ -369,12 +374,12 @@ func leakyBucket(ctx context.Context, s Store, c Cache, r *RateLimitReq) (resp * Limit: b.Limit, Remaining: int64(b.Remaining), Status: Status_UNDER_LIMIT, - ResetTime: now + (b.Limit-int64(b.Remaining))*int64(rate), + ResetTime: createdAt + (b.Limit-int64(b.Remaining))*int64(rate), } // TODO: Feature missing: check for Duration change between item/request. - if s != nil { + if s != nil && reqState.IsOwner { defer func() { s.OnChange(ctx, r, item) }() @@ -382,7 +387,9 @@ func leakyBucket(ctx context.Context, s Store, c Cache, r *RateLimitReq) (resp * // If we are already at the limit if int64(b.Remaining) == 0 && r.Hits > 0 { - metricOverLimitCounter.Add(1) + if reqState.IsOwner { + metricOverLimitCounter.Add(1) + } rl.Status = Status_OVER_LIMIT return rl, nil } @@ -391,14 +398,16 @@ func leakyBucket(ctx context.Context, s Store, c Cache, r *RateLimitReq) (resp * if int64(b.Remaining) == r.Hits { b.Remaining = 0 rl.Remaining = int64(b.Remaining) - rl.ResetTime = now + (rl.Limit-rl.Remaining)*int64(rate) + rl.ResetTime = createdAt + (rl.Limit-rl.Remaining)*int64(rate) return rl, nil } // If requested is more than available, then return over the limit // without updating the bucket, unless `DRAIN_OVER_LIMIT` is set. if r.Hits > int64(b.Remaining) { - metricOverLimitCounter.Add(1) + if reqState.IsOwner { + metricOverLimitCounter.Add(1) + } rl.Status = Status_OVER_LIMIT // DRAIN_OVER_LIMIT behavior drains the remaining counter. @@ -417,16 +426,16 @@ func leakyBucket(ctx context.Context, s Store, c Cache, r *RateLimitReq) (resp * b.Remaining -= float64(r.Hits) rl.Remaining = int64(b.Remaining) - rl.ResetTime = now + (rl.Limit-rl.Remaining)*int64(rate) + rl.ResetTime = createdAt + (rl.Limit-rl.Remaining)*int64(rate) return rl, nil } - return leakyBucketNewItem(ctx, s, c, r) + return leakyBucketNewItem(ctx, s, c, r, reqState) } // Called by leakyBucket() when adding a new item in the store. -func leakyBucketNewItem(ctx context.Context, s Store, c Cache, r *RateLimitReq) (resp *RateLimitResp, err error) { - now := MillisecondNow() +func leakyBucketNewItem(ctx context.Context, s Store, c Cache, r *RateLimitReq, reqState RateLimitReqState) (resp *RateLimitResp, err error) { + createdAt := *r.CreatedAt duration := r.Duration rate := float64(duration) / float64(r.Limit) if HasBehavior(r.Behavior, Behavior_DURATION_IS_GREGORIAN) { @@ -445,7 +454,7 @@ func leakyBucketNewItem(ctx context.Context, s Store, c Cache, r *RateLimitReq) Remaining: float64(r.Burst - r.Hits), Limit: r.Limit, Duration: duration, - UpdatedAt: now, + UpdatedAt: createdAt, Burst: r.Burst, } @@ -453,20 +462,22 @@ func leakyBucketNewItem(ctx context.Context, s Store, c Cache, r *RateLimitReq) Status: Status_UNDER_LIMIT, Limit: b.Limit, Remaining: r.Burst - r.Hits, - ResetTime: now + (b.Limit-(r.Burst-r.Hits))*int64(rate), + ResetTime: createdAt + (b.Limit-(r.Burst-r.Hits))*int64(rate), } // Client could be requesting that we start with the bucket OVER_LIMIT if r.Hits > r.Burst { - metricOverLimitCounter.Add(1) + if reqState.IsOwner { + metricOverLimitCounter.Add(1) + } rl.Status = Status_OVER_LIMIT rl.Remaining = 0 - rl.ResetTime = now + (rl.Limit-rl.Remaining)*int64(rate) + rl.ResetTime = createdAt + (rl.Limit-rl.Remaining)*int64(rate) b.Remaining = 0 } item := &CacheItem{ - ExpireAt: now + duration, + ExpireAt: createdAt + duration, Algorithm: r.Algorithm, Key: r.HashKey(), Value: &b, @@ -474,7 +485,7 @@ func leakyBucketNewItem(ctx context.Context, s Store, c Cache, r *RateLimitReq) c.Add(item) - if s != nil { + if s != nil && reqState.IsOwner { s.OnChange(ctx, r, item) } diff --git a/benchmark_test.go b/benchmark_test.go index 5a383761..9673cf2b 100644 --- a/benchmark_test.go +++ b/benchmark_test.go @@ -22,6 +22,7 @@ import ( guber "github.com/mailgun/gubernator/v2" "github.com/mailgun/gubernator/v2/cluster" + "github.com/mailgun/holster/v4/clock" "github.com/mailgun/holster/v4/syncutil" "github.com/stretchr/testify/require" ) @@ -31,8 +32,9 @@ func BenchmarkServer(b *testing.B) { conf := guber.Config{} err := conf.SetDefaults() require.NoError(b, err, "Error in conf.SetDefaults") + createdAt := epochMillis(clock.Now()) - b.Run("GetPeerRateLimit() with no batching", func(b *testing.B) { + b.Run("GetPeerRateLimit", func(b *testing.B) { client, err := guber.NewPeerClient(guber.PeerConfig{ Info: cluster.GetRandomPeer(cluster.DataCenterNone), Behavior: conf.Behaviors, @@ -40,17 +42,17 @@ func BenchmarkServer(b *testing.B) { if err != nil { b.Errorf("Error building client: %s", err) } - b.ResetTimer() for n := 0; n < b.N; n++ { - _, err := client.GetPeerRateLimit(context.Background(), &guber.RateLimitReq{ - Name: "get_peer_rate_limits_benchmark", + _, err := client.GetPeerRateLimit(ctx, &guber.RateLimitReq{ + Name: b.Name(), UniqueKey: guber.RandomString(10), - Behavior: guber.Behavior_NO_BATCHING, + // Behavior: guber.Behavior_NO_BATCHING, Limit: 10, Duration: 5, Hits: 1, + CreatedAt: &createdAt, }) if err != nil { b.Errorf("Error in client.GetPeerRateLimit: %s", err) @@ -58,17 +60,16 @@ func BenchmarkServer(b *testing.B) { } }) - b.Run("GetRateLimit()", func(b *testing.B) { + b.Run("GetRateLimits batching", func(b *testing.B) { client, err := guber.DialV1Server(cluster.GetRandomPeer(cluster.DataCenterNone).GRPCAddress, nil) require.NoError(b, err, "Error in guber.DialV1Server") - b.ResetTimer() for n := 0; n < b.N; n++ { _, err := client.GetRateLimits(ctx, &guber.GetRateLimitsReq{ Requests: []*guber.RateLimitReq{ { - Name: "get_rate_limit_benchmark", + Name: b.Name(), UniqueKey: guber.RandomString(10), Limit: 10, Duration: guber.Second * 5, @@ -82,17 +83,16 @@ func BenchmarkServer(b *testing.B) { } }) - b.Run("GetRateLimitGlobal()", func(b *testing.B) { + b.Run("GetRateLimits global", func(b *testing.B) { client, err := guber.DialV1Server(cluster.GetRandomPeer(cluster.DataCenterNone).GRPCAddress, nil) require.NoError(b, err, "Error in guber.DialV1Server") - b.ResetTimer() for n := 0; n < b.N; n++ { - _, err := client.GetRateLimits(context.Background(), &guber.GetRateLimitsReq{ + _, err := client.GetRateLimits(ctx, &guber.GetRateLimitsReq{ Requests: []*guber.RateLimitReq{ { - Name: "get_rate_limit_benchmark", + Name: b.Name(), UniqueKey: guber.RandomString(10), Behavior: guber.Behavior_GLOBAL, Limit: 10, @@ -110,11 +110,10 @@ func BenchmarkServer(b *testing.B) { b.Run("HealthCheck", func(b *testing.B) { client, err := guber.DialV1Server(cluster.GetRandomPeer(cluster.DataCenterNone).GRPCAddress, nil) require.NoError(b, err, "Error in guber.DialV1Server") - b.ResetTimer() for n := 0; n < b.N; n++ { - if _, err := client.HealthCheck(context.Background(), &guber.HealthCheckReq{}); err != nil { + if _, err := client.HealthCheck(ctx, &guber.HealthCheckReq{}); err != nil { b.Errorf("Error in client.HealthCheck: %s", err) } } @@ -123,17 +122,15 @@ func BenchmarkServer(b *testing.B) { b.Run("Thundering herd", func(b *testing.B) { client, err := guber.DialV1Server(cluster.GetRandomPeer(cluster.DataCenterNone).GRPCAddress, nil) require.NoError(b, err, "Error in guber.DialV1Server") - b.ResetTimer() - fan := syncutil.NewFanOut(100) for n := 0; n < b.N; n++ { fan.Run(func(o interface{}) error { - _, err := client.GetRateLimits(context.Background(), &guber.GetRateLimitsReq{ + _, err := client.GetRateLimits(ctx, &guber.GetRateLimitsReq{ Requests: []*guber.RateLimitReq{ { - Name: "get_rate_limit_benchmark", + Name: b.Name(), UniqueKey: guber.RandomString(10), Limit: 10, Duration: guber.Second * 5, diff --git a/cache.go b/cache.go index 163627d2..0fd431a5 100644 --- a/cache.go +++ b/cache.go @@ -39,3 +39,19 @@ type CacheItem struct { // for the latest rate limit data. InvalidAt int64 } + +func (item *CacheItem) IsExpired() bool { + now := MillisecondNow() + + // If the entry is invalidated + if item.InvalidAt != 0 && item.InvalidAt < now { + return true + } + + // If the entry has expired, remove it from the cache + if item.ExpireAt < now { + return true + } + + return false +} diff --git a/functional_test.go b/functional_test.go index 654342b7..400137b2 100644 --- a/functional_test.go +++ b/functional_test.go @@ -24,18 +24,24 @@ import ( "math/rand" "net/http" "os" + "sort" "strings" + "sync" + "sync/atomic" "testing" "time" + "github.com/mailgun/errors" guber "github.com/mailgun/gubernator/v2" "github.com/mailgun/gubernator/v2/cluster" "github.com/mailgun/holster/v4/clock" + "github.com/mailgun/holster/v4/syncutil" "github.com/mailgun/holster/v4/testutil" "github.com/prometheus/common/expfmt" "github.com/prometheus/common/model" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + "golang.org/x/exp/maps" "google.golang.org/grpc" "google.golang.org/grpc/credentials/insecure" json "google.golang.org/protobuf/encoding/protojson" @@ -43,23 +49,12 @@ import ( // Setup and shutdown the mock gubernator cluster for the entire test suite func TestMain(m *testing.M) { - if err := cluster.StartWith([]guber.PeerInfo{ - {GRPCAddress: "127.0.0.1:9990", HTTPAddress: "127.0.0.1:9980", DataCenter: cluster.DataCenterNone}, - {GRPCAddress: "127.0.0.1:9991", HTTPAddress: "127.0.0.1:9981", DataCenter: cluster.DataCenterNone}, - {GRPCAddress: "127.0.0.1:9992", HTTPAddress: "127.0.0.1:9982", DataCenter: cluster.DataCenterNone}, - {GRPCAddress: "127.0.0.1:9993", HTTPAddress: "127.0.0.1:9983", DataCenter: cluster.DataCenterNone}, - {GRPCAddress: "127.0.0.1:9994", HTTPAddress: "127.0.0.1:9984", DataCenter: cluster.DataCenterNone}, - {GRPCAddress: "127.0.0.1:9995", HTTPAddress: "127.0.0.1:9985", DataCenter: cluster.DataCenterNone}, - - // DataCenterOne - {GRPCAddress: "127.0.0.1:9890", HTTPAddress: "127.0.0.1:9880", DataCenter: cluster.DataCenterOne}, - {GRPCAddress: "127.0.0.1:9891", HTTPAddress: "127.0.0.1:9881", DataCenter: cluster.DataCenterOne}, - {GRPCAddress: "127.0.0.1:9892", HTTPAddress: "127.0.0.1:9882", DataCenter: cluster.DataCenterOne}, - {GRPCAddress: "127.0.0.1:9893", HTTPAddress: "127.0.0.1:9883", DataCenter: cluster.DataCenterOne}, - }); err != nil { + err := startGubernator() + if err != nil { fmt.Println(err) os.Exit(1) } + code := m.Run() cluster.Stop() @@ -68,8 +63,8 @@ func TestMain(m *testing.M) { } func TestOverTheLimit(t *testing.T) { - client, errs := guber.DialV1Server(cluster.GetRandomPeer(cluster.DataCenterNone).GRPCAddress, nil) - require.Nil(t, errs) + client, err := guber.DialV1Server(cluster.GetRandomPeer(cluster.DataCenterNone).GRPCAddress, nil) + require.NoError(t, err) tests := []struct { Remaining int64 @@ -103,7 +98,7 @@ func TestOverTheLimit(t *testing.T) { }, }, }) - require.Nil(t, err) + require.NoError(t, err) rl := resp.Responses[0] @@ -123,7 +118,7 @@ func TestMultipleAsync(t *testing.T) { t.Logf("Asking Peer: %s", cluster.GetPeers()[0].GRPCAddress) client, errs := guber.DialV1Server(cluster.GetPeers()[0].GRPCAddress, nil) - require.Nil(t, errs) + require.NoError(t, errs) resp, err := client.GetRateLimits(context.Background(), &guber.GetRateLimitsReq{ Requests: []*guber.RateLimitReq{ @@ -147,7 +142,7 @@ func TestMultipleAsync(t *testing.T) { }, }, }) - require.Nil(t, err) + require.NoError(t, err) require.Len(t, resp.Responses, 2) @@ -166,8 +161,8 @@ func TestTokenBucket(t *testing.T) { defer clock.Freeze(clock.Now()).Unfreeze() addr := cluster.GetRandomPeer(cluster.DataCenterNone).GRPCAddress - client, errs := guber.DialV1Server(addr, nil) - require.Nil(t, errs) + client, err := guber.DialV1Server(addr, nil) + require.NoError(t, err) tests := []struct { name string @@ -209,7 +204,7 @@ func TestTokenBucket(t *testing.T) { }, }, }) - require.Nil(t, err) + require.NoError(t, err) rl := resp.Responses[0] @@ -226,8 +221,8 @@ func TestTokenBucket(t *testing.T) { func TestTokenBucketGregorian(t *testing.T) { defer clock.Freeze(clock.Now()).Unfreeze() - client, errs := guber.DialV1Server(cluster.GetRandomPeer(cluster.DataCenterNone).GRPCAddress, nil) - require.Nil(t, errs) + client, err := guber.DialV1Server(cluster.GetRandomPeer(cluster.DataCenterNone).GRPCAddress, nil) + require.NoError(t, err) tests := []struct { Name string @@ -284,7 +279,7 @@ func TestTokenBucketGregorian(t *testing.T) { }, }, }) - require.Nil(t, err) + require.NoError(t, err) rl := resp.Responses[0] @@ -302,8 +297,8 @@ func TestTokenBucketNegativeHits(t *testing.T) { defer clock.Freeze(clock.Now()).Unfreeze() addr := cluster.GetRandomPeer(cluster.DataCenterNone).GRPCAddress - client, errs := guber.DialV1Server(addr, nil) - require.Nil(t, errs) + client, err := guber.DialV1Server(addr, nil) + require.NoError(t, err) tests := []struct { name string @@ -356,7 +351,7 @@ func TestTokenBucketNegativeHits(t *testing.T) { }, }, }) - require.Nil(t, err) + require.NoError(t, err) rl := resp.Responses[0] @@ -372,8 +367,8 @@ func TestTokenBucketNegativeHits(t *testing.T) { func TestDrainOverLimit(t *testing.T) { defer clock.Freeze(clock.Now()).Unfreeze() - client, errs := guber.DialV1Server(cluster.PeerAt(0).GRPCAddress, nil) - require.Nil(t, errs) + client, err := guber.DialV1Server(cluster.PeerAt(0).GRPCAddress, nil) + require.NoError(t, err) tests := []struct { Name string @@ -482,8 +477,8 @@ func TestTokenBucketRequestMoreThanAvailable(t *testing.T) { func TestLeakyBucket(t *testing.T) { defer clock.Freeze(clock.Now()).Unfreeze() - client, errs := guber.DialV1Server(cluster.PeerAt(0).GRPCAddress, nil) - require.Nil(t, errs) + client, err := guber.DialV1Server(cluster.PeerAt(0).GRPCAddress, nil) + require.NoError(t, err) tests := []struct { Name string @@ -609,8 +604,8 @@ func TestLeakyBucket(t *testing.T) { func TestLeakyBucketWithBurst(t *testing.T) { defer clock.Freeze(clock.Now()).Unfreeze() - client, errs := guber.DialV1Server(cluster.PeerAt(0).GRPCAddress, nil) - require.Nil(t, errs) + client, err := guber.DialV1Server(cluster.PeerAt(0).GRPCAddress, nil) + require.NoError(t, err) tests := []struct { Name string @@ -716,8 +711,8 @@ func TestLeakyBucketWithBurst(t *testing.T) { func TestLeakyBucketGregorian(t *testing.T) { defer clock.Freeze(clock.Now()).Unfreeze() - client, errs := guber.DialV1Server(cluster.PeerAt(0).GRPCAddress, nil) - require.Nil(t, errs) + client, err := guber.DialV1Server(cluster.PeerAt(0).GRPCAddress, nil) + require.NoError(t, err) tests := []struct { Name string @@ -753,14 +748,16 @@ func TestLeakyBucketGregorian(t *testing.T) { now = now.Truncate(1 * time.Minute) // So we don't start on the minute boundary now = now.Add(time.Millisecond * 100) + name := t.Name() + key := guber.RandomString(10) for _, test := range tests { t.Run(test.Name, func(t *testing.T) { resp, err := client.GetRateLimits(context.Background(), &guber.GetRateLimitsReq{ Requests: []*guber.RateLimitReq{ { - Name: "test_leaky_bucket_greg", - UniqueKey: "account:12345", + Name: name, + UniqueKey: key, Behavior: guber.Behavior_DURATION_IS_GREGORIAN, Algorithm: guber.Algorithm_LEAKY_BUCKET, Duration: guber.GregorianMinutes, @@ -769,15 +766,13 @@ func TestLeakyBucketGregorian(t *testing.T) { }, }, }) - clock.Freeze(clock.Now()) require.NoError(t, err) rl := resp.Responses[0] - assert.Equal(t, test.Status, rl.Status) assert.Equal(t, test.Remaining, rl.Remaining) assert.Equal(t, int64(60), rl.Limit) - assert.True(t, rl.ResetTime > now.Unix()) + assert.Greater(t, rl.ResetTime, now.Unix()) clock.Advance(test.Sleep) }) } @@ -786,8 +781,8 @@ func TestLeakyBucketGregorian(t *testing.T) { func TestLeakyBucketNegativeHits(t *testing.T) { defer clock.Freeze(clock.Now()).Unfreeze() - client, errs := guber.DialV1Server(cluster.PeerAt(0).GRPCAddress, nil) - require.Nil(t, errs) + client, err := guber.DialV1Server(cluster.PeerAt(0).GRPCAddress, nil) + require.NoError(t, err) tests := []struct { Name string @@ -899,8 +894,8 @@ func TestLeakyBucketRequestMoreThanAvailable(t *testing.T) { } func TestMissingFields(t *testing.T) { - client, errs := guber.DialV1Server(cluster.GetRandomPeer(cluster.DataCenterNone).GRPCAddress, nil) - require.Nil(t, errs) + client, err := guber.DialV1Server(cluster.GetRandomPeer(cluster.DataCenterNone).GRPCAddress, nil) + require.NoError(t, err) tests := []struct { Req *guber.RateLimitReq @@ -955,29 +950,29 @@ func TestMissingFields(t *testing.T) { resp, err := client.GetRateLimits(context.Background(), &guber.GetRateLimitsReq{ Requests: []*guber.RateLimitReq{test.Req}, }) - require.Nil(t, err) + require.NoError(t, err) assert.Equal(t, test.Error, resp.Responses[0].Error, i) assert.Equal(t, test.Status, resp.Responses[0].Status, i) } } func TestGlobalRateLimits(t *testing.T) { - const ( - name = "test_global" - key = "account:12345" - ) - + name := t.Name() + key := guber.RandomString(10) + owner, err := cluster.FindOwningDaemon(name, key) + require.NoError(t, err) peers, err := cluster.ListNonOwningDaemons(name, key) require.NoError(t, err) + var firstResetTime int64 - sendHit := func(client guber.V1Client, status guber.Status, hits int64, remain int64) { + sendHit := func(client guber.V1Client, status guber.Status, hits, remain int64) { ctx, cancel := context.WithTimeout(context.Background(), clock.Second*10) defer cancel() resp, err := client.GetRateLimits(ctx, &guber.GetRateLimitsReq{ Requests: []*guber.RateLimitReq{ { - Name: "test_global", - UniqueKey: "account:12345", + Name: name, + UniqueKey: key, Algorithm: guber.Algorithm_TOKEN_BUCKET, Behavior: guber.Behavior_GLOBAL, Duration: guber.Minute * 3, @@ -987,11 +982,24 @@ func TestGlobalRateLimits(t *testing.T) { }, }) require.NoError(t, err) - assert.Equal(t, "", resp.Responses[0].Error) - assert.Equal(t, remain, resp.Responses[0].Remaining) - assert.Equal(t, status, resp.Responses[0].Status) - assert.Equal(t, int64(5), resp.Responses[0].Limit) + item := resp.Responses[0] + assert.Equal(t, "", item.Error) + assert.Equal(t, remain, item.Remaining) + assert.Equal(t, status, item.Status) + assert.Equal(t, int64(5), item.Limit) + + // ResetTime should not change during test. + if firstResetTime == 0 { + firstResetTime = item.ResetTime + } + assert.Equal(t, firstResetTime, item.ResetTime) + + // ensure that we have a canonical host + assert.NotEmpty(t, item.Metadata["owner"]) } + + require.NoError(t, waitForIdle(1*clock.Minute, cluster.GetDaemons()...)) + // Our first hit should create the request on the peer and queue for async forward sendHit(peers[0].MustClient(), guber.Status_UNDER_LIMIT, 1, 4) @@ -1005,8 +1013,6 @@ func TestGlobalRateLimits(t *testing.T) { assert.NoError(t, err) assert.Equal(t, 1, int(m.Value)) }) - owner, err := cluster.FindOwningDaemon(name, key) - require.NoError(t, err) require.NoError(t, waitForBroadcast(clock.Second*3, owner, 1)) @@ -1027,18 +1033,15 @@ func TestGlobalRateLimits(t *testing.T) { // either owner or non-owner peer. func TestGlobalRateLimitsWithLoadBalancing(t *testing.T) { ctx := context.Background() - const name = "test_global" - key := fmt.Sprintf("key:%016x", rand.Int()) + name := t.Name() + key := guber.RandomString(10) // Determine owner and non-owner peers. - ownerPeerInfo, err := cluster.FindOwningPeer(name, key) + owner, err := cluster.FindOwningDaemon(name, key) require.NoError(t, err) - owner := ownerPeerInfo.GRPCAddress - nonOwner := cluster.PeerAt(0).GRPCAddress - if nonOwner == owner { - nonOwner = cluster.PeerAt(1).GRPCAddress - } - require.NotEqual(t, owner, nonOwner) + peers, err := cluster.ListNonOwningDaemons(name, key) + require.NoError(t, err) + nonOwner := peers[0] // Connect to owner and non-owner peers in round robin. dialOpts := []grpc.DialOption{ @@ -1046,22 +1049,22 @@ func TestGlobalRateLimitsWithLoadBalancing(t *testing.T) { grpc.WithTransportCredentials(insecure.NewCredentials()), grpc.WithDefaultServiceConfig(`{"loadBalancingConfig": [{"round_robin":{}}]}`), } - address := fmt.Sprintf("static:///%s,%s", owner, nonOwner) + address := fmt.Sprintf("static:///%s,%s", owner.PeerInfo.GRPCAddress, nonOwner.PeerInfo.GRPCAddress) conn, err := grpc.DialContext(ctx, address, dialOpts...) require.NoError(t, err) client := guber.NewV1Client(conn) - sendHit := func(status guber.Status, i int) { - ctx, cancel := context.WithTimeout(ctx, 10*clock.Second) + sendHit := func(client guber.V1Client, status guber.Status, i int) { + ctx, cancel := context.WithTimeout(context.Background(), 10*clock.Second) defer cancel() resp, err := client.GetRateLimits(ctx, &guber.GetRateLimitsReq{ Requests: []*guber.RateLimitReq{ { Name: name, UniqueKey: key, - Algorithm: guber.Algorithm_LEAKY_BUCKET, + Algorithm: guber.Algorithm_TOKEN_BUCKET, Behavior: guber.Behavior_GLOBAL, - Duration: guber.Minute * 5, + Duration: 5 * guber.Minute, Hits: 1, Limit: 2, }, @@ -1069,35 +1072,34 @@ func TestGlobalRateLimitsWithLoadBalancing(t *testing.T) { }) require.NoError(t, err, i) item := resp.Responses[0] - assert.Equal(t, "", item.GetError(), fmt.Sprintf("mismatch error, iteration %d", i)) - assert.Equal(t, status, item.GetStatus(), fmt.Sprintf("mismatch status, iteration %d", i)) + assert.Equal(t, "", item.Error, fmt.Sprintf("unexpected error, iteration %d", i)) + assert.Equal(t, status, item.Status, fmt.Sprintf("mismatch status, iteration %d", i)) } + require.NoError(t, waitForIdle(1*clock.Minute, cluster.GetDaemons()...)) + // Send two hits that should be processed by the owner and non-owner and // deplete the limit consistently. - sendHit(guber.Status_UNDER_LIMIT, 1) - sendHit(guber.Status_UNDER_LIMIT, 2) - - // Sleep to ensure the global broadcast occurs (every 100ms). - time.Sleep(150 * time.Millisecond) + sendHit(client, guber.Status_UNDER_LIMIT, 1) + sendHit(client, guber.Status_UNDER_LIMIT, 2) + require.NoError(t, waitForBroadcast(3*clock.Second, owner, 1)) // All successive hits should return OVER_LIMIT. for i := 2; i <= 10; i++ { - sendHit(guber.Status_OVER_LIMIT, i) + sendHit(client, guber.Status_OVER_LIMIT, i) } } func TestGlobalRateLimitsPeerOverLimit(t *testing.T) { - const ( - name = "test_global_token_limit" - key = "account:12345" - ) - + name := t.Name() + key := guber.RandomString(10) + owner, err := cluster.FindOwningDaemon(name, key) + require.NoError(t, err) peers, err := cluster.ListNonOwningDaemons(name, key) require.NoError(t, err) - sendHit := func(expectedStatus guber.Status, hits int64) { - ctx, cancel := context.WithTimeout(context.Background(), clock.Second*10) + sendHit := func(expectedStatus guber.Status, hits, expectedRemaining int64) { + ctx, cancel := context.WithTimeout(context.Background(), 10*clock.Second) defer cancel() resp, err := peers[0].MustClient().GetRateLimits(ctx, &guber.GetRateLimitsReq{ Requests: []*guber.RateLimitReq{ @@ -1106,80 +1108,44 @@ func TestGlobalRateLimitsPeerOverLimit(t *testing.T) { UniqueKey: key, Algorithm: guber.Algorithm_TOKEN_BUCKET, Behavior: guber.Behavior_GLOBAL, - Duration: guber.Minute * 5, + Duration: 5 * guber.Minute, Hits: hits, Limit: 2, }, }, }) assert.NoError(t, err) - assert.Equal(t, "", resp.Responses[0].GetError()) - assert.Equal(t, expectedStatus, resp.Responses[0].GetStatus()) + item := resp.Responses[0] + assert.Equal(t, "", item.Error, "unexpected error") + assert.Equal(t, expectedStatus, item.Status, "mismatch status") + assert.Equal(t, expectedRemaining, item.Remaining, "mismatch remaining") } - owner, err := cluster.FindOwningDaemon(name, key) - require.NoError(t, err) - // Send two hits that should be processed by the owner and the broadcast to peer, depleting the remaining - sendHit(guber.Status_UNDER_LIMIT, 1) - sendHit(guber.Status_UNDER_LIMIT, 1) - // Wait for the broadcast from the owner to the peer - require.NoError(t, waitForBroadcast(clock.Second*3, owner, 1)) - // Since the remainder is 0, the peer should set OVER_LIMIT instead of waiting for the owner - // to respond with OVER_LIMIT. - sendHit(guber.Status_OVER_LIMIT, 1) - // Wait for the broadcast from the owner to the peer - require.NoError(t, waitForBroadcast(clock.Second*3, owner, 2)) - // The status should still be OVER_LIMIT - sendHit(guber.Status_OVER_LIMIT, 0) -} + require.NoError(t, waitForIdle(1*clock.Minute, cluster.GetDaemons()...)) -func TestGlobalRateLimitsPeerOverLimitLeaky(t *testing.T) { - const ( - name = "test_global_token_limit_leaky" - key = "account:12345" - ) + // Send two hits that should be processed by the owner and the broadcast to + // peer, depleting the remaining. + sendHit(guber.Status_UNDER_LIMIT, 1, 1) + sendHit(guber.Status_UNDER_LIMIT, 1, 0) - peers, err := cluster.ListNonOwningDaemons(name, key) - require.NoError(t, err) + // Wait for the broadcast from the owner to the peer + require.NoError(t, waitForBroadcast(3*clock.Second, owner, 1)) - sendHit := func(client guber.V1Client, expectedStatus guber.Status, hits int64) { - ctx, cancel := context.WithTimeout(context.Background(), clock.Second*10) - defer cancel() - resp, err := client.GetRateLimits(ctx, &guber.GetRateLimitsReq{ - Requests: []*guber.RateLimitReq{ - { - Name: name, - UniqueKey: key, - Algorithm: guber.Algorithm_LEAKY_BUCKET, - Behavior: guber.Behavior_GLOBAL, - Duration: guber.Minute * 5, - Hits: hits, - Limit: 2, - }, - }, - }) - assert.NoError(t, err) - assert.Equal(t, "", resp.Responses[0].GetError()) - assert.Equal(t, expectedStatus, resp.Responses[0].GetStatus()) - } - owner, err := cluster.FindOwningDaemon(name, key) - require.NoError(t, err) + // Since the remainder is 0, the peer should return OVER_LIMIT on next hit. + sendHit(guber.Status_OVER_LIMIT, 1, 0) - // Send two hits that should be processed by the owner and the broadcast to peer, depleting the remaining - sendHit(peers[0].MustClient(), guber.Status_UNDER_LIMIT, 1) - sendHit(peers[0].MustClient(), guber.Status_UNDER_LIMIT, 1) - // Wait for the broadcast from the owner to the peers - require.NoError(t, waitForBroadcast(clock.Second*3, owner, 1)) - // Ask a different peer if the status is over the limit - sendHit(peers[1].MustClient(), guber.Status_OVER_LIMIT, 1) + // Wait for the broadcast from the owner to the peer. + require.NoError(t, waitForBroadcast(3*clock.Second, owner, 2)) + + // The status should still be OVER_LIMIT. + sendHit(guber.Status_OVER_LIMIT, 0, 0) } func TestGlobalRequestMoreThanAvailable(t *testing.T) { - const ( - name = "test_global_more_than_available" - key = "account:123456" - ) - + name := t.Name() + key := guber.RandomString(10) + owner, err := cluster.FindOwningDaemon(name, key) + require.NoError(t, err) peers, err := cluster.ListNonOwningDaemons(name, key) require.NoError(t, err) @@ -1203,11 +1169,9 @@ func TestGlobalRequestMoreThanAvailable(t *testing.T) { assert.Equal(t, "", resp.Responses[0].GetError()) assert.Equal(t, expectedStatus, resp.Responses[0].GetStatus()) } - owner, err := cluster.FindOwningDaemon(name, key) - require.NoError(t, err) - prev, err := getBroadcastCount(owner) - require.NoError(t, err) + require.NoError(t, waitForIdle(1*time.Minute, cluster.GetDaemons()...)) + prev := getMetricValue(t, owner, "gubernator_broadcast_duration_count") // Ensure GRPC has connections to each peer before we start, as we want // the actual test requests to happen quite fast. @@ -1238,11 +1202,10 @@ func TestGlobalRequestMoreThanAvailable(t *testing.T) { } func TestGlobalNegativeHits(t *testing.T) { - const ( - name = "test_global_negative_hits" - key = "account:12345" - ) - + name := t.Name() + key := guber.RandomString(10) + owner, err := cluster.FindOwningDaemon(name, key) + require.NoError(t, err) peers, err := cluster.ListNonOwningDaemons(name, key) require.NoError(t, err) @@ -1267,9 +1230,10 @@ func TestGlobalNegativeHits(t *testing.T) { assert.Equal(t, status, resp.Responses[0].GetStatus()) assert.Equal(t, remaining, resp.Responses[0].Remaining) } - owner, err := cluster.FindOwningDaemon(name, key) - require.NoError(t, err) - prev, err := getBroadcastCount(owner) + + require.NoError(t, waitForIdle(1*time.Minute, cluster.GetDaemons()...)) + + prev := getMetricValue(t, owner, "gubernator_broadcast_duration_count") require.NoError(t, err) // Send a negative hit on a rate limit with no hits @@ -1292,11 +1256,10 @@ func TestGlobalNegativeHits(t *testing.T) { } func TestGlobalResetRemaining(t *testing.T) { - const ( - name = "test_global_reset" - key = "account:123456" - ) - + name := t.Name() + key := guber.RandomString(10) + owner, err := cluster.FindOwningDaemon(name, key) + require.NoError(t, err) peers, err := cluster.ListNonOwningDaemons(name, key) require.NoError(t, err) @@ -1321,9 +1284,10 @@ func TestGlobalResetRemaining(t *testing.T) { assert.Equal(t, expectedStatus, resp.Responses[0].GetStatus()) assert.Equal(t, remaining, resp.Responses[0].Remaining) } - owner, err := cluster.FindOwningDaemon(name, key) - require.NoError(t, err) - prev, err := getBroadcastCount(owner) + + require.NoError(t, waitForIdle(1*time.Minute, cluster.GetDaemons()...)) + + prev := getMetricValue(t, owner, "gubernator_broadcast_duration_count") require.NoError(t, err) for _, p := range peers { @@ -1374,21 +1338,11 @@ func TestGlobalResetRemaining(t *testing.T) { }) require.NoError(t, err) assert.NotEqual(t, 100, resp.Responses[0].Remaining) - -} - -func getMetricRequest(url string, name string) (*model.Sample, error) { - resp, err := http.Get(url) - if err != nil { - return nil, err - } - defer resp.Body.Close() - return getMetric(resp.Body, name) } func TestChangeLimit(t *testing.T) { - client, errs := guber.DialV1Server(cluster.GetRandomPeer(cluster.DataCenterNone).GRPCAddress, nil) - require.Nil(t, errs) + client, err := guber.DialV1Server(cluster.GetRandomPeer(cluster.DataCenterNone).GRPCAddress, nil) + require.NoError(t, err) tests := []struct { Remaining int64 @@ -1469,7 +1423,7 @@ func TestChangeLimit(t *testing.T) { }, }, }) - require.Nil(t, err) + require.NoError(t, err) rl := resp.Responses[0] @@ -1482,8 +1436,8 @@ func TestChangeLimit(t *testing.T) { } func TestResetRemaining(t *testing.T) { - client, errs := guber.DialV1Server(cluster.GetRandomPeer(cluster.DataCenterNone).GRPCAddress, nil) - require.Nil(t, errs) + client, err := guber.DialV1Server(cluster.GetRandomPeer(cluster.DataCenterNone).GRPCAddress, nil) + require.NoError(t, err) tests := []struct { Remaining int64 @@ -1542,7 +1496,7 @@ func TestResetRemaining(t *testing.T) { }, }, }) - require.Nil(t, err) + require.NoError(t, err) rl := resp.Responses[0] @@ -1554,93 +1508,42 @@ func TestResetRemaining(t *testing.T) { } func TestHealthCheck(t *testing.T) { - client, err := guber.DialV1Server(cluster.DaemonAt(0).GRPCListeners[0].Addr().String(), nil) - require.NoError(t, err) - - // Check that the cluster is healthy to start with - healthResp, err := client.HealthCheck(context.Background(), &guber.HealthCheckReq{}) - require.NoError(t, err) - - require.Equal(t, "healthy", healthResp.GetStatus()) - - // Create a global rate limit that will need to be sent to all peers in the cluster - _, err = client.GetRateLimits(context.Background(), &guber.GetRateLimitsReq{ - Requests: []*guber.RateLimitReq{ - { - Name: "test_health_check", - UniqueKey: "account:12345", - Algorithm: guber.Algorithm_TOKEN_BUCKET, - Behavior: guber.Behavior_BATCHING, - Duration: guber.Second * 3, - Hits: 1, - Limit: 5, - }, - }, - }) - require.Nil(t, err) - - // Stop the rest of the cluster to ensure errors occur on our instance - for i := 1; i < cluster.NumOfDaemons(); i++ { - d := cluster.DaemonAt(i) - require.NotNil(t, d) - d.Close() + // Check that the cluster is healthy to start with. + for _, peer := range cluster.GetDaemons() { + healthResp, err := peer.MustClient().HealthCheck(context.Background(), &guber.HealthCheckReq{}) + require.NoError(t, err) + assert.Equal(t, "healthy", healthResp.Status) } - // Hit the global rate limit again this time causing a connection error - _, err = client.GetRateLimits(context.Background(), &guber.GetRateLimitsReq{ - Requests: []*guber.RateLimitReq{ - { - Name: "test_health_check", - UniqueKey: "account:12345", - Algorithm: guber.Algorithm_TOKEN_BUCKET, - Behavior: guber.Behavior_GLOBAL, - Duration: guber.Second * 3, - Hits: 1, - Limit: 5, - }, - }, - }) - require.Nil(t, err) + // Stop the cluster to ensure errors occur on our instance. + cluster.Stop() - testutil.UntilPass(t, 20, clock.Millisecond*300, func(t testutil.TestingT) { - // Check the health again to get back the connection error - healthResp, err = client.HealthCheck(context.Background(), &guber.HealthCheckReq{}) - if assert.Nil(t, err) { - return + // Check the health again to get back the connection error. + testutil.UntilPass(t, 20, 300*clock.Millisecond, func(t testutil.TestingT) { + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + for _, peer := range cluster.GetDaemons() { + _, err := peer.MustClient().HealthCheck(ctx, &guber.HealthCheckReq{}) + assert.Error(t, err, "connect: connection refused") } - - assert.Equal(t, "unhealthy", healthResp.GetStatus()) - assert.Contains(t, healthResp.GetMessage(), "connect: connection refused") }) - // Restart stopped instances - ctx, cancel := context.WithTimeout(context.Background(), clock.Second*15) - defer cancel() - require.NoError(t, cluster.Restart(ctx)) - - // wait for every peer instance to come back online - for _, peer := range cluster.GetPeers() { - peerClient, err := guber.DialV1Server(peer.GRPCAddress, nil) - require.NoError(t, err) - testutil.UntilPass(t, 10, clock.Millisecond*300, func(t testutil.TestingT) { - healthResp, err = peerClient.HealthCheck(context.Background(), &guber.HealthCheckReq{}) - assert.Equal(t, "healthy", healthResp.GetStatus()) - }) - } + // Restart so cluster is ready for next test. + require.NoError(t, startGubernator()) } func TestLeakyBucketDivBug(t *testing.T) { - // Freeze time so we don't leak during the test defer clock.Freeze(clock.Now()).Unfreeze() - + name := t.Name() + key := guber.RandomString(10) client, err := guber.DialV1Server(cluster.GetRandomPeer(cluster.DataCenterNone).GRPCAddress, nil) require.NoError(t, err) resp, err := client.GetRateLimits(context.Background(), &guber.GetRateLimitsReq{ Requests: []*guber.RateLimitReq{ { - Name: "test_leaky_bucket_div", - UniqueKey: "account:12345", + Name: name, + UniqueKey: key, Algorithm: guber.Algorithm_LEAKY_BUCKET, Duration: guber.Millisecond * 1000, Hits: 1, @@ -1658,8 +1561,8 @@ func TestLeakyBucketDivBug(t *testing.T) { resp, err = client.GetRateLimits(context.Background(), &guber.GetRateLimitsReq{ Requests: []*guber.RateLimitReq{ { - Name: "test_leaky_bucket_div", - UniqueKey: "account:12345", + Name: name, + UniqueKey: key, Algorithm: guber.Algorithm_LEAKY_BUCKET, Duration: guber.Millisecond * 1000, Hits: 100, @@ -1683,6 +1586,8 @@ func TestMultiRegion(t *testing.T) { } func TestGRPCGateway(t *testing.T) { + name := t.Name() + key := guber.RandomString(10) address := cluster.GetRandomPeer(cluster.DataCenterNone).HTTPAddress resp, err := http.DefaultClient.Get("http://" + address + "/v1/HealthCheck") require.NoError(t, err) @@ -1702,8 +1607,8 @@ func TestGRPCGateway(t *testing.T) { payload, err := json.Marshal(&guber.GetRateLimitsReq{ Requests: []*guber.RateLimitReq{ { - Name: "requests_per_sec", - UniqueKey: "account:12345", + Name: name, + UniqueKey: key, Duration: guber.Millisecond * 1000, Hits: 1, Limit: 10, @@ -1731,6 +1636,7 @@ func TestGRPCGateway(t *testing.T) { } func TestGetPeerRateLimits(t *testing.T) { + name := t.Name() ctx := context.Background() peerClient, err := guber.NewPeerClient(guber.PeerConfig{ Info: cluster.GetRandomPeer(cluster.DataCenterNone), @@ -1740,6 +1646,7 @@ func TestGetPeerRateLimits(t *testing.T) { t.Run("Stable rate check request order", func(t *testing.T) { // Ensure response order matches rate check request order. // Try various batch sizes. + createdAt := epochMillis(clock.Now()) testCases := []int{1, 2, 5, 10, 100, 1000} for _, n := range testCases { @@ -1750,13 +1657,14 @@ func TestGetPeerRateLimits(t *testing.T) { } for i := 0; i < n; i++ { req.Requests[i] = &guber.RateLimitReq{ - Name: "Foobar", - UniqueKey: fmt.Sprintf("%08x", i), + Name: name, + UniqueKey: guber.RandomString(10), Hits: 0, Limit: 1000 + int64(i), Duration: 1000, Algorithm: guber.Algorithm_TOKEN_BUCKET, Behavior: guber.Behavior_BATCHING, + CreatedAt: &createdAt, } } @@ -1779,6 +1687,468 @@ func TestGetPeerRateLimits(t *testing.T) { // TODO: Add a test for sending no rate limits RateLimitReqList.RateLimits = nil +func TestGlobalBehavior(t *testing.T) { + const limit = 1000 + broadcastTimeout := 400 * time.Millisecond + createdAt := epochMillis(clock.Now()) + + makeReq := func(name, key string, hits int64) *guber.RateLimitReq { + return &guber.RateLimitReq{ + Name: name, + UniqueKey: key, + Algorithm: guber.Algorithm_TOKEN_BUCKET, + Behavior: guber.Behavior_GLOBAL, + Duration: guber.Minute * 3, + Hits: hits, + Limit: limit, + CreatedAt: &createdAt, + } + } + + t.Run("Hits on owner peer", func(t *testing.T) { + testCases := []struct { + Name string + Hits int64 + }{ + {Name: "Single hit", Hits: 1}, + {Name: "Multiple hits", Hits: 10}, + } + + for _, testCase := range testCases { + t.Run(testCase.Name, func(t *testing.T) { + name := t.Name() + key := fmt.Sprintf("account:%08x", rand.Int()) + peers, err := cluster.ListNonOwningDaemons(name, key) + require.NoError(t, err) + owner, err := cluster.FindOwningDaemon(name, key) + require.NoError(t, err) + t.Logf("Owner peer: %s", owner.InstanceID) + + require.NoError(t, waitForIdle(1*time.Minute, cluster.GetDaemons()...)) + + broadcastCounters := getPeerCounters(t, cluster.GetDaemons(), "gubernator_broadcast_duration_count") + updateCounters := getPeerCounters(t, cluster.GetDaemons(), "gubernator_global_send_duration_count") + upgCounters := getPeerCounters(t, cluster.GetDaemons(), "gubernator_grpc_request_duration_count{method=\"/pb.gubernator.PeersV1/UpdatePeerGlobals\"}") + gprlCounters := getPeerCounters(t, cluster.GetDaemons(), "gubernator_grpc_request_duration_count{method=\"/pb.gubernator.PeersV1/GetPeerRateLimits\"}") + + // When + for i := int64(0); i < testCase.Hits; i++ { + sendHit(t, owner, makeReq(name, key, 1), guber.Status_UNDER_LIMIT, 999-i) + } + + // Then + // Expect a single global broadcast to all non-owner peers. + t.Log("Waiting for global broadcasts") + var wg sync.WaitGroup + var didOwnerBroadcast, didNonOwnerBroadcast int + wg.Add(len(peers) + 1) + go func() { + expected := broadcastCounters[owner.InstanceID] + 1 + if err := waitForBroadcast(broadcastTimeout, owner, expected); err == nil { + didOwnerBroadcast++ + t.Log("Global broadcast from owner") + } + wg.Done() + }() + for _, peer := range peers { + go func(peer *guber.Daemon) { + expected := broadcastCounters[peer.InstanceID] + 1 + if err := waitForBroadcast(broadcastTimeout, peer, expected); err == nil { + didNonOwnerBroadcast++ + t.Logf("Global broadcast from peer %s", peer.InstanceID) + } + wg.Done() + }(peer) + } + wg.Wait() + assert.Equal(t, 1, didOwnerBroadcast) + assert.Zero(t, didNonOwnerBroadcast) + + // Check for global hits update from non-owner to owner peer. + // Expect no global hits update because the hits were given + // directly to the owner peer. + t.Log("Waiting for global broadcasts") + var didOwnerUpdate, didNonOwnerUpdate int + wg.Add(len(peers) + 1) + go func() { + expected := updateCounters[owner.InstanceID] + 1 + if err := waitForUpdate(broadcastTimeout, owner, expected); err == nil { + didOwnerUpdate++ + t.Log("Global hits update from owner") + } + wg.Done() + }() + for _, peer := range peers { + go func(peer *guber.Daemon) { + expected := updateCounters[peer.InstanceID] + 1 + if err := waitForUpdate(broadcastTimeout, peer, expected); err == nil { + didNonOwnerUpdate++ + t.Logf("Global hits update from peer %s", peer.InstanceID) + } + wg.Done() + + }(peer) + } + wg.Wait() + assert.Zero(t, didOwnerUpdate) + assert.Zero(t, didNonOwnerUpdate) + + // Assert UpdatePeerGlobals endpoint called once on each peer except owner. + // Used by global broadcast. + upgCounters2 := getPeerCounters(t, cluster.GetDaemons(), "gubernator_grpc_request_duration_count{method=\"/pb.gubernator.PeersV1/UpdatePeerGlobals\"}") + for _, peer := range cluster.GetDaemons() { + expected := upgCounters[peer.InstanceID] + if peer.PeerInfo.DataCenter == cluster.DataCenterNone && peer.InstanceID != owner.InstanceID { + expected++ + } + assert.Equal(t, expected, upgCounters2[peer.InstanceID]) + } + + // Assert PeerGetRateLimits endpoint not called. + // Used by global hits update. + gprlCounters2 := getPeerCounters(t, cluster.GetDaemons(), "gubernator_grpc_request_duration_count{method=\"/pb.gubernator.PeersV1/GetPeerRateLimits\"}") + for _, peer := range cluster.GetDaemons() { + expected := gprlCounters[peer.InstanceID] + assert.Equal(t, expected, gprlCounters2[peer.InstanceID]) + } + + // Verify all peers report consistent remaining value value. + for _, peer := range cluster.GetDaemons() { + if peer.PeerInfo.DataCenter != cluster.DataCenterNone { + continue + } + sendHit(t, peer, makeReq(name, key, 0), guber.Status_UNDER_LIMIT, limit-testCase.Hits) + } + }) + } + }) + + t.Run("Hits on non-owner peer", func(t *testing.T) { + testCases := []struct { + Name string + Hits int64 + }{ + {Name: "Single hit", Hits: 1}, + {Name: "Multiple htis", Hits: 10}, + } + + for _, testCase := range testCases { + t.Run(testCase.Name, func(t *testing.T) { + name := t.Name() + key := fmt.Sprintf("account:%08x", rand.Int()) + peers, err := cluster.ListNonOwningDaemons(name, key) + require.NoError(t, err) + owner, err := cluster.FindOwningDaemon(name, key) + require.NoError(t, err) + t.Logf("Owner peer: %s", owner.InstanceID) + + require.NoError(t, waitForIdle(1*clock.Minute, cluster.GetDaemons()...)) + + broadcastCounters := getPeerCounters(t, cluster.GetDaemons(), "gubernator_broadcast_duration_count") + updateCounters := getPeerCounters(t, cluster.GetDaemons(), "gubernator_global_send_duration_count") + upgCounters := getPeerCounters(t, cluster.GetDaemons(), "gubernator_grpc_request_duration_count{method=\"/pb.gubernator.PeersV1/UpdatePeerGlobals\"}") + gprlCounters := getPeerCounters(t, cluster.GetDaemons(), "gubernator_grpc_request_duration_count{method=\"/pb.gubernator.PeersV1/GetPeerRateLimits\"}") + + // When + for i := int64(0); i < testCase.Hits; i++ { + sendHit(t, peers[0], makeReq(name, key, 1), guber.Status_UNDER_LIMIT, 999-i) + } + + // Then + // Check for global hits update from non-owner to owner peer. + // Expect single global hits update from non-owner peer that received hits. + t.Log("Waiting for global hits updates") + var wg sync.WaitGroup + var didOwnerUpdate int + var didNonOwnerUpdate []string + wg.Add(len(peers) + 1) + go func() { + expected := updateCounters[owner.InstanceID] + 1 + if err := waitForUpdate(broadcastTimeout, owner, expected); err == nil { + didOwnerUpdate++ + t.Log("Global hits update from owner") + } + wg.Done() + }() + for _, peer := range peers { + go func(peer *guber.Daemon) { + expected := updateCounters[peer.InstanceID] + 1 + if err := waitForUpdate(broadcastTimeout, peer, expected); err == nil { + didNonOwnerUpdate = append(didNonOwnerUpdate, peer.InstanceID) + t.Logf("Global hits update from peer %s", peer.InstanceID) + } + wg.Done() + + }(peer) + } + wg.Wait() + assert.Zero(t, didOwnerUpdate) + assert.Len(t, didNonOwnerUpdate, 1) + assert.Equal(t, []string{peers[0].InstanceID}, didNonOwnerUpdate) + + // Expect a single global broadcast to all non-owner peers. + t.Log("Waiting for global broadcasts") + var didOwnerBroadcast, didNonOwnerBroadcast int + wg.Add(len(peers) + 1) + go func() { + expected := broadcastCounters[owner.InstanceID] + 1 + if err := waitForBroadcast(broadcastTimeout, owner, expected); err == nil { + didOwnerBroadcast++ + t.Log("Global broadcast from owner") + } + wg.Done() + }() + for _, peer := range peers { + go func(peer *guber.Daemon) { + expected := broadcastCounters[peer.InstanceID] + 1 + if err := waitForBroadcast(broadcastTimeout, peer, expected); err == nil { + didNonOwnerBroadcast++ + t.Logf("Global broadcast from peer %s", peer.InstanceID) + } + wg.Done() + }(peer) + } + wg.Wait() + assert.Equal(t, 1, didOwnerBroadcast) + assert.Empty(t, didNonOwnerBroadcast) + + // Assert UpdatePeerGlobals endpoint called once on each peer except owner. + // Used by global broadcast. + upgCounters2 := getPeerCounters(t, cluster.GetDaemons(), "gubernator_grpc_request_duration_count{method=\"/pb.gubernator.PeersV1/UpdatePeerGlobals\"}") + for _, peer := range cluster.GetDaemons() { + expected := upgCounters[peer.InstanceID] + if peer.PeerInfo.DataCenter == cluster.DataCenterNone && peer.InstanceID != owner.InstanceID { + expected++ + } + assert.Equal(t, expected, upgCounters2[peer.InstanceID], "upgCounter %s", peer.InstanceID) + } + + // Assert PeerGetRateLimits endpoint called once on owner. + // Used by global hits update. + gprlCounters2 := getPeerCounters(t, cluster.GetDaemons(), "gubernator_grpc_request_duration_count{method=\"/pb.gubernator.PeersV1/GetPeerRateLimits\"}") + for _, peer := range cluster.GetDaemons() { + expected := gprlCounters[peer.InstanceID] + if peer.InstanceID == owner.InstanceID { + expected++ + } + assert.Equal(t, expected, gprlCounters2[peer.InstanceID], "gprlCounter %s", peer.InstanceID) + } + + // Verify all peers report consistent remaining value value. + for _, peer := range cluster.GetDaemons() { + if peer.PeerInfo.DataCenter != cluster.DataCenterNone { + continue + } + sendHit(t, peer, makeReq(name, key, 0), guber.Status_UNDER_LIMIT, limit-testCase.Hits) + } + }) + } + }) + + t.Run("Distributed hits", func(t *testing.T) { + testCases := []struct { + Name string + Hits int + }{ + {Name: "2 hits", Hits: 2}, + {Name: "10 hits", Hits: 10}, + {Name: "100 hits", Hits: 100}, + } + + for _, testCase := range testCases { + t.Run(testCase.Name, func(t *testing.T) { + name := t.Name() + key := fmt.Sprintf("account:%08x", rand.Int()) + peers, err := cluster.ListNonOwningDaemons(name, key) + require.NoError(t, err) + owner, err := cluster.FindOwningDaemon(name, key) + require.NoError(t, err) + var localPeers []*guber.Daemon + for _, peer := range cluster.GetDaemons() { + if peer.PeerInfo.DataCenter == cluster.DataCenterNone && peer.InstanceID != owner.InstanceID { + localPeers = append(localPeers, peer) + } + } + t.Logf("Owner peer: %s", owner.InstanceID) + + require.NoError(t, waitForIdle(1*clock.Minute, cluster.GetDaemons()...)) + + broadcastCounters := getPeerCounters(t, cluster.GetDaemons(), "gubernator_broadcast_duration_count") + updateCounters := getPeerCounters(t, cluster.GetDaemons(), "gubernator_global_send_duration_count") + upgCounters := getPeerCounters(t, cluster.GetDaemons(), "gubernator_grpc_request_duration_count{method=\"/pb.gubernator.PeersV1/UpdatePeerGlobals\"}") + gprlCounters := getPeerCounters(t, cluster.GetDaemons(), "gubernator_grpc_request_duration_count{method=\"/pb.gubernator.PeersV1/GetPeerRateLimits\"}") + expectUpdate := make(map[string]struct{}) + var wg sync.WaitGroup + var mutex sync.Mutex + + // When + wg.Add(testCase.Hits) + for i := 0; i < testCase.Hits; i++ { + peer := localPeers[i%len(localPeers)] + go func(peer *guber.Daemon) { + sendHit(t, peer, makeReq(name, key, 1), guber.Status_UNDER_LIMIT, -1) + if peer.InstanceID != owner.InstanceID { + mutex.Lock() + expectUpdate[peer.InstanceID] = struct{}{} + mutex.Unlock() + } + wg.Done() + }(peer) + } + wg.Wait() + + // Then + // Check for global hits update from non-owner to owner peer. + // Expect single update from each non-owner peer that received + // hits. + t.Log("Waiting for global hits updates") + var didOwnerUpdate int64 + var didNonOwnerUpdate []string + wg.Add(len(peers) + 1) + go func() { + expected := updateCounters[owner.InstanceID] + 1 + if err := waitForUpdate(broadcastTimeout, owner, expected); err == nil { + atomic.AddInt64(&didOwnerUpdate, 1) + t.Log("Global hits update from owner") + } + wg.Done() + }() + for _, peer := range peers { + go func(peer *guber.Daemon) { + expected := updateCounters[peer.InstanceID] + 1 + if err := waitForUpdate(broadcastTimeout, peer, expected); err == nil { + mutex.Lock() + didNonOwnerUpdate = append(didNonOwnerUpdate, peer.InstanceID) + mutex.Unlock() + t.Logf("Global hits update from peer %s", peer.InstanceID) + } + wg.Done() + + }(peer) + } + wg.Wait() + assert.Zero(t, didOwnerUpdate) + assert.Len(t, didNonOwnerUpdate, len(expectUpdate)) + expectedNonOwnerUpdate := maps.Keys(expectUpdate) + sort.Strings(expectedNonOwnerUpdate) + sort.Strings(didNonOwnerUpdate) + assert.Equal(t, expectedNonOwnerUpdate, didNonOwnerUpdate) + + // Expect a single global broadcast to all non-owner peers. + t.Log("Waiting for global broadcasts") + var didOwnerBroadcast, didNonOwnerBroadcast int64 + wg.Add(len(peers) + 1) + go func() { + expected := broadcastCounters[owner.InstanceID] + 1 + if err := waitForBroadcast(broadcastTimeout, owner, expected); err == nil { + atomic.AddInt64(&didOwnerBroadcast, 1) + t.Log("Global broadcast from owner") + } + wg.Done() + }() + for _, peer := range peers { + go func(peer *guber.Daemon) { + expected := broadcastCounters[peer.InstanceID] + 1 + if err := waitForBroadcast(broadcastTimeout, peer, expected); err == nil { + atomic.AddInt64(&didNonOwnerBroadcast, 1) + t.Logf("Global broadcast from peer %s", peer.InstanceID) + } + wg.Done() + }(peer) + } + wg.Wait() + assert.Equal(t, int64(1), didOwnerBroadcast) + assert.Empty(t, didNonOwnerBroadcast) + + // Assert UpdatePeerGlobals endpoint called at least + // once on each peer except owner. + // Used by global broadcast. + upgCounters2 := getPeerCounters(t, cluster.GetDaemons(), "gubernator_grpc_request_duration_count{method=\"/pb.gubernator.PeersV1/UpdatePeerGlobals\"}") + for _, peer := range cluster.GetDaemons() { + expected := upgCounters[peer.InstanceID] + if peer.PeerInfo.DataCenter == cluster.DataCenterNone && peer.InstanceID != owner.InstanceID { + expected++ + } + assert.GreaterOrEqual(t, upgCounters2[peer.InstanceID], expected, "upgCounter %s", peer.InstanceID) + } + + // Assert PeerGetRateLimits endpoint called on owner + // for each non-owner that received hits. + // Used by global hits update. + gprlCounters2 := getPeerCounters(t, cluster.GetDaemons(), "gubernator_grpc_request_duration_count{method=\"/pb.gubernator.PeersV1/GetPeerRateLimits\"}") + for _, peer := range cluster.GetDaemons() { + expected := gprlCounters[peer.InstanceID] + if peer.InstanceID == owner.InstanceID { + expected += float64(len(expectUpdate)) + } + assert.Equal(t, expected, gprlCounters2[peer.InstanceID], "gprlCounter %s", peer.InstanceID) + } + + // Verify all peers report consistent remaining value value. + for _, peer := range cluster.GetDaemons() { + if peer.PeerInfo.DataCenter != cluster.DataCenterNone { + continue + } + sendHit(t, peer, makeReq(name, key, 0), guber.Status_UNDER_LIMIT, int64(limit-testCase.Hits)) + } + }) + } + }) +} + +// Request metrics and parse into map. +// Optionally pass names to filter metrics by name. +func getMetrics(HTTPAddr string, names ...string) (map[string]*model.Sample, error) { + url := fmt.Sprintf("http://%s/metrics", HTTPAddr) + resp, err := http.Get(url) + if err != nil { + return nil, err + } + defer resp.Body.Close() + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("HTTP error requesting metrics: %s", resp.Status) + } + decoder := expfmt.SampleDecoder{ + Dec: expfmt.NewDecoder(resp.Body, expfmt.FmtText), + Opts: &expfmt.DecodeOptions{ + Timestamp: model.Now(), + }, + } + nameSet := make(map[string]struct{}) + for _, name := range names { + nameSet[name] = struct{}{} + } + metrics := make(map[string]*model.Sample) + + for { + var smpls model.Vector + err := decoder.Decode(&smpls) + if err == io.EOF { + break + } + if err != nil { + return nil, err + } + for _, smpl := range smpls { + name := smpl.Metric.String() + if _, ok := nameSet[name]; ok || len(nameSet) == 0 { + metrics[name] = smpl + } + } + } + + return metrics, nil +} + +func getMetricRequest(url string, name string) (*model.Sample, error) { + resp, err := http.Get(url) + if err != nil { + return nil, err + } + defer resp.Body.Close() + return getMetric(resp.Body, name) +} + func getMetric(in io.Reader, name string) (*model.Sample, error) { dec := expfmt.SampleDecoder{ Dec: expfmt.NewDecoder(in, expfmt.FmtText), @@ -1808,44 +2178,172 @@ func getMetric(in io.Reader, name string) (*model.Sample, error) { return nil, nil } -// getBroadcastCount returns the current broadcast count for use with waitForBroadcast() -// TODO: Replace this with something else, we can call and reset via HTTP/GRPC calls in gubernator v3 -func getBroadcastCount(d *guber.Daemon) (int, error) { - m, err := getMetricRequest(fmt.Sprintf("http://%s/metrics", d.Config().HTTPListenAddress), - "gubernator_broadcast_duration_count") - if err != nil { - return 0, err - } +// waitForBroadcast waits until the broadcast count for the daemon changes to +// at least the expected value and the broadcast queue is empty. +// Returns an error if timeout waiting for conditions to be met. +func waitForBroadcast(timeout clock.Duration, d *guber.Daemon, expect float64) error { + ctx, cancel := context.WithTimeout(context.Background(), timeout) + defer cancel() + + for { + metrics, err := getMetrics(d.Config().HTTPListenAddress, + "gubernator_broadcast_duration_count", "gubernator_global_queue_length") + if err != nil { + return err + } + gbdc := metrics["gubernator_broadcast_duration_count"] + ggql := metrics["gubernator_global_queue_length"] + + // It's possible a broadcast occurred twice if waiting for multiple + // peers to forward updates to non-owners. + if float64(gbdc.Value) >= expect && ggql.Value == 0 { + return nil + } - return int(m.Value), nil + select { + case <-clock.After(100 * clock.Millisecond): + case <-ctx.Done(): + return ctx.Err() + } + } } -// waitForBroadcast waits until the broadcast count for the daemon passed -// changes to the expected value. Returns an error if the expected value is -// not found before the context is cancelled. -func waitForBroadcast(timeout clock.Duration, d *guber.Daemon, expect int) error { +// waitForUpdate waits until the global hits update count for the daemon +// changes to at least the expected value and the global update queue is empty. +// Returns an error if timeout waiting for conditions to be met. +func waitForUpdate(timeout clock.Duration, d *guber.Daemon, expect float64) error { ctx, cancel := context.WithTimeout(context.Background(), timeout) defer cancel() for { - m, err := getMetricRequest(fmt.Sprintf("http://%s/metrics", d.Config().HTTPListenAddress), - "gubernator_broadcast_duration_count") + metrics, err := getMetrics(d.Config().HTTPListenAddress, + "gubernator_global_send_duration_count", "gubernator_global_send_queue_length") if err != nil { return err } + gsdc := metrics["gubernator_global_send_duration_count"] + gsql := metrics["gubernator_global_send_queue_length"] - // It's possible a broadcast occurred twice if waiting for multiple peer to + // It's possible a hit occurred twice if waiting for multiple peers to // forward updates to the owner. - if int(m.Value) >= expect { - // Give the nodes some time to process the broadcasts - clock.Sleep(clock.Millisecond * 500) + if float64(gsdc.Value) >= expect && gsql.Value == 0 { return nil } select { - case <-clock.After(time.Millisecond * 800): + case <-clock.After(100 * clock.Millisecond): case <-ctx.Done(): return ctx.Err() } } } + +// waitForIdle waits until both global broadcast and global hits queues are +// empty. +func waitForIdle(timeout clock.Duration, daemons ...*guber.Daemon) error { + var wg syncutil.WaitGroup + ctx, cancel := context.WithTimeout(context.Background(), timeout) + defer cancel() + for _, d := range daemons { + wg.Run(func(raw any) error { + d := raw.(*guber.Daemon) + for { + metrics, err := getMetrics(d.Config().HTTPListenAddress, + "gubernator_global_queue_length", "gubernator_global_send_queue_length") + if err != nil { + return err + } + ggql := metrics["gubernator_global_queue_length"] + gsql := metrics["gubernator_global_send_queue_length"] + + if ggql.Value == 0 && gsql.Value == 0 { + return nil + } + + select { + case <-clock.After(100 * clock.Millisecond): + case <-ctx.Done(): + return ctx.Err() + } + } + }, d) + } + errs := wg.Wait() + if len(errs) > 0 { + return errs[0] + } + return nil +} + +func getMetricValue(t *testing.T, d *guber.Daemon, name string) float64 { + m, err := getMetricRequest(fmt.Sprintf("http://%s/metrics", d.Config().HTTPListenAddress), + name) + require.NoError(t, err) + if m == nil { + return 0 + } + return float64(m.Value) +} + +// Get metric counter values on each peer. +func getPeerCounters(t *testing.T, peers []*guber.Daemon, name string) map[string]float64 { + counters := make(map[string]float64) + for _, peer := range peers { + counters[peer.InstanceID] = getMetricValue(t, peer, name) + } + return counters +} + +func sendHit(t *testing.T, d *guber.Daemon, req *guber.RateLimitReq, expectStatus guber.Status, expectRemaining int64) { + if req.Hits != 0 { + t.Logf("Sending %d hits to peer %s", req.Hits, d.InstanceID) + } + client := d.MustClient() + ctx, cancel := context.WithTimeout(context.Background(), time.Second*10) + defer cancel() + resp, err := client.GetRateLimits(ctx, &guber.GetRateLimitsReq{ + Requests: []*guber.RateLimitReq{req}, + }) + require.NoError(t, err) + item := resp.Responses[0] + assert.Equal(t, "", item.Error) + if expectRemaining >= 0 { + assert.Equal(t, expectRemaining, item.Remaining) + } + assert.Equal(t, expectStatus, item.Status) + assert.Equal(t, req.Limit, item.Limit) +} + +func epochMillis(t time.Time) int64 { + return t.UnixNano() / 1_000_000 +} + +func startGubernator() error { + err := cluster.StartWith([]guber.PeerInfo{ + {GRPCAddress: "127.0.0.1:9990", HTTPAddress: "127.0.0.1:9980", DataCenter: cluster.DataCenterNone}, + {GRPCAddress: "127.0.0.1:9991", HTTPAddress: "127.0.0.1:9981", DataCenter: cluster.DataCenterNone}, + {GRPCAddress: "127.0.0.1:9992", HTTPAddress: "127.0.0.1:9982", DataCenter: cluster.DataCenterNone}, + {GRPCAddress: "127.0.0.1:9993", HTTPAddress: "127.0.0.1:9983", DataCenter: cluster.DataCenterNone}, + {GRPCAddress: "127.0.0.1:9994", HTTPAddress: "127.0.0.1:9984", DataCenter: cluster.DataCenterNone}, + {GRPCAddress: "127.0.0.1:9995", HTTPAddress: "127.0.0.1:9985", DataCenter: cluster.DataCenterNone}, + + // DataCenterOne + {GRPCAddress: "127.0.0.1:9890", HTTPAddress: "127.0.0.1:9880", DataCenter: cluster.DataCenterOne}, + {GRPCAddress: "127.0.0.1:9891", HTTPAddress: "127.0.0.1:9881", DataCenter: cluster.DataCenterOne}, + {GRPCAddress: "127.0.0.1:9892", HTTPAddress: "127.0.0.1:9882", DataCenter: cluster.DataCenterOne}, + {GRPCAddress: "127.0.0.1:9893", HTTPAddress: "127.0.0.1:9883", DataCenter: cluster.DataCenterOne}, + }) + if err != nil { + return errors.Wrap(err, "while starting cluster") + } + + // Populate peer clients. Avoids data races when goroutines conflict trying + // to instantiate client singletons. + for _, peer := range cluster.GetDaemons() { + _, err = peer.Client() + if err != nil { + return errors.Wrap(err, "while connecting client") + } + } + return nil +} diff --git a/global.go b/global.go index bd0c1e7c..c5fe1676 100644 --- a/global.go +++ b/global.go @@ -22,28 +22,29 @@ import ( "github.com/mailgun/holster/v4/syncutil" "github.com/pkg/errors" "github.com/prometheus/client_golang/prometheus" + "google.golang.org/protobuf/proto" ) // globalManager manages async hit queue and updates peers in // the cluster periodically when a global rate limit we own updates. type globalManager struct { - hitsQueue chan *RateLimitReq - broadcastQueue chan *UpdatePeerGlobal - wg syncutil.WaitGroup - conf BehaviorConfig - log FieldLogger - instance *V1Instance // TODO circular import? V1Instance also holds a reference to globalManager - metricGlobalSendDuration prometheus.Summary - metricBroadcastDuration prometheus.Summary - metricBroadcastCounter *prometheus.CounterVec - metricGlobalQueueLength prometheus.Gauge + hitsQueue chan *RateLimitReq + broadcastQueue chan *RateLimitReq + wg syncutil.WaitGroup + conf BehaviorConfig + log FieldLogger + instance *V1Instance // TODO circular import? V1Instance also holds a reference to globalManager + metricGlobalSendDuration prometheus.Summary + metricGlobalSendQueueLength prometheus.Gauge + metricBroadcastDuration prometheus.Summary + metricGlobalQueueLength prometheus.Gauge } func newGlobalManager(conf BehaviorConfig, instance *V1Instance) *globalManager { gm := globalManager{ log: instance.log, hitsQueue: make(chan *RateLimitReq, conf.GlobalBatchLimit), - broadcastQueue: make(chan *UpdatePeerGlobal, conf.GlobalBatchLimit), + broadcastQueue: make(chan *RateLimitReq, conf.GlobalBatchLimit), instance: instance, conf: conf, metricGlobalSendDuration: prometheus.NewSummary(prometheus.SummaryOpts{ @@ -51,15 +52,15 @@ func newGlobalManager(conf BehaviorConfig, instance *V1Instance) *globalManager Help: "The duration of GLOBAL async sends in seconds.", Objectives: map[float64]float64{0.5: 0.05, 0.99: 0.001}, }), + metricGlobalSendQueueLength: prometheus.NewGauge(prometheus.GaugeOpts{ + Name: "gubernator_global_send_queue_length", + Help: "The count of requests queued up for global broadcast. This is only used for GetRateLimit requests using global behavior.", + }), metricBroadcastDuration: prometheus.NewSummary(prometheus.SummaryOpts{ Name: "gubernator_broadcast_duration", Help: "The duration of GLOBAL broadcasts to peers in seconds.", Objectives: map[float64]float64{0.5: 0.05, 0.99: 0.001}, }), - metricBroadcastCounter: prometheus.NewCounterVec(prometheus.CounterOpts{ - Name: "gubernator_broadcast_counter", - Help: "The count of broadcasts.", - }, []string{"condition"}), metricGlobalQueueLength: prometheus.NewGauge(prometheus.GaugeOpts{ Name: "gubernator_global_queue_length", Help: "The count of requests queued up for global broadcast. This is only used for GetRateLimit requests using global behavior.", @@ -71,14 +72,14 @@ func newGlobalManager(conf BehaviorConfig, instance *V1Instance) *globalManager } func (gm *globalManager) QueueHit(r *RateLimitReq) { - gm.hitsQueue <- r + if r.Hits != 0 { + gm.hitsQueue <- r + } } -func (gm *globalManager) QueueUpdate(req *RateLimitReq, resp *RateLimitResp) { - gm.broadcastQueue <- &UpdatePeerGlobal{ - Key: req.HashKey(), - Algorithm: req.Algorithm, - Status: resp, +func (gm *globalManager) QueueUpdate(req *RateLimitReq) { + if req.Hits != 0 { + gm.broadcastQueue <- req } } @@ -108,11 +109,13 @@ func (gm *globalManager) runAsyncHits() { } else { hits[key] = r } + gm.metricGlobalSendQueueLength.Set(float64(len(hits))) // Send the hits if we reached our batch limit if len(hits) == gm.conf.GlobalBatchLimit { gm.sendHits(hits) hits = make(map[string]*RateLimitReq) + gm.metricGlobalSendQueueLength.Set(0) return true } @@ -126,6 +129,7 @@ func (gm *globalManager) runAsyncHits() { if len(hits) != 0 { gm.sendHits(hits) hits = make(map[string]*RateLimitReq) + gm.metricGlobalSendQueueLength.Set(0) } case <-done: interval.Stop() @@ -188,18 +192,19 @@ func (gm *globalManager) sendHits(hits map[string]*RateLimitReq) { // and in a periodic frequency determined by GlobalSyncWait. func (gm *globalManager) runBroadcasts() { var interval = NewInterval(gm.conf.GlobalSyncWait) - updates := make(map[string]*UpdatePeerGlobal) + updates := make(map[string]*RateLimitReq) gm.wg.Until(func(done chan struct{}) bool { select { - case updateReq := <-gm.broadcastQueue: - updates[updateReq.Key] = updateReq + case update := <-gm.broadcastQueue: + updates[update.HashKey()] = update + gm.metricGlobalQueueLength.Set(float64(len(updates))) // Send the hits if we reached our batch limit if len(updates) >= gm.conf.GlobalBatchLimit { - gm.metricBroadcastCounter.WithLabelValues("queue_full").Inc() gm.broadcastPeers(context.Background(), updates) - updates = make(map[string]*UpdatePeerGlobal) + updates = make(map[string]*RateLimitReq) + gm.metricGlobalQueueLength.Set(0) return true } @@ -210,13 +215,13 @@ func (gm *globalManager) runBroadcasts() { } case <-interval.C: - if len(updates) != 0 { - gm.metricBroadcastCounter.WithLabelValues("timer").Inc() - gm.broadcastPeers(context.Background(), updates) - updates = make(map[string]*UpdatePeerGlobal) - } else { - gm.metricGlobalQueueLength.Set(0) + if len(updates) == 0 { + break } + gm.broadcastPeers(context.Background(), updates) + updates = make(map[string]*RateLimitReq) + gm.metricGlobalQueueLength.Set(0) + case <-done: interval.Stop() return false @@ -226,14 +231,30 @@ func (gm *globalManager) runBroadcasts() { } // broadcastPeers broadcasts global rate limit statuses to all other peers -func (gm *globalManager) broadcastPeers(ctx context.Context, updates map[string]*UpdatePeerGlobal) { +func (gm *globalManager) broadcastPeers(ctx context.Context, updates map[string]*RateLimitReq) { defer prometheus.NewTimer(gm.metricBroadcastDuration).ObserveDuration() var req UpdatePeerGlobalsReq + reqState := RateLimitReqState{IsOwner: false} gm.metricGlobalQueueLength.Set(float64(len(updates))) - for _, r := range updates { - req.Globals = append(req.Globals, r) + for _, update := range updates { + // Get current rate limit state. + grlReq := proto.Clone(update).(*RateLimitReq) + grlReq.Hits = 0 + status, err := gm.instance.workerPool.GetRateLimit(ctx, grlReq, reqState) + if err != nil { + gm.log.WithError(err).Error("while retrieving rate limit status") + continue + } + updateReq := &UpdatePeerGlobal{ + Key: update.HashKey(), + Algorithm: update.Algorithm, + Duration: update.Duration, + Status: status, + CreatedAt: *update.CreatedAt, + } + req.Globals = append(req.Globals, updateReq) } fan := syncutil.NewFanOut(gm.conf.GlobalPeerRequestsConcurrency) diff --git a/go.mod b/go.mod index 93080b32..cb0f9886 100644 --- a/go.mod +++ b/go.mod @@ -23,8 +23,9 @@ require ( go.opentelemetry.io/otel/sdk v1.21.0 go.opentelemetry.io/otel/trace v1.21.0 go.uber.org/goleak v1.3.0 - golang.org/x/net v0.18.0 - golang.org/x/sync v0.3.0 + golang.org/x/exp v0.0.0-20240222234643-814bf88cf225 + golang.org/x/net v0.22.0 + golang.org/x/sync v0.6.0 golang.org/x/time v0.3.0 google.golang.org/genproto/googleapis/api v0.0.0-20231016165738-49dd2c1f3d0b google.golang.org/grpc v1.59.0 @@ -81,12 +82,12 @@ require ( go.uber.org/atomic v1.9.0 // indirect go.uber.org/multierr v1.8.0 // indirect go.uber.org/zap v1.21.0 // indirect - golang.org/x/mod v0.8.0 // indirect + golang.org/x/mod v0.15.0 // indirect golang.org/x/oauth2 v0.12.0 // indirect - golang.org/x/sys v0.14.0 // indirect - golang.org/x/term v0.14.0 // indirect + golang.org/x/sys v0.18.0 // indirect + golang.org/x/term v0.18.0 // indirect golang.org/x/text v0.14.0 // indirect - golang.org/x/tools v0.6.0 // indirect + golang.org/x/tools v0.18.0 // indirect google.golang.org/appengine v1.6.7 // indirect google.golang.org/genproto v0.0.0-20231012201019-e917dd12ba7a // indirect google.golang.org/genproto/googleapis/rpc v0.0.0-20231016165738-49dd2c1f3d0b // indirect diff --git a/go.sum b/go.sum index fea9ef4c..7b2a2004 100644 --- a/go.sum +++ b/go.sum @@ -478,6 +478,8 @@ golang.org/x/exp v0.0.0-20191227195350-da58074b4299/go.mod h1:2RIsYlXP63K8oxa1u0 golang.org/x/exp v0.0.0-20200119233911-0405dc783f0a/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4= golang.org/x/exp v0.0.0-20200207192155-f17229e696bd/go.mod h1:J/WKrq2StrnmMY6+EHIKF9dgMWnmCNThgcyBT1FY9mM= golang.org/x/exp v0.0.0-20200224162631-6cc2880d07d6/go.mod h1:3jZMyOhIsHpP37uCMkUooju7aAi5cS1Q23tOzKc+0MU= +golang.org/x/exp v0.0.0-20240222234643-814bf88cf225 h1:LfspQV/FYTatPTr/3HzIcmiUFH7PGP+OQ6mgDYo3yuQ= +golang.org/x/exp v0.0.0-20240222234643-814bf88cf225/go.mod h1:CxmFvTBINI24O/j8iY7H1xHzx2i4OsyguNBmN/uPtqc= golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js= golang.org/x/image v0.0.0-20190802002840-cff245a6509b/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= @@ -503,8 +505,8 @@ golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.4.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.4.1/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= -golang.org/x/mod v0.8.0 h1:LUYupSeNrTNCGzR/hVBk2NHZO4hXcVaW1k4Qx7rjPx8= -golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= +golang.org/x/mod v0.15.0 h1:SernR4v+D55NyBH2QiEQrlBAnj1ECL6AGrA5+dPaMY8= +golang.org/x/mod v0.15.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= @@ -550,8 +552,8 @@ golang.org/x/net v0.0.0-20210726213435-c6fcb2dbf985/go.mod h1:9nx3DQGgdP8bBQD5qx golang.org/x/net v0.0.0-20211209124913-491a49abca63/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/net v0.0.0-20220127200216-cd36cc0744dd/go.mod h1:CfG3xpIq0wQ8r1q4Su4UZFWDARRcnwPjda9FqA0JpMk= golang.org/x/net v0.0.0-20220225172249-27dd8689420f/go.mod h1:CfG3xpIq0wQ8r1q4Su4UZFWDARRcnwPjda9FqA0JpMk= -golang.org/x/net v0.18.0 h1:mIYleuAkSbHh0tCv7RvjL3F6ZVbLjq4+R7zbOn3Kokg= -golang.org/x/net v0.18.0/go.mod h1:/czyP5RqHAH4odGYxBJ1qz0+CE5WZ+2j1YgoEo8F2jQ= +golang.org/x/net v0.22.0 h1:9sGLhx7iRIHEiX0oAJ3MRZMUCElJgy7Br1nO+AMN3Tc= +golang.org/x/net v0.22.0/go.mod h1:JKghWKKOSdJwpW2GEx0Ja7fmaKnMsbu+MWVZTokSYmg= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= @@ -579,8 +581,8 @@ golang.org/x/sync v0.0.0-20200625203802-6e8e738ad208/go.mod h1:RxMgew5VJxzue5/jJ golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20201207232520-09787c993a3a/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.3.0 h1:ftCYgMx6zT/asHUrPw8BLLscYtGznsLAnjq5RH9P66E= -golang.org/x/sync v0.3.0/go.mod h1:FU7BRWz2tNW+3quACPkgCx/L+uEAv1htQ0V83Z9Rj+Y= +golang.org/x/sync v0.6.0 h1:5BMeUDZ7vkXGfEr1x9B4bRcTH4lpkTkpdh0T/J+qjbQ= +golang.org/x/sync v0.6.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.0.0-20180823144017-11551d06cbcc/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= @@ -643,13 +645,13 @@ golang.org/x/sys v0.0.0-20211216021012-1d35b9e2eb4e/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20220114195835-da31bd327af9/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220728004956-3c1f35247d10/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.14.0 h1:Vz7Qs629MkJkGyHxUlRHizWJRG2j8fbQKjELVSNhy7Q= -golang.org/x/sys v0.14.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.18.0 h1:DBdB3niSjOA/O0blCZBqDefyWNYveAYMNF1Wum0DYQ4= +golang.org/x/sys v0.18.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210615171337-6886f2dfbf5b/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= -golang.org/x/term v0.14.0 h1:LGK9IlZ8T9jvdy6cTdfKUCltatMFOehAQo9SRC46UQ8= -golang.org/x/term v0.14.0/go.mod h1:TySc+nGkYR6qt8km8wUhuFRTVSMIX3XPR58y2lC8vww= +golang.org/x/term v0.18.0 h1:FcHjZXDMxI8mM3nwhX9HlKop4C0YQvCVCdwYl2wOtE8= +golang.org/x/term v0.18.0/go.mod h1:ILwASektA3OnRv7amZ1xhE/KTR+u50pbXfZ03+6Nx58= golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= @@ -721,8 +723,8 @@ golang.org/x/tools v0.1.0/go.mod h1:xkSsbof2nBLbhDlRMhhhyNLN/zl3eTqcnHD5viDpcZ0= golang.org/x/tools v0.1.2/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= golang.org/x/tools v0.1.5/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= golang.org/x/tools v0.1.6-0.20210726203631-07bc1bf47fb2/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= -golang.org/x/tools v0.6.0 h1:BOw41kyTf3PuCW1pVQf8+Cyg8pMlkYB1oo9iJ6D/lKM= -golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU= +golang.org/x/tools v0.18.0 h1:k8NLag8AGHnn+PHbl7g43CtqZAwG60vZkLqgyZgIHgQ= +golang.org/x/tools v0.18.0/go.mod h1:GL7B4CwcLLeo59yx/9UWWuNOW1n3VZ4f5axWfML7Lcg= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= diff --git a/gubernator.go b/gubernator.go index 7ec9a96a..ff6812ae 100644 --- a/gubernator.go +++ b/gubernator.go @@ -21,8 +21,10 @@ import ( "fmt" "strings" "sync" + "time" "github.com/mailgun/errors" + "github.com/mailgun/holster/v4/clock" "github.com/mailgun/holster/v4/syncutil" "github.com/mailgun/holster/v4/tracing" "github.com/prometheus/client_golang/prometheus" @@ -51,6 +53,10 @@ type V1Instance struct { workerPool *WorkerPool } +type RateLimitReqState struct { + IsOwner bool +} + var ( metricGetRateLimitCounter = prometheus.NewCounterVec(prometheus.CounterOpts{ Name: "gubernator_getratelimit_counter", @@ -186,6 +192,7 @@ func (s *V1Instance) GetRateLimits(ctx context.Context, r *GetRateLimitsReq) (*G "Requests.RateLimits list too large; max size is '%d'", maxBatchSize) } + createdAt := epochMillis(clock.Now()) resp := GetRateLimitsResp{ Responses: make([]*RateLimitResp, len(r.Requests)), } @@ -198,17 +205,19 @@ func (s *V1Instance) GetRateLimits(ctx context.Context, r *GetRateLimitsReq) (*G var peer *PeerClient var err error - if len(req.UniqueKey) == 0 { + if req.UniqueKey == "" { metricCheckErrorCounter.WithLabelValues("Invalid request").Inc() resp.Responses[i] = &RateLimitResp{Error: "field 'unique_key' cannot be empty"} continue } - - if len(req.Name) == 0 { + if req.Name == "" { metricCheckErrorCounter.WithLabelValues("Invalid request").Inc() resp.Responses[i] = &RateLimitResp{Error: "field 'namespace' cannot be empty"} continue } + if req.CreatedAt == nil || *req.CreatedAt == 0 { + req.CreatedAt = &createdAt + } if ctx.Err() != nil { err = errors.Wrap(ctx.Err(), "Error while iterating request items") @@ -235,9 +244,10 @@ func (s *V1Instance) GetRateLimits(ctx context.Context, r *GetRateLimitsReq) (*G } // If our server instance is the owner of this rate limit - if peer.Info().IsOwner { + reqState := RateLimitReqState{IsOwner: peer.Info().IsOwner} + if reqState.IsOwner { // Apply our rate limit algorithm to the request - resp.Responses[i], err = s.getLocalRateLimit(ctx, req) + resp.Responses[i], err = s.getLocalRateLimit(ctx, req, reqState) if err != nil { err = errors.Wrapf(err, "Error while apply rate limit for '%s'", key) span := trace.SpanFromContext(ctx) @@ -308,6 +318,7 @@ func (s *V1Instance) asyncRequest(ctx context.Context, req *AsyncReq) { funcTimer := prometheus.NewTimer(metricFuncTimeDuration.WithLabelValues("V1Instance.asyncRequest")) defer funcTimer.ObserveDuration() + reqState := RateLimitReqState{IsOwner: req.Peer.Info().IsOwner} resp := AsyncResp{ Idx: req.Idx, } @@ -326,8 +337,8 @@ func (s *V1Instance) asyncRequest(ctx context.Context, req *AsyncReq) { // If we are attempting again, the owner of this rate limit might have changed to us! if attempts != 0 { - if req.Peer.Info().IsOwner { - resp.Resp, err = s.getLocalRateLimit(ctx, req.Req) + if reqState.IsOwner { + resp.Resp, err = s.getLocalRateLimit(ctx, req.Req, reqState) if err != nil { s.log.WithContext(ctx). WithError(err). @@ -394,12 +405,13 @@ func (s *V1Instance) getGlobalRateLimit(ctx context.Context, req *RateLimitReq) tracing.EndScope(ctx, err) }() - cpy := proto.Clone(req).(*RateLimitReq) - SetBehavior(&cpy.Behavior, Behavior_NO_BATCHING, true) - SetBehavior(&cpy.Behavior, Behavior_GLOBAL, false) + req2 := proto.Clone(req).(*RateLimitReq) + SetBehavior(&req2.Behavior, Behavior_NO_BATCHING, true) + SetBehavior(&req2.Behavior, Behavior_GLOBAL, false) + reqState := RateLimitReqState{IsOwner: false} // Process the rate limit like we own it - resp, err = s.getLocalRateLimit(ctx, cpy) + resp, err = s.getLocalRateLimit(ctx, req2, reqState) if err != nil { return nil, errors.Wrap(err, "during in getLocalRateLimit") } @@ -411,6 +423,7 @@ func (s *V1Instance) getGlobalRateLimit(ctx context.Context, req *RateLimitReq) // UpdatePeerGlobals updates the local cache with a list of global rate limits. This method should only // be called by a peer who is the owner of a global rate limit. func (s *V1Instance) UpdatePeerGlobals(ctx context.Context, r *UpdatePeerGlobalsReq) (*UpdatePeerGlobalsResp, error) { + defer prometheus.NewTimer(metricFuncTimeDuration.WithLabelValues("V1Instance.UpdatePeerGlobals")).ObserveDuration() now := MillisecondNow() for _, g := range r.Globals { item := &CacheItem{ @@ -423,6 +436,7 @@ func (s *V1Instance) UpdatePeerGlobals(ctx context.Context, r *UpdatePeerGlobals item.Value = &LeakyBucketItem{ Remaining: float64(g.Status.Remaining), Limit: g.Status.Limit, + Duration: g.Duration, Burst: g.Status.Limit, UpdatedAt: now, } @@ -430,6 +444,7 @@ func (s *V1Instance) UpdatePeerGlobals(ctx context.Context, r *UpdatePeerGlobals item.Value = &TokenBucketItem{ Status: g.Status.Status, Limit: g.Status.Limit, + Duration: g.Duration, Remaining: g.Status.Remaining, CreatedAt: now, } @@ -445,6 +460,7 @@ func (s *V1Instance) UpdatePeerGlobals(ctx context.Context, r *UpdatePeerGlobals // GetPeerRateLimits is called by other peers to get the rate limits owned by this peer. func (s *V1Instance) GetPeerRateLimits(ctx context.Context, r *GetPeerRateLimitsReq) (resp *GetPeerRateLimitsResp, err error) { + defer prometheus.NewTimer(metricFuncTimeDuration.WithLabelValues("V1Instance.GetPeerRateLimits")).ObserveDuration() if len(r.Requests) > maxBatchSize { err := fmt.Errorf("'PeerRequest.rate_limits' list too large; max size is '%d'", maxBatchSize) metricCheckErrorCounter.WithLabelValues("Request too large").Inc() @@ -467,6 +483,7 @@ func (s *V1Instance) GetPeerRateLimits(ctx context.Context, r *GetPeerRateLimits respChan := make(chan respOut) var respWg sync.WaitGroup respWg.Add(1) + reqState := RateLimitReqState{IsOwner: true} go func() { // Capture each response and return in the same order @@ -494,7 +511,13 @@ func (s *V1Instance) GetPeerRateLimits(ctx context.Context, r *GetPeerRateLimits SetBehavior(&rin.req.Behavior, Behavior_DRAIN_OVER_LIMIT, true) } - rl, err := s.getLocalRateLimit(ctx, rin.req) + // Assign default to CreatedAt for backwards compatibility. + if rin.req.CreatedAt == nil || *rin.req.CreatedAt == 0 { + createdAt := epochMillis(clock.Now()) + rin.req.CreatedAt = &createdAt + } + + rl, err := s.getLocalRateLimit(ctx, rin.req, reqState) if err != nil { // Return the error for this request err = errors.Wrap(err, "Error in getLocalRateLimit") @@ -562,7 +585,7 @@ func (s *V1Instance) HealthCheck(ctx context.Context, r *HealthCheckReq) (health return health, nil } -func (s *V1Instance) getLocalRateLimit(ctx context.Context, r *RateLimitReq) (_ *RateLimitResp, err error) { +func (s *V1Instance) getLocalRateLimit(ctx context.Context, r *RateLimitReq, reqState RateLimitReqState) (_ *RateLimitResp, err error) { ctx = tracing.StartNamedScope(ctx, "V1Instance.getLocalRateLimit", trace.WithAttributes( attribute.String("ratelimit.key", r.UniqueKey), attribute.String("ratelimit.name", r.Name), @@ -572,17 +595,19 @@ func (s *V1Instance) getLocalRateLimit(ctx context.Context, r *RateLimitReq) (_ defer func() { tracing.EndScope(ctx, err) }() defer prometheus.NewTimer(metricFuncTimeDuration.WithLabelValues("V1Instance.getLocalRateLimit")).ObserveDuration() - resp, err := s.workerPool.GetRateLimit(ctx, r) + resp, err := s.workerPool.GetRateLimit(ctx, r, reqState) if err != nil { return nil, errors.Wrap(err, "during workerPool.GetRateLimit") } - metricGetRateLimitCounter.WithLabelValues("local").Inc() // If global behavior, then broadcast update to all peers. if HasBehavior(r.Behavior, Behavior_GLOBAL) { - s.global.QueueUpdate(r, resp) + s.global.QueueUpdate(r) } + if reqState.IsOwner { + metricGetRateLimitCounter.WithLabelValues("local").Inc() + } return resp, nil } @@ -723,10 +748,10 @@ func (s *V1Instance) Describe(ch chan<- *prometheus.Desc) { metricGetRateLimitCounter.Describe(ch) metricOverLimitCounter.Describe(ch) metricWorkerQueue.Describe(ch) - s.global.metricBroadcastCounter.Describe(ch) s.global.metricBroadcastDuration.Describe(ch) s.global.metricGlobalQueueLength.Describe(ch) s.global.metricGlobalSendDuration.Describe(ch) + s.global.metricGlobalSendQueueLength.Describe(ch) } // Collect fetches metrics from the server for use by prometheus @@ -741,10 +766,10 @@ func (s *V1Instance) Collect(ch chan<- prometheus.Metric) { metricGetRateLimitCounter.Collect(ch) metricOverLimitCounter.Collect(ch) metricWorkerQueue.Collect(ch) - s.global.metricBroadcastCounter.Collect(ch) s.global.metricBroadcastDuration.Collect(ch) s.global.metricGlobalQueueLength.Collect(ch) s.global.metricGlobalSendDuration.Collect(ch) + s.global.metricGlobalSendQueueLength.Collect(ch) } // HasBehavior returns true if the provided behavior is set @@ -785,3 +810,7 @@ func isDeadlineExceeded(err error) bool { } return errors.Is(err, context.DeadlineExceeded) } + +func epochMillis(t time.Time) int64 { + return t.UnixNano() / 1_000_000 +} diff --git a/gubernator.pb.go b/gubernator.pb.go index 808a8814..3b54288d 100644 --- a/gubernator.pb.go +++ b/gubernator.pb.go @@ -374,6 +374,17 @@ type RateLimitReq struct { // this to pass trace context to other peers. Might be useful for future clients to pass along // trace information to gubernator. Metadata map[string]string `protobuf:"bytes,9,rep,name=metadata,proto3" json:"metadata,omitempty" protobuf_key:"bytes,1,opt,name=key,proto3" protobuf_val:"bytes,2,opt,name=value,proto3"` + // The exact time this request was created in Epoch milliseconds. Due to + // time drift between systems, it may be advantageous for a client to set the + // exact time the request was created. It possible the system clock for the + // client has drifted from the system clock where gubernator daemon is + // running. + // + // The created time is used by gubernator to calculate the reset time for + // both token and leaky algorithms. If it is not set by the client, + // gubernator will set the created time when it receives the rate limit + // request. + CreatedAt *int64 `protobuf:"varint,10,opt,name=created_at,json=createdAt,proto3,oneof" json:"created_at,omitempty"` } func (x *RateLimitReq) Reset() { @@ -471,6 +482,13 @@ func (x *RateLimitReq) GetMetadata() map[string]string { return nil } +func (x *RateLimitReq) GetCreatedAt() int64 { + if x != nil && x.CreatedAt != nil { + return *x.CreatedAt + } + return 0 +} + type RateLimitResp struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache @@ -684,7 +702,7 @@ var file_gubernator_proto_rawDesc = []byte{ 0x70, 0x12, 0x3a, 0x0a, 0x09, 0x72, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x1c, 0x2e, 0x70, 0x62, 0x2e, 0x67, 0x75, 0x62, 0x65, 0x72, 0x6e, 0x61, 0x74, 0x6f, 0x72, 0x2e, 0x52, 0x61, 0x74, 0x65, 0x4c, 0x69, 0x6d, 0x69, 0x74, 0x52, 0x65, - 0x73, 0x70, 0x52, 0x09, 0x72, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x73, 0x22, 0x8e, 0x03, + 0x73, 0x70, 0x52, 0x09, 0x72, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x73, 0x22, 0xc1, 0x03, 0x0a, 0x0c, 0x52, 0x61, 0x74, 0x65, 0x4c, 0x69, 0x6d, 0x69, 0x74, 0x52, 0x65, 0x71, 0x12, 0x12, 0x0a, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x12, 0x1d, 0x0a, 0x0a, 0x75, 0x6e, 0x69, 0x71, 0x75, 0x65, 0x5f, 0x6b, 0x65, 0x79, @@ -706,68 +724,71 @@ var file_gubernator_proto_rawDesc = []byte{ 0x62, 0x2e, 0x67, 0x75, 0x62, 0x65, 0x72, 0x6e, 0x61, 0x74, 0x6f, 0x72, 0x2e, 0x52, 0x61, 0x74, 0x65, 0x4c, 0x69, 0x6d, 0x69, 0x74, 0x52, 0x65, 0x71, 0x2e, 0x4d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x52, 0x08, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, - 0x61, 0x1a, 0x3b, 0x0a, 0x0d, 0x4d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x45, 0x6e, 0x74, - 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, - 0x03, 0x6b, 0x65, 0x79, 0x12, 0x14, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, - 0x01, 0x28, 0x09, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, 0x22, 0xac, - 0x02, 0x0a, 0x0d, 0x52, 0x61, 0x74, 0x65, 0x4c, 0x69, 0x6d, 0x69, 0x74, 0x52, 0x65, 0x73, 0x70, - 0x12, 0x2d, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0e, - 0x32, 0x15, 0x2e, 0x70, 0x62, 0x2e, 0x67, 0x75, 0x62, 0x65, 0x72, 0x6e, 0x61, 0x74, 0x6f, 0x72, - 0x2e, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, - 0x14, 0x0a, 0x05, 0x6c, 0x69, 0x6d, 0x69, 0x74, 0x18, 0x02, 0x20, 0x01, 0x28, 0x03, 0x52, 0x05, - 0x6c, 0x69, 0x6d, 0x69, 0x74, 0x12, 0x1c, 0x0a, 0x09, 0x72, 0x65, 0x6d, 0x61, 0x69, 0x6e, 0x69, - 0x6e, 0x67, 0x18, 0x03, 0x20, 0x01, 0x28, 0x03, 0x52, 0x09, 0x72, 0x65, 0x6d, 0x61, 0x69, 0x6e, - 0x69, 0x6e, 0x67, 0x12, 0x1d, 0x0a, 0x0a, 0x72, 0x65, 0x73, 0x65, 0x74, 0x5f, 0x74, 0x69, 0x6d, - 0x65, 0x18, 0x04, 0x20, 0x01, 0x28, 0x03, 0x52, 0x09, 0x72, 0x65, 0x73, 0x65, 0x74, 0x54, 0x69, - 0x6d, 0x65, 0x12, 0x14, 0x0a, 0x05, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x18, 0x05, 0x20, 0x01, 0x28, - 0x09, 0x52, 0x05, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x12, 0x46, 0x0a, 0x08, 0x6d, 0x65, 0x74, 0x61, - 0x64, 0x61, 0x74, 0x61, 0x18, 0x06, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x2a, 0x2e, 0x70, 0x62, 0x2e, - 0x67, 0x75, 0x62, 0x65, 0x72, 0x6e, 0x61, 0x74, 0x6f, 0x72, 0x2e, 0x52, 0x61, 0x74, 0x65, 0x4c, - 0x69, 0x6d, 0x69, 0x74, 0x52, 0x65, 0x73, 0x70, 0x2e, 0x4d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, - 0x61, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x52, 0x08, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, - 0x1a, 0x3b, 0x0a, 0x0d, 0x4d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x45, 0x6e, 0x74, 0x72, - 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, - 0x6b, 0x65, 0x79, 0x12, 0x14, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, - 0x28, 0x09, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, 0x22, 0x10, 0x0a, - 0x0e, 0x48, 0x65, 0x61, 0x6c, 0x74, 0x68, 0x43, 0x68, 0x65, 0x63, 0x6b, 0x52, 0x65, 0x71, 0x22, - 0x62, 0x0a, 0x0f, 0x48, 0x65, 0x61, 0x6c, 0x74, 0x68, 0x43, 0x68, 0x65, 0x63, 0x6b, 0x52, 0x65, - 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, - 0x28, 0x09, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x18, 0x0a, 0x07, 0x6d, 0x65, - 0x73, 0x73, 0x61, 0x67, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x6d, 0x65, 0x73, - 0x73, 0x61, 0x67, 0x65, 0x12, 0x1d, 0x0a, 0x0a, 0x70, 0x65, 0x65, 0x72, 0x5f, 0x63, 0x6f, 0x75, - 0x6e, 0x74, 0x18, 0x03, 0x20, 0x01, 0x28, 0x05, 0x52, 0x09, 0x70, 0x65, 0x65, 0x72, 0x43, 0x6f, - 0x75, 0x6e, 0x74, 0x2a, 0x2f, 0x0a, 0x09, 0x41, 0x6c, 0x67, 0x6f, 0x72, 0x69, 0x74, 0x68, 0x6d, - 0x12, 0x10, 0x0a, 0x0c, 0x54, 0x4f, 0x4b, 0x45, 0x4e, 0x5f, 0x42, 0x55, 0x43, 0x4b, 0x45, 0x54, - 0x10, 0x00, 0x12, 0x10, 0x0a, 0x0c, 0x4c, 0x45, 0x41, 0x4b, 0x59, 0x5f, 0x42, 0x55, 0x43, 0x4b, - 0x45, 0x54, 0x10, 0x01, 0x2a, 0x8d, 0x01, 0x0a, 0x08, 0x42, 0x65, 0x68, 0x61, 0x76, 0x69, 0x6f, - 0x72, 0x12, 0x0c, 0x0a, 0x08, 0x42, 0x41, 0x54, 0x43, 0x48, 0x49, 0x4e, 0x47, 0x10, 0x00, 0x12, - 0x0f, 0x0a, 0x0b, 0x4e, 0x4f, 0x5f, 0x42, 0x41, 0x54, 0x43, 0x48, 0x49, 0x4e, 0x47, 0x10, 0x01, - 0x12, 0x0a, 0x0a, 0x06, 0x47, 0x4c, 0x4f, 0x42, 0x41, 0x4c, 0x10, 0x02, 0x12, 0x19, 0x0a, 0x15, - 0x44, 0x55, 0x52, 0x41, 0x54, 0x49, 0x4f, 0x4e, 0x5f, 0x49, 0x53, 0x5f, 0x47, 0x52, 0x45, 0x47, - 0x4f, 0x52, 0x49, 0x41, 0x4e, 0x10, 0x04, 0x12, 0x13, 0x0a, 0x0f, 0x52, 0x45, 0x53, 0x45, 0x54, - 0x5f, 0x52, 0x45, 0x4d, 0x41, 0x49, 0x4e, 0x49, 0x4e, 0x47, 0x10, 0x08, 0x12, 0x10, 0x0a, 0x0c, - 0x4d, 0x55, 0x4c, 0x54, 0x49, 0x5f, 0x52, 0x45, 0x47, 0x49, 0x4f, 0x4e, 0x10, 0x10, 0x12, 0x14, - 0x0a, 0x10, 0x44, 0x52, 0x41, 0x49, 0x4e, 0x5f, 0x4f, 0x56, 0x45, 0x52, 0x5f, 0x4c, 0x49, 0x4d, - 0x49, 0x54, 0x10, 0x20, 0x2a, 0x29, 0x0a, 0x06, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x0f, - 0x0a, 0x0b, 0x55, 0x4e, 0x44, 0x45, 0x52, 0x5f, 0x4c, 0x49, 0x4d, 0x49, 0x54, 0x10, 0x00, 0x12, - 0x0e, 0x0a, 0x0a, 0x4f, 0x56, 0x45, 0x52, 0x5f, 0x4c, 0x49, 0x4d, 0x49, 0x54, 0x10, 0x01, 0x32, - 0xdd, 0x01, 0x0a, 0x02, 0x56, 0x31, 0x12, 0x70, 0x0a, 0x0d, 0x47, 0x65, 0x74, 0x52, 0x61, 0x74, - 0x65, 0x4c, 0x69, 0x6d, 0x69, 0x74, 0x73, 0x12, 0x1f, 0x2e, 0x70, 0x62, 0x2e, 0x67, 0x75, 0x62, - 0x65, 0x72, 0x6e, 0x61, 0x74, 0x6f, 0x72, 0x2e, 0x47, 0x65, 0x74, 0x52, 0x61, 0x74, 0x65, 0x4c, - 0x69, 0x6d, 0x69, 0x74, 0x73, 0x52, 0x65, 0x71, 0x1a, 0x20, 0x2e, 0x70, 0x62, 0x2e, 0x67, 0x75, - 0x62, 0x65, 0x72, 0x6e, 0x61, 0x74, 0x6f, 0x72, 0x2e, 0x47, 0x65, 0x74, 0x52, 0x61, 0x74, 0x65, - 0x4c, 0x69, 0x6d, 0x69, 0x74, 0x73, 0x52, 0x65, 0x73, 0x70, 0x22, 0x1c, 0x82, 0xd3, 0xe4, 0x93, - 0x02, 0x16, 0x3a, 0x01, 0x2a, 0x22, 0x11, 0x2f, 0x76, 0x31, 0x2f, 0x47, 0x65, 0x74, 0x52, 0x61, - 0x74, 0x65, 0x4c, 0x69, 0x6d, 0x69, 0x74, 0x73, 0x12, 0x65, 0x0a, 0x0b, 0x48, 0x65, 0x61, 0x6c, - 0x74, 0x68, 0x43, 0x68, 0x65, 0x63, 0x6b, 0x12, 0x1d, 0x2e, 0x70, 0x62, 0x2e, 0x67, 0x75, 0x62, - 0x65, 0x72, 0x6e, 0x61, 0x74, 0x6f, 0x72, 0x2e, 0x48, 0x65, 0x61, 0x6c, 0x74, 0x68, 0x43, 0x68, - 0x65, 0x63, 0x6b, 0x52, 0x65, 0x71, 0x1a, 0x1e, 0x2e, 0x70, 0x62, 0x2e, 0x67, 0x75, 0x62, 0x65, - 0x72, 0x6e, 0x61, 0x74, 0x6f, 0x72, 0x2e, 0x48, 0x65, 0x61, 0x6c, 0x74, 0x68, 0x43, 0x68, 0x65, - 0x63, 0x6b, 0x52, 0x65, 0x73, 0x70, 0x22, 0x17, 0x82, 0xd3, 0xe4, 0x93, 0x02, 0x11, 0x12, 0x0f, - 0x2f, 0x76, 0x31, 0x2f, 0x48, 0x65, 0x61, 0x6c, 0x74, 0x68, 0x43, 0x68, 0x65, 0x63, 0x6b, 0x42, - 0x22, 0x5a, 0x1d, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x6d, 0x61, - 0x69, 0x6c, 0x67, 0x75, 0x6e, 0x2f, 0x67, 0x75, 0x62, 0x65, 0x72, 0x6e, 0x61, 0x74, 0x6f, 0x72, - 0x80, 0x01, 0x01, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, + 0x61, 0x12, 0x22, 0x0a, 0x0a, 0x63, 0x72, 0x65, 0x61, 0x74, 0x65, 0x64, 0x5f, 0x61, 0x74, 0x18, + 0x0a, 0x20, 0x01, 0x28, 0x03, 0x48, 0x00, 0x52, 0x09, 0x63, 0x72, 0x65, 0x61, 0x74, 0x65, 0x64, + 0x41, 0x74, 0x88, 0x01, 0x01, 0x1a, 0x3b, 0x0a, 0x0d, 0x4d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, + 0x61, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, + 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x14, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, + 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, + 0x38, 0x01, 0x42, 0x0d, 0x0a, 0x0b, 0x5f, 0x63, 0x72, 0x65, 0x61, 0x74, 0x65, 0x64, 0x5f, 0x61, + 0x74, 0x22, 0xac, 0x02, 0x0a, 0x0d, 0x52, 0x61, 0x74, 0x65, 0x4c, 0x69, 0x6d, 0x69, 0x74, 0x52, + 0x65, 0x73, 0x70, 0x12, 0x2d, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, + 0x01, 0x28, 0x0e, 0x32, 0x15, 0x2e, 0x70, 0x62, 0x2e, 0x67, 0x75, 0x62, 0x65, 0x72, 0x6e, 0x61, + 0x74, 0x6f, 0x72, 0x2e, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, + 0x75, 0x73, 0x12, 0x14, 0x0a, 0x05, 0x6c, 0x69, 0x6d, 0x69, 0x74, 0x18, 0x02, 0x20, 0x01, 0x28, + 0x03, 0x52, 0x05, 0x6c, 0x69, 0x6d, 0x69, 0x74, 0x12, 0x1c, 0x0a, 0x09, 0x72, 0x65, 0x6d, 0x61, + 0x69, 0x6e, 0x69, 0x6e, 0x67, 0x18, 0x03, 0x20, 0x01, 0x28, 0x03, 0x52, 0x09, 0x72, 0x65, 0x6d, + 0x61, 0x69, 0x6e, 0x69, 0x6e, 0x67, 0x12, 0x1d, 0x0a, 0x0a, 0x72, 0x65, 0x73, 0x65, 0x74, 0x5f, + 0x74, 0x69, 0x6d, 0x65, 0x18, 0x04, 0x20, 0x01, 0x28, 0x03, 0x52, 0x09, 0x72, 0x65, 0x73, 0x65, + 0x74, 0x54, 0x69, 0x6d, 0x65, 0x12, 0x14, 0x0a, 0x05, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x18, 0x05, + 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x12, 0x46, 0x0a, 0x08, 0x6d, + 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x18, 0x06, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x2a, 0x2e, + 0x70, 0x62, 0x2e, 0x67, 0x75, 0x62, 0x65, 0x72, 0x6e, 0x61, 0x74, 0x6f, 0x72, 0x2e, 0x52, 0x61, + 0x74, 0x65, 0x4c, 0x69, 0x6d, 0x69, 0x74, 0x52, 0x65, 0x73, 0x70, 0x2e, 0x4d, 0x65, 0x74, 0x61, + 0x64, 0x61, 0x74, 0x61, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x52, 0x08, 0x6d, 0x65, 0x74, 0x61, 0x64, + 0x61, 0x74, 0x61, 0x1a, 0x3b, 0x0a, 0x0d, 0x4d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x45, + 0x6e, 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, + 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x14, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, + 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, + 0x22, 0x10, 0x0a, 0x0e, 0x48, 0x65, 0x61, 0x6c, 0x74, 0x68, 0x43, 0x68, 0x65, 0x63, 0x6b, 0x52, + 0x65, 0x71, 0x22, 0x62, 0x0a, 0x0f, 0x48, 0x65, 0x61, 0x6c, 0x74, 0x68, 0x43, 0x68, 0x65, 0x63, + 0x6b, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, + 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x18, 0x0a, + 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, + 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x12, 0x1d, 0x0a, 0x0a, 0x70, 0x65, 0x65, 0x72, 0x5f, + 0x63, 0x6f, 0x75, 0x6e, 0x74, 0x18, 0x03, 0x20, 0x01, 0x28, 0x05, 0x52, 0x09, 0x70, 0x65, 0x65, + 0x72, 0x43, 0x6f, 0x75, 0x6e, 0x74, 0x2a, 0x2f, 0x0a, 0x09, 0x41, 0x6c, 0x67, 0x6f, 0x72, 0x69, + 0x74, 0x68, 0x6d, 0x12, 0x10, 0x0a, 0x0c, 0x54, 0x4f, 0x4b, 0x45, 0x4e, 0x5f, 0x42, 0x55, 0x43, + 0x4b, 0x45, 0x54, 0x10, 0x00, 0x12, 0x10, 0x0a, 0x0c, 0x4c, 0x45, 0x41, 0x4b, 0x59, 0x5f, 0x42, + 0x55, 0x43, 0x4b, 0x45, 0x54, 0x10, 0x01, 0x2a, 0x8d, 0x01, 0x0a, 0x08, 0x42, 0x65, 0x68, 0x61, + 0x76, 0x69, 0x6f, 0x72, 0x12, 0x0c, 0x0a, 0x08, 0x42, 0x41, 0x54, 0x43, 0x48, 0x49, 0x4e, 0x47, + 0x10, 0x00, 0x12, 0x0f, 0x0a, 0x0b, 0x4e, 0x4f, 0x5f, 0x42, 0x41, 0x54, 0x43, 0x48, 0x49, 0x4e, + 0x47, 0x10, 0x01, 0x12, 0x0a, 0x0a, 0x06, 0x47, 0x4c, 0x4f, 0x42, 0x41, 0x4c, 0x10, 0x02, 0x12, + 0x19, 0x0a, 0x15, 0x44, 0x55, 0x52, 0x41, 0x54, 0x49, 0x4f, 0x4e, 0x5f, 0x49, 0x53, 0x5f, 0x47, + 0x52, 0x45, 0x47, 0x4f, 0x52, 0x49, 0x41, 0x4e, 0x10, 0x04, 0x12, 0x13, 0x0a, 0x0f, 0x52, 0x45, + 0x53, 0x45, 0x54, 0x5f, 0x52, 0x45, 0x4d, 0x41, 0x49, 0x4e, 0x49, 0x4e, 0x47, 0x10, 0x08, 0x12, + 0x10, 0x0a, 0x0c, 0x4d, 0x55, 0x4c, 0x54, 0x49, 0x5f, 0x52, 0x45, 0x47, 0x49, 0x4f, 0x4e, 0x10, + 0x10, 0x12, 0x14, 0x0a, 0x10, 0x44, 0x52, 0x41, 0x49, 0x4e, 0x5f, 0x4f, 0x56, 0x45, 0x52, 0x5f, + 0x4c, 0x49, 0x4d, 0x49, 0x54, 0x10, 0x20, 0x2a, 0x29, 0x0a, 0x06, 0x53, 0x74, 0x61, 0x74, 0x75, + 0x73, 0x12, 0x0f, 0x0a, 0x0b, 0x55, 0x4e, 0x44, 0x45, 0x52, 0x5f, 0x4c, 0x49, 0x4d, 0x49, 0x54, + 0x10, 0x00, 0x12, 0x0e, 0x0a, 0x0a, 0x4f, 0x56, 0x45, 0x52, 0x5f, 0x4c, 0x49, 0x4d, 0x49, 0x54, + 0x10, 0x01, 0x32, 0xdd, 0x01, 0x0a, 0x02, 0x56, 0x31, 0x12, 0x70, 0x0a, 0x0d, 0x47, 0x65, 0x74, + 0x52, 0x61, 0x74, 0x65, 0x4c, 0x69, 0x6d, 0x69, 0x74, 0x73, 0x12, 0x1f, 0x2e, 0x70, 0x62, 0x2e, + 0x67, 0x75, 0x62, 0x65, 0x72, 0x6e, 0x61, 0x74, 0x6f, 0x72, 0x2e, 0x47, 0x65, 0x74, 0x52, 0x61, + 0x74, 0x65, 0x4c, 0x69, 0x6d, 0x69, 0x74, 0x73, 0x52, 0x65, 0x71, 0x1a, 0x20, 0x2e, 0x70, 0x62, + 0x2e, 0x67, 0x75, 0x62, 0x65, 0x72, 0x6e, 0x61, 0x74, 0x6f, 0x72, 0x2e, 0x47, 0x65, 0x74, 0x52, + 0x61, 0x74, 0x65, 0x4c, 0x69, 0x6d, 0x69, 0x74, 0x73, 0x52, 0x65, 0x73, 0x70, 0x22, 0x1c, 0x82, + 0xd3, 0xe4, 0x93, 0x02, 0x16, 0x3a, 0x01, 0x2a, 0x22, 0x11, 0x2f, 0x76, 0x31, 0x2f, 0x47, 0x65, + 0x74, 0x52, 0x61, 0x74, 0x65, 0x4c, 0x69, 0x6d, 0x69, 0x74, 0x73, 0x12, 0x65, 0x0a, 0x0b, 0x48, + 0x65, 0x61, 0x6c, 0x74, 0x68, 0x43, 0x68, 0x65, 0x63, 0x6b, 0x12, 0x1d, 0x2e, 0x70, 0x62, 0x2e, + 0x67, 0x75, 0x62, 0x65, 0x72, 0x6e, 0x61, 0x74, 0x6f, 0x72, 0x2e, 0x48, 0x65, 0x61, 0x6c, 0x74, + 0x68, 0x43, 0x68, 0x65, 0x63, 0x6b, 0x52, 0x65, 0x71, 0x1a, 0x1e, 0x2e, 0x70, 0x62, 0x2e, 0x67, + 0x75, 0x62, 0x65, 0x72, 0x6e, 0x61, 0x74, 0x6f, 0x72, 0x2e, 0x48, 0x65, 0x61, 0x6c, 0x74, 0x68, + 0x43, 0x68, 0x65, 0x63, 0x6b, 0x52, 0x65, 0x73, 0x70, 0x22, 0x17, 0x82, 0xd3, 0xe4, 0x93, 0x02, + 0x11, 0x12, 0x0f, 0x2f, 0x76, 0x31, 0x2f, 0x48, 0x65, 0x61, 0x6c, 0x74, 0x68, 0x43, 0x68, 0x65, + 0x63, 0x6b, 0x42, 0x22, 0x5a, 0x1d, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, + 0x2f, 0x6d, 0x61, 0x69, 0x6c, 0x67, 0x75, 0x6e, 0x2f, 0x67, 0x75, 0x62, 0x65, 0x72, 0x6e, 0x61, + 0x74, 0x6f, 0x72, 0x80, 0x01, 0x01, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, } var ( @@ -895,6 +916,7 @@ func file_gubernator_proto_init() { } } } + file_gubernator_proto_msgTypes[2].OneofWrappers = []interface{}{} type x struct{} out := protoimpl.TypeBuilder{ File: protoimpl.DescBuilder{ diff --git a/gubernator.proto b/gubernator.proto index fea99a22..52d5e65f 100644 --- a/gubernator.proto +++ b/gubernator.proto @@ -168,6 +168,18 @@ message RateLimitReq { // this to pass trace context to other peers. Might be useful for future clients to pass along // trace information to gubernator. map metadata = 9; + + // The exact time this request was created in Epoch milliseconds. Due to + // time drift between systems, it may be advantageous for a client to set the + // exact time the request was created. It possible the system clock for the + // client has drifted from the system clock where gubernator daemon is + // running. + // + // The created time is used by gubernator to calculate the reset time for + // both token and leaky algorithms. If it is not set by the client, + // gubernator will set the created time when it receives the rate limit + // request. + optional int64 created_at = 10; } enum Status { diff --git a/interval_test.go b/interval_test.go index 68c8b40d..d01d86f3 100644 --- a/interval_test.go +++ b/interval_test.go @@ -19,7 +19,7 @@ package gubernator_test import ( "testing" - "github.com/mailgun/gubernator/v2" + gubernator "github.com/mailgun/gubernator/v2" "github.com/mailgun/holster/v4/clock" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" diff --git a/lrucache.go b/lrucache.go index 09bc36ba..03867209 100644 --- a/lrucache.go +++ b/lrucache.go @@ -112,16 +112,7 @@ func (c *LRUCache) GetItem(key string) (item *CacheItem, ok bool) { if ele, hit := c.cache[key]; hit { entry := ele.Value.(*CacheItem) - now := MillisecondNow() - // If the entry is invalidated - if entry.InvalidAt != 0 && entry.InvalidAt < now { - c.removeElement(ele) - metricCacheAccess.WithLabelValues("miss").Add(1) - return - } - - // If the entry has expired, remove it from the cache - if entry.ExpireAt < now { + if entry.IsExpired() { c.removeElement(ele) metricCacheAccess.WithLabelValues("miss").Add(1) return diff --git a/peer_client.go b/peer_client.go index 39c13c14..5e2fef15 100644 --- a/peer_client.go +++ b/peer_client.go @@ -66,9 +66,10 @@ type response struct { } type request struct { - request *RateLimitReq - resp chan *response - ctx context.Context + request *RateLimitReq + reqState RateLimitReqState + resp chan *response + ctx context.Context } type PeerConfig struct { diff --git a/peer_client_test.go b/peer_client_test.go index d739f40a..5f0bc016 100644 --- a/peer_client_test.go +++ b/peer_client_test.go @@ -37,6 +37,7 @@ func TestPeerClientShutdown(t *testing.T) { } const threads = 10 + createdAt := epochMillis(clock.Now()) cases := []test{ {"No batching", gubernator.Behavior_NO_BATCHING}, @@ -71,9 +72,10 @@ func TestPeerClientShutdown(t *testing.T) { wg.Go(func() error { ctx := context.Background() _, err := client.GetPeerRateLimit(ctx, &gubernator.RateLimitReq{ - Hits: 1, - Limit: 100, - Behavior: c.Behavior, + Hits: 1, + Limit: 100, + Behavior: c.Behavior, + CreatedAt: &createdAt, }) if err != nil { diff --git a/peers.pb.go b/peers.pb.go index a805b29a..e69e6fe2 100644 --- a/peers.pb.go +++ b/peers.pb.go @@ -185,9 +185,25 @@ type UpdatePeerGlobal struct { sizeCache protoimpl.SizeCache unknownFields protoimpl.UnknownFields - Key string `protobuf:"bytes,1,opt,name=key,proto3" json:"key,omitempty"` - Status *RateLimitResp `protobuf:"bytes,2,opt,name=status,proto3" json:"status,omitempty"` - Algorithm Algorithm `protobuf:"varint,3,opt,name=algorithm,proto3,enum=pb.gubernator.Algorithm" json:"algorithm,omitempty"` + // Uniquely identifies this rate limit IE: 'ip:10.2.10.7' or 'account:123445' + Key string `protobuf:"bytes,1,opt,name=key,proto3" json:"key,omitempty"` + Status *RateLimitResp `protobuf:"bytes,2,opt,name=status,proto3" json:"status,omitempty"` + // The algorithm used to calculate the rate limit. The algorithm may change on + // subsequent requests, when this occurs any previous rate limit hit counts are reset. + Algorithm Algorithm `protobuf:"varint,3,opt,name=algorithm,proto3,enum=pb.gubernator.Algorithm" json:"algorithm,omitempty"` + // The duration of the rate limit in milliseconds + Duration int64 `protobuf:"varint,4,opt,name=duration,proto3" json:"duration,omitempty"` + // The exact time the original request was created in Epoch milliseconds. + // Due to time drift between systems, it may be advantageous for a client to + // set the exact time the request was created. It possible the system clock + // for the client has drifted from the system clock where gubernator daemon + // is running. + // + // The created time is used by gubernator to calculate the reset time for + // both token and leaky algorithms. If it is not set by the client, + // gubernator will set the created time when it receives the rate limit + // request. + CreatedAt int64 `protobuf:"varint,5,opt,name=created_at,json=createdAt,proto3" json:"created_at,omitempty"` } func (x *UpdatePeerGlobal) Reset() { @@ -243,6 +259,20 @@ func (x *UpdatePeerGlobal) GetAlgorithm() Algorithm { return Algorithm_TOKEN_BUCKET } +func (x *UpdatePeerGlobal) GetDuration() int64 { + if x != nil { + return x.Duration + } + return 0 +} + +func (x *UpdatePeerGlobal) GetCreatedAt() int64 { + if x != nil { + return x.CreatedAt + } + return 0 +} + type UpdatePeerGlobalsResp struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache @@ -302,7 +332,7 @@ var file_peers_proto_rawDesc = []byte{ 0x39, 0x0a, 0x07, 0x67, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x1f, 0x2e, 0x70, 0x62, 0x2e, 0x67, 0x75, 0x62, 0x65, 0x72, 0x6e, 0x61, 0x74, 0x6f, 0x72, 0x2e, 0x55, 0x70, 0x64, 0x61, 0x74, 0x65, 0x50, 0x65, 0x65, 0x72, 0x47, 0x6c, 0x6f, 0x62, 0x61, - 0x6c, 0x52, 0x07, 0x67, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x73, 0x22, 0x92, 0x01, 0x0a, 0x10, 0x55, + 0x6c, 0x52, 0x07, 0x67, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x73, 0x22, 0xcd, 0x01, 0x0a, 0x10, 0x55, 0x70, 0x64, 0x61, 0x74, 0x65, 0x50, 0x65, 0x65, 0x72, 0x47, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x34, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, @@ -311,25 +341,28 @@ var file_peers_proto_rawDesc = []byte{ 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x36, 0x0a, 0x09, 0x61, 0x6c, 0x67, 0x6f, 0x72, 0x69, 0x74, 0x68, 0x6d, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x18, 0x2e, 0x70, 0x62, 0x2e, 0x67, 0x75, 0x62, 0x65, 0x72, 0x6e, 0x61, 0x74, 0x6f, 0x72, 0x2e, 0x41, 0x6c, 0x67, 0x6f, 0x72, - 0x69, 0x74, 0x68, 0x6d, 0x52, 0x09, 0x61, 0x6c, 0x67, 0x6f, 0x72, 0x69, 0x74, 0x68, 0x6d, 0x22, - 0x17, 0x0a, 0x15, 0x55, 0x70, 0x64, 0x61, 0x74, 0x65, 0x50, 0x65, 0x65, 0x72, 0x47, 0x6c, 0x6f, - 0x62, 0x61, 0x6c, 0x73, 0x52, 0x65, 0x73, 0x70, 0x32, 0xcd, 0x01, 0x0a, 0x07, 0x50, 0x65, 0x65, - 0x72, 0x73, 0x56, 0x31, 0x12, 0x60, 0x0a, 0x11, 0x47, 0x65, 0x74, 0x50, 0x65, 0x65, 0x72, 0x52, - 0x61, 0x74, 0x65, 0x4c, 0x69, 0x6d, 0x69, 0x74, 0x73, 0x12, 0x23, 0x2e, 0x70, 0x62, 0x2e, 0x67, + 0x69, 0x74, 0x68, 0x6d, 0x52, 0x09, 0x61, 0x6c, 0x67, 0x6f, 0x72, 0x69, 0x74, 0x68, 0x6d, 0x12, + 0x1a, 0x0a, 0x08, 0x64, 0x75, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x18, 0x04, 0x20, 0x01, 0x28, + 0x03, 0x52, 0x08, 0x64, 0x75, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x1d, 0x0a, 0x0a, 0x63, + 0x72, 0x65, 0x61, 0x74, 0x65, 0x64, 0x5f, 0x61, 0x74, 0x18, 0x05, 0x20, 0x01, 0x28, 0x03, 0x52, + 0x09, 0x63, 0x72, 0x65, 0x61, 0x74, 0x65, 0x64, 0x41, 0x74, 0x22, 0x17, 0x0a, 0x15, 0x55, 0x70, + 0x64, 0x61, 0x74, 0x65, 0x50, 0x65, 0x65, 0x72, 0x47, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x73, 0x52, + 0x65, 0x73, 0x70, 0x32, 0xcd, 0x01, 0x0a, 0x07, 0x50, 0x65, 0x65, 0x72, 0x73, 0x56, 0x31, 0x12, + 0x60, 0x0a, 0x11, 0x47, 0x65, 0x74, 0x50, 0x65, 0x65, 0x72, 0x52, 0x61, 0x74, 0x65, 0x4c, 0x69, + 0x6d, 0x69, 0x74, 0x73, 0x12, 0x23, 0x2e, 0x70, 0x62, 0x2e, 0x67, 0x75, 0x62, 0x65, 0x72, 0x6e, + 0x61, 0x74, 0x6f, 0x72, 0x2e, 0x47, 0x65, 0x74, 0x50, 0x65, 0x65, 0x72, 0x52, 0x61, 0x74, 0x65, + 0x4c, 0x69, 0x6d, 0x69, 0x74, 0x73, 0x52, 0x65, 0x71, 0x1a, 0x24, 0x2e, 0x70, 0x62, 0x2e, 0x67, 0x75, 0x62, 0x65, 0x72, 0x6e, 0x61, 0x74, 0x6f, 0x72, 0x2e, 0x47, 0x65, 0x74, 0x50, 0x65, 0x65, - 0x72, 0x52, 0x61, 0x74, 0x65, 0x4c, 0x69, 0x6d, 0x69, 0x74, 0x73, 0x52, 0x65, 0x71, 0x1a, 0x24, - 0x2e, 0x70, 0x62, 0x2e, 0x67, 0x75, 0x62, 0x65, 0x72, 0x6e, 0x61, 0x74, 0x6f, 0x72, 0x2e, 0x47, - 0x65, 0x74, 0x50, 0x65, 0x65, 0x72, 0x52, 0x61, 0x74, 0x65, 0x4c, 0x69, 0x6d, 0x69, 0x74, 0x73, - 0x52, 0x65, 0x73, 0x70, 0x22, 0x00, 0x12, 0x60, 0x0a, 0x11, 0x55, 0x70, 0x64, 0x61, 0x74, 0x65, - 0x50, 0x65, 0x65, 0x72, 0x47, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x73, 0x12, 0x23, 0x2e, 0x70, 0x62, + 0x72, 0x52, 0x61, 0x74, 0x65, 0x4c, 0x69, 0x6d, 0x69, 0x74, 0x73, 0x52, 0x65, 0x73, 0x70, 0x22, + 0x00, 0x12, 0x60, 0x0a, 0x11, 0x55, 0x70, 0x64, 0x61, 0x74, 0x65, 0x50, 0x65, 0x65, 0x72, 0x47, + 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x73, 0x12, 0x23, 0x2e, 0x70, 0x62, 0x2e, 0x67, 0x75, 0x62, 0x65, + 0x72, 0x6e, 0x61, 0x74, 0x6f, 0x72, 0x2e, 0x55, 0x70, 0x64, 0x61, 0x74, 0x65, 0x50, 0x65, 0x65, + 0x72, 0x47, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x73, 0x52, 0x65, 0x71, 0x1a, 0x24, 0x2e, 0x70, 0x62, 0x2e, 0x67, 0x75, 0x62, 0x65, 0x72, 0x6e, 0x61, 0x74, 0x6f, 0x72, 0x2e, 0x55, 0x70, 0x64, 0x61, - 0x74, 0x65, 0x50, 0x65, 0x65, 0x72, 0x47, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x73, 0x52, 0x65, 0x71, - 0x1a, 0x24, 0x2e, 0x70, 0x62, 0x2e, 0x67, 0x75, 0x62, 0x65, 0x72, 0x6e, 0x61, 0x74, 0x6f, 0x72, - 0x2e, 0x55, 0x70, 0x64, 0x61, 0x74, 0x65, 0x50, 0x65, 0x65, 0x72, 0x47, 0x6c, 0x6f, 0x62, 0x61, - 0x6c, 0x73, 0x52, 0x65, 0x73, 0x70, 0x22, 0x00, 0x42, 0x22, 0x5a, 0x1d, 0x67, 0x69, 0x74, 0x68, - 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x6d, 0x61, 0x69, 0x6c, 0x67, 0x75, 0x6e, 0x2f, 0x67, - 0x75, 0x62, 0x65, 0x72, 0x6e, 0x61, 0x74, 0x6f, 0x72, 0x80, 0x01, 0x01, 0x62, 0x06, 0x70, 0x72, - 0x6f, 0x74, 0x6f, 0x33, + 0x74, 0x65, 0x50, 0x65, 0x65, 0x72, 0x47, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x73, 0x52, 0x65, 0x73, + 0x70, 0x22, 0x00, 0x42, 0x22, 0x5a, 0x1d, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, + 0x6d, 0x2f, 0x6d, 0x61, 0x69, 0x6c, 0x67, 0x75, 0x6e, 0x2f, 0x67, 0x75, 0x62, 0x65, 0x72, 0x6e, + 0x61, 0x74, 0x6f, 0x72, 0x80, 0x01, 0x01, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, } var ( diff --git a/peers.proto b/peers.proto index 1ce2a431..0dad87d4 100644 --- a/peers.proto +++ b/peers.proto @@ -26,32 +26,48 @@ import "gubernator.proto"; // NOTE: For use by gubernator peers only service PeersV1 { - // Used by peers to relay batches of requests to an owner peer - rpc GetPeerRateLimits (GetPeerRateLimitsReq) returns (GetPeerRateLimitsResp) {} + // Used by peers to relay batches of requests to an owner peer + rpc GetPeerRateLimits (GetPeerRateLimitsReq) returns (GetPeerRateLimitsResp) {} - // Used by owner peers to send global rate limit updates to non-owner peers - rpc UpdatePeerGlobals (UpdatePeerGlobalsReq) returns (UpdatePeerGlobalsResp) {} + // Used by owner peers to send global rate limit updates to non-owner peers + rpc UpdatePeerGlobals (UpdatePeerGlobalsReq) returns (UpdatePeerGlobalsResp) {} } message GetPeerRateLimitsReq { - // Must specify at least one RateLimit. The peer that recives this request MUST be authoritative for - // each rate_limit[x].unique_key provided, as the peer will not forward the request to any other peers - repeated RateLimitReq requests = 1; + // Must specify at least one RateLimit. The peer that recives this request MUST be authoritative for + // each rate_limit[x].unique_key provided, as the peer will not forward the request to any other peers + repeated RateLimitReq requests = 1; } message GetPeerRateLimitsResp { - // Responses are in the same order as they appeared in the PeerRateLimitRequests - repeated RateLimitResp rate_limits = 1; + // Responses are in the same order as they appeared in the PeerRateLimitRequests + repeated RateLimitResp rate_limits = 1; } message UpdatePeerGlobalsReq { - // Must specify at least one RateLimit - repeated UpdatePeerGlobal globals = 1; + // Must specify at least one RateLimit + repeated UpdatePeerGlobal globals = 1; } message UpdatePeerGlobal { - string key = 1; - RateLimitResp status = 2; - Algorithm algorithm = 3; + // Uniquely identifies this rate limit IE: 'ip:10.2.10.7' or 'account:123445' + string key = 1; + RateLimitResp status = 2; + // The algorithm used to calculate the rate limit. The algorithm may change on + // subsequent requests, when this occurs any previous rate limit hit counts are reset. + Algorithm algorithm = 3; + // The duration of the rate limit in milliseconds + int64 duration = 4; + // The exact time the original request was created in Epoch milliseconds. + // Due to time drift between systems, it may be advantageous for a client to + // set the exact time the request was created. It possible the system clock + // for the client has drifted from the system clock where gubernator daemon + // is running. + // + // The created time is used by gubernator to calculate the reset time for + // both token and leaky algorithms. If it is not set by the client, + // gubernator will set the created time when it receives the rate limit + // request. + int64 created_at = 5; } message UpdatePeerGlobalsResp {} diff --git a/python/gubernator/gubernator_pb2.py b/python/gubernator/gubernator_pb2.py index 17351bb6..f1369bd5 100644 --- a/python/gubernator/gubernator_pb2.py +++ b/python/gubernator/gubernator_pb2.py @@ -15,7 +15,7 @@ from google.api import annotations_pb2 as google_dot_api_dot_annotations__pb2 -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x10gubernator.proto\x12\rpb.gubernator\x1a\x1cgoogle/api/annotations.proto\"K\n\x10GetRateLimitsReq\x12\x37\n\x08requests\x18\x01 \x03(\x0b\x32\x1b.pb.gubernator.RateLimitReqR\x08requests\"O\n\x11GetRateLimitsResp\x12:\n\tresponses\x18\x01 \x03(\x0b\x32\x1c.pb.gubernator.RateLimitRespR\tresponses\"\x8e\x03\n\x0cRateLimitReq\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x1d\n\nunique_key\x18\x02 \x01(\tR\tuniqueKey\x12\x12\n\x04hits\x18\x03 \x01(\x03R\x04hits\x12\x14\n\x05limit\x18\x04 \x01(\x03R\x05limit\x12\x1a\n\x08\x64uration\x18\x05 \x01(\x03R\x08\x64uration\x12\x36\n\talgorithm\x18\x06 \x01(\x0e\x32\x18.pb.gubernator.AlgorithmR\talgorithm\x12\x33\n\x08\x62\x65havior\x18\x07 \x01(\x0e\x32\x17.pb.gubernator.BehaviorR\x08\x62\x65havior\x12\x14\n\x05\x62urst\x18\x08 \x01(\x03R\x05\x62urst\x12\x45\n\x08metadata\x18\t \x03(\x0b\x32).pb.gubernator.RateLimitReq.MetadataEntryR\x08metadata\x1a;\n\rMetadataEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\"\xac\x02\n\rRateLimitResp\x12-\n\x06status\x18\x01 \x01(\x0e\x32\x15.pb.gubernator.StatusR\x06status\x12\x14\n\x05limit\x18\x02 \x01(\x03R\x05limit\x12\x1c\n\tremaining\x18\x03 \x01(\x03R\tremaining\x12\x1d\n\nreset_time\x18\x04 \x01(\x03R\tresetTime\x12\x14\n\x05\x65rror\x18\x05 \x01(\tR\x05\x65rror\x12\x46\n\x08metadata\x18\x06 \x03(\x0b\x32*.pb.gubernator.RateLimitResp.MetadataEntryR\x08metadata\x1a;\n\rMetadataEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\"\x10\n\x0eHealthCheckReq\"b\n\x0fHealthCheckResp\x12\x16\n\x06status\x18\x01 \x01(\tR\x06status\x12\x18\n\x07message\x18\x02 \x01(\tR\x07message\x12\x1d\n\npeer_count\x18\x03 \x01(\x05R\tpeerCount*/\n\tAlgorithm\x12\x10\n\x0cTOKEN_BUCKET\x10\x00\x12\x10\n\x0cLEAKY_BUCKET\x10\x01*\x8d\x01\n\x08\x42\x65havior\x12\x0c\n\x08\x42\x41TCHING\x10\x00\x12\x0f\n\x0bNO_BATCHING\x10\x01\x12\n\n\x06GLOBAL\x10\x02\x12\x19\n\x15\x44URATION_IS_GREGORIAN\x10\x04\x12\x13\n\x0fRESET_REMAINING\x10\x08\x12\x10\n\x0cMULTI_REGION\x10\x10\x12\x14\n\x10\x44RAIN_OVER_LIMIT\x10 *)\n\x06Status\x12\x0f\n\x0bUNDER_LIMIT\x10\x00\x12\x0e\n\nOVER_LIMIT\x10\x01\x32\xdd\x01\n\x02V1\x12p\n\rGetRateLimits\x12\x1f.pb.gubernator.GetRateLimitsReq\x1a .pb.gubernator.GetRateLimitsResp\"\x1c\x82\xd3\xe4\x93\x02\x16\"\x11/v1/GetRateLimits:\x01*\x12\x65\n\x0bHealthCheck\x12\x1d.pb.gubernator.HealthCheckReq\x1a\x1e.pb.gubernator.HealthCheckResp\"\x17\x82\xd3\xe4\x93\x02\x11\x12\x0f/v1/HealthCheckB\"Z\x1dgithub.com/mailgun/gubernator\x80\x01\x01\x62\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x10gubernator.proto\x12\rpb.gubernator\x1a\x1cgoogle/api/annotations.proto\"K\n\x10GetRateLimitsReq\x12\x37\n\x08requests\x18\x01 \x03(\x0b\x32\x1b.pb.gubernator.RateLimitReqR\x08requests\"O\n\x11GetRateLimitsResp\x12:\n\tresponses\x18\x01 \x03(\x0b\x32\x1c.pb.gubernator.RateLimitRespR\tresponses\"\xc1\x03\n\x0cRateLimitReq\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x1d\n\nunique_key\x18\x02 \x01(\tR\tuniqueKey\x12\x12\n\x04hits\x18\x03 \x01(\x03R\x04hits\x12\x14\n\x05limit\x18\x04 \x01(\x03R\x05limit\x12\x1a\n\x08\x64uration\x18\x05 \x01(\x03R\x08\x64uration\x12\x36\n\talgorithm\x18\x06 \x01(\x0e\x32\x18.pb.gubernator.AlgorithmR\talgorithm\x12\x33\n\x08\x62\x65havior\x18\x07 \x01(\x0e\x32\x17.pb.gubernator.BehaviorR\x08\x62\x65havior\x12\x14\n\x05\x62urst\x18\x08 \x01(\x03R\x05\x62urst\x12\x45\n\x08metadata\x18\t \x03(\x0b\x32).pb.gubernator.RateLimitReq.MetadataEntryR\x08metadata\x12\"\n\ncreated_at\x18\n \x01(\x03H\x00R\tcreatedAt\x88\x01\x01\x1a;\n\rMetadataEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\x42\r\n\x0b_created_at\"\xac\x02\n\rRateLimitResp\x12-\n\x06status\x18\x01 \x01(\x0e\x32\x15.pb.gubernator.StatusR\x06status\x12\x14\n\x05limit\x18\x02 \x01(\x03R\x05limit\x12\x1c\n\tremaining\x18\x03 \x01(\x03R\tremaining\x12\x1d\n\nreset_time\x18\x04 \x01(\x03R\tresetTime\x12\x14\n\x05\x65rror\x18\x05 \x01(\tR\x05\x65rror\x12\x46\n\x08metadata\x18\x06 \x03(\x0b\x32*.pb.gubernator.RateLimitResp.MetadataEntryR\x08metadata\x1a;\n\rMetadataEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\"\x10\n\x0eHealthCheckReq\"b\n\x0fHealthCheckResp\x12\x16\n\x06status\x18\x01 \x01(\tR\x06status\x12\x18\n\x07message\x18\x02 \x01(\tR\x07message\x12\x1d\n\npeer_count\x18\x03 \x01(\x05R\tpeerCount*/\n\tAlgorithm\x12\x10\n\x0cTOKEN_BUCKET\x10\x00\x12\x10\n\x0cLEAKY_BUCKET\x10\x01*\x8d\x01\n\x08\x42\x65havior\x12\x0c\n\x08\x42\x41TCHING\x10\x00\x12\x0f\n\x0bNO_BATCHING\x10\x01\x12\n\n\x06GLOBAL\x10\x02\x12\x19\n\x15\x44URATION_IS_GREGORIAN\x10\x04\x12\x13\n\x0fRESET_REMAINING\x10\x08\x12\x10\n\x0cMULTI_REGION\x10\x10\x12\x14\n\x10\x44RAIN_OVER_LIMIT\x10 *)\n\x06Status\x12\x0f\n\x0bUNDER_LIMIT\x10\x00\x12\x0e\n\nOVER_LIMIT\x10\x01\x32\xdd\x01\n\x02V1\x12p\n\rGetRateLimits\x12\x1f.pb.gubernator.GetRateLimitsReq\x1a .pb.gubernator.GetRateLimitsResp\"\x1c\x82\xd3\xe4\x93\x02\x16\"\x11/v1/GetRateLimits:\x01*\x12\x65\n\x0bHealthCheck\x12\x1d.pb.gubernator.HealthCheckReq\x1a\x1e.pb.gubernator.HealthCheckResp\"\x17\x82\xd3\xe4\x93\x02\x11\x12\x0f/v1/HealthCheckB\"Z\x1dgithub.com/mailgun/gubernator\x80\x01\x01\x62\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) @@ -31,28 +31,28 @@ _globals['_V1'].methods_by_name['GetRateLimits']._serialized_options = b'\202\323\344\223\002\026\"\021/v1/GetRateLimits:\001*' _globals['_V1'].methods_by_name['HealthCheck']._options = None _globals['_V1'].methods_by_name['HealthCheck']._serialized_options = b'\202\323\344\223\002\021\022\017/v1/HealthCheck' - _globals['_ALGORITHM']._serialized_start=1045 - _globals['_ALGORITHM']._serialized_end=1092 - _globals['_BEHAVIOR']._serialized_start=1095 - _globals['_BEHAVIOR']._serialized_end=1236 - _globals['_STATUS']._serialized_start=1238 - _globals['_STATUS']._serialized_end=1279 + _globals['_ALGORITHM']._serialized_start=1096 + _globals['_ALGORITHM']._serialized_end=1143 + _globals['_BEHAVIOR']._serialized_start=1146 + _globals['_BEHAVIOR']._serialized_end=1287 + _globals['_STATUS']._serialized_start=1289 + _globals['_STATUS']._serialized_end=1330 _globals['_GETRATELIMITSREQ']._serialized_start=65 _globals['_GETRATELIMITSREQ']._serialized_end=140 _globals['_GETRATELIMITSRESP']._serialized_start=142 _globals['_GETRATELIMITSRESP']._serialized_end=221 _globals['_RATELIMITREQ']._serialized_start=224 - _globals['_RATELIMITREQ']._serialized_end=622 - _globals['_RATELIMITREQ_METADATAENTRY']._serialized_start=563 - _globals['_RATELIMITREQ_METADATAENTRY']._serialized_end=622 - _globals['_RATELIMITRESP']._serialized_start=625 - _globals['_RATELIMITRESP']._serialized_end=925 - _globals['_RATELIMITRESP_METADATAENTRY']._serialized_start=563 - _globals['_RATELIMITRESP_METADATAENTRY']._serialized_end=622 - _globals['_HEALTHCHECKREQ']._serialized_start=927 - _globals['_HEALTHCHECKREQ']._serialized_end=943 - _globals['_HEALTHCHECKRESP']._serialized_start=945 - _globals['_HEALTHCHECKRESP']._serialized_end=1043 - _globals['_V1']._serialized_start=1282 - _globals['_V1']._serialized_end=1503 + _globals['_RATELIMITREQ']._serialized_end=673 + _globals['_RATELIMITREQ_METADATAENTRY']._serialized_start=599 + _globals['_RATELIMITREQ_METADATAENTRY']._serialized_end=658 + _globals['_RATELIMITRESP']._serialized_start=676 + _globals['_RATELIMITRESP']._serialized_end=976 + _globals['_RATELIMITRESP_METADATAENTRY']._serialized_start=599 + _globals['_RATELIMITRESP_METADATAENTRY']._serialized_end=658 + _globals['_HEALTHCHECKREQ']._serialized_start=978 + _globals['_HEALTHCHECKREQ']._serialized_end=994 + _globals['_HEALTHCHECKRESP']._serialized_start=996 + _globals['_HEALTHCHECKRESP']._serialized_end=1094 + _globals['_V1']._serialized_start=1333 + _globals['_V1']._serialized_end=1554 # @@protoc_insertion_point(module_scope) diff --git a/python/gubernator/peers_pb2.py b/python/gubernator/peers_pb2.py index b1451c7a..97a519d4 100644 --- a/python/gubernator/peers_pb2.py +++ b/python/gubernator/peers_pb2.py @@ -15,7 +15,7 @@ import gubernator_pb2 as gubernator__pb2 -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x0bpeers.proto\x12\rpb.gubernator\x1a\x10gubernator.proto\"O\n\x14GetPeerRateLimitsReq\x12\x37\n\x08requests\x18\x01 \x03(\x0b\x32\x1b.pb.gubernator.RateLimitReqR\x08requests\"V\n\x15GetPeerRateLimitsResp\x12=\n\x0brate_limits\x18\x01 \x03(\x0b\x32\x1c.pb.gubernator.RateLimitRespR\nrateLimits\"Q\n\x14UpdatePeerGlobalsReq\x12\x39\n\x07globals\x18\x01 \x03(\x0b\x32\x1f.pb.gubernator.UpdatePeerGlobalR\x07globals\"\x92\x01\n\x10UpdatePeerGlobal\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x34\n\x06status\x18\x02 \x01(\x0b\x32\x1c.pb.gubernator.RateLimitRespR\x06status\x12\x36\n\talgorithm\x18\x03 \x01(\x0e\x32\x18.pb.gubernator.AlgorithmR\talgorithm\"\x17\n\x15UpdatePeerGlobalsResp2\xcd\x01\n\x07PeersV1\x12`\n\x11GetPeerRateLimits\x12#.pb.gubernator.GetPeerRateLimitsReq\x1a$.pb.gubernator.GetPeerRateLimitsResp\"\x00\x12`\n\x11UpdatePeerGlobals\x12#.pb.gubernator.UpdatePeerGlobalsReq\x1a$.pb.gubernator.UpdatePeerGlobalsResp\"\x00\x42\"Z\x1dgithub.com/mailgun/gubernator\x80\x01\x01\x62\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x0bpeers.proto\x12\rpb.gubernator\x1a\x10gubernator.proto\"O\n\x14GetPeerRateLimitsReq\x12\x37\n\x08requests\x18\x01 \x03(\x0b\x32\x1b.pb.gubernator.RateLimitReqR\x08requests\"V\n\x15GetPeerRateLimitsResp\x12=\n\x0brate_limits\x18\x01 \x03(\x0b\x32\x1c.pb.gubernator.RateLimitRespR\nrateLimits\"Q\n\x14UpdatePeerGlobalsReq\x12\x39\n\x07globals\x18\x01 \x03(\x0b\x32\x1f.pb.gubernator.UpdatePeerGlobalR\x07globals\"\xcd\x01\n\x10UpdatePeerGlobal\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x34\n\x06status\x18\x02 \x01(\x0b\x32\x1c.pb.gubernator.RateLimitRespR\x06status\x12\x36\n\talgorithm\x18\x03 \x01(\x0e\x32\x18.pb.gubernator.AlgorithmR\talgorithm\x12\x1a\n\x08\x64uration\x18\x04 \x01(\x03R\x08\x64uration\x12\x1d\n\ncreated_at\x18\x05 \x01(\x03R\tcreatedAt\"\x17\n\x15UpdatePeerGlobalsResp2\xcd\x01\n\x07PeersV1\x12`\n\x11GetPeerRateLimits\x12#.pb.gubernator.GetPeerRateLimitsReq\x1a$.pb.gubernator.GetPeerRateLimitsResp\"\x00\x12`\n\x11UpdatePeerGlobals\x12#.pb.gubernator.UpdatePeerGlobalsReq\x1a$.pb.gubernator.UpdatePeerGlobalsResp\"\x00\x42\"Z\x1dgithub.com/mailgun/gubernator\x80\x01\x01\x62\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) @@ -30,9 +30,9 @@ _globals['_UPDATEPEERGLOBALSREQ']._serialized_start=217 _globals['_UPDATEPEERGLOBALSREQ']._serialized_end=298 _globals['_UPDATEPEERGLOBAL']._serialized_start=301 - _globals['_UPDATEPEERGLOBAL']._serialized_end=447 - _globals['_UPDATEPEERGLOBALSRESP']._serialized_start=449 - _globals['_UPDATEPEERGLOBALSRESP']._serialized_end=472 - _globals['_PEERSV1']._serialized_start=475 - _globals['_PEERSV1']._serialized_end=680 + _globals['_UPDATEPEERGLOBAL']._serialized_end=506 + _globals['_UPDATEPEERGLOBALSRESP']._serialized_start=508 + _globals['_UPDATEPEERGLOBALSRESP']._serialized_end=531 + _globals['_PEERSV1']._serialized_start=534 + _globals['_PEERSV1']._serialized_end=739 # @@protoc_insertion_point(module_scope) diff --git a/workers.go b/workers.go index 07ba177f..34d99d1d 100644 --- a/workers.go +++ b/workers.go @@ -199,7 +199,7 @@ func (p *WorkerPool) dispatch(worker *Worker) { } resp := new(response) - resp.rl, resp.err = worker.handleGetRateLimit(req.ctx, req.request, worker.cache) + resp.rl, resp.err = worker.handleGetRateLimit(req.ctx, req.request, req.reqState, worker.cache) select { case req.resp <- resp: // Success. @@ -258,16 +258,17 @@ func (p *WorkerPool) dispatch(worker *Worker) { } // GetRateLimit sends a GetRateLimit request to worker pool. -func (p *WorkerPool) GetRateLimit(ctx context.Context, rlRequest *RateLimitReq) (retval *RateLimitResp, reterr error) { +func (p *WorkerPool) GetRateLimit(ctx context.Context, rlRequest *RateLimitReq, reqState RateLimitReqState) (*RateLimitResp, error) { // Delegate request to assigned channel based on request key. worker := p.getWorker(rlRequest.HashKey()) queueGauge := metricWorkerQueue.WithLabelValues("GetRateLimit", worker.name) queueGauge.Inc() defer queueGauge.Dec() handlerRequest := request{ - ctx: ctx, - resp: make(chan *response, 1), - request: rlRequest, + ctx: ctx, + resp: make(chan *response, 1), + request: rlRequest, + reqState: reqState, } // Send request. @@ -289,14 +290,14 @@ func (p *WorkerPool) GetRateLimit(ctx context.Context, rlRequest *RateLimitReq) } // Handle request received by worker. -func (worker *Worker) handleGetRateLimit(ctx context.Context, req *RateLimitReq, cache Cache) (*RateLimitResp, error) { +func (worker *Worker) handleGetRateLimit(ctx context.Context, req *RateLimitReq, reqState RateLimitReqState, cache Cache) (*RateLimitResp, error) { defer prometheus.NewTimer(metricFuncTimeDuration.WithLabelValues("Worker.handleGetRateLimit")).ObserveDuration() var rlResponse *RateLimitResp var err error switch req.Algorithm { case Algorithm_TOKEN_BUCKET: - rlResponse, err = tokenBucket(ctx, worker.conf.Store, cache, req) + rlResponse, err = tokenBucket(ctx, worker.conf.Store, cache, req, reqState) if err != nil { msg := "Error in tokenBucket" countError(err, msg) @@ -305,7 +306,7 @@ func (worker *Worker) handleGetRateLimit(ctx context.Context, req *RateLimitReq, } case Algorithm_LEAKY_BUCKET: - rlResponse, err = leakyBucket(ctx, worker.conf.Store, cache, req) + rlResponse, err = leakyBucket(ctx, worker.conf.Store, cache, req, reqState) if err != nil { msg := "Error in leakyBucket" countError(err, msg)