diff --git a/features/features.go b/features/features.go index 892d29f9931..262ce0933cf 100644 --- a/features/features.go +++ b/features/features.go @@ -104,6 +104,19 @@ type Config struct { // returned to the Subscriber indicating that the order cannot be processed // until the paused identifiers are unpaused and the order is resubmitted. CheckIdentifiersPaused bool + + // UseKvLimitsForNewOrder when enabled, causes the key-value rate limiter to + // be the authoritative source of rate limiting information for new-order + // callers and disables the legacy rate limiting checks. + // + // Note: this flag does not disable writes to the certificatesPerName or + // fqdnSets tables at Finalize time. + UseKvLimitsForNewOrder bool + + // UseKvLimitsForNewAccount when enabled, causes the key-value rate limiter + // to be the authoritative source of rate limiting information for + // new-account callers and disables the legacy rate limiting checks. + UseKvLimitsForNewAccount bool } var fMu = new(sync.RWMutex) diff --git a/ra/ra.go b/ra/ra.go index c6d3888eddc..13b636447ab 100644 --- a/ra/ra.go +++ b/ra/ra.go @@ -491,9 +491,12 @@ func (ra *RegistrationAuthorityImpl) NewRegistration(ctx context.Context, reques if err != nil { return nil, berrors.InternalServerError("failed to unmarshal ip address: %s", err.Error()) } - err = ra.checkRegistrationLimits(ctx, ipAddr) - if err != nil { - return nil, err + + if !features.Get().UseKvLimitsForNewAccount { + err = ra.checkRegistrationLimits(ctx, ipAddr) + if err != nil { + return nil, err + } } // Check that contacts conform to our expectations. @@ -2007,12 +2010,6 @@ func (ra *RegistrationAuthorityImpl) PerformValidation( if prob != nil { challenge.Status = core.StatusInvalid challenge.Error = prob - - // TODO(#5545): Spending can be async until key-value rate limits - // are authoritative. This saves us from adding latency to each - // request. Goroutines spun out below will respect a context - // deadline set by the ratelimits package and cannot be prematurely - // canceled by the requester. go ra.countFailedValidation(vaCtx, authz.RegistrationID, authz.Identifier.Value) } else { challenge.Status = core.StatusValid @@ -2585,7 +2582,7 @@ func (ra *RegistrationAuthorityImpl) NewOrder(ctx context.Context, req *rapb.New } // Renewal orders, indicated by ARI, are exempt from NewOrder rate limits. - if !req.IsARIRenewal { + if !req.IsARIRenewal && !features.Get().UseKvLimitsForNewOrder { // Check if there is rate limit space for issuing a certificate. err = ra.checkNewOrderLimits(ctx, newOrder.DnsNames, newOrder.RegistrationID, req.IsRenewal) if err != nil { @@ -2664,7 +2661,7 @@ func (ra *RegistrationAuthorityImpl) NewOrder(ctx context.Context, req *rapb.New } // Renewal orders, indicated by ARI, are exempt from NewOrder rate limits. - if len(missingAuthzIdents) > 0 && !req.IsARIRenewal { + if len(missingAuthzIdents) > 0 && !req.IsARIRenewal && !features.Get().UseKvLimitsForNewOrder { pendingAuthzLimits := ra.rlPolicies.PendingAuthorizationsPerAccount() if pendingAuthzLimits.Enabled() { // The order isn't fully authorized we need to check that the client diff --git a/test.sh b/test.sh index 6f8bedd76e6..9e5e1c6ea56 100755 --- a/test.sh +++ b/test.sh @@ -19,6 +19,14 @@ UNIT_PACKAGES=() UNIT_FLAGS=() FILTER=() +# +# Cleanup Functions +# + +function flush_redis() { + go run ./test/boulder-tools/flushredis/main.go +} + # # Print Functions # @@ -225,6 +233,7 @@ fi STAGE="unit" if [[ "${RUN[@]}" =~ "$STAGE" ]] ; then print_heading "Running Unit Tests" + flush_redis run_unit_tests fi @@ -234,6 +243,7 @@ fi STAGE="integration" if [[ "${RUN[@]}" =~ "$STAGE" ]] ; then print_heading "Running Integration Tests" + flush_redis python3 test/integration-test.py --chisel --gotest "${FILTER[@]}" fi diff --git a/test/boulder-tools/flushredis/main.go b/test/boulder-tools/flushredis/main.go new file mode 100644 index 00000000000..de09aebcd8a --- /dev/null +++ b/test/boulder-tools/flushredis/main.go @@ -0,0 +1,56 @@ +package main + +import ( + "context" + "fmt" + "os" + + "github.com/letsencrypt/boulder/cmd" + blog "github.com/letsencrypt/boulder/log" + "github.com/letsencrypt/boulder/metrics" + bredis "github.com/letsencrypt/boulder/redis" + + "github.com/redis/go-redis/v9" +) + +func main() { + rc := bredis.Config{ + Username: "unittest-rw", + TLS: cmd.TLSConfig{ + CACertFile: "test/certs/ipki/minica.pem", + CertFile: "test/certs/ipki/localhost/cert.pem", + KeyFile: "test/certs/ipki/localhost/key.pem", + }, + Lookups: []cmd.ServiceDomain{ + { + Service: "redisratelimits", + Domain: "service.consul", + }, + }, + LookupDNSAuthority: "consul.service.consul", + } + rc.PasswordConfig = cmd.PasswordConfig{ + PasswordFile: "test/secrets/ratelimits_redis_password", + } + + stats := metrics.NoopRegisterer + log := blog.NewMock() + ring, err := bredis.NewRingFromConfig(rc, stats, log) + if err != nil { + fmt.Printf("while constructing ring client: %v\n", err) + os.Exit(1) + } + + err = ring.ForEachShard(context.Background(), func(ctx context.Context, shard *redis.Client) error { + cmd := shard.FlushAll(ctx) + _, err := cmd.Result() + if err != nil { + return err + } + return nil + }) + if err != nil { + fmt.Printf("while flushing redis shards: %v\n", err) + os.Exit(1) + } +} diff --git a/test/config-next/ra.json b/test/config-next/ra.json index c71fd27cdfa..31eed6ec39e 100644 --- a/test/config-next/ra.json +++ b/test/config-next/ra.json @@ -130,7 +130,9 @@ }, "features": { "AsyncFinalize": true, - "CheckRenewalExemptionAtWFE": true + "CheckRenewalExemptionAtWFE": true, + "UseKvLimitsForNewOrder": true, + "UseKvLimitsForNewAccount": true }, "ctLogs": { "stagger": "500ms", diff --git a/test/config-next/wfe2-ratelimit-overrides.yml b/test/config-next/wfe2-ratelimit-overrides.yml index 95303173dc8..716ccb3aa1f 100644 --- a/test/config-next/wfe2-ratelimit-overrides.yml +++ b/test/config-next/wfe2-ratelimit-overrides.yml @@ -5,6 +5,10 @@ ids: - id: 127.0.0.1 comment: localhost + - id: 10.77.77.77 + comment: test + - id: 10.88.88.88 + comment: test - CertificatesPerDomain: burst: 1 count: 1 diff --git a/test/config-next/wfe2.json b/test/config-next/wfe2.json index 55b28980ae2..3fbdef462ee 100644 --- a/test/config-next/wfe2.json +++ b/test/config-next/wfe2.json @@ -130,7 +130,9 @@ "ServeRenewalInfo": true, "TrackReplacementCertificatesARI": true, "CheckRenewalExemptionAtWFE": true, - "CheckIdentifiersPaused": true + "CheckIdentifiersPaused": true, + "UseKvLimitsForNewOrder": true, + "UseKvLimitsForNewAccount": true }, "certProfiles": { "legacy": "The normal profile you know and love", diff --git a/test/integration/otel_test.go b/test/integration/otel_test.go index b0d020c598a..6270357c58e 100644 --- a/test/integration/otel_test.go +++ b/test/integration/otel_test.go @@ -188,6 +188,14 @@ func httpSpan(endpoint string, children ...expectedSpans) expectedSpans { } } +func redisPipelineSpan(op, service string, children ...expectedSpans) expectedSpans { + return expectedSpans{ + Operation: "redis.pipeline " + op, + Service: service, + Children: children, + } +} + // TestTraces tests that all the expected spans are present and properly connected func TestTraces(t *testing.T) { t.Parallel() @@ -210,17 +218,19 @@ func TestTraces(t *testing.T) { {Operation: "/acme/new-nonce", Service: wfe, Children: []expectedSpans{ rpcSpan("nonce.NonceService/Nonce", wfe, "nonce-service")}}, httpSpan("/acme/new-acct", + redisPipelineSpan("get", wfe), + redisPipelineSpan("set", wfe), rpcSpan("sa.StorageAuthorityReadOnly/KeyBlocked", wfe, sa), rpcSpan("sa.StorageAuthorityReadOnly/GetRegistrationByKey", wfe, sa), rpcSpan("ra.RegistrationAuthority/NewRegistration", wfe, ra, rpcSpan("sa.StorageAuthority/KeyBlocked", ra, sa), - rpcSpan("sa.StorageAuthority/CountRegistrationsByIP", ra, sa), rpcSpan("sa.StorageAuthority/NewRegistration", ra, sa))), httpSpan("/acme/new-order", rpcSpan("sa.StorageAuthorityReadOnly/GetRegistration", wfe, sa), + redisPipelineSpan("get", wfe), + redisPipelineSpan("set", wfe), rpcSpan("ra.RegistrationAuthority/NewOrder", wfe, ra, rpcSpan("sa.StorageAuthority/GetOrderForNames", ra, sa), - // 8 ra -> sa rate limit spans omitted here rpcSpan("sa.StorageAuthority/NewOrderAndAuthzs", ra, sa))), httpSpan("/acme/authz-v3/", rpcSpan("sa.StorageAuthorityReadOnly/GetAuthorization2", wfe, sa)), @@ -236,8 +246,10 @@ func TestTraces(t *testing.T) { rpcSpan("sa.StorageAuthority/GetValidOrderAuthorizations2", ra, sa), rpcSpan("sa.StorageAuthority/SetOrderProcessing", ra, sa), rpcSpan("ca.CertificateAuthority/IssuePrecertificate", ra, ca), + redisPipelineSpan("get", ra), rpcSpan("Publisher/SubmitToSingleCTWithResult", ra, "boulder-publisher"), rpcSpan("ca.CertificateAuthority/IssueCertificateForPrecertificate", ra, ca), + redisPipelineSpan("set", ra), rpcSpan("sa.StorageAuthority/FinalizeOrder", ra, sa))), httpSpan("/acme/order/", rpcSpan("sa.StorageAuthorityReadOnly/GetOrder", wfe, sa)), httpSpan("/acme/cert/", rpcSpan("sa.StorageAuthorityReadOnly/GetCertificate", wfe, sa)), diff --git a/test/integration/ratelimit_test.go b/test/integration/ratelimit_test.go index 5527f52d2b0..91c4080fdc2 100644 --- a/test/integration/ratelimit_test.go +++ b/test/integration/ratelimit_test.go @@ -3,19 +3,13 @@ package integration import ( - "context" + "crypto/rand" + "encoding/hex" + "fmt" "os" "strings" "testing" - "github.com/jmhodges/clock" - - "github.com/letsencrypt/boulder/cmd" - berrors "github.com/letsencrypt/boulder/errors" - blog "github.com/letsencrypt/boulder/log" - "github.com/letsencrypt/boulder/metrics" - "github.com/letsencrypt/boulder/ratelimits" - bredis "github.com/letsencrypt/boulder/redis" "github.com/letsencrypt/boulder/test" ) @@ -23,6 +17,7 @@ func TestDuplicateFQDNRateLimit(t *testing.T) { t.Parallel() domain := random_domain() + // The global rate limit for a duplicate certificates is 2 per 3 hours. _, err := authAndIssue(nil, nil, []string{domain}, true) test.AssertNotError(t, err, "Failed to issue first certificate") @@ -33,45 +28,44 @@ func TestDuplicateFQDNRateLimit(t *testing.T) { test.AssertError(t, err, "Somehow managed to issue third certificate") if strings.Contains(os.Getenv("BOULDER_CONFIG_DIR"), "test/config-next") { - // Setup rate limiting. - rc := bredis.Config{ - Username: "unittest-rw", - TLS: cmd.TLSConfig{ - CACertFile: "test/certs/ipki/minica.pem", - CertFile: "test/certs/ipki/localhost/cert.pem", - KeyFile: "test/certs/ipki/localhost/key.pem", - }, - Lookups: []cmd.ServiceDomain{ - { - Service: "redisratelimits", - Domain: "service.consul", - }, - }, - LookupDNSAuthority: "consul.service.consul", - } - rc.PasswordConfig = cmd.PasswordConfig{ - PasswordFile: "test/secrets/ratelimits_redis_password", - } + // Error should be served from key-value rate limits implementation. + test.AssertContains(t, err.Error(), "too many certificates (2) already issued for this exact set of domains in the last 3h0m0s") + } else { + // Error should be served from legacy rate limits implementation. + test.AssertContains(t, err.Error(), "too many certificates (2) already issued for this exact set of domains in the last 3 hours") + } +} - fc := clock.New() - stats := metrics.NoopRegisterer - log := blog.NewMock() - ring, err := bredis.NewRingFromConfig(rc, stats, log) - test.AssertNotError(t, err, "making redis ring client") - source := ratelimits.NewRedisSource(ring.Ring, fc, stats) - test.AssertNotNil(t, source, "source should not be nil") - limiter, err := ratelimits.NewLimiter(fc, source, stats) - test.AssertNotError(t, err, "making limiter") - txnBuilder, err := ratelimits.NewTransactionBuilder("test/config-next/wfe2-ratelimit-defaults.yml", "") - test.AssertNotError(t, err, "making transaction composer") +func TestCertificatesPerDomain(t *testing.T) { + t.Parallel() - // Check that the CertificatesPerFQDNSet limit is reached. - txns, err := txnBuilder.NewOrderLimitTransactions(1, []string{domain}, false) - test.AssertNotError(t, err, "making transaction") - decision, err := limiter.BatchSpend(context.Background(), txns) - test.AssertNotError(t, err, "checking transaction") - err = decision.Result(fc.Now()) - test.AssertErrorIs(t, err, berrors.RateLimit) - test.AssertContains(t, err.Error(), "too many certificates (2) already issued for this exact set of domains in the last 3h0m0s") + randomDomain := random_domain() + randomSubDomain := func() string { + var bytes [3]byte + rand.Read(bytes[:]) + return fmt.Sprintf("%s.%s", hex.EncodeToString(bytes[:]), randomDomain) + } + + firstSubDomain := randomSubDomain() + _, err := authAndIssue(nil, nil, []string{firstSubDomain}, true) + test.AssertNotError(t, err, "Failed to issue first certificate") + + _, err = authAndIssue(nil, nil, []string{randomSubDomain()}, true) + test.AssertNotError(t, err, "Failed to issue second certificate") + + _, err = authAndIssue(nil, nil, []string{randomSubDomain()}, true) + test.AssertError(t, err, "Somehow managed to issue third certificate") + + if strings.Contains(os.Getenv("BOULDER_CONFIG_DIR"), "test/config-next") { + // Error should be served from key-value rate limits implementation. + test.AssertContains(t, err.Error(), fmt.Sprintf("too many certificates (2) already issued for %q in the last 2160h0m0s", randomDomain)) + } else { + // Error should be served from legacy rate limits implementation. + test.AssertContains(t, err.Error(), fmt.Sprintf("too many certificates already issued for %q", randomDomain)) } + + // Issue a certificate for the first subdomain, which should succeed because + // it's a renewal. + _, err = authAndIssue(nil, nil, []string{firstSubDomain}, true) + test.AssertNotError(t, err, "Failed to issue renewal certificate") } diff --git a/test/redis-ratelimits.config b/test/redis-ratelimits.config index 667ae9e34a0..a4d1eaf0259 100644 --- a/test/redis-ratelimits.config +++ b/test/redis-ratelimits.config @@ -9,7 +9,6 @@ rename-command BGREWRITEAOF "" rename-command BGSAVE "" rename-command CONFIG "" rename-command DEBUG "" -rename-command FLUSHALL "" rename-command FLUSHDB "" rename-command KEYS "" rename-command PEXPIRE "" diff --git a/test/v2_integration.py b/test/v2_integration.py index 170d50ac924..d3895a4938c 100644 --- a/test/v2_integration.py +++ b/test/v2_integration.py @@ -1559,12 +1559,14 @@ def test_renewal_exemption(): chisel2.expect_problem("urn:ietf:params:acme:error:rateLimited", lambda: chisel2.auth_and_issue(["mail." + base_domain])) -# TODO(#5545) -# - Phase 2: Once the new rate limits are authoritative in config-next, ensure -# that this test only runs in config. -# - Phase 3: Once the new rate limits are authoritative in config, remove this -# test entirely. +# TODO(#5545) Remove this test once key-value rate limits are authoritative in +# production. def test_certificates_per_name(): + if CONFIG_NEXT: + # This test is replaced by TestCertificatesPerDomain in the Go + # integration tests because key-value rate limits does not support + # override limits of 0. + return chisel2.expect_problem("urn:ietf:params:acme:error:rateLimited", lambda: chisel2.auth_and_issue([random_domain() + ".lim.it"])) diff --git a/wfe2/wfe.go b/wfe2/wfe.go index 005d5059f18..ed2d0fee715 100644 --- a/wfe2/wfe.go +++ b/wfe2/wfe.go @@ -642,36 +642,33 @@ func link(url, relation string) string { // function is returned that can be called to refund the quota if the account // creation fails, the func will be nil if any error was encountered during the // check. -// -// TODO(#5545): For now we're simply exercising the new rate limiter codepath. -// This should eventually return a berrors.RateLimit error containing the retry -// after duration among other information available in the ratelimits.Decision. -func (wfe *WebFrontEndImpl) checkNewAccountLimits(ctx context.Context, ip net.IP) func() { +func (wfe *WebFrontEndImpl) checkNewAccountLimits(ctx context.Context, ip net.IP) (func(), error) { if wfe.limiter == nil && wfe.txnBuilder == nil { // Key-value rate limiting is disabled. - return nil + return nil, nil } txns, err := wfe.txnBuilder.NewAccountLimitTransactions(ip) if err != nil { - // TODO(#5545): Once key-value rate limits are authoritative this - // log line should be removed in favor of returning the error. - wfe.log.Infof("building new account limit transactions: %v", err) - return nil + return nil, fmt.Errorf("building new account limit transactions: %w", err) } - _, err = wfe.limiter.BatchSpend(ctx, txns) + d, err := wfe.limiter.BatchSpend(ctx, txns) if err != nil { - wfe.log.Warningf("checking newAccount limits: %s", err) - return nil + return nil, fmt.Errorf("spending new account limits: %w", err) + } + + err = d.Result(wfe.clk.Now()) + if err != nil { + return nil, err } return func() { _, err := wfe.limiter.BatchRefund(ctx, txns) if err != nil { - wfe.log.Warningf("refunding newAccount limits: %s", err) + wfe.log.Warningf("refunding new account limits: %s", err) } - } + }, nil } // NewAccount is used by clients to submit a new account @@ -796,7 +793,16 @@ func (wfe *WebFrontEndImpl) NewAccount( InitialIP: ipBytes, } - refundLimits := wfe.checkNewAccountLimits(ctx, ip) + refundLimits, err := wfe.checkNewAccountLimits(ctx, ip) + if err != nil { + if errors.Is(err, berrors.RateLimit) { + if features.Get().UseKvLimitsForNewAccount { + wfe.sendError(response, logEvent, probs.RateLimited(err.Error()), err) + return + } + } + wfe.log.Warningf(err.Error()) + } var newRegistrationSuccessful bool var errIsRateLimit bool @@ -2026,37 +2032,33 @@ func (wfe *WebFrontEndImpl) orderToOrderJSON(request *http.Request, order *corep // encountered during the check, it is logged but not returned. A refund // function is returned that can be used to refund the quota if the order is not // created, the func will be nil if any error was encountered during the check. -// -// TODO(#5545): For now we're simply exercising the new rate limiter codepath. -// This should eventually return a berrors.RateLimit error containing the retry -// after duration among other information available in the ratelimits.Decision. -func (wfe *WebFrontEndImpl) checkNewOrderLimits(ctx context.Context, regId int64, names []string, isRenewal bool) func() { +func (wfe *WebFrontEndImpl) checkNewOrderLimits(ctx context.Context, regId int64, names []string, isRenewal bool) (func(), error) { if wfe.limiter == nil && wfe.txnBuilder == nil { // Key-value rate limiting is disabled. - return nil + return nil, nil } txns, err := wfe.txnBuilder.NewOrderLimitTransactions(regId, names, isRenewal) if err != nil { - wfe.log.Warningf("building new order limit transactions: %v", err) - return nil + return nil, fmt.Errorf("building new order limit transactions: %w", err) } - _, err = wfe.limiter.BatchSpend(ctx, txns) + d, err := wfe.limiter.BatchSpend(ctx, txns) if err != nil { - if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) { - return nil - } - wfe.log.Warningf("checking newOrder limits: %s", err) - return nil + return nil, fmt.Errorf("spending new order limits: %w", err) + } + + err = d.Result(wfe.clk.Now()) + if err != nil { + return nil, err } return func() { _, err := wfe.limiter.BatchRefund(ctx, txns) if err != nil { - wfe.log.Warningf("refunding newOrder limits: %s", err) + wfe.log.Warningf("refunding new order limits: %s", err) } - } + }, nil } // orderMatchesReplacement checks if the order matches the provided certificate @@ -2367,7 +2369,16 @@ func (wfe *WebFrontEndImpl) NewOrder( return } - refundLimits := wfe.checkNewOrderLimits(ctx, acct.ID, names, isRenewal) + refundLimits, err := wfe.checkNewOrderLimits(ctx, acct.ID, names, isRenewal) + if err != nil { + if errors.Is(err, berrors.RateLimit) { + if features.Get().UseKvLimitsForNewOrder { + wfe.sendError(response, logEvent, probs.RateLimited(err.Error()), err) + return + } + } + wfe.log.Warningf(err.Error()) + } var newOrderSuccessful bool var errIsRateLimit bool