Skip to content

Commit

Permalink
Disable circuit breaking for proxies (#44416)
Browse files Browse the repository at this point in the history
Proxy circuit breakers can be tripped by errors that may occur
during joining, due to some configuration issues with the join
mechanism, or potentially due to a malicious user hitting /webapi/ping
too frequently. As a result the cluster can end up in an unusable
state that can only be rectified by fixing the join token issue
or turning off agents attempting to join.

To avoid this class of problems entirely, the proxy circuit breaker
is now a noop. While this does mean backoff during periods when
auth connectivity is spotty will be reduced for proxies, they are
usually far out numbered by agents in the cluster. Their lack of
circuit breaking shouldn't exacerbate thundering herds.
  • Loading branch information
rosstimothy authored Jul 18, 2024
1 parent 37016a7 commit 66011fd
Showing 1 changed file with 15 additions and 2 deletions.
17 changes: 15 additions & 2 deletions lib/service/connect.go
Original file line number Diff line number Diff line change
Expand Up @@ -1241,6 +1241,19 @@ func (process *TeleportProcess) newClient(identity *state.Identity) (*authclient
return nil, nil, trace.NotImplemented("could not find connection strategy for config version %s", process.Config.Version)
}

func (process *TeleportProcess) breakerConfigForRole(role types.SystemRole) breaker.Config {
// Disable circuit breaking for proxies. A proxy often times forwards
// requests to auth on behalf of agents(during joining) or unauthenticated
// users(webapi/ping) and any errors that may be encountered during forwarded
// requests could trip the breaker eventhough auth is healthy. Since the number
// of agents in a cluster should far outnumber the proxies this shouldn't
// have much impact.
if role == types.RoleProxy || process.instanceRoleExpected(types.RoleProxy) {
return breaker.NoopBreakerConfig()
}
return servicebreaker.InstrumentBreakerForConnector(role, process.Config.CircuitBreakerConfig)
}

func (process *TeleportProcess) newClientThroughTunnel(tlsConfig *tls.Config, sshConfig *ssh.ClientConfig, role types.SystemRole) (*authclient.Client, *proto.PingResponse, error) {
dialer, err := reversetunnelclient.NewTunnelAuthDialer(reversetunnelclient.TunnelAuthDialerConfig{
Resolver: process.resolver,
Expand All @@ -1258,7 +1271,7 @@ func (process *TeleportProcess) newClientThroughTunnel(tlsConfig *tls.Config, ss
Credentials: []apiclient.Credentials{
apiclient.LoadTLS(tlsConfig),
},
CircuitBreakerConfig: servicebreaker.InstrumentBreakerForConnector(role, process.Config.CircuitBreakerConfig),
CircuitBreakerConfig: process.breakerConfigForRole(role),
DialTimeout: process.Config.Testing.ClientTimeout,
})
if err != nil {
Expand Down Expand Up @@ -1305,7 +1318,7 @@ func (process *TeleportProcess) newClientDirect(authServers []utils.NetAddr, tls
apiclient.LoadTLS(tlsConfig),
},
DialTimeout: process.Config.Testing.ClientTimeout,
CircuitBreakerConfig: servicebreaker.InstrumentBreakerForConnector(role, process.Config.CircuitBreakerConfig),
CircuitBreakerConfig: process.breakerConfigForRole(role),
DialOpts: dialOpts,
}, cltParams...)
if err != nil {
Expand Down

0 comments on commit 66011fd

Please sign in to comment.