Skip to content

Commit

Permalink
Fix flaky TestAgentRolloutController (#49886)
Browse files Browse the repository at this point in the history
* Fix falky TestAgentRolloutController

* switch to real clock + increase Eventually timeout

* Make reconciliation period a parameter + add TELEPORT_UNSTABLE env var

* Update lib/service/service_test.go

Co-authored-by: Alan Parra <alan.parra@goteleport.com>

* Apply suggestions from code review

Co-authored-by: Alan Parra <alan.parra@goteleport.com>

* Remove env var

* lint

---------

Co-authored-by: Alan Parra <alan.parra@goteleport.com>
  • Loading branch information
hugoShaka and codingllama authored Dec 9, 2024
1 parent 373d34b commit 4aad389
Show file tree
Hide file tree
Showing 4 changed files with 28 additions and 13 deletions.
17 changes: 13 additions & 4 deletions lib/autoupdate/rollout/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ import (
)

const (
reconcilerPeriod = time.Minute
defaultReconcilerPeriod = time.Minute
)

// Controller wakes up every minute to reconcile the autoupdate_agent_rollout resource.
Expand All @@ -43,10 +43,14 @@ type Controller struct {
reconciler reconciler
clock clockwork.Clock
log *slog.Logger
period time.Duration
}

// NewController creates a new Controller for the autoupdate_agent_rollout kind.
func NewController(client Client, log *slog.Logger, clock clockwork.Clock) (*Controller, error) {
// The period can be specified to control the sync frequency. This is mainly
// used to speed up tests or for demo purposes. When empty, the controller picks
// a sane default value.
func NewController(client Client, log *slog.Logger, clock clockwork.Clock, period time.Duration) (*Controller, error) {
if client == nil {
return nil, trace.BadParameter("missing client")
}
Expand All @@ -57,6 +61,10 @@ func NewController(client Client, log *slog.Logger, clock clockwork.Clock) (*Con
return nil, trace.BadParameter("missing clock")
}

if period <= 0 {
period = defaultReconcilerPeriod
}

return &Controller{
clock: clock,
log: log,
Expand All @@ -67,14 +75,15 @@ func NewController(client Client, log *slog.Logger, clock clockwork.Clock) (*Con
// TODO(hugoShaka): add the strategies here as we implement them
},
},
period: period,
}, nil
}

// Run the autoupdate_agent_rollout controller. This function returns only when its context is canceled.
func (c *Controller) Run(ctx context.Context) error {
config := interval.Config{
Duration: reconcilerPeriod,
FirstDuration: reconcilerPeriod,
Duration: c.period,
FirstDuration: c.period,
Jitter: retryutils.SeventhJitter,
Clock: c.clock,
}
Expand Down
2 changes: 1 addition & 1 deletion lib/service/service.go
Original file line number Diff line number Diff line change
Expand Up @@ -2435,7 +2435,7 @@ func (process *TeleportProcess) initAuthService() error {
return trace.Wrap(spiffeFedSyncer.Run(process.GracefulExitContext()), "running SPIFFEFederation Syncer")
})

agentRolloutController, err := rollout.NewController(authServer, logger, process.Clock)
agentRolloutController, err := rollout.NewController(authServer, logger, process.Clock, cfg.Auth.AgentRolloutControllerSyncPeriod)
if err != nil {
return trace.Wrap(err, "creating the rollout controller")
}
Expand Down
15 changes: 7 additions & 8 deletions lib/service/service_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1851,13 +1851,13 @@ func TestInitDatabaseService(t *testing.T) {
func TestAgentRolloutController(t *testing.T) {
t.Parallel()

// Test setup: create a Teleport Auth config
fakeClock := clockwork.NewFakeClock()

dataDir := makeTempDir(t)

cfg := servicecfg.MakeDefaultConfig()
cfg.Clock = fakeClock
// We use a real clock because too many sevrices are using the clock and it's not possible to accurately wait for
// each one of them to reach the point where they wait for the clock to advance. If we add a WaitUntil(X waiters)
// check, this will break the next time we add a new waiter.
cfg.Clock = clockwork.NewRealClock()
cfg.DataDir = dataDir
cfg.SetAuthServerAddress(utils.NetAddr{AddrNetwork: "tcp", Addr: "127.0.0.1:0"})
cfg.Auth.Enabled = true
Expand All @@ -1866,6 +1866,8 @@ func TestAgentRolloutController(t *testing.T) {
cfg.DebugService.Enabled = false
cfg.Auth.StorageConfig.Params["path"] = dataDir
cfg.Auth.ListenAddr = utils.NetAddr{AddrNetwork: "tcp", Addr: "127.0.0.1:0"}
// Speed up the reconciliation period for testing purposes.
cfg.Auth.AgentRolloutControllerSyncPeriod = 200 * time.Millisecond
cfg.CircuitBreakerConfig = breaker.NoopBreakerConfig()

process, err := NewTeleport(cfg)
Expand Down Expand Up @@ -1904,17 +1906,14 @@ func TestAgentRolloutController(t *testing.T) {
version, err = authServer.CreateAutoUpdateVersion(ctx, version)
require.NoError(t, err)

// Test execution: advance clock to trigger a reconciliation
fakeClock.Advance(2 * time.Minute)

// Test validation: check that a new autoupdate_agent_rollout config was created
require.Eventually(t, func() bool {
rollout, err := authServer.GetAutoUpdateAgentRollout(ctx)
if err != nil {
return false
}
return rollout.Spec.GetTargetVersion() == version.Spec.GetAgents().GetTargetVersion()
}, time.Second, 10*time.Millisecond)
}, 5*time.Second, 10*time.Millisecond)
}

// makeTempDir makes a temp dir with a shorter name than t.TempDir() in order to
Expand Down
7 changes: 7 additions & 0 deletions lib/service/servicecfg/auth.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ package servicecfg

import (
"slices"
"time"

"github.com/dustin/go-humanize"
"github.com/gravitational/trace"
Expand Down Expand Up @@ -116,6 +117,12 @@ type AuthConfig struct {

// AccessMonitoring configures access monitoring.
AccessMonitoring *AccessMonitoringOptions

// AgentRolloutControllerSyncPeriod controls the period between two
// reconciliations of the agent rollout controller. This value is jittered.
// Empty value means the controller uses its default.
// Used in tests.
AgentRolloutControllerSyncPeriod time.Duration
}

// AccessMonitoringOptions configures access monitoring.
Expand Down

0 comments on commit 4aad389

Please sign in to comment.