Skip to content

Commit

Permalink
🌱 Retry CRD detection during startup (#2917)
Browse files Browse the repository at this point in the history
* Retry CRD detection during startup

* Address comments

* More ffedback

* Fix error handling
  • Loading branch information
fabriziopandini authored Apr 19, 2024
1 parent 74a6e80 commit 1b870cc
Showing 1 changed file with 30 additions and 20 deletions.
50 changes: 30 additions & 20 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ package main

import (
"context"
"errors"
"flag"
"fmt"
"os"
Expand All @@ -32,6 +31,8 @@ import (
"gopkg.in/fsnotify.v1"
"k8s.io/apimachinery/pkg/api/meta"
"k8s.io/apimachinery/pkg/runtime/schema"
kerrors "k8s.io/apimachinery/pkg/util/errors"
"k8s.io/apimachinery/pkg/util/wait"
cliflag "k8s.io/component-base/cli/flag"
"k8s.io/component-base/logs"
logsv1 "k8s.io/component-base/logs/api/v1"
Expand Down Expand Up @@ -268,37 +269,46 @@ func main() {
return perrors.Wrapf(err, "unable to create remote cluster cache tracker")
}

// Check for non-supervisor VSphereCluster and start controller if found
gvr := infrav1.GroupVersion.WithResource(reflect.TypeOf(&infrav1.VSphereCluster{}).Elem().Name())
isNonSupervisorCRDLoaded, err := isCRDDeployed(mgr, gvr)
if err != nil {
return err
govmomiGVR := infrav1.GroupVersion.WithResource(reflect.TypeOf(&infrav1.VSphereCluster{}).Elem().Name())
supervisorGVR := vmwarev1.GroupVersion.WithResource(reflect.TypeOf(&vmwarev1.VSphereCluster{}).Elem().Name())

var isSupervisorCRDLoaded, isGovmomiCRDLoaded bool
var errGovmomi, errSupervisor error
if err := wait.PollUntilContextTimeout(ctx, 5*time.Second, 30*time.Second, true, func(_ context.Context) (bool, error) {
// Check for non-supervisor VSphereCluster and start controller if found
isGovmomiCRDLoaded, errGovmomi = isCRDDeployed(mgr, govmomiGVR)

// Check for supervisor VSphereCluster and start controller if found
isSupervisorCRDLoaded, errSupervisor = isCRDDeployed(mgr, supervisorGVR)

// One of govmomi/supervisor mode should be detected, otherwise keep trying until timeout to handle
// race conditions during controllers startup right after install or upgrades, when also CRDs
// are installed in a short time frame.
if (isGovmomiCRDLoaded && errGovmomi == nil) || (isSupervisorCRDLoaded && errSupervisor == nil) {
return true, nil
}
return false, nil
}); err != nil {
// Continuing startup does not make sense without one of govmomi/supervisor mode detected.
// The Pod goes in CrashLoopBack and eventually recover, but failing to detect CRD after 30s is usually
// a signal of some problem.
return fmt.Errorf("neither supervisor nor govmomi CRDs detected: %w", kerrors.NewAggregate([]error{err, errGovmomi, errSupervisor}))
}
if isNonSupervisorCRDLoaded {

if isGovmomiCRDLoaded {
if err := setupVAPIControllers(ctx, controllerCtx, mgr, tracker); err != nil {
return fmt.Errorf("setupVAPIControllers: %w", err)
}
} else {
setupLog.Info(fmt.Sprintf("CRD for %s not loaded, skipping.", gvr.String()))
setupLog.Info(fmt.Sprintf("CRD for %s not loaded, skipping.", govmomiGVR.String()))
}

// Check for supervisor VSphereCluster and start controller if found
gvr = vmwarev1.GroupVersion.WithResource(reflect.TypeOf(&vmwarev1.VSphereCluster{}).Elem().Name())
isSupervisorCRDLoaded, err := isCRDDeployed(mgr, gvr)
if err != nil {
return err
}
if isSupervisorCRDLoaded {
if err := setupSupervisorControllers(ctx, controllerCtx, mgr, tracker); err != nil {
return fmt.Errorf("setupSupervisorControllers: %w", err)
}
} else {
setupLog.Info(fmt.Sprintf("CRD for %s not loaded, skipping.", gvr.String()))
}

// Continuing startup does not make sense without having managers added.
if !isSupervisorCRDLoaded && !isNonSupervisorCRDLoaded {
return errors.New("neither supervisor nor non-supervisor CRDs detected")
setupLog.Info(fmt.Sprintf("CRD for %s not loaded, skipping.", supervisorGVR.String()))
}

return nil
Expand Down

0 comments on commit 1b870cc

Please sign in to comment.