From 1b870cca19bbe6f6de90f9c3e89aec553dd2608f Mon Sep 17 00:00:00 2001 From: Fabrizio Pandini Date: Fri, 19 Apr 2024 12:30:51 +0200 Subject: [PATCH] =?UTF-8?q?=F0=9F=8C=B1=20Retry=20CRD=20detection=20during?= =?UTF-8?q?=20startup=20(#2917)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Retry CRD detection during startup * Address comments * More ffedback * Fix error handling --- main.go | 50 ++++++++++++++++++++++++++++++-------------------- 1 file changed, 30 insertions(+), 20 deletions(-) diff --git a/main.go b/main.go index dd902db56a..ad0b586e57 100644 --- a/main.go +++ b/main.go @@ -19,7 +19,6 @@ package main import ( "context" - "errors" "flag" "fmt" "os" @@ -32,6 +31,8 @@ import ( "gopkg.in/fsnotify.v1" "k8s.io/apimachinery/pkg/api/meta" "k8s.io/apimachinery/pkg/runtime/schema" + kerrors "k8s.io/apimachinery/pkg/util/errors" + "k8s.io/apimachinery/pkg/util/wait" cliflag "k8s.io/component-base/cli/flag" "k8s.io/component-base/logs" logsv1 "k8s.io/component-base/logs/api/v1" @@ -268,37 +269,46 @@ func main() { return perrors.Wrapf(err, "unable to create remote cluster cache tracker") } - // Check for non-supervisor VSphereCluster and start controller if found - gvr := infrav1.GroupVersion.WithResource(reflect.TypeOf(&infrav1.VSphereCluster{}).Elem().Name()) - isNonSupervisorCRDLoaded, err := isCRDDeployed(mgr, gvr) - if err != nil { - return err + govmomiGVR := infrav1.GroupVersion.WithResource(reflect.TypeOf(&infrav1.VSphereCluster{}).Elem().Name()) + supervisorGVR := vmwarev1.GroupVersion.WithResource(reflect.TypeOf(&vmwarev1.VSphereCluster{}).Elem().Name()) + + var isSupervisorCRDLoaded, isGovmomiCRDLoaded bool + var errGovmomi, errSupervisor error + if err := wait.PollUntilContextTimeout(ctx, 5*time.Second, 30*time.Second, true, func(_ context.Context) (bool, error) { + // Check for non-supervisor VSphereCluster and start controller if found + isGovmomiCRDLoaded, errGovmomi = isCRDDeployed(mgr, govmomiGVR) + + // Check for supervisor VSphereCluster and start controller if found + isSupervisorCRDLoaded, errSupervisor = isCRDDeployed(mgr, supervisorGVR) + + // One of govmomi/supervisor mode should be detected, otherwise keep trying until timeout to handle + // race conditions during controllers startup right after install or upgrades, when also CRDs + // are installed in a short time frame. + if (isGovmomiCRDLoaded && errGovmomi == nil) || (isSupervisorCRDLoaded && errSupervisor == nil) { + return true, nil + } + return false, nil + }); err != nil { + // Continuing startup does not make sense without one of govmomi/supervisor mode detected. + // The Pod goes in CrashLoopBack and eventually recover, but failing to detect CRD after 30s is usually + // a signal of some problem. + return fmt.Errorf("neither supervisor nor govmomi CRDs detected: %w", kerrors.NewAggregate([]error{err, errGovmomi, errSupervisor})) } - if isNonSupervisorCRDLoaded { + + if isGovmomiCRDLoaded { if err := setupVAPIControllers(ctx, controllerCtx, mgr, tracker); err != nil { return fmt.Errorf("setupVAPIControllers: %w", err) } } else { - setupLog.Info(fmt.Sprintf("CRD for %s not loaded, skipping.", gvr.String())) + setupLog.Info(fmt.Sprintf("CRD for %s not loaded, skipping.", govmomiGVR.String())) } - // Check for supervisor VSphereCluster and start controller if found - gvr = vmwarev1.GroupVersion.WithResource(reflect.TypeOf(&vmwarev1.VSphereCluster{}).Elem().Name()) - isSupervisorCRDLoaded, err := isCRDDeployed(mgr, gvr) - if err != nil { - return err - } if isSupervisorCRDLoaded { if err := setupSupervisorControllers(ctx, controllerCtx, mgr, tracker); err != nil { return fmt.Errorf("setupSupervisorControllers: %w", err) } } else { - setupLog.Info(fmt.Sprintf("CRD for %s not loaded, skipping.", gvr.String())) - } - - // Continuing startup does not make sense without having managers added. - if !isSupervisorCRDLoaded && !isNonSupervisorCRDLoaded { - return errors.New("neither supervisor nor non-supervisor CRDs detected") + setupLog.Info(fmt.Sprintf("CRD for %s not loaded, skipping.", supervisorGVR.String())) } return nil