diff --git a/api/v1alpha1/drplacementcontrol_types.go b/api/v1alpha1/drplacementcontrol_types.go index 911ebdd6e5..3692908580 100644 --- a/api/v1alpha1/drplacementcontrol_types.go +++ b/api/v1alpha1/drplacementcontrol_types.go @@ -69,6 +69,7 @@ const ( ReasonCleaning = "Cleaning" ReasonSuccess = "Success" ReasonNotStarted = "NotStarted" + ReasonPaused = "Paused" ) type ProgressionStatus string @@ -93,6 +94,7 @@ const ( ProgressionEnsuringVolSyncSetup = ProgressionStatus("EnsuringVolSyncSetup") ProgressionSettingupVolsyncDest = ProgressionStatus("SettingUpVolSyncDest") ProgressionDeleting = ProgressionStatus("Deleting") + ProgressionActionPaused = ProgressionStatus("Paused") ) // DRPlacementControlSpec defines the desired state of DRPlacementControl diff --git a/controllers/drcluster_mmode.go b/controllers/drcluster_mmode.go index b151edfd39..83bea1fb88 100644 --- a/controllers/drcluster_mmode.go +++ b/controllers/drcluster_mmode.go @@ -115,7 +115,7 @@ func (u *drclusterInstance) getVRGs(drpcCollection DRPCAndPolicy) (map[string]*r return nil, err } - vrgs, failedToQueryCluster, err := getVRGsFromManagedClusters( + vrgs, _, failedToQueryCluster, err := getVRGsFromManagedClusters( u.reconciler.MCVGetter, drpcCollection.drpc, drClusters, diff --git a/controllers/drplacementcontrol.go b/controllers/drplacementcontrol.go index 7f0efea911..5253c29a02 100644 --- a/controllers/drplacementcontrol.go +++ b/controllers/drplacementcontrol.go @@ -18,7 +18,6 @@ import ( "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" - "k8s.io/apimachinery/pkg/util/sets" "sigs.k8s.io/yaml" rmn "github.com/ramendr/ramen/api/v1alpha1" @@ -323,13 +322,6 @@ func (d *DRPCInstance) startDeploying(homeCluster, homeClusterNamespace string) func (d *DRPCInstance) RunFailover() (bool, error) { d.log.Info("Entering RunFailover", "state", d.getLastDRState()) - if d.isPlacementNeedsFixing() { - err := d.fixupPlacementForFailover() - if err != nil { - d.log.Info("Couldn't fix up placement for Failover") - } - } - const done = true // We are done if empty @@ -662,7 +654,7 @@ func GetLastKnownVRGPrimaryFromS3( vrg := &rmn.VolumeReplicationGroup{} if err := vrgObjectDownload(objectStorer, sourcePathNamePrefix, vrg); err != nil { - log.Error(err, "Kube objects capture-to-recover-from identifier get error") + log.Info(fmt.Sprintf("Failed to get VRG from s3 store - s3ProfileName %s. Err %v", s3ProfileName, err)) continue } @@ -773,13 +765,6 @@ func checkActivationForStorageIdentifier( func (d *DRPCInstance) RunRelocate() (bool, error) { d.log.Info("Entering RunRelocate", "state", d.getLastDRState(), "progression", d.getProgression()) - if d.isPlacementNeedsFixing() { - err := d.fixupPlacementForRelocate() - if err != nil { - d.log.Info("Couldn't fix up placement for Relocate") - } - } - const done = true preferredCluster := d.instance.Spec.PreferredCluster @@ -816,7 +801,7 @@ func (d *DRPCInstance) RunRelocate() (bool, error) { } if d.getLastDRState() != rmn.Relocating && !d.validatePeerReady() { - return !done, fmt.Errorf("clean up on secondaries pending (%+v)", d.instance) + return !done, fmt.Errorf("clean up secondaries is pending (%+v)", d.instance.Status.Conditions) } if curHomeCluster != "" && curHomeCluster != preferredCluster { @@ -841,6 +826,11 @@ func (d *DRPCInstance) ensureActionCompleted(srcCluster string) (bool, error) { return !done, err } + err = d.ensurePlacement(srcCluster) + if err != nil { + return !done, err + } + d.setProgression(rmn.ProgressionCleaningUp) // Cleanup and setup VolSync if enabled @@ -1009,12 +999,12 @@ func (d *DRPCInstance) areMultipleVRGsPrimary() bool { func (d *DRPCInstance) validatePeerReady() bool { condition := findCondition(d.instance.Status.Conditions, rmn.ConditionPeerReady) - d.log.Info(fmt.Sprintf("validatePeerReady -- Condition %v", condition)) - if condition == nil || condition.Status == metav1.ConditionTrue { return true } + d.log.Info("validatePeerReady", "Condition", condition) + return false } @@ -1281,15 +1271,13 @@ func (d *DRPCInstance) vrgExistsAndPrimary(targetCluster string) bool { return false } - clusterDecision := d.reconciler.getClusterDecision(d.userPlacement) - if clusterDecision.ClusterName != "" && - targetCluster == clusterDecision.ClusterName { - d.log.Info(fmt.Sprintf("Already %q to cluster %s", d.getLastDRState(), targetCluster)) - - return true + if !vrg.GetDeletionTimestamp().IsZero() { + return false } - return false + d.log.Info(fmt.Sprintf("Already %q to cluster %s", d.getLastDRState(), targetCluster)) + + return true } func (d *DRPCInstance) mwExistsAndPlacementUpdated(targetCluster string) (bool, error) { @@ -1435,7 +1423,7 @@ func (d *DRPCInstance) createVRGManifestWork(homeCluster string, repState rmn.Re d.log.Info("Creating VRG ManifestWork", "Last State:", d.getLastDRState(), "cluster", homeCluster) - vrg := d.generateVRG(repState) + vrg := d.generateVRG(homeCluster, repState) vrg.Spec.VolSync.Disabled = d.volSyncDisabled annotations := make(map[string]string) @@ -1468,6 +1456,18 @@ func (d *DRPCInstance) ensureVRGManifestWork(homeCluster string) error { return d.createVRGManifestWork(homeCluster, cachedVrg.Spec.ReplicationState) } +func (d *DRPCInstance) ensurePlacement(homeCluster string) error { + clusterDecision := d.reconciler.getClusterDecision(d.userPlacement) + if clusterDecision.ClusterName == "" || + homeCluster != clusterDecision.ClusterName { + d.updatePreferredDecision() + + return d.updateUserPlacementRule(homeCluster, homeCluster) + } + + return nil +} + func vrgAction(drpcAction rmn.DRAction) rmn.VRGAction { switch drpcAction { case rmn.ActionFailover: @@ -1488,14 +1488,20 @@ func (d *DRPCInstance) setVRGAction(vrg *rmn.VolumeReplicationGroup) { vrg.Spec.Action = action } -func (d *DRPCInstance) generateVRG(repState rmn.ReplicationState) rmn.VolumeReplicationGroup { +func (d *DRPCInstance) generateVRG(dstCluster string, repState rmn.ReplicationState) rmn.VolumeReplicationGroup { vrg := rmn.VolumeReplicationGroup{ - TypeMeta: metav1.TypeMeta{Kind: "VolumeReplicationGroup", APIVersion: "ramendr.openshift.io/v1alpha1"}, - ObjectMeta: metav1.ObjectMeta{Name: d.instance.Name, Namespace: d.vrgNamespace}, + TypeMeta: metav1.TypeMeta{Kind: "VolumeReplicationGroup", APIVersion: "ramendr.openshift.io/v1alpha1"}, + ObjectMeta: metav1.ObjectMeta{ + Name: d.instance.Name, + Namespace: d.vrgNamespace, + Annotations: map[string]string{ + DestinationClusterAnnotationKey: dstCluster, + }, + }, Spec: rmn.VolumeReplicationGroupSpec{ PVCSelector: d.instance.Spec.PVCSelector, ReplicationState: repState, - S3Profiles: d.availableS3Profiles(), + S3Profiles: AvailableS3Profiles(d.drClusters), KubeObjectProtection: d.instance.Spec.KubeObjectProtection, }, } @@ -1507,21 +1513,6 @@ func (d *DRPCInstance) generateVRG(repState rmn.ReplicationState) rmn.VolumeRepl return vrg } -func (d *DRPCInstance) availableS3Profiles() []string { - profiles := sets.New[string]() - - for i := range d.drClusters { - drCluster := &d.drClusters[i] - if drClusterIsDeleted(drCluster) { - continue - } - - profiles.Insert(drCluster.Spec.S3ProfileName) - } - - return sets.List(profiles) -} - func (d *DRPCInstance) generateVRGSpecAsync() *rmn.VRGAsyncSpec { if dRPolicySupportsRegional(d.drPolicy, d.drClusters) { return &rmn.VRGAsyncSpec{ @@ -1651,10 +1642,9 @@ func (d *DRPCInstance) ensureClusterDataRestored(homeCluster string) (*rmn.Volum annotations[DRPCNameAnnotation] = d.instance.Name annotations[DRPCNamespaceAnnotation] = d.instance.Namespace - vrg, err := d.reconciler.MCVGetter.GetVRGFromManagedCluster(d.instance.Name, - d.vrgNamespace, homeCluster, annotations) - if err != nil { - return nil, false, fmt.Errorf("failed to get VRG %s from cluster %s (err: %w)", d.instance.Name, homeCluster, err) + vrg := d.vrgs[homeCluster] + if vrg == nil { + return nil, false, fmt.Errorf("failed to get VRG %s from cluster %s", d.instance.Name, homeCluster) } // ClusterDataReady condition tells us whether the PVs have been applied on the @@ -2315,126 +2305,6 @@ func (d *DRPCInstance) setConditionOnInitialDeploymentCompletion() { metav1.ConditionTrue, rmn.ReasonSuccess, "Ready") } -func (d *DRPCInstance) isPlacementNeedsFixing() bool { - // Needs fixing if and only if the DRPC Status is empty, the Placement decision is empty, and - // the we have VRG(s) in the managed clusters - clusterDecision := d.reconciler.getClusterDecision(d.userPlacement) - d.log.Info(fmt.Sprintf("Check placement if needs fixing: PrD %v, PlD %v, VRGs %d", - d.instance.Status.PreferredDecision, clusterDecision, len(d.vrgs))) - - if reflect.DeepEqual(d.instance.Status.PreferredDecision, plrv1.PlacementDecision{}) && - (clusterDecision == nil || clusterDecision.ClusterName == "") && len(d.vrgs) > 0 { - return true - } - - return false -} - -func (d *DRPCInstance) selectCurrentPrimaries() []string { - var primaries []string - - for clusterName, vrg := range d.vrgs { - if isVRGPrimary(vrg) { - primaries = append(primaries, clusterName) - } - } - - return primaries -} - -func (d *DRPCInstance) selectPrimaryForFailover(primaries []string) string { - for _, clusterName := range primaries { - if clusterName == d.instance.Spec.FailoverCluster { - return clusterName - } - } - - return "" -} - -func (d *DRPCInstance) fixupPlacementForFailover() error { - d.log.Info("Fixing PlacementRule for failover...") - - var primary string - - var primaries []string - - var secondaries []string - - if d.areMultipleVRGsPrimary() { - primaries := d.selectCurrentPrimaries() - primary = d.selectPrimaryForFailover(primaries) - } else { - primary, secondaries = d.selectCurrentPrimaryAndSecondaries() - } - - // IFF we have a primary cluster, and it points to the failoverCluster, then rebuild the - // drpc status with it. - if primary != "" && primary == d.instance.Spec.FailoverCluster { - err := d.updateUserPlacementRule(primary, "") - if err != nil { - return err - } - - // Update our 'well known' preferred placement - d.updatePreferredDecision() - - peerReadyConditionStatus := metav1.ConditionTrue - peerReadyMsg := "Ready" - // IFF more than one primary then the failover hasn't entered the cleanup phase. - // IFF we have a secondary, then the failover hasn't completed the cleanup phase. - // We need to start where it was left off (best guess). - if len(primaries) > 1 || len(secondaries) > 0 { - d.instance.Status.Phase = rmn.FailingOver - peerReadyConditionStatus = metav1.ConditionFalse - peerReadyMsg = "NotReady" - } - - addOrUpdateCondition(&d.instance.Status.Conditions, rmn.ConditionPeerReady, d.instance.Generation, - peerReadyConditionStatus, rmn.ReasonSuccess, peerReadyMsg) - - addOrUpdateCondition(&d.instance.Status.Conditions, rmn.ConditionAvailable, d.instance.Generation, - metav1.ConditionTrue, string(d.instance.Status.Phase), "Available") - - return nil - } - - return fmt.Errorf("detected a failover, but it can't rebuild the state") -} - -func (d *DRPCInstance) fixupPlacementForRelocate() error { - if d.areMultipleVRGsPrimary() { - return fmt.Errorf("unconstructable state. Can't have multiple primaries on 'Relocate'") - } - - primary, secondaries := d.selectCurrentPrimaryAndSecondaries() - d.log.Info(fmt.Sprintf("Fixing PlacementRule for relocation. Primary (%s), Secondaries (%v)", - primary, secondaries)) - - // IFF we have a primary cluster, then update the PlacementRule and reset PeerReady to false - // Setting the PeerReady condition status to false allows peer cleanup if necessary - if primary != "" { - err := d.updateUserPlacementRule(primary, "") - if err != nil { - return err - } - - // Assume that it is not clean - addOrUpdateCondition(&d.instance.Status.Conditions, rmn.ConditionPeerReady, d.instance.Generation, - metav1.ConditionFalse, rmn.ReasonNotStarted, "NotReady") - } else if len(secondaries) > 1 { - // Use case 3: After Hub Recovery, the DRPC Action found to be 'Relocate', and ALL VRGs are secondary - addOrUpdateCondition(&d.instance.Status.Conditions, rmn.ConditionPeerReady, d.instance.Generation, - metav1.ConditionTrue, rmn.ReasonSuccess, - fmt.Sprintf("Fixed for relocation to %q", d.instance.Spec.PreferredCluster)) - } - - // Update our 'well known' preferred placement - d.updatePreferredDecision() - - return nil -} - func (d *DRPCInstance) setStatusInitiating() { if !(d.instance.Status.Phase == "" || d.instance.Status.Phase == rmn.Deployed || diff --git a/controllers/drplacementcontrol_controller.go b/controllers/drplacementcontrol_controller.go index 4cd05ca4eb..bd6644f0f7 100644 --- a/controllers/drplacementcontrol_controller.go +++ b/controllers/drplacementcontrol_controller.go @@ -20,6 +20,7 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/sets" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/builder" "sigs.k8s.io/controller-runtime/pkg/client" @@ -57,6 +58,8 @@ const ( // Maximum retries to create PlacementDecisionName with an increasing index in case of conflicts // with existing PlacementDecision resources MaxPlacementDecisionConflictCount = 5 + + DestinationClusterAnnotationKey = "drplacementcontrol.ramendr.openshift.io/destination-cluster" ) var InitialWaitTimeForDRPCPlacementRule = errorswrapper.New("Waiting for DRPC Placement to produces placement decision") @@ -587,7 +590,7 @@ func (r *DRPlacementControlReconciler) SetupWithManager(mgr ctrl.Manager) error // For more details, check Reconcile and its Result here: // - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.7.0/pkg/reconcile // -//nolint:cyclop,funlen,gocognit +//nolint:funlen,gocognit,gocyclo,cyclop func (r *DRPlacementControlReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { logger := r.Log.WithValues("DRPC", req.NamespacedName, "rid", uuid.New()) @@ -651,6 +654,16 @@ func (r *DRPlacementControlReconciler) Reconcile(ctx context.Context, req ctrl.R return ctrl.Result{Requeue: true}, nil } + // Rebuild DRPC state if needed + requeue, err := r.ensureDRPCStatusConsistency(ctx, drpc, drPolicy, placementObj, logger) + if err != nil { + return ctrl.Result{}, err + } + + if requeue { + return ctrl.Result{Requeue: true}, r.updateDRPCStatus(ctx, drpc, placementObj, logger) + } + d, err := r.createDRPCInstance(ctx, drPolicy, drpc, placementObj, logger) if err != nil && !errorswrapper.Is(err, InitialWaitTimeForDRPCPlacementRule) { err2 := r.updateDRPCStatus(ctx, drpc, placementObj, logger) @@ -775,7 +788,7 @@ func (r *DRPlacementControlReconciler) createDRPCInstance( return nil, err } - vrgs, err := updateVRGsFromManagedClusters(r.MCVGetter, drpc, drClusters, vrgNamespace, log) + vrgs, _, _, err := getVRGsFromManagedClusters(r.MCVGetter, drpc, drClusters, vrgNamespace, log) if err != nil { return nil, err } @@ -1070,7 +1083,7 @@ func (r *DRPlacementControlReconciler) finalizeDRPC(ctx context.Context, drpc *r } // Verify VRGs have been deleted - vrgs, _, err := getVRGsFromManagedClusters(r.MCVGetter, drpc, drClusters, vrgNamespace, log) + vrgs, _, _, err := getVRGsFromManagedClusters(r.MCVGetter, drpc, drClusters, vrgNamespace, log) if err != nil { return fmt.Errorf("failed to retrieve VRGs. We'll retry later. Error (%w)", err) } @@ -1425,30 +1438,13 @@ func (r *DRPlacementControlReconciler) clonePlacementRule(ctx context.Context, return clonedPlRule, nil } -func updateVRGsFromManagedClusters(mcvGetter rmnutil.ManagedClusterViewGetter, drpc *rmn.DRPlacementControl, - drClusters []rmn.DRCluster, vrgNamespace string, log logr.Logger, -) (map[string]*rmn.VolumeReplicationGroup, error) { - vrgs, failedClusterToQuery, err := getVRGsFromManagedClusters(mcvGetter, drpc, drClusters, vrgNamespace, log) - if err != nil { - return nil, err - } - - if len(vrgs) == 0 && failedClusterToQuery != drpc.Spec.FailoverCluster { - condition := findCondition(drpc.Status.Conditions, rmn.ConditionPeerReady) - if condition == nil { - addOrUpdateCondition(&drpc.Status.Conditions, rmn.ConditionAvailable, - drpc.Generation, metav1.ConditionTrue, rmn.ReasonSuccess, "Forcing Available") - addOrUpdateCondition(&drpc.Status.Conditions, rmn.ConditionPeerReady, drpc.Generation, - metav1.ConditionTrue, rmn.ReasonSuccess, "Forcing Ready") - } - } - - return vrgs, nil -} - -func getVRGsFromManagedClusters(mcvGetter rmnutil.ManagedClusterViewGetter, drpc *rmn.DRPlacementControl, - drClusters []rmn.DRCluster, vrgNamespace string, log logr.Logger, -) (map[string]*rmn.VolumeReplicationGroup, string, error) { +func getVRGsFromManagedClusters( + mcvGetter rmnutil.ManagedClusterViewGetter, + drpc *rmn.DRPlacementControl, + drClusters []rmn.DRCluster, + vrgNamespace string, + log logr.Logger, +) (map[string]*rmn.VolumeReplicationGroup, int, string, error) { vrgs := map[string]*rmn.VolumeReplicationGroup{} annotations := make(map[string]string) @@ -1456,10 +1452,10 @@ func getVRGsFromManagedClusters(mcvGetter rmnutil.ManagedClusterViewGetter, drpc annotations[DRPCNameAnnotation] = drpc.Name annotations[DRPCNamespaceAnnotation] = drpc.Namespace - var failedClusterToQuery string - var clustersQueriedSuccessfully int + var failedCluster string + for i := range drClusters { drCluster := &drClusters[i] @@ -1473,9 +1469,9 @@ func getVRGsFromManagedClusters(mcvGetter rmnutil.ManagedClusterViewGetter, drpc continue } - failedClusterToQuery = drCluster.Name + failedCluster = drCluster.Name - log.Info(fmt.Sprintf("failed to retrieve VRG from %s. err (%v)", drCluster.Name, err)) + log.Info(fmt.Sprintf("failed to retrieve VRG from %s. err (%v).", drCluster.Name, err)) continue } @@ -1495,14 +1491,14 @@ func getVRGsFromManagedClusters(mcvGetter rmnutil.ManagedClusterViewGetter, drpc // We are done if we successfully queried all drClusters if clustersQueriedSuccessfully == len(drClusters) { - return vrgs, "", nil + return vrgs, clustersQueriedSuccessfully, "", nil } if clustersQueriedSuccessfully == 0 { - return vrgs, "", fmt.Errorf("failed to retrieve VRGs from clusters") + return vrgs, 0, "", fmt.Errorf("failed to retrieve VRGs from clusters") } - return vrgs, failedClusterToQuery, nil + return vrgs, clustersQueriedSuccessfully, failedCluster, nil } func (r *DRPlacementControlReconciler) deleteClonedPlacementRule(ctx context.Context, @@ -1545,6 +1541,10 @@ func (r *DRPlacementControlReconciler) addClusterPeersToPlacementRule( // DRPC status is updated at least once every StatusCheckDelay in order to refresh // the VRG status. func (d *DRPCInstance) statusUpdateTimeElapsed() bool { + if d.instance.Status.LastUpdateTime == nil { + return false + } + return d.instance.Status.LastUpdateTime.Add(StatusCheckDelay).Before(time.Now()) } @@ -2051,3 +2051,259 @@ func ensureDRPCConditionsInited(conditions *[]metav1.Condition, observedGenerati Message: message, }) } + +func AvailableS3Profiles(drClusters []rmn.DRCluster) []string { + profiles := sets.New[string]() + + for i := range drClusters { + drCluster := &drClusters[i] + if drClusterIsDeleted(drCluster) { + continue + } + + profiles.Insert(drCluster.Spec.S3ProfileName) + } + + return sets.List(profiles) +} + +type Progress int + +const ( + Continue = 1 + AllowFailover = 2 + Stop = 3 +) + +func (r *DRPlacementControlReconciler) ensureDRPCStatusConsistency( + ctx context.Context, + drpc *rmn.DRPlacementControl, + drPolicy *rmn.DRPolicy, + placementObj client.Object, + log logr.Logger, +) (bool, error) { + requeue := true + + // This will always be false the first time the DRPC resource is first created OR after hub recovery + if drpc.Status.Phase != "" { + return !requeue, nil + } + + dstCluster := drpc.Spec.PreferredCluster + if drpc.Spec.Action == rmn.ActionFailover { + dstCluster = drpc.Spec.FailoverCluster + } + + progress, err := r.determineDRPCState(ctx, drpc, drPolicy, placementObj, dstCluster, log) + if err != nil { + return requeue, err + } + + switch progress { + case Continue: + return !requeue, nil + case AllowFailover: + updateDRPCProgression(drpc, rmn.ProgressionActionPaused, log) + addOrUpdateCondition(&drpc.Status.Conditions, rmn.ConditionAvailable, + drpc.Generation, metav1.ConditionTrue, rmn.ReasonSuccess, "Failover allowed") + addOrUpdateCondition(&drpc.Status.Conditions, rmn.ConditionPeerReady, drpc.Generation, + metav1.ConditionTrue, rmn.ReasonSuccess, "Failover allowed") + + return requeue, nil + default: + msg := "Operation Paused - User Intervention Required." + + log.Info(fmt.Sprintf("err:%v - msg:%s", err, msg)) + updateDRPCProgression(drpc, rmn.ProgressionActionPaused, log) + addOrUpdateCondition(&drpc.Status.Conditions, rmn.ConditionAvailable, + drpc.Generation, metav1.ConditionFalse, rmn.ReasonPaused, msg) + addOrUpdateCondition(&drpc.Status.Conditions, rmn.ConditionPeerReady, drpc.Generation, + metav1.ConditionFalse, rmn.ReasonPaused, msg) + + return requeue, nil + } +} + +// determineDRPCState runs the following algorithm +// 1. Stop Condition for Both Failed Queries: +// If attempts to query 2 clusters result in failure for both, the process is halted. + +// 2. Initial Deployment without VRGs: +// If 2 clusters are successfully queried, and no VRGs are found, proceed with the +// initial deployment. + +// 3. Handling Failures with S3 Store Check: +// - If 2 clusters are queried, 1 fails, and 0 VRGs are found, perform the following checks: +// - If the VRG is found in the S3 store, ensure that the DRPC action matches the VRG action. +// If not, stop until the action is corrected, allowing failover if necessary (set PeerReady). +// - If the VRG is not found in the S3 store and the failed cluster is not the destination +// cluster, continue with the initial deployment. + +// 4. Verification and Failover for VRGs on Failover Cluster: +// If 2 clusters are queried, 1 fails, and 1 VRG is found on the failover cluster, check +// the action: +// - If the actions don't match, stop until corrected by the user. +// - If they match, also stop but allow failover if the VRG in-hand is a secondary. +// Otherwise, continue. + +// 5. Handling VRGs on Destination Cluster: +// If 2 clusters are queried successfully and 1 or more VRGs are found, and one of the +// VRGs is on the destination cluster, perform the following checks: +// - Continue with the action only if the DRPC and the found VRG action match. +// - Stop until someone investigates if there is a mismatch, but allow failover to +// take place (set PeerReady). + +// 6. Otherwise, default to allowing Failover: +// If none of the above conditions apply, allow failover (set PeerReady) but stop until +// someone makes the necessary change. +// +//nolint:funlen,nestif,gocognit,gocyclo,cyclop +func (r *DRPlacementControlReconciler) determineDRPCState( + ctx context.Context, + drpc *rmn.DRPlacementControl, + drPolicy *rmn.DRPolicy, + placementObj client.Object, + dstCluster string, + log logr.Logger, +) (Progress, error) { + vrgNamespace, err := selectVRGNamespace(r.Client, log, drpc, placementObj) + if err != nil { + log.Info("Failed to select VRG namespace") + + return Stop, err + } + + drClusters, err := getDRClusters(ctx, r.Client, drPolicy) + if err != nil { + return Stop, err + } + + vrgs, successfullyQueriedClusterCount, failedCluster, err := getVRGsFromManagedClusters( + r.MCVGetter, drpc, drClusters, vrgNamespace, log) + if err != nil { + log.Info("Failed to get a list of VRGs") + + return Stop, err + } + + // IF 2 clusters queried, and both queries failed, then STOP + if successfullyQueriedClusterCount == 0 { + log.Info("Number of clusters queried is 0. Stop...") + + return Stop, nil + } + + // IF 2 clusters queried successfully and no VRGs, then continue with initial deployment + if successfullyQueriedClusterCount == 2 && len(vrgs) == 0 { + log.Info("Queried 2 clusters successfully") + + return Continue, nil + } + + // IF queried 2 clusters queried, 1 failed and 0 VRG found, then check s3 store. + // IF the VRG found in the s3 store, ensure that the DRPC action and the VRG action match. IF not, stop until + // the action is corrected, but allow failover to take place if needed (set PeerReady) + // If the VRG is not found in the s3 store and the failedCluster is not the destination cluster, then continue + // with initial deploy + if successfullyQueriedClusterCount == 1 && len(vrgs) == 0 { + vrg := GetLastKnownVRGPrimaryFromS3(ctx, r.APIReader, + AvailableS3Profiles(drClusters), drpc.GetName(), vrgNamespace, r.ObjStoreGetter, log) + if vrg == nil { + // IF the failed cluster is not the dest cluster, then this could be an initial deploy + if failedCluster != dstCluster { + return Continue, nil + } + + log.Info("Unable to query all clusters and failed to get VRG from s3 store") + + return Stop, nil + } + + log.Info("VRG From s3", "VRG Spec", vrg.Spec, "VRG Annotations", vrg.GetAnnotations()) + + if drpc.Spec.Action != rmn.DRAction(vrg.Spec.Action) { + log.Info(fmt.Sprintf("Two different actions - drpc action is '%s'/vrg action from s3 is '%s'", + drpc.Spec.Action, vrg.Spec.Action)) + + return AllowFailover, nil + } + + if dstCluster == vrg.GetAnnotations()[DestinationClusterAnnotationKey] && + dstCluster != failedCluster { + log.Info(fmt.Sprintf("VRG from s3. Same dstCluster %s/%s. Proceeding...", + dstCluster, vrg.GetAnnotations()[DestinationClusterAnnotationKey])) + + return Continue, nil + } + + log.Info(fmt.Sprintf("VRG from s3. DRPCAction/vrgAction/DRPCDstClstr/vrgDstClstr %s/%s/%s/%s. Allow Failover...", + drpc.Spec.Action, vrg.Spec.Action, dstCluster, vrg.GetAnnotations()[DestinationClusterAnnotationKey])) + + return AllowFailover, nil + } + + // IF 2 clusters queried, 1 failed and 1 VRG found on the failover cluster, then check the action, if they don't + // match, stop until corrected by the user. If they do match, then also stop but allow failover if the VRG in-hand + // is a secondary. Othewise, continue... + if successfullyQueriedClusterCount == 1 && len(vrgs) == 1 { + var clusterName string + + var vrg *rmn.VolumeReplicationGroup + for k, v := range vrgs { + clusterName, vrg = k, v + + break + } + + if drpc.Spec.Action != rmn.DRAction(vrg.Spec.Action) { + log.Info(fmt.Sprintf("Stop! Two different actions - drpc action is '%s'/vrg action is '%s'", + drpc.Spec.Action, vrg.Spec.Action)) + + return Stop, nil + } + + if dstCluster != clusterName && vrg.Spec.ReplicationState == rmn.Secondary { + log.Info(fmt.Sprintf("Same Action and dstCluster and ReplicationState %s/%s/%s", + drpc.Spec.Action, dstCluster, vrg.Spec.ReplicationState)) + + log.Info("Failover is allowed - Primary is assumed in the failed cluster") + + return AllowFailover, nil + } + + log.Info("Allow to continue") + + return Continue, nil + } + + // Finally, IF 2 clusters queried successfully and 1 or more VRGs found, and if one of the VRGs is on the dstCluster, + // then continue with action if and only if DRPC and the found VRG action match. otherwise, stop until someone + // investigates but allow failover to take place (set PeerReady) + if successfullyQueriedClusterCount == 2 && len(vrgs) >= 1 { + var clusterName string + + var vrg *rmn.VolumeReplicationGroup + + for k, v := range vrgs { + clusterName, vrg = k, v + + break + } + + if drpc.Spec.Action == rmn.DRAction(vrg.Spec.Action) { + log.Info(fmt.Sprintf("Same Action %s", drpc.Spec.Action)) + + return Continue, nil + } + + log.Info("Failover is allowed", "vrgs count", len(vrgs), "drpc action", + drpc.Spec.Action, "vrg action", vrg.Spec.Action, "dstCluster/clusterName", dstCluster+"/"+clusterName) + + return AllowFailover, nil + } + + // IF none of the above, then allow failover (set PeerReady), but stop until someone makes the change + log.Info("Failover is allowed, but user intervention is required") + + return AllowFailover, nil +} diff --git a/controllers/drplacementcontrol_controller_test.go b/controllers/drplacementcontrol_controller_test.go index 7d16cc4332..023d570256 100644 --- a/controllers/drplacementcontrol_controller_test.go +++ b/controllers/drplacementcontrol_controller_test.go @@ -60,6 +60,8 @@ const ( ) var ( + NumberOfVrgsToReturnWhenRebuildingState = 0 + UseApplicationSet = false west1Cluster = &spokeClusterV1.ManagedCluster{ @@ -432,58 +434,63 @@ func (f FakeMCVGetter) GetVRGFromManagedCluster(resourceName, resourceNamespace, return nil, errors.NewNotFound(schema.GroupResource{}, "requested resource not found in ManagedCluster") case "getVRGsFromManagedClusters": - vrgFromMW, err := getVRGFromManifestWork(managedCluster) - if err != nil { - if errors.IsNotFound(err) { - if getFunctionNameAtIndex(3) == "getVRGs" { // Called only from DRCluster reconciler, at present - return fakeVRGWithMModesProtectedPVC() - } - } + return doGetFakeVRGsFromManagedClusters(managedCluster, vrgStatus) + } + + return nil, fmt.Errorf("unknown caller %s", getFunctionNameAtIndex(2)) +} - return nil, err +func doGetFakeVRGsFromManagedClusters(managedCluster string, vrgStatus rmn.VolumeReplicationGroupStatus, +) (*rmn.VolumeReplicationGroup, error) { + vrgFromMW, err := getVRGFromManifestWork(managedCluster) + if err != nil { + if errors.IsNotFound(err) { + if getFunctionNameAtIndex(3) == "getVRGs" { // Called only from DRCluster reconciler, at present + return fakeVRGWithMModesProtectedPVC() + } } - if vrgFromMW != nil { - vrgFromMW.Generation = 1 - vrgFromMW.Status = vrgStatus - vrgFromMW.Status.Conditions = append(vrgFromMW.Status.Conditions, metav1.Condition{ - Type: controllers.VRGConditionTypeClusterDataReady, - Reason: controllers.VRGConditionReasonClusterDataRestored, - Status: metav1.ConditionTrue, - Message: "Cluster Data Ready", - LastTransitionTime: metav1.Now(), - ObservedGeneration: vrgFromMW.Generation, - }) - vrgFromMW.Status.Conditions = append(vrgFromMW.Status.Conditions, metav1.Condition{ - Type: controllers.VRGConditionTypeClusterDataProtected, - Reason: controllers.VRGConditionReasonClusterDataRestored, - Status: metav1.ConditionTrue, - Message: "Cluster Data Protected", - LastTransitionTime: metav1.Now(), - ObservedGeneration: vrgFromMW.Generation, - }) - vrgFromMW.Status.Conditions = append(vrgFromMW.Status.Conditions, metav1.Condition{ - Type: controllers.VRGConditionTypeDataProtected, - Reason: controllers.VRGConditionReasonDataProtected, - Status: metav1.ConditionTrue, - Message: "Data Protected", - LastTransitionTime: metav1.Now(), - ObservedGeneration: vrgFromMW.Generation, - }) + return nil, err + } - protectedPVC := &rmn.ProtectedPVC{} - protectedPVC.Name = "random name" - protectedPVC.StorageIdentifiers.ReplicationID.ID = MModeReplicationID - protectedPVC.StorageIdentifiers.StorageProvisioner = MModeCSIProvisioner - protectedPVC.StorageIdentifiers.ReplicationID.Modes = []rmn.MMode{rmn.MModeFailover} + if vrgFromMW != nil { + vrgFromMW.Generation = 1 + vrgFromMW.Status = vrgStatus + vrgFromMW.Status.Conditions = append(vrgFromMW.Status.Conditions, metav1.Condition{ + Type: controllers.VRGConditionTypeClusterDataReady, + Reason: controllers.VRGConditionReasonClusterDataRestored, + Status: metav1.ConditionTrue, + Message: "Cluster Data Ready", + LastTransitionTime: metav1.Now(), + ObservedGeneration: vrgFromMW.Generation, + }) + vrgFromMW.Status.Conditions = append(vrgFromMW.Status.Conditions, metav1.Condition{ + Type: controllers.VRGConditionTypeClusterDataProtected, + Reason: controllers.VRGConditionReasonClusterDataRestored, + Status: metav1.ConditionTrue, + Message: "Cluster Data Protected", + LastTransitionTime: metav1.Now(), + ObservedGeneration: vrgFromMW.Generation, + }) + vrgFromMW.Status.Conditions = append(vrgFromMW.Status.Conditions, metav1.Condition{ + Type: controllers.VRGConditionTypeDataProtected, + Reason: controllers.VRGConditionReasonDataProtected, + Status: metav1.ConditionTrue, + Message: "Data Protected", + LastTransitionTime: metav1.Now(), + ObservedGeneration: vrgFromMW.Generation, + }) - vrgFromMW.Status.ProtectedPVCs = append(vrgFromMW.Status.ProtectedPVCs, *protectedPVC) - } + protectedPVC := &rmn.ProtectedPVC{} + protectedPVC.Name = "random name" + protectedPVC.StorageIdentifiers.ReplicationID.ID = MModeReplicationID + protectedPVC.StorageIdentifiers.StorageProvisioner = MModeCSIProvisioner + protectedPVC.StorageIdentifiers.ReplicationID.Modes = []rmn.MMode{rmn.MModeFailover} - return vrgFromMW, nil + vrgFromMW.Status.ProtectedPVCs = append(vrgFromMW.Status.ProtectedPVCs, *protectedPVC) } - return nil, fmt.Errorf("unknown caller %s", getFunctionNameAtIndex(2)) + return vrgFromMW, nil } func (f FakeMCVGetter) DeleteVRGManagedClusterView( @@ -1304,7 +1311,7 @@ func verifyUserPlacementRuleDecisionUnchanged(name, namespace, homeCluster strin var placementObj client.Object - Consistently(func() bool { + Eventually(func() bool { err := k8sClient.Get(context.TODO(), usrPlcementLookupKey, usrPlRule) if errors.IsNotFound(err) { usrPlmnt := &clrapiv1beta1.Placement{} @@ -1425,6 +1432,7 @@ func waitForCompletion(expectedState string) { fmt.Sprintf("failed waiting for state to match. expecting: %s, found %s", expectedState, drstate)) } +//nolint:unused func waitForUpdateDRPCStatus() { Eventually(func() bool { drpc := getLatestDRPC() @@ -1491,6 +1499,7 @@ func runFailoverAction(placementObj client.Object, fromCluster, toCluster string Expect(decision.ClusterName).To(Equal(toCluster)) } +//nolint:all func clearDRActionAfterFailover(userPlacementRule *plrv1.PlacementRule, preferredCluster, failoverCluster string) { drstate = "none" @@ -1831,9 +1840,32 @@ func verifyDRPCOwnedByPlacement(placementObj client.Object, drpc *rmn.DRPlacemen Fail(fmt.Sprintf("DRPC %s not owned by Placement %s", drpc.GetName(), placementObj.GetName())) } +//nolint:all +func checkConditionAllowFailover() { + var drpc *rmn.DRPlacementControl + + var availableCondition metav1.Condition + + Eventually(func() bool { + drpc = getLatestDRPC() + for _, availableCondition = range drpc.Status.Conditions { + if availableCondition.Type != rmn.ConditionPeerReady { + if availableCondition.Status == metav1.ConditionTrue { + return true + } + } + } + + return false + }, timeout, interval).Should(BeTrue(), fmt.Sprintf("Condition '%+v'", availableCondition)) + + Expect(drpc.Status.Phase).To(Equal(rmn.DRState(""))) + Expect(availableCondition.Message).Should(Equal("Failover allowed")) +} + // +kubebuilder:docs-gen:collapse=Imports // -//nolint:errcheck +//nolint:errcheck,scopelint var _ = Describe("DRPlacementControl Reconciler", func() { Specify("DRClusters", func() { populateDRClusters() @@ -1858,7 +1890,7 @@ var _ = Describe("DRPlacementControl Reconciler", func() { By("\n\n*** Failover - 1\n\n") setRestorePVsUncomplete() setDRPCSpecExpectationTo(rmn.ActionFailover, East1ManagedCluster, West1ManagedCluster) - verifyUserPlacementRuleDecisionUnchanged(userPlacementRule.Name, userPlacementRule.Namespace, East1ManagedCluster) + verifyUserPlacementRuleDecisionUnchanged(userPlacementRule.Name, userPlacementRule.Namespace, West1ManagedCluster) // MWs for VRG, NS, DRCluster, and MMode Eventually(getManifestWorkCount, timeout, interval).WithArguments(West1ManagedCluster).Should(Equal(4)) setRestorePVsComplete() @@ -1884,21 +1916,6 @@ var _ = Describe("DRPlacementControl Reconciler", func() { runRelocateAction(userPlacementRule, West1ManagedCluster, false, false) }) }) - When("DRAction is Relocate during hub recovery", func() { - It("Should reconstructs the DRPC state and points to Primary (East1ManagedCluster)", func() { - // ----------------------------- RELOCATION TO PRIMARY -------------------------------------- - By("\n\n*** Relocate - 2\n\n") - clearFakeUserPlacementRuleStatus() - clearDRPCStatus() - runRelocateAction(userPlacementRule, West1ManagedCluster, false, false) - }) - }) - When("DRAction is cleared after relocation", func() { - It("Should not do anything", func() { - // ----------------------------- Clear DRAction -------------------------------------- - clearDRActionAfterRelocate(userPlacementRule, East1ManagedCluster, West1ManagedCluster) - }) - }) When("DRAction is changed to Failover after relocation", func() { It("Should failover again to Secondary (West1ManagedCluster)", func() { // ----------------------------- FAILOVER TO SECONDARY -------------------------------------- @@ -1906,30 +1923,6 @@ var _ = Describe("DRPlacementControl Reconciler", func() { runFailoverAction(userPlacementRule, East1ManagedCluster, West1ManagedCluster, false, false) }) }) - When("DRAction is cleared after failover", func() { - It("Should not do anything", func() { - // ----------------------------- Clear DRAction -------------------------------------- - By("\n\n>>> clearing DRAction") - clearDRActionAfterFailover(userPlacementRule, East1ManagedCluster, West1ManagedCluster) - }) - }) - When("DRAction is cleared but DRPC status is empty", func() { - It("Should do nothing for placement as it is deployed somewhere else", func() { - // ----------------------------- Clear DRAction -------------------------------------- - By("\n\n>>> Update DRPC status") - clearDRPCStatus() - waitForCompletion(string("")) - drpc = getLatestDRPC() - // At this point expect the DRPC status condition to have 2 types - // {Available and PeerReady} - // Final state didn't change and it is 'FailedOver' even though we tried to run - // initial deployment. But the status remains cleared. - Expect(drpc.Status.Phase).To(Equal(rmn.DRState(""))) - decision := getLatestUserPlacementDecision(userPlacementRule.Name, userPlacementRule.Namespace) - Expect(decision.ClusterName).To(Equal(West1ManagedCluster)) - Expect(drpc.GetAnnotations()[controllers.LastAppDeploymentCluster]).To(Equal(West1ManagedCluster)) - }) - }) When("DRAction is set to Relocate", func() { It("Should relocate to Primary (East1ManagedCluster)", func() { // ----------------------------- RELOCATION TO PRIMARY -------------------------------------- @@ -2009,7 +2002,7 @@ var _ = Describe("DRPlacementControl Reconciler", func() { It("Should not failover to Secondary (West1ManagedCluster) till PV manifest is applied", func() { setRestorePVsUncomplete() setDRPCSpecExpectationTo(rmn.ActionFailover, East1ManagedCluster, West1ManagedCluster) - verifyUserPlacementRuleDecisionUnchanged(placement.Name, placement.Namespace, East1ManagedCluster) + verifyUserPlacementRuleDecisionUnchanged(placement.Name, placement.Namespace, West1ManagedCluster) // MWs for VRG, NS, VRG DRCluster, and MMode Expect(getManifestWorkCount(West1ManagedCluster)).Should(Equal(4)) Expect(len(getPlacementDecision(placement.GetName(), placement.GetNamespace()). @@ -2150,14 +2143,14 @@ var _ = Describe("DRPlacementControl Reconciler", func() { }) Context("DRPlacementControl Reconciler Sync DR", func() { userPlacementRule := &plrv1.PlacementRule{} - drpc := &rmn.DRPlacementControl{} + // drpc := &rmn.DRPlacementControl{} Specify("DRClusters", func() { populateDRClusters() }) When("An Application is deployed for the first time", func() { It("Should deploy to East1ManagedCluster", func() { By("Initial Deployment") - userPlacementRule, drpc = InitialDeploymentSync(DRPCNamespaceName, UserPlacementRuleName, East1ManagedCluster) + userPlacementRule, _ = InitialDeploymentSync(DRPCNamespaceName, UserPlacementRuleName, East1ManagedCluster) verifyInitialDRPCDeployment(userPlacementRule, East1ManagedCluster) verifyDRPCOwnedByPlacement(userPlacementRule, getLatestDRPC()) }) @@ -2168,7 +2161,7 @@ var _ = Describe("DRPlacementControl Reconciler", func() { setRestorePVsUncomplete() fenceCluster(East1ManagedCluster, false) setDRPCSpecExpectationTo(rmn.ActionFailover, East1ManagedCluster, East2ManagedCluster) - verifyUserPlacementRuleDecisionUnchanged(userPlacementRule.Name, userPlacementRule.Namespace, East1ManagedCluster) + verifyUserPlacementRuleDecisionUnchanged(userPlacementRule.Name, userPlacementRule.Namespace, East2ManagedCluster) // MWs for VRG, VRG DRCluster and the MW for NetworkFence CR to fence off // East1ManagedCluster Expect(getManifestWorkCount(East2ManagedCluster)).Should(Equal(4)) @@ -2200,29 +2193,6 @@ var _ = Describe("DRPlacementControl Reconciler", func() { runFailoverAction(userPlacementRule, East1ManagedCluster, East2ManagedCluster, true, false) }) }) - When("DRAction is cleared after failover", func() { - It("Should not do anything", func() { - // ----------------------------- Clear DRAction -------------------------------------- - By("\n\n>>> clearing DRAction") - clearDRActionAfterFailover(userPlacementRule, East1ManagedCluster, East2ManagedCluster) - }) - }) - When("DRAction is cleared but DRPC status is empty", func() { - It("Should do nothing for placement as it is deployed somewhere else", func() { - // ----------------------------- Clear DRAction -------------------------------------- - By("\n\n>>> Update DRPC status") - clearDRPCStatus() - waitForCompletion(string("")) - drpc = getLatestDRPC() - // At this point expect the DRPC status condition to have 2 types - // {Available and PeerReady} - // Final state didn't change and it is 'FailedOver' even though we tried to run - // initial deployment. But the status remains cleared. - Expect(drpc.Status.Phase).To(Equal(rmn.DRState(""))) - decision := getLatestUserPlacementDecision(userPlacementRule.Name, userPlacementRule.Namespace) - Expect(decision.ClusterName).To(Equal(East2ManagedCluster)) - }) - }) When("DRAction is set to Relocate", func() { It("Should relocate to Primary (East1ManagedCluster)", func() { // ----------------------------- RELOCATION TO PRIMARY FOR SYNC DR------------------------ @@ -2249,14 +2219,14 @@ var _ = Describe("DRPlacementControl Reconciler", func() { // manual fencing and manual unfencing userPlacementRule = &plrv1.PlacementRule{} - drpc = &rmn.DRPlacementControl{} + // drpc = &rmn.DRPlacementControl{} Specify("DRClusters", func() { populateDRClusters() }) When("An Application is deployed for the first time", func() { It("Should deploy to East1ManagedCluster", func() { By("Initial Deployment") - userPlacementRule, drpc = InitialDeploymentSync(DRPCNamespaceName, UserPlacementRuleName, East1ManagedCluster) + userPlacementRule, _ = InitialDeploymentSync(DRPCNamespaceName, UserPlacementRuleName, East1ManagedCluster) verifyInitialDRPCDeployment(userPlacementRule, East1ManagedCluster) verifyDRPCOwnedByPlacement(userPlacementRule, getLatestDRPC()) }) @@ -2267,7 +2237,7 @@ var _ = Describe("DRPlacementControl Reconciler", func() { setRestorePVsUncomplete() fenceCluster(East1ManagedCluster, true) setDRPCSpecExpectationTo(rmn.ActionFailover, East1ManagedCluster, East2ManagedCluster) - verifyUserPlacementRuleDecisionUnchanged(userPlacementRule.Name, userPlacementRule.Namespace, East1ManagedCluster) + verifyUserPlacementRuleDecisionUnchanged(userPlacementRule.Name, userPlacementRule.Namespace, East2ManagedCluster) // MWs for VRG, VRG DRCluster and the MW for NetworkFence CR to fence off // East1ManagedCluster Expect(getManifestWorkCount(East2ManagedCluster)).Should(Equal(4)) @@ -2286,12 +2256,6 @@ var _ = Describe("DRPlacementControl Reconciler", func() { runRelocateAction(userPlacementRule, East2ManagedCluster, true, true) }) }) - When("DRAction is cleared after relocation", func() { - It("Should not do anything", func() { - // ----------------------------- Clear DRAction -------------------------------------- - clearDRActionAfterRelocate(userPlacementRule, East1ManagedCluster, East2ManagedCluster) - }) - }) When("DRAction is changed to Failover after relocation", func() { It("Should failover again to Secondary (East2ManagedCluster)", func() { // ----------------------------- FAILOVER TO SECONDARY FOR SYNC DR-------------------- @@ -2299,29 +2263,6 @@ var _ = Describe("DRPlacementControl Reconciler", func() { runFailoverAction(userPlacementRule, East1ManagedCluster, East2ManagedCluster, true, true) }) }) - When("DRAction is cleared after failover", func() { - It("Should not do anything", func() { - // ----------------------------- Clear DRAction -------------------------------------- - By("\n\n>>> clearing DRAction") - clearDRActionAfterFailover(userPlacementRule, East1ManagedCluster, East2ManagedCluster) - }) - }) - When("DRAction is cleared but DRPC status is empty", func() { - It("Should do nothing for placement as it is deployed somewhere else", func() { - // ----------------------------- Clear DRAction -------------------------------------- - By("\n\n>>> Update DRPC status") - clearDRPCStatus() - waitForCompletion(string("")) - drpc = getLatestDRPC() - // At this point expect the DRPC status condition to have 2 types - // {Available and PeerReady} - // Final state didn't change and it is 'FailedOver' even though we tried to run - // initial deployment. But the status remains cleared. - Expect(drpc.Status.Phase).To(Equal(rmn.DRState(""))) - decision := getLatestUserPlacementDecision(userPlacementRule.Name, userPlacementRule.Namespace) - Expect(decision.ClusterName).To(Equal(East2ManagedCluster)) - }) - }) When("DRAction is set to Relocate", func() { It("Should relocate to Primary (East1ManagedCluster)", func() { // ----------------------------- RELOCATION TO PRIMARY FOR SYNC DR------------------------ diff --git a/controllers/drplacementcontrolvolsync.go b/controllers/drplacementcontrolvolsync.go index 04b284cdb8..691382ddbb 100644 --- a/controllers/drplacementcontrolvolsync.go +++ b/controllers/drplacementcontrolvolsync.go @@ -308,7 +308,7 @@ func (d *DRPCInstance) createVolSyncDestManifestWork(clusterToSkip string) error annotations[DRPCNameAnnotation] = d.instance.Name annotations[DRPCNamespaceAnnotation] = d.instance.Namespace - vrg := d.generateVRG(rmn.Secondary) + vrg := d.generateVRG(dstCluster, rmn.Secondary) if err := d.mwu.CreateOrUpdateVRGManifestWork( d.instance.Name, d.vrgNamespace, dstCluster, vrg, annotations); err != nil {