From 39d7efa0d50a4e90f34b8c25e541bd68805fc4ba Mon Sep 17 00:00:00 2001 From: Nir Soffer Date: Mon, 23 Sep 2024 16:56:09 +0300 Subject: [PATCH] Fix disable dr if VR failed validation When deleting a primary VRG, we wait until the VR Completed condition is met. However if a VR precondition failed, for example using a drpolicy without flattening enabled when the PVC needs flattening, the VR will never complete and the vrg and drpc deletion will never complete. Since csi-addons 0.10.0 we have a new Validated VR condition, set to true if pre conditions are met, and false if not. VR is can be deleted safely in this state, since mirroring was not enabled. This changes modifies deleted VRG processing to check the new VR Validated status. If the condition exist and the condition status is false, validateVRStatus() return true, signaling that the VR is in the desired state, and ramen completes the delete flow. If the VR does not report the Validated condition (e.g. old csi-addon version) or the condition status is true (mirroring in progress), we continue in the normal flow. The VR will be deleted only when the Completed condition status is true. Tested locally with discovered app using a pvc created from a volume snapshot. Signed-off-by: Nir Soffer --- internal/controller/vrg_volrep.go | 32 ++++++++++++++++++++++++++++--- 1 file changed, 29 insertions(+), 3 deletions(-) diff --git a/internal/controller/vrg_volrep.go b/internal/controller/vrg_volrep.go index d955dce14..ca6cdfc4b 100644 --- a/internal/controller/vrg_volrep.go +++ b/internal/controller/vrg_volrep.go @@ -1415,12 +1415,23 @@ func (v *VRGInstance) checkVRStatus(pvc *corev1.PersistentVolumeClaim, volRep *v // validateVRStatus validates if the VolumeReplication resource has the desired status for the // current generation and returns true if so, false otherwise -// - When replication state is Primary, only Completed condition is checked. -// - When replication state is Secondary, all 3 conditions for Completed/Degraded/Resyncing is -// checked and ensured healthy. +// - When replication state is Primary, Validated and Completed conditions are checked. +// - When replication state is Secondary, Completed, Degraded and Resyncing conditions are checked and +// ensured healthy. func (v *VRGInstance) validateVRStatus(pvc *corev1.PersistentVolumeClaim, volRep *volrep.VolumeReplication, state ramendrv1alpha1.ReplicationState, ) bool { + // Check validated for primary during VRG deletion. + if state == ramendrv1alpha1.Primary && rmnutil.ResourceIsDeleted(v.instance) { + validated, ok := v.validateVRValidatedStatus(volRep) + if !validated && ok { + v.log.Info(fmt.Sprintf("VolumeReplication %s/%s failed validation and can be deleted", + volRep.GetName(), volRep.GetNamespace())) + + return true + } + } + // Check completed for both primary and secondary. if !v.validateVRCompletedStatus(pvc, volRep, state) { return false @@ -1443,6 +1454,21 @@ func (v *VRGInstance) validateVRStatus(pvc *corev1.PersistentVolumeClaim, volRep return true } +// validateVRValidatedStatus validates that VolumeReplicaion resource was validated. +// Return 2 booleans +// - validated: true if the condition is true, otherwise false +// - ok: true if the check was succeesfull, false if the condition is missing, stale, or unknown. +func (v *VRGInstance) validateVRValidatedStatus( + volRep *volrep.VolumeReplication, +) (bool, bool) { + conditionMet, errorMsg := isVRConditionMet(volRep, volrep.ConditionValidated, metav1.ConditionTrue) + if errorMsg != "" { + v.log.Info(fmt.Sprintf("%s (VolRep: %s/%s)", errorMsg, volRep.GetName(), volRep.GetNamespace())) + } + + return conditionMet, errorMsg == "" +} + // validateVRCompletedStatus validates if the VolumeReplication resource Completed condition is met and update // the PVC DataReady and Protected conditions. // Returns true if the condtion is true, false if the condition is missing, stale, ubnknown, of false.