Skip to content

Commit

Permalink
Propagate VR conditions .Message
Browse files Browse the repository at this point in the history
When a VR condition is not met, we set the protected PVC condition
message using the error message returned from isVRConditionMet(). When
using csi-addons > 0.10.0, we use now the message from the condition
instead of the default message.

Since the Validated condition is not reported by older version of
csi-addons, and we must wait until the Validated condition status is
known when VRG is deleted, isVRConditionMet() returns now also the state
of the condition, which can be:

- missing: condition not found
- stale: observed generation does not match object generation
- unknown: the special "Unknown" value
- known: status is True or False

When we validate the Validate condition we have these cases:

- Condition is missing: continue to next condition.

- Condition is met: continue to the next condition.

- Condition not met and its status is False. This VR will never
  complete and it is safe to delete since replication will never start.
  If VRG is deleted, we return true since the VR reached the designed
  state. Otherwise we return false. In this case we updated the
  protected pvc condition with the message from the VR condition.

- Condition is not met and is stale or unnown: we need to check again
  later. There is no point to check the completed condition since a VR
  cannot complete without validation.In this case we updated the
  protected pvc condition with the message generated by
  isVRConditionMet() for stale or unknown conditions.

Example protected pvc DataReady condition with propagated message when
VR validation failed:

    conditions:
      - lastTransitionTime: "2024-11-06T15:33:06Z"
        message: 'failed to meet prerequisite: rpc error: code = FailedPrecondition
          desc = system is not in a state required for the operation''s execution:
          failed to enable mirroring on image "replicapool/csi-vol-fe2ca7f8-713c-4c51-bf52-0d4b2c11d329":
          parent image "replicapool/csi-snap-e2114105-b451-469b-ad97-eb3cbe2af54e"
          is not enabled for mirroring'
        observedGeneration: 1
        reason: Error
        status: "False"
        type: DataReady

Signed-off-by: Nir Soffer <nsoffer@redhat.com>
  • Loading branch information
nirs committed Nov 6, 2024
1 parent 649b1de commit ec846ff
Showing 1 changed file with 62 additions and 29 deletions.
91 changes: 62 additions & 29 deletions internal/controller/vrg_volrep.go
Original file line number Diff line number Diff line change
Expand Up @@ -1430,20 +1430,27 @@ func (v *VRGInstance) checkVRStatus(pvc *corev1.PersistentVolumeClaim, volRep *v
//
// We handle 3 cases:
// - Primary deleted VRG: If Validated condition exists and false, the VR will never complete and can be
// deleted safely. Otherwise Completed condition is checked.
// - Primary VRG: Completed condition is checked.
// deleted safely.
// - Primary VRG: Validated condition is checked, and if successful the Completed conditions is also checked.
// - Secondary VRG: Completed, Degraded and Resyncing conditions are checked and ensured healthy.
func (v *VRGInstance) validateVRStatus(pvc *corev1.PersistentVolumeClaim, volRep *volrep.VolumeReplication,
state ramendrv1alpha1.ReplicationState,
) bool {
// Check validated for primary during VRG deletion.
if state == ramendrv1alpha1.Primary && rmnutil.ResourceIsDeleted(v.instance) {
validated, ok := v.validateVRValidatedStatus(volRep)
if !validated && ok {
v.log.Info(fmt.Sprintf("VolumeReplication %s/%s failed validation and can be deleted",
volRep.GetName(), volRep.GetNamespace()))
// If primary, check the validated condition.
if state == ramendrv1alpha1.Primary {
validated, condState := v.validateVRValidatedStatus(pvc, volRep)
if !validated && condState != conditionMissing {
// If the condition is known, this VR will never complete since it failed initial validation.
if condState == conditionKnown {
v.log.Info(fmt.Sprintf("VolumeReplication %s/%s failed validation and can be deleted",
volRep.GetName(), volRep.GetNamespace()))

// If the VRG is deleted the VR has reached the desired state.
return rmnutil.ResourceIsDeleted(v.instance)
}

return true
// The condition is stale or unknown so we need to check again later.
return false
}
}

Expand All @@ -1467,19 +1474,25 @@ func (v *VRGInstance) validateVRStatus(pvc *corev1.PersistentVolumeClaim, volRep
return true
}

// validateVRValidatedStatus validates that VolumeReplicaion resource was validated.
// Return 2 booleans
// validateVRValidatedStatus validates that VolumeReplication resource was validated.
// Returns 2 values:
// - validated: true if the condition is true, otherwise false
// - ok: true if the check was succeesfull, false if the condition is missing, stale, or unknown.
// - state: condition state
func (v *VRGInstance) validateVRValidatedStatus(
pvc *corev1.PersistentVolumeClaim,
volRep *volrep.VolumeReplication,
) (bool, bool) {
conditionMet, errorMsg := isVRConditionMet(volRep, volrep.ConditionValidated, metav1.ConditionTrue)
if errorMsg != "" {
v.log.Info(fmt.Sprintf("%s (VolRep: %s/%s)", errorMsg, volRep.GetName(), volRep.GetNamespace()))
) (bool, conditionState) {
conditionMet, condState, errorMsg := isVRConditionMet(volRep, volrep.ConditionValidated, metav1.ConditionTrue)
if !conditionMet && condState != conditionMissing {
defaultMsg := "VolumeReplication resource not validated"
v.updatePVCDataReadyConditionHelper(pvc.Namespace, pvc.Name, VRGConditionReasonError, errorMsg,
defaultMsg)
v.updatePVCDataProtectedConditionHelper(pvc.Namespace, pvc.Name, VRGConditionReasonError, errorMsg,
defaultMsg)
v.log.Info(fmt.Sprintf("%s (VolRep: %s/%s)", defaultMsg, volRep.GetName(), volRep.GetNamespace()))
}

return conditionMet, errorMsg == ""
return conditionMet, condState
}

// validateVRCompletedStatus validates if the VolumeReplication resource Completed condition is met and update
Expand All @@ -1502,7 +1515,7 @@ func (v *VRGInstance) validateVRCompletedStatus(pvc *corev1.PersistentVolumeClai
action = "demoted"
}

conditionMet, msg := isVRConditionMet(volRep, volrep.ConditionCompleted, metav1.ConditionTrue)
conditionMet, _, msg := isVRConditionMet(volRep, volrep.ConditionCompleted, metav1.ConditionTrue)
if !conditionMet {
defaultMsg := fmt.Sprintf("VolumeReplication resource for pvc not %s to %s", action, stateString)
v.updatePVCDataReadyConditionHelper(pvc.Namespace, pvc.Name, VRGConditionReasonError, msg,
Expand Down Expand Up @@ -1539,12 +1552,12 @@ func (v *VRGInstance) validateAdditionalVRStatusForSecondary(pvc *corev1.Persist
) bool {
v.updatePVCLastSyncCounters(pvc.Namespace, pvc.Name, nil)

conditionMet, _ := isVRConditionMet(volRep, volrep.ConditionResyncing, metav1.ConditionTrue)
conditionMet, _, _ := isVRConditionMet(volRep, volrep.ConditionResyncing, metav1.ConditionTrue)
if !conditionMet {
return v.checkResyncCompletionAsSecondary(pvc, volRep)
}

conditionMet, msg := isVRConditionMet(volRep, volrep.ConditionDegraded, metav1.ConditionTrue)
conditionMet, _, msg := isVRConditionMet(volRep, volrep.ConditionDegraded, metav1.ConditionTrue)
if !conditionMet {
defaultMsg := "VolumeReplication resource for pvc is not in Degraded condition while resyncing"
v.updatePVCDataProtectedConditionHelper(pvc.Namespace, pvc.Name, VRGConditionReasonError, msg,
Expand Down Expand Up @@ -1573,7 +1586,7 @@ func (v *VRGInstance) validateAdditionalVRStatusForSecondary(pvc *corev1.Persist
func (v *VRGInstance) checkResyncCompletionAsSecondary(pvc *corev1.PersistentVolumeClaim,
volRep *volrep.VolumeReplication,
) bool {
conditionMet, msg := isVRConditionMet(volRep, volrep.ConditionResyncing, metav1.ConditionFalse)
conditionMet, _, msg := isVRConditionMet(volRep, volrep.ConditionResyncing, metav1.ConditionFalse)
if !conditionMet {
defaultMsg := "VolumeReplication resource for pvc not syncing as Secondary"
v.updatePVCDataReadyConditionHelper(pvc.Namespace, pvc.Name, VRGConditionReasonError, msg,
Expand All @@ -1587,7 +1600,7 @@ func (v *VRGInstance) checkResyncCompletionAsSecondary(pvc *corev1.PersistentVol
return false
}

conditionMet, msg = isVRConditionMet(volRep, volrep.ConditionDegraded, metav1.ConditionFalse)
conditionMet, _, msg = isVRConditionMet(volRep, volrep.ConditionDegraded, metav1.ConditionFalse)
if !conditionMet {
defaultMsg := "VolumeReplication resource for pvc is not syncing and is degraded as Secondary"
v.updatePVCDataReadyConditionHelper(pvc.Namespace, pvc.Name, VRGConditionReasonError, msg,
Expand All @@ -1611,35 +1624,55 @@ func (v *VRGInstance) checkResyncCompletionAsSecondary(pvc *corev1.PersistentVol
return true
}

// isVRConditionMet returns true if the condition is met, and an error mesage if we could not get the
// condition value.
type conditionState string

const (
conditionMissing = conditionState("missing")
conditionStale = conditionState("stale")
conditionUnknown = conditionState("unknown")
conditionKnown = conditionState("known")
)

// isVRConditionMet check if condition is met.
// Returns 3 values:
// - met: true if the condition status matches the desired status, otherwise false
// - state: one of (conditionMissing, conditionStale, conditionUnknown, conditionKnown)
// generation, and its value is not unknown.
// - errorMsg: error message describing why the condition is not met
func isVRConditionMet(volRep *volrep.VolumeReplication,
conditionType string,
desiredStatus metav1.ConditionStatus,
) (bool, string) {
) (bool, conditionState, string) {
met := true

volRepCondition := findCondition(volRep.Status.Conditions, conditionType)
if volRepCondition == nil {
errorMsg := fmt.Sprintf("Failed to get the %s condition from status of VolumeReplication resource.",
conditionType)

return false, errorMsg
return !met, conditionMissing, errorMsg
}

if volRep.GetGeneration() != volRepCondition.ObservedGeneration {
errorMsg := fmt.Sprintf("Stale generation for condition %s from status of VolumeReplication resource.",
conditionType)

return false, errorMsg
return !met, conditionStale, errorMsg
}

if volRepCondition.Status == metav1.ConditionUnknown {
errorMsg := fmt.Sprintf("Unknown status for condition %s from status of VolumeReplication resource.",
conditionType)

return false, errorMsg
return !met, conditionUnknown, errorMsg
}

if volRepCondition.Status != desiredStatus {
// csi-addons > 0.10.0 returns detailed error message
return !met, conditionKnown, volRepCondition.Message
}

return volRepCondition.Status == desiredStatus, ""
return met, conditionKnown, ""
}

// Disabling unparam linter as currently every invokation of this
Expand Down

0 comments on commit ec846ff

Please sign in to comment.