Skip to content

Commit

Permalink
Merge pull request #5319 from zhzhuang-zju/poliyMarks
Browse files Browse the repository at this point in the history
skip cleanup when the poliy claim metadata changed
  • Loading branch information
karmada-bot authored Oct 10, 2024
2 parents 11d5c97 + 08026dc commit 5692e28
Show file tree
Hide file tree
Showing 3 changed files with 82 additions and 13 deletions.
14 changes: 14 additions & 0 deletions pkg/detector/claim.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,3 +75,17 @@ func CleanupCPPClaimMetadata(obj metav1.Object) {
util.RemoveLabels(obj, clusterPropagationPolicyClaimLabels...)
util.RemoveAnnotations(obj, clusterPropagationPolicyClaimAnnotations...)
}

// NeedCleanupClaimMetadata determines whether the object's claim metadata needs to be cleaned up.
// We need to ensure that the claim metadata being deleted belong to the current PropagationPolicy/ClusterPropagationPolicy,
// otherwise, there is a risk of mistakenly deleting the ones belonging to another PropagationPolicy/ClusterPropagationPolicy.
// This situation could occur during the rapid deletion and creation of PropagationPolicy(s)/ClusterPropagationPolicy(s).
// More info can refer to https://github.com/karmada-io/karmada/issues/5307.
func NeedCleanupClaimMetadata(obj metav1.Object, targetClaimMetadata map[string]string) bool {
for k, v := range targetClaimMetadata {
if obj.GetLabels()[k] != v {
return false
}
}
return true
}
41 changes: 41 additions & 0 deletions pkg/detector/claim_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -167,3 +167,44 @@ func TestCleanupCPPClaimMetadata(t *testing.T) {
})
}
}

func TestNeedCleanupClaimMetadata(t *testing.T) {
tests := []struct {
name string
obj metav1.Object
targetClaimMetadata map[string]string
needCleanup bool
}{
{
name: "need cleanup",
obj: &unstructured.Unstructured{
Object: map[string]interface{}{
"metadata": map[string]interface{}{
"labels": map[string]interface{}{policyv1alpha1.ClusterPropagationPolicyPermanentIDLabel: "f2507cgb-f3f3-4a4b-b289-5691a4fef979"},
"annotations": map[string]interface{}{policyv1alpha1.ClusterPropagationPolicyAnnotation: "cpp-example"},
},
},
},
targetClaimMetadata: map[string]string{policyv1alpha1.ClusterPropagationPolicyPermanentIDLabel: "f2507cgb-f3f3-4a4b-b289-5691a4fef979"},
needCleanup: true,
},
{
name: "no need cleanup",
obj: &unstructured.Unstructured{
Object: map[string]interface{}{
"metadata": map[string]interface{}{
"labels": map[string]interface{}{policyv1alpha1.PropagationPolicyPermanentIDLabel: "b0907cgb-f3f3-4a4b-b289-5691a4fef979"},
"annotations": map[string]interface{}{policyv1alpha1.PropagationPolicyNamespaceAnnotation: "default", policyv1alpha1.PropagationPolicyNameAnnotation: "pp-example"},
},
},
},
targetClaimMetadata: map[string]string{policyv1alpha1.PropagationPolicyPermanentIDLabel: "f2507cgb-f3f3-4a4b-b289-5691a4fef979"},
needCleanup: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
assert.Equal(t, tt.needCleanup, NeedCleanupClaimMetadata(tt.obj, tt.targetClaimMetadata))
})
}
}
40 changes: 27 additions & 13 deletions pkg/detector/detector.go
Original file line number Diff line number Diff line change
Expand Up @@ -1051,7 +1051,8 @@ func (d *ResourceDetector) ReconcileClusterPropagationPolicy(key util.QueueKey)
//
// Note: The relevant ResourceBinding will continue to exist until the resource template is gone.
func (d *ResourceDetector) HandlePropagationPolicyDeletion(policyID string) error {
rbs, err := helper.GetResourceBindings(d.Client, labels.Set{policyv1alpha1.PropagationPolicyPermanentIDLabel: policyID})
claimMetadata := labels.Set{policyv1alpha1.PropagationPolicyPermanentIDLabel: policyID}
rbs, err := helper.GetResourceBindings(d.Client, claimMetadata)
if err != nil {
klog.Errorf("Failed to list propagation bindings with policy permanentID(%s): %v", policyID, err)
return err
Expand All @@ -1062,7 +1063,7 @@ func (d *ResourceDetector) HandlePropagationPolicyDeletion(policyID string) erro
// Must remove the claim metadata, such as labels and annotations, from the resource template ahead of ResourceBinding,
// otherwise might lose the chance to do that in a retry loop (in particular, the claim metadata was successfully removed
// from ResourceBinding, but resource template not), since the ResourceBinding will not be listed again.
if err := d.CleanupResourceTemplateClaimMetadata(binding.Spec.Resource, CleanupPPClaimMetadata); err != nil {
if err := d.CleanupResourceTemplateClaimMetadata(binding.Spec.Resource, claimMetadata, CleanupPPClaimMetadata); err != nil {
klog.Errorf("Failed to clean up claim metadata from resource(%s-%s/%s) when propagationPolicy removed, error: %v",
binding.Spec.Resource.Kind, binding.Spec.Resource.Namespace, binding.Spec.Resource.Name, err)
errs = append(errs, err)
Expand All @@ -1071,7 +1072,7 @@ func (d *ResourceDetector) HandlePropagationPolicyDeletion(policyID string) erro
}

// Clean up the claim metadata from the reference binding so that the karmada scheduler won't reschedule the binding.
if err := d.CleanupResourceBindingClaimMetadata(&rbs.Items[index], CleanupPPClaimMetadata); err != nil {
if err := d.CleanupResourceBindingClaimMetadata(&rbs.Items[index], claimMetadata, CleanupPPClaimMetadata); err != nil {
klog.Errorf("Failed to clean up claim metadata from resource binding(%s/%s) when propagationPolicy removed, error: %v",
binding.Namespace, binding.Name, err)
errs = append(errs, err)
Expand Down Expand Up @@ -1102,15 +1103,15 @@ func (d *ResourceDetector) HandleClusterPropagationPolicyDeletion(policyID strin
// ClusterResourceBinding, otherwise might lose the chance to do that in a retry loop (in particular, the
// claim metadata was successfully removed from ClusterResourceBinding, but resource template not), since the
// ClusterResourceBinding will not be listed again.
if err := d.CleanupResourceTemplateClaimMetadata(binding.Spec.Resource, CleanupCPPClaimMetadata); err != nil {
if err := d.CleanupResourceTemplateClaimMetadata(binding.Spec.Resource, labelSet, CleanupCPPClaimMetadata); err != nil {
klog.Errorf("Failed to clean up claim metadata from resource(%s-%s) when clusterPropagationPolicy removed, error: %v",
binding.Spec.Resource.Kind, binding.Spec.Resource.Name, err)
// Skip cleaning up policy labels and annotations from ClusterResourceBinding, give a chance to do that in a retry loop.
continue
}

// Clean up the claim metadata from the reference binding so that the Karmada scheduler won't reschedule the binding.
if err := d.CleanupClusterResourceBindingClaimMetadata(&crbs.Items[index], CleanupCPPClaimMetadata); err != nil {
if err := d.CleanupClusterResourceBindingClaimMetadata(&crbs.Items[index], labelSet); err != nil {
klog.Errorf("Failed to clean up claim metadata from clusterResourceBinding(%s) when clusterPropagationPolicy removed, error: %v",
binding.Name, err)
errs = append(errs, err)
Expand All @@ -1128,7 +1129,7 @@ func (d *ResourceDetector) HandleClusterPropagationPolicyDeletion(policyID strin
// Must remove the claim metadata, such as labels and annotations, from the resource template ahead of ResourceBinding,
// otherwise might lose the chance to do that in a retry loop (in particular, the label was successfully
// removed from ResourceBinding, but resource template not), since the ResourceBinding will not be listed again.
if err := d.CleanupResourceTemplateClaimMetadata(binding.Spec.Resource, CleanupCPPClaimMetadata); err != nil {
if err := d.CleanupResourceTemplateClaimMetadata(binding.Spec.Resource, labelSet, CleanupCPPClaimMetadata); err != nil {
klog.Errorf("Failed to clean up claim metadata from resource(%s-%s/%s) when clusterPropagationPolicy removed, error: %v",
binding.Spec.Resource.Kind, binding.Spec.Resource.Namespace, binding.Spec.Resource.Name, err)
errs = append(errs, err)
Expand All @@ -1137,7 +1138,7 @@ func (d *ResourceDetector) HandleClusterPropagationPolicyDeletion(policyID strin
}

// Clean up the claim metadata from the reference binding so that the Karmada scheduler won't reschedule the binding.
if err := d.CleanupResourceBindingClaimMetadata(&rbs.Items[index], CleanupCPPClaimMetadata); err != nil {
if err := d.CleanupResourceBindingClaimMetadata(&rbs.Items[index], labelSet, CleanupCPPClaimMetadata); err != nil {
klog.Errorf("Failed to clean up claim metadata from resourceBinding(%s/%s) when clusterPropagationPolicy removed, error: %v",
binding.Namespace, binding.Name, err)
errs = append(errs, err)
Expand Down Expand Up @@ -1279,7 +1280,7 @@ func (d *ResourceDetector) HandleClusterPropagationPolicyCreationOrUpdate(policy
}

// CleanupResourceTemplateClaimMetadata removes claim metadata, such as labels and annotations, from object referencing by objRef.
func (d *ResourceDetector) CleanupResourceTemplateClaimMetadata(objRef workv1alpha2.ObjectReference, cleanupFunc func(obj metav1.Object)) error {
func (d *ResourceDetector) CleanupResourceTemplateClaimMetadata(objRef workv1alpha2.ObjectReference, targetClaimMetadata map[string]string, cleanupFunc func(obj metav1.Object)) error {
gvr, err := restmapper.GetGroupVersionResource(d.RESTMapper, schema.FromAPIVersionAndKind(objRef.APIVersion, objRef.Kind))
if err != nil {
klog.Errorf("Failed to convert GVR from GVK(%s/%s), err: %v", objRef.APIVersion, objRef.Kind, err)
Expand All @@ -1297,6 +1298,11 @@ func (d *ResourceDetector) CleanupResourceTemplateClaimMetadata(objRef workv1alp
return err
}

if !NeedCleanupClaimMetadata(workload, targetClaimMetadata) {
klog.Infof("No need to clean up the claim metadata on resource(kind=%s, %s/%s) since they have changed", workload.GetKind(), workload.GetNamespace(), workload.GetName())
return nil
}

cleanupFunc(workload)

_, err = d.DynamicClient.Resource(gvr).Namespace(workload.GetNamespace()).Update(context.TODO(), workload, metav1.UpdateOptions{})
Expand All @@ -1310,8 +1316,12 @@ func (d *ResourceDetector) CleanupResourceTemplateClaimMetadata(objRef workv1alp
}

// CleanupResourceBindingClaimMetadata removes claim metadata, such as labels and annotations, from resource binding.
func (d *ResourceDetector) CleanupResourceBindingClaimMetadata(rb *workv1alpha2.ResourceBinding, cleanupFunc func(obj metav1.Object)) error {
func (d *ResourceDetector) CleanupResourceBindingClaimMetadata(rb *workv1alpha2.ResourceBinding, targetClaimMetadata map[string]string, cleanupFunc func(obj metav1.Object)) error {
return retry.RetryOnConflict(retry.DefaultRetry, func() (err error) {
if !NeedCleanupClaimMetadata(rb, targetClaimMetadata) {
klog.Infof("No need to clean up the claim metadata on ResourceBinding(%s/%s) since they have changed", rb.GetNamespace(), rb.GetName())
return nil
}
cleanupFunc(rb)
updateErr := d.Client.Update(context.TODO(), rb)
if updateErr == nil {
Expand All @@ -1322,16 +1332,20 @@ func (d *ResourceDetector) CleanupResourceBindingClaimMetadata(rb *workv1alpha2.
if err = d.Client.Get(context.TODO(), client.ObjectKey{Namespace: rb.GetNamespace(), Name: rb.GetName()}, updated); err == nil {
rb = updated.DeepCopy()
} else {
klog.Errorf("Failed to get updated resource binding %s/%s: %v", rb.GetNamespace(), rb.GetName(), err)
klog.Errorf("Failed to get updated ResourceBinding(%s/%s): %v", rb.GetNamespace(), rb.GetName(), err)
}
return updateErr
})
}

// CleanupClusterResourceBindingClaimMetadata removes claim metadata, such as labels and annotations, from cluster resource binding.
func (d *ResourceDetector) CleanupClusterResourceBindingClaimMetadata(crb *workv1alpha2.ClusterResourceBinding, cleanupFunc func(obj metav1.Object)) error {
func (d *ResourceDetector) CleanupClusterResourceBindingClaimMetadata(crb *workv1alpha2.ClusterResourceBinding, targetClaimMetadata map[string]string) error {
return retry.RetryOnConflict(retry.DefaultRetry, func() (err error) {
cleanupFunc(crb)
if !NeedCleanupClaimMetadata(crb, targetClaimMetadata) {
klog.Infof("No need to clean up the claim metadata on ClusterResourceBinding(%s) since they have changed", crb.GetName())
return nil
}
CleanupCPPClaimMetadata(crb)
updateErr := d.Client.Update(context.TODO(), crb)
if updateErr == nil {
return nil
Expand All @@ -1341,7 +1355,7 @@ func (d *ResourceDetector) CleanupClusterResourceBindingClaimMetadata(crb *workv
if err = d.Client.Get(context.TODO(), client.ObjectKey{Name: crb.GetName()}, updated); err == nil {
crb = updated.DeepCopy()
} else {
klog.Errorf("Failed to get updated cluster resource binding %s: %v", crb.GetName(), err)
klog.Errorf("Failed to get updated ClusterResourceBinding(%s):: %v", crb.GetName(), err)
}
return updateErr
})
Expand Down

0 comments on commit 5692e28

Please sign in to comment.