From ac189255de8cc0cf338d25af18c361ce43836ab2 Mon Sep 17 00:00:00 2001 From: Christian Schlotter Date: Fri, 6 Sep 2024 18:24:25 +0200 Subject: [PATCH] machine: prevent error spamming for NodeOutdatedTaint if objects are not found --- .../machine/machine_controller_noderef.go | 49 +++++++---- .../machine_controller_noderef_test.go | 86 +++++++++++-------- 2 files changed, 81 insertions(+), 54 deletions(-) diff --git a/internal/controllers/machine/machine_controller_noderef.go b/internal/controllers/machine/machine_controller_noderef.go index 091a1121044d..551d16f00457 100644 --- a/internal/controllers/machine/machine_controller_noderef.go +++ b/internal/controllers/machine/machine_controller_noderef.go @@ -23,6 +23,7 @@ import ( "github.com/pkg/errors" corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "k8s.io/apimachinery/pkg/runtime/schema" @@ -296,10 +297,17 @@ func (r *Reconciler) patchNode(ctx context.Context, remoteClient client.Client, hasTaintChanges := taints.RemoveNodeTaint(newNode, clusterv1.NodeUninitializedTaint) // Set Taint to a node in an old MachineSet and unset Taint from a node in a new MachineSet - isOutdated, err := shouldNodeHaveOutdatedTaint(ctx, r.Client, m) + isOutdated, notFound, err := shouldNodeHaveOutdatedTaint(ctx, r.Client, m) if err != nil { return errors.Wrapf(err, "failed to check if Node %s is outdated", klog.KRef("", node.Name)) } + + // It is not possible to identify if we have to set the NodeOutdatedRevisionTaint if there is + // no OwnerReference or the owning MachineSet or MachineDeployment was not found. + // Note: This could happen e.g. during background deletion of objects. + if notFound { + return nil + } if isOutdated { hasTaintChanges = taints.EnsureNodeTaint(newNode, clusterv1.NodeOutdatedRevisionTaint) || hasTaintChanges } else { @@ -313,20 +321,26 @@ func (r *Reconciler) patchNode(ctx context.Context, remoteClient client.Client, return remoteClient.Patch(ctx, newNode, client.StrategicMergeFrom(node)) } -func shouldNodeHaveOutdatedTaint(ctx context.Context, c client.Client, m *clusterv1.Machine) (bool, error) { +// shouldNodeHaveOutdatedTaint tries to compare the revision of the owning MachineSet to the MachineDeployment. +// It returns notFound = true if the OwnerReference is not set or the APIServer returns NotFound for the MachineSet or MachineDeployment. +// Note: This three cases could happen during background deletion of objects. +func shouldNodeHaveOutdatedTaint(ctx context.Context, c client.Client, m *clusterv1.Machine) (outdated bool, notFound bool, err error) { if _, hasLabel := m.Labels[clusterv1.MachineDeploymentNameLabel]; !hasLabel { - return false, nil + return false, false, nil } // Resolve the MachineSet name via owner references because the label value // could also be a hash. - objKey, err := getOwnerMachineSetObjectKey(m.ObjectMeta) - if err != nil { - return false, err + objKey, notFound, err := getOwnerMachineSetObjectKey(m.ObjectMeta) + if err != nil || notFound { + return false, notFound, err } ms := &clusterv1.MachineSet{} if err := c.Get(ctx, *objKey, ms); err != nil { - return false, err + if apierrors.IsNotFound(err) { + return false, true, nil + } + return false, false, err } md := &clusterv1.MachineDeployment{} objKey = &client.ObjectKey{ @@ -334,31 +348,34 @@ func shouldNodeHaveOutdatedTaint(ctx context.Context, c client.Client, m *cluste Name: m.Labels[clusterv1.MachineDeploymentNameLabel], } if err := c.Get(ctx, *objKey, md); err != nil { - return false, err + if apierrors.IsNotFound(err) { + return false, true, nil + } + return false, false, err } msRev, err := mdutil.Revision(ms) if err != nil { - return false, err + return false, false, err } mdRev, err := mdutil.Revision(md) if err != nil { - return false, err + return false, false, err } if msRev < mdRev { - return true, nil + return true, false, nil } - return false, nil + return false, false, nil } -func getOwnerMachineSetObjectKey(obj metav1.ObjectMeta) (*client.ObjectKey, error) { +func getOwnerMachineSetObjectKey(obj metav1.ObjectMeta) (*client.ObjectKey, bool, error) { for _, ref := range obj.GetOwnerReferences() { gv, err := schema.ParseGroupVersion(ref.APIVersion) if err != nil { - return nil, err + return nil, false, err } if ref.Kind == "MachineSet" && gv.Group == clusterv1.GroupVersion.Group { - return &client.ObjectKey{Namespace: obj.Namespace, Name: ref.Name}, nil + return &client.ObjectKey{Namespace: obj.Namespace, Name: ref.Name}, false, nil } } - return nil, errors.Errorf("failed to find MachineSet owner reference for Machine %s", klog.KRef(obj.GetNamespace(), obj.GetName())) + return nil, true, nil } diff --git a/internal/controllers/machine/machine_controller_noderef_test.go b/internal/controllers/machine/machine_controller_noderef_test.go index 9b97585b555e..7007dd5c036b 100644 --- a/internal/controllers/machine/machine_controller_noderef_test.go +++ b/internal/controllers/machine/machine_controller_noderef_test.go @@ -1019,53 +1019,60 @@ func Test_shouldNodeHaveOutdatedTaint(t *testing.T) { testMachine.SetOwnerReferences([]metav1.OwnerReference{*ownerrefs.OwnerReferenceTo(testMachineSet, testMachineSet.GroupVersionKind())}) tests := []struct { - name string - machine *clusterv1.Machine - objects []client.Object - want bool - wantErr bool + name string + machine *clusterv1.Machine + objects []client.Object + wantOutdated bool + wantNotFound bool + wantErr bool }{ { - name: "Machine without MachineDeployment label", - machine: builder.Machine(namespaceName, "no-deploy").Build(), - objects: nil, - want: false, - wantErr: false, + name: "Machineset not outdated", + machine: testMachine, + objects: []client.Object{testMachineSet, testMachineDeployment}, + wantOutdated: false, + wantNotFound: false, + wantErr: false, }, { - name: "Machine without OwnerReference", - machine: builder.Machine(namespaceName, "no-ownerref").WithLabels(labels).Build(), - objects: nil, - want: false, - wantErr: true, + name: "Machineset outdated", + machine: testMachine, + objects: []client.Object{testMachineSet, testMachineDeploymentNew}, + wantOutdated: true, + wantNotFound: false, + wantErr: false, }, { - name: "Machine without existing MachineSet", - machine: testMachine, - objects: []client.Object{testMachineSet}, - want: false, - wantErr: true, + name: "Machine without MachineDeployment label", + machine: builder.Machine(namespaceName, "no-deploy").Build(), + objects: nil, + wantOutdated: false, + wantNotFound: false, + wantErr: false, }, { - name: "Machine without existing MachineDeployment", - machine: testMachine, - objects: []client.Object{testMachineSet}, - want: false, - wantErr: true, + name: "Machine without OwnerReference", + machine: builder.Machine(namespaceName, "no-ownerref").WithLabels(labels).Build(), + objects: nil, + wantOutdated: false, + wantNotFound: true, + wantErr: false, }, { - name: "Machineset not outdated", - machine: testMachine, - objects: []client.Object{testMachineSet, testMachineDeployment}, - want: false, - wantErr: false, + name: "Machine without existing MachineSet", + machine: testMachine, + objects: nil, + wantOutdated: false, + wantNotFound: true, + wantErr: false, }, { - name: "Machineset outdated", - machine: testMachine, - objects: []client.Object{testMachineSet, testMachineDeploymentNew}, - want: true, - wantErr: false, + name: "Machine without existing MachineDeployment", + machine: testMachine, + objects: []client.Object{testMachineSet}, + wantOutdated: false, + wantNotFound: true, + wantErr: false, }, } for _, tt := range tests { @@ -1076,13 +1083,16 @@ func Test_shouldNodeHaveOutdatedTaint(t *testing.T) { c := fake.NewClientBuilder(). WithObjects(objects...).Build() - got, err := shouldNodeHaveOutdatedTaint(ctx, c, tt.machine) + gotOutdated, gotNotFound, err := shouldNodeHaveOutdatedTaint(ctx, c, tt.machine) if (err != nil) != tt.wantErr { t.Errorf("shouldNodeHaveOutdatedTaint() error = %v, wantErr %v", err, tt.wantErr) return } - if got != tt.want { - t.Errorf("shouldNodeHaveOutdatedTaint() = %v, want %v", got, tt.want) + if gotOutdated != tt.wantOutdated { + t.Errorf("shouldNodeHaveOutdatedTaint() = %v, want %v", gotOutdated, tt.wantOutdated) + } + if gotNotFound != tt.wantNotFound { + t.Errorf("shouldNodeHaveOutdatedTaint() = %v, want %v", gotNotFound, tt.wantNotFound) } }) }