diff --git a/internal/controllers/machine/machine_controller_noderef.go b/internal/controllers/machine/machine_controller_noderef.go index 091a1121044d..67a4d028e599 100644 --- a/internal/controllers/machine/machine_controller_noderef.go +++ b/internal/controllers/machine/machine_controller_noderef.go @@ -23,6 +23,7 @@ import ( "github.com/pkg/errors" corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "k8s.io/apimachinery/pkg/runtime/schema" @@ -296,14 +297,20 @@ func (r *Reconciler) patchNode(ctx context.Context, remoteClient client.Client, hasTaintChanges := taints.RemoveNodeTaint(newNode, clusterv1.NodeUninitializedTaint) // Set Taint to a node in an old MachineSet and unset Taint from a node in a new MachineSet - isOutdated, err := shouldNodeHaveOutdatedTaint(ctx, r.Client, m) + isOutdated, notFound, err := shouldNodeHaveOutdatedTaint(ctx, r.Client, m) if err != nil { return errors.Wrapf(err, "failed to check if Node %s is outdated", klog.KRef("", node.Name)) } - if isOutdated { - hasTaintChanges = taints.EnsureNodeTaint(newNode, clusterv1.NodeOutdatedRevisionTaint) || hasTaintChanges - } else { - hasTaintChanges = taints.RemoveNodeTaint(newNode, clusterv1.NodeOutdatedRevisionTaint) || hasTaintChanges + + // It is only possible to identify if we have to set or remove the NodeOutdatedRevisionTaint if shouldNodeHaveOutdatedTaint + // found all relevant objects. + // Example: when the MachineDeployment or Machineset can't be found due to a background deletion of objects. + if !notFound { + if isOutdated { + hasTaintChanges = taints.EnsureNodeTaint(newNode, clusterv1.NodeOutdatedRevisionTaint) || hasTaintChanges + } else { + hasTaintChanges = taints.RemoveNodeTaint(newNode, clusterv1.NodeOutdatedRevisionTaint) || hasTaintChanges + } } if !hasAnnotationChanges && !hasLabelChanges && !hasTaintChanges { @@ -313,20 +320,26 @@ func (r *Reconciler) patchNode(ctx context.Context, remoteClient client.Client, return remoteClient.Patch(ctx, newNode, client.StrategicMergeFrom(node)) } -func shouldNodeHaveOutdatedTaint(ctx context.Context, c client.Client, m *clusterv1.Machine) (bool, error) { +// shouldNodeHaveOutdatedTaint tries to compare the revision of the owning MachineSet to the MachineDeployment. +// It returns notFound = true if the OwnerReference is not set or the APIServer returns NotFound for the MachineSet or MachineDeployment. +// Note: This three cases could happen during background deletion of objects. +func shouldNodeHaveOutdatedTaint(ctx context.Context, c client.Client, m *clusterv1.Machine) (outdated bool, notFound bool, err error) { if _, hasLabel := m.Labels[clusterv1.MachineDeploymentNameLabel]; !hasLabel { - return false, nil + return false, false, nil } // Resolve the MachineSet name via owner references because the label value // could also be a hash. - objKey, err := getOwnerMachineSetObjectKey(m.ObjectMeta) - if err != nil { - return false, err + objKey, notFound, err := getOwnerMachineSetObjectKey(m.ObjectMeta) + if err != nil || notFound { + return false, notFound, err } ms := &clusterv1.MachineSet{} if err := c.Get(ctx, *objKey, ms); err != nil { - return false, err + if apierrors.IsNotFound(err) { + return false, true, nil + } + return false, false, err } md := &clusterv1.MachineDeployment{} objKey = &client.ObjectKey{ @@ -334,31 +347,34 @@ func shouldNodeHaveOutdatedTaint(ctx context.Context, c client.Client, m *cluste Name: m.Labels[clusterv1.MachineDeploymentNameLabel], } if err := c.Get(ctx, *objKey, md); err != nil { - return false, err + if apierrors.IsNotFound(err) { + return false, true, nil + } + return false, false, err } msRev, err := mdutil.Revision(ms) if err != nil { - return false, err + return false, false, err } mdRev, err := mdutil.Revision(md) if err != nil { - return false, err + return false, false, err } if msRev < mdRev { - return true, nil + return true, false, nil } - return false, nil + return false, false, nil } -func getOwnerMachineSetObjectKey(obj metav1.ObjectMeta) (*client.ObjectKey, error) { +func getOwnerMachineSetObjectKey(obj metav1.ObjectMeta) (*client.ObjectKey, bool, error) { for _, ref := range obj.GetOwnerReferences() { gv, err := schema.ParseGroupVersion(ref.APIVersion) if err != nil { - return nil, err + return nil, false, err } if ref.Kind == "MachineSet" && gv.Group == clusterv1.GroupVersion.Group { - return &client.ObjectKey{Namespace: obj.Namespace, Name: ref.Name}, nil + return &client.ObjectKey{Namespace: obj.Namespace, Name: ref.Name}, false, nil } } - return nil, errors.Errorf("failed to find MachineSet owner reference for Machine %s", klog.KRef(obj.GetNamespace(), obj.GetName())) + return nil, true, nil } diff --git a/internal/controllers/machine/machine_controller_noderef_test.go b/internal/controllers/machine/machine_controller_noderef_test.go index 52dfc42331f4..40d64b9f0a6f 100644 --- a/internal/controllers/machine/machine_controller_noderef_test.go +++ b/internal/controllers/machine/machine_controller_noderef_test.go @@ -29,11 +29,14 @@ import ( "k8s.io/utils/ptr" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/fake" "sigs.k8s.io/controller-runtime/pkg/handler" "sigs.k8s.io/controller-runtime/pkg/reconcile" clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" "sigs.k8s.io/cluster-api/controllers/remote" + "sigs.k8s.io/cluster-api/internal/test/builder" + "sigs.k8s.io/cluster-api/internal/topology/ownerrefs" "sigs.k8s.io/cluster-api/util" "sigs.k8s.io/cluster-api/util/kubeconfig" ) @@ -770,6 +773,49 @@ func TestPatchNode(t *testing.T) { ms: newFakeMachineSet(metav1.NamespaceDefault, clusterName), md: newFakeMachineDeployment(metav1.NamespaceDefault, clusterName), }, + { + name: "Ensure Labels and Annotations still get patched if MachineSet and Machinedeployment cannot be found", + oldNode: &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf("node-%s", util.RandomString(6)), + }, + }, + newLabels: map[string]string{ + "label-from-machine": "foo", + }, + newAnnotations: map[string]string{ + "annotation-from-machine": "foo", + }, + expectedLabels: map[string]string{ + "label-from-machine": "foo", + }, + expectedAnnotations: map[string]string{ + "annotation-from-machine": "foo", + clusterv1.LabelsFromMachineAnnotation: "label-from-machine", + }, + expectedTaints: []corev1.Taint{ + {Key: "node.kubernetes.io/not-ready", Effect: "NoSchedule"}, // Added by the API server + }, + machine: &clusterv1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf("ma-%s", util.RandomString(6)), + Namespace: metav1.NamespaceDefault, + Labels: map[string]string{ + clusterv1.MachineSetNameLabel: "test-ms-missing", + clusterv1.MachineDeploymentNameLabel: "test-md", + }, + OwnerReferences: []metav1.OwnerReference{{ + Kind: "MachineSet", + Name: "test-ms-missing", + APIVersion: clusterv1.GroupVersion.String(), + UID: "uid", + }}, + }, + Spec: newFakeMachineSpec(metav1.NamespaceDefault, clusterName), + }, + ms: nil, + md: nil, + }, { name: "Ensure NodeOutdatedRevisionTaint to be set if a node is associated to an outdated machineset", oldNode: &corev1.Node{ @@ -913,8 +959,12 @@ func TestPatchNode(t *testing.T) { g.Expect(env.CreateAndWait(ctx, oldNode)).To(Succeed()) g.Expect(env.CreateAndWait(ctx, machine)).To(Succeed()) - g.Expect(env.CreateAndWait(ctx, ms)).To(Succeed()) - g.Expect(env.CreateAndWait(ctx, md)).To(Succeed()) + if ms != nil { + g.Expect(env.CreateAndWait(ctx, ms)).To(Succeed()) + } + if md != nil { + g.Expect(env.CreateAndWait(ctx, md)).To(Succeed()) + } t.Cleanup(func() { _ = env.CleanupAndWait(ctx, oldNode, machine, ms, md) }) @@ -994,3 +1044,104 @@ func newFakeMachineDeployment(namespace, clusterName string) *clusterv1.MachineD }, } } + +func Test_shouldNodeHaveOutdatedTaint(t *testing.T) { + namespaceName := "test" + namespace := &corev1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: namespaceName}} + + testMachineDeployment := builder.MachineDeployment(namespaceName, "my-md"). + WithAnnotations(map[string]string{clusterv1.RevisionAnnotation: "1"}). + Build() + testMachineDeploymentNew := testMachineDeployment.DeepCopy() + testMachineDeploymentNew.Annotations = map[string]string{clusterv1.RevisionAnnotation: "2"} + + testMachineSet := builder.MachineSet(namespaceName, "my-ms"). + WithOwnerReferences([]metav1.OwnerReference{*ownerrefs.OwnerReferenceTo(testMachineDeployment, testMachineDeployment.GroupVersionKind())}). + Build() + testMachineSet.Annotations = map[string]string{clusterv1.RevisionAnnotation: "1"} + + labels := map[string]string{ + clusterv1.MachineDeploymentNameLabel: "my-md", + } + testMachine := builder.Machine(namespaceName, "my-machine").WithLabels(labels).Build() + testMachine.SetOwnerReferences([]metav1.OwnerReference{*ownerrefs.OwnerReferenceTo(testMachineSet, testMachineSet.GroupVersionKind())}) + + tests := []struct { + name string + machine *clusterv1.Machine + objects []client.Object + wantOutdated bool + wantNotFound bool + wantErr bool + }{ + { + name: "Machineset not outdated", + machine: testMachine, + objects: []client.Object{testMachineSet, testMachineDeployment}, + wantOutdated: false, + wantNotFound: false, + wantErr: false, + }, + { + name: "Machineset outdated", + machine: testMachine, + objects: []client.Object{testMachineSet, testMachineDeploymentNew}, + wantOutdated: true, + wantNotFound: false, + wantErr: false, + }, + { + name: "Machine without MachineDeployment label", + machine: builder.Machine(namespaceName, "no-deploy").Build(), + objects: nil, + wantOutdated: false, + wantNotFound: false, + wantErr: false, + }, + { + name: "Machine without OwnerReference", + machine: builder.Machine(namespaceName, "no-ownerref").WithLabels(labels).Build(), + objects: nil, + wantOutdated: false, + wantNotFound: true, + wantErr: false, + }, + { + name: "Machine without existing MachineSet", + machine: testMachine, + objects: nil, + wantOutdated: false, + wantNotFound: true, + wantErr: false, + }, + { + name: "Machine without existing MachineDeployment", + machine: testMachine, + objects: []client.Object{testMachineSet}, + wantOutdated: false, + wantNotFound: true, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + objects := []client.Object{namespace} + objects = append(objects, tt.machine) + objects = append(objects, tt.objects...) + c := fake.NewClientBuilder(). + WithObjects(objects...).Build() + + gotOutdated, gotNotFound, err := shouldNodeHaveOutdatedTaint(ctx, c, tt.machine) + if (err != nil) != tt.wantErr { + t.Errorf("shouldNodeHaveOutdatedTaint() error = %v, wantErr %v", err, tt.wantErr) + return + } + if gotOutdated != tt.wantOutdated { + t.Errorf("shouldNodeHaveOutdatedTaint() = %v, want %v", gotOutdated, tt.wantOutdated) + } + if gotNotFound != tt.wantNotFound { + t.Errorf("shouldNodeHaveOutdatedTaint() = %v, want %v", gotNotFound, tt.wantNotFound) + } + }) + } +}