Skip to content

Commit

Permalink
🌱 machine: prevent error spamming for NodeOutdatedTaint if objects ar…
Browse files Browse the repository at this point in the history
…e not found (#11148)

* machine: add test coverage for shouldNodeHaveOutdatedTaint

* machine: prevent error spamming for NodeOutdatedTaint if objects are not found

* lint fix

* review fixes

* review fixes

* review fix
  • Loading branch information
chrischdi authored Sep 19, 2024
1 parent d7a2335 commit f9372a1
Show file tree
Hide file tree
Showing 2 changed files with 189 additions and 22 deletions.
56 changes: 36 additions & 20 deletions internal/controllers/machine/machine_controller_noderef.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (

"github.com/pkg/errors"
corev1 "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"k8s.io/apimachinery/pkg/runtime/schema"
Expand Down Expand Up @@ -296,14 +297,20 @@ func (r *Reconciler) patchNode(ctx context.Context, remoteClient client.Client,
hasTaintChanges := taints.RemoveNodeTaint(newNode, clusterv1.NodeUninitializedTaint)

// Set Taint to a node in an old MachineSet and unset Taint from a node in a new MachineSet
isOutdated, err := shouldNodeHaveOutdatedTaint(ctx, r.Client, m)
isOutdated, notFound, err := shouldNodeHaveOutdatedTaint(ctx, r.Client, m)
if err != nil {
return errors.Wrapf(err, "failed to check if Node %s is outdated", klog.KRef("", node.Name))
}
if isOutdated {
hasTaintChanges = taints.EnsureNodeTaint(newNode, clusterv1.NodeOutdatedRevisionTaint) || hasTaintChanges
} else {
hasTaintChanges = taints.RemoveNodeTaint(newNode, clusterv1.NodeOutdatedRevisionTaint) || hasTaintChanges

// It is only possible to identify if we have to set or remove the NodeOutdatedRevisionTaint if shouldNodeHaveOutdatedTaint
// found all relevant objects.
// Example: when the MachineDeployment or Machineset can't be found due to a background deletion of objects.
if !notFound {
if isOutdated {
hasTaintChanges = taints.EnsureNodeTaint(newNode, clusterv1.NodeOutdatedRevisionTaint) || hasTaintChanges
} else {
hasTaintChanges = taints.RemoveNodeTaint(newNode, clusterv1.NodeOutdatedRevisionTaint) || hasTaintChanges
}
}

if !hasAnnotationChanges && !hasLabelChanges && !hasTaintChanges {
Expand All @@ -313,52 +320,61 @@ func (r *Reconciler) patchNode(ctx context.Context, remoteClient client.Client,
return remoteClient.Patch(ctx, newNode, client.StrategicMergeFrom(node))
}

func shouldNodeHaveOutdatedTaint(ctx context.Context, c client.Client, m *clusterv1.Machine) (bool, error) {
// shouldNodeHaveOutdatedTaint tries to compare the revision of the owning MachineSet to the MachineDeployment.
// It returns notFound = true if the OwnerReference is not set or the APIServer returns NotFound for the MachineSet or MachineDeployment.
// Note: This three cases could happen during background deletion of objects.
func shouldNodeHaveOutdatedTaint(ctx context.Context, c client.Client, m *clusterv1.Machine) (outdated bool, notFound bool, err error) {
if _, hasLabel := m.Labels[clusterv1.MachineDeploymentNameLabel]; !hasLabel {
return false, nil
return false, false, nil
}

// Resolve the MachineSet name via owner references because the label value
// could also be a hash.
objKey, err := getOwnerMachineSetObjectKey(m.ObjectMeta)
if err != nil {
return false, err
objKey, notFound, err := getOwnerMachineSetObjectKey(m.ObjectMeta)
if err != nil || notFound {
return false, notFound, err
}
ms := &clusterv1.MachineSet{}
if err := c.Get(ctx, *objKey, ms); err != nil {
return false, err
if apierrors.IsNotFound(err) {
return false, true, nil
}
return false, false, err
}
md := &clusterv1.MachineDeployment{}
objKey = &client.ObjectKey{
Namespace: m.ObjectMeta.Namespace,
Name: m.Labels[clusterv1.MachineDeploymentNameLabel],
}
if err := c.Get(ctx, *objKey, md); err != nil {
return false, err
if apierrors.IsNotFound(err) {
return false, true, nil
}
return false, false, err
}
msRev, err := mdutil.Revision(ms)
if err != nil {
return false, err
return false, false, err
}
mdRev, err := mdutil.Revision(md)
if err != nil {
return false, err
return false, false, err
}
if msRev < mdRev {
return true, nil
return true, false, nil
}
return false, nil
return false, false, nil
}

func getOwnerMachineSetObjectKey(obj metav1.ObjectMeta) (*client.ObjectKey, error) {
func getOwnerMachineSetObjectKey(obj metav1.ObjectMeta) (*client.ObjectKey, bool, error) {
for _, ref := range obj.GetOwnerReferences() {
gv, err := schema.ParseGroupVersion(ref.APIVersion)
if err != nil {
return nil, err
return nil, false, err
}
if ref.Kind == "MachineSet" && gv.Group == clusterv1.GroupVersion.Group {
return &client.ObjectKey{Namespace: obj.Namespace, Name: ref.Name}, nil
return &client.ObjectKey{Namespace: obj.Namespace, Name: ref.Name}, false, nil
}
}
return nil, errors.Errorf("failed to find MachineSet owner reference for Machine %s", klog.KRef(obj.GetNamespace(), obj.GetName()))
return nil, true, nil
}
155 changes: 153 additions & 2 deletions internal/controllers/machine/machine_controller_noderef_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,14 @@ import (
"k8s.io/utils/ptr"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/client/fake"
"sigs.k8s.io/controller-runtime/pkg/handler"
"sigs.k8s.io/controller-runtime/pkg/reconcile"

clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
"sigs.k8s.io/cluster-api/controllers/remote"
"sigs.k8s.io/cluster-api/internal/test/builder"
"sigs.k8s.io/cluster-api/internal/topology/ownerrefs"
"sigs.k8s.io/cluster-api/util"
"sigs.k8s.io/cluster-api/util/kubeconfig"
)
Expand Down Expand Up @@ -770,6 +773,49 @@ func TestPatchNode(t *testing.T) {
ms: newFakeMachineSet(metav1.NamespaceDefault, clusterName),
md: newFakeMachineDeployment(metav1.NamespaceDefault, clusterName),
},
{
name: "Ensure Labels and Annotations still get patched if MachineSet and Machinedeployment cannot be found",
oldNode: &corev1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: fmt.Sprintf("node-%s", util.RandomString(6)),
},
},
newLabels: map[string]string{
"label-from-machine": "foo",
},
newAnnotations: map[string]string{
"annotation-from-machine": "foo",
},
expectedLabels: map[string]string{
"label-from-machine": "foo",
},
expectedAnnotations: map[string]string{
"annotation-from-machine": "foo",
clusterv1.LabelsFromMachineAnnotation: "label-from-machine",
},
expectedTaints: []corev1.Taint{
{Key: "node.kubernetes.io/not-ready", Effect: "NoSchedule"}, // Added by the API server
},
machine: &clusterv1.Machine{
ObjectMeta: metav1.ObjectMeta{
Name: fmt.Sprintf("ma-%s", util.RandomString(6)),
Namespace: metav1.NamespaceDefault,
Labels: map[string]string{
clusterv1.MachineSetNameLabel: "test-ms-missing",
clusterv1.MachineDeploymentNameLabel: "test-md",
},
OwnerReferences: []metav1.OwnerReference{{
Kind: "MachineSet",
Name: "test-ms-missing",
APIVersion: clusterv1.GroupVersion.String(),
UID: "uid",
}},
},
Spec: newFakeMachineSpec(metav1.NamespaceDefault, clusterName),
},
ms: nil,
md: nil,
},
{
name: "Ensure NodeOutdatedRevisionTaint to be set if a node is associated to an outdated machineset",
oldNode: &corev1.Node{
Expand Down Expand Up @@ -913,8 +959,12 @@ func TestPatchNode(t *testing.T) {

g.Expect(env.CreateAndWait(ctx, oldNode)).To(Succeed())
g.Expect(env.CreateAndWait(ctx, machine)).To(Succeed())
g.Expect(env.CreateAndWait(ctx, ms)).To(Succeed())
g.Expect(env.CreateAndWait(ctx, md)).To(Succeed())
if ms != nil {
g.Expect(env.CreateAndWait(ctx, ms)).To(Succeed())
}
if md != nil {
g.Expect(env.CreateAndWait(ctx, md)).To(Succeed())
}
t.Cleanup(func() {
_ = env.CleanupAndWait(ctx, oldNode, machine, ms, md)
})
Expand Down Expand Up @@ -994,3 +1044,104 @@ func newFakeMachineDeployment(namespace, clusterName string) *clusterv1.MachineD
},
}
}

func Test_shouldNodeHaveOutdatedTaint(t *testing.T) {
namespaceName := "test"
namespace := &corev1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: namespaceName}}

testMachineDeployment := builder.MachineDeployment(namespaceName, "my-md").
WithAnnotations(map[string]string{clusterv1.RevisionAnnotation: "1"}).
Build()
testMachineDeploymentNew := testMachineDeployment.DeepCopy()
testMachineDeploymentNew.Annotations = map[string]string{clusterv1.RevisionAnnotation: "2"}

testMachineSet := builder.MachineSet(namespaceName, "my-ms").
WithOwnerReferences([]metav1.OwnerReference{*ownerrefs.OwnerReferenceTo(testMachineDeployment, testMachineDeployment.GroupVersionKind())}).
Build()
testMachineSet.Annotations = map[string]string{clusterv1.RevisionAnnotation: "1"}

labels := map[string]string{
clusterv1.MachineDeploymentNameLabel: "my-md",
}
testMachine := builder.Machine(namespaceName, "my-machine").WithLabels(labels).Build()
testMachine.SetOwnerReferences([]metav1.OwnerReference{*ownerrefs.OwnerReferenceTo(testMachineSet, testMachineSet.GroupVersionKind())})

tests := []struct {
name string
machine *clusterv1.Machine
objects []client.Object
wantOutdated bool
wantNotFound bool
wantErr bool
}{
{
name: "Machineset not outdated",
machine: testMachine,
objects: []client.Object{testMachineSet, testMachineDeployment},
wantOutdated: false,
wantNotFound: false,
wantErr: false,
},
{
name: "Machineset outdated",
machine: testMachine,
objects: []client.Object{testMachineSet, testMachineDeploymentNew},
wantOutdated: true,
wantNotFound: false,
wantErr: false,
},
{
name: "Machine without MachineDeployment label",
machine: builder.Machine(namespaceName, "no-deploy").Build(),
objects: nil,
wantOutdated: false,
wantNotFound: false,
wantErr: false,
},
{
name: "Machine without OwnerReference",
machine: builder.Machine(namespaceName, "no-ownerref").WithLabels(labels).Build(),
objects: nil,
wantOutdated: false,
wantNotFound: true,
wantErr: false,
},
{
name: "Machine without existing MachineSet",
machine: testMachine,
objects: nil,
wantOutdated: false,
wantNotFound: true,
wantErr: false,
},
{
name: "Machine without existing MachineDeployment",
machine: testMachine,
objects: []client.Object{testMachineSet},
wantOutdated: false,
wantNotFound: true,
wantErr: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
objects := []client.Object{namespace}
objects = append(objects, tt.machine)
objects = append(objects, tt.objects...)
c := fake.NewClientBuilder().
WithObjects(objects...).Build()

gotOutdated, gotNotFound, err := shouldNodeHaveOutdatedTaint(ctx, c, tt.machine)
if (err != nil) != tt.wantErr {
t.Errorf("shouldNodeHaveOutdatedTaint() error = %v, wantErr %v", err, tt.wantErr)
return
}
if gotOutdated != tt.wantOutdated {
t.Errorf("shouldNodeHaveOutdatedTaint() = %v, want %v", gotOutdated, tt.wantOutdated)
}
if gotNotFound != tt.wantNotFound {
t.Errorf("shouldNodeHaveOutdatedTaint() = %v, want %v", gotNotFound, tt.wantNotFound)
}
})
}
}

0 comments on commit f9372a1

Please sign in to comment.