Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

🌱 machine: prevent error spamming for NodeOutdatedTaint if objects are not found #11148

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 36 additions & 20 deletions internal/controllers/machine/machine_controller_noderef.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (

"github.com/pkg/errors"
corev1 "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"k8s.io/apimachinery/pkg/runtime/schema"
Expand Down Expand Up @@ -296,14 +297,20 @@ func (r *Reconciler) patchNode(ctx context.Context, remoteClient client.Client,
hasTaintChanges := taints.RemoveNodeTaint(newNode, clusterv1.NodeUninitializedTaint)

// Set Taint to a node in an old MachineSet and unset Taint from a node in a new MachineSet
isOutdated, err := shouldNodeHaveOutdatedTaint(ctx, r.Client, m)
isOutdated, notFound, err := shouldNodeHaveOutdatedTaint(ctx, r.Client, m)
if err != nil {
return errors.Wrapf(err, "failed to check if Node %s is outdated", klog.KRef("", node.Name))
}
if isOutdated {
hasTaintChanges = taints.EnsureNodeTaint(newNode, clusterv1.NodeOutdatedRevisionTaint) || hasTaintChanges
} else {
hasTaintChanges = taints.RemoveNodeTaint(newNode, clusterv1.NodeOutdatedRevisionTaint) || hasTaintChanges

// It is only possible to identify if we have to set or remove the NodeOutdatedRevisionTaint if shouldNodeHaveOutdatedTaint
// found all relevant objects.
// Example: when the MachineDeployment or Machineset can't be found due to a background deletion of objects.
if !notFound {
if isOutdated {
hasTaintChanges = taints.EnsureNodeTaint(newNode, clusterv1.NodeOutdatedRevisionTaint) || hasTaintChanges
} else {
hasTaintChanges = taints.RemoveNodeTaint(newNode, clusterv1.NodeOutdatedRevisionTaint) || hasTaintChanges
}
}

if !hasAnnotationChanges && !hasLabelChanges && !hasTaintChanges {
Expand All @@ -313,52 +320,61 @@ func (r *Reconciler) patchNode(ctx context.Context, remoteClient client.Client,
return remoteClient.Patch(ctx, newNode, client.StrategicMergeFrom(node))
}

func shouldNodeHaveOutdatedTaint(ctx context.Context, c client.Client, m *clusterv1.Machine) (bool, error) {
// shouldNodeHaveOutdatedTaint tries to compare the revision of the owning MachineSet to the MachineDeployment.
// It returns notFound = true if the OwnerReference is not set or the APIServer returns NotFound for the MachineSet or MachineDeployment.
// Note: This three cases could happen during background deletion of objects.
func shouldNodeHaveOutdatedTaint(ctx context.Context, c client.Client, m *clusterv1.Machine) (outdated bool, notFound bool, err error) {
if _, hasLabel := m.Labels[clusterv1.MachineDeploymentNameLabel]; !hasLabel {
return false, nil
return false, false, nil
}

// Resolve the MachineSet name via owner references because the label value
// could also be a hash.
objKey, err := getOwnerMachineSetObjectKey(m.ObjectMeta)
if err != nil {
return false, err
objKey, notFound, err := getOwnerMachineSetObjectKey(m.ObjectMeta)
if err != nil || notFound {
return false, notFound, err
}
ms := &clusterv1.MachineSet{}
if err := c.Get(ctx, *objKey, ms); err != nil {
return false, err
if apierrors.IsNotFound(err) {
return false, true, nil
}
return false, false, err
}
md := &clusterv1.MachineDeployment{}
objKey = &client.ObjectKey{
Namespace: m.ObjectMeta.Namespace,
Name: m.Labels[clusterv1.MachineDeploymentNameLabel],
}
if err := c.Get(ctx, *objKey, md); err != nil {
return false, err
if apierrors.IsNotFound(err) {
return false, true, nil
}
return false, false, err
}
msRev, err := mdutil.Revision(ms)
if err != nil {
return false, err
return false, false, err
}
mdRev, err := mdutil.Revision(md)
if err != nil {
return false, err
return false, false, err
}
if msRev < mdRev {
return true, nil
return true, false, nil
}
return false, nil
return false, false, nil
}

func getOwnerMachineSetObjectKey(obj metav1.ObjectMeta) (*client.ObjectKey, error) {
func getOwnerMachineSetObjectKey(obj metav1.ObjectMeta) (*client.ObjectKey, bool, error) {
for _, ref := range obj.GetOwnerReferences() {
gv, err := schema.ParseGroupVersion(ref.APIVersion)
if err != nil {
return nil, err
return nil, false, err
}
if ref.Kind == "MachineSet" && gv.Group == clusterv1.GroupVersion.Group {
return &client.ObjectKey{Namespace: obj.Namespace, Name: ref.Name}, nil
return &client.ObjectKey{Namespace: obj.Namespace, Name: ref.Name}, false, nil
}
}
return nil, errors.Errorf("failed to find MachineSet owner reference for Machine %s", klog.KRef(obj.GetNamespace(), obj.GetName()))
return nil, true, nil
}
155 changes: 153 additions & 2 deletions internal/controllers/machine/machine_controller_noderef_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,14 @@ import (
"k8s.io/utils/ptr"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/client/fake"
"sigs.k8s.io/controller-runtime/pkg/handler"
"sigs.k8s.io/controller-runtime/pkg/reconcile"

clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
"sigs.k8s.io/cluster-api/controllers/remote"
"sigs.k8s.io/cluster-api/internal/test/builder"
"sigs.k8s.io/cluster-api/internal/topology/ownerrefs"
"sigs.k8s.io/cluster-api/util"
"sigs.k8s.io/cluster-api/util/kubeconfig"
)
Expand Down Expand Up @@ -770,6 +773,49 @@ func TestPatchNode(t *testing.T) {
ms: newFakeMachineSet(metav1.NamespaceDefault, clusterName),
md: newFakeMachineDeployment(metav1.NamespaceDefault, clusterName),
},
{
name: "Ensure Labels and Annotations still get patched if MachineSet and Machinedeployment cannot be found",
oldNode: &corev1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: fmt.Sprintf("node-%s", util.RandomString(6)),
},
},
newLabels: map[string]string{
"label-from-machine": "foo",
},
newAnnotations: map[string]string{
"annotation-from-machine": "foo",
},
expectedLabels: map[string]string{
"label-from-machine": "foo",
},
expectedAnnotations: map[string]string{
"annotation-from-machine": "foo",
clusterv1.LabelsFromMachineAnnotation: "label-from-machine",
},
expectedTaints: []corev1.Taint{
{Key: "node.kubernetes.io/not-ready", Effect: "NoSchedule"}, // Added by the API server
},
machine: &clusterv1.Machine{
ObjectMeta: metav1.ObjectMeta{
Name: fmt.Sprintf("ma-%s", util.RandomString(6)),
Namespace: metav1.NamespaceDefault,
Labels: map[string]string{
clusterv1.MachineSetNameLabel: "test-ms-missing",
clusterv1.MachineDeploymentNameLabel: "test-md",
},
OwnerReferences: []metav1.OwnerReference{{
Kind: "MachineSet",
Name: "test-ms-missing",
APIVersion: clusterv1.GroupVersion.String(),
UID: "uid",
}},
},
Spec: newFakeMachineSpec(metav1.NamespaceDefault, clusterName),
},
ms: nil,
md: nil,
},
{
name: "Ensure NodeOutdatedRevisionTaint to be set if a node is associated to an outdated machineset",
oldNode: &corev1.Node{
Expand Down Expand Up @@ -913,8 +959,12 @@ func TestPatchNode(t *testing.T) {

g.Expect(env.CreateAndWait(ctx, oldNode)).To(Succeed())
g.Expect(env.CreateAndWait(ctx, machine)).To(Succeed())
g.Expect(env.CreateAndWait(ctx, ms)).To(Succeed())
g.Expect(env.CreateAndWait(ctx, md)).To(Succeed())
if ms != nil {
g.Expect(env.CreateAndWait(ctx, ms)).To(Succeed())
}
if md != nil {
g.Expect(env.CreateAndWait(ctx, md)).To(Succeed())
}
t.Cleanup(func() {
_ = env.CleanupAndWait(ctx, oldNode, machine, ms, md)
})
Expand Down Expand Up @@ -994,3 +1044,104 @@ func newFakeMachineDeployment(namespace, clusterName string) *clusterv1.MachineD
},
}
}

func Test_shouldNodeHaveOutdatedTaint(t *testing.T) {
chrischdi marked this conversation as resolved.
Show resolved Hide resolved
namespaceName := "test"
namespace := &corev1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: namespaceName}}

testMachineDeployment := builder.MachineDeployment(namespaceName, "my-md").
WithAnnotations(map[string]string{clusterv1.RevisionAnnotation: "1"}).
Build()
testMachineDeploymentNew := testMachineDeployment.DeepCopy()
testMachineDeploymentNew.Annotations = map[string]string{clusterv1.RevisionAnnotation: "2"}

testMachineSet := builder.MachineSet(namespaceName, "my-ms").
WithOwnerReferences([]metav1.OwnerReference{*ownerrefs.OwnerReferenceTo(testMachineDeployment, testMachineDeployment.GroupVersionKind())}).
Build()
testMachineSet.Annotations = map[string]string{clusterv1.RevisionAnnotation: "1"}

labels := map[string]string{
clusterv1.MachineDeploymentNameLabel: "my-md",
}
testMachine := builder.Machine(namespaceName, "my-machine").WithLabels(labels).Build()
testMachine.SetOwnerReferences([]metav1.OwnerReference{*ownerrefs.OwnerReferenceTo(testMachineSet, testMachineSet.GroupVersionKind())})

tests := []struct {
name string
machine *clusterv1.Machine
objects []client.Object
wantOutdated bool
wantNotFound bool
wantErr bool
}{
{
name: "Machineset not outdated",
machine: testMachine,
objects: []client.Object{testMachineSet, testMachineDeployment},
wantOutdated: false,
wantNotFound: false,
wantErr: false,
},
{
name: "Machineset outdated",
machine: testMachine,
objects: []client.Object{testMachineSet, testMachineDeploymentNew},
wantOutdated: true,
wantNotFound: false,
wantErr: false,
},
{
name: "Machine without MachineDeployment label",
machine: builder.Machine(namespaceName, "no-deploy").Build(),
objects: nil,
wantOutdated: false,
wantNotFound: false,
wantErr: false,
},
{
name: "Machine without OwnerReference",
machine: builder.Machine(namespaceName, "no-ownerref").WithLabels(labels).Build(),
objects: nil,
wantOutdated: false,
wantNotFound: true,
wantErr: false,
},
{
name: "Machine without existing MachineSet",
machine: testMachine,
objects: nil,
wantOutdated: false,
wantNotFound: true,
wantErr: false,
},
{
name: "Machine without existing MachineDeployment",
machine: testMachine,
objects: []client.Object{testMachineSet},
wantOutdated: false,
wantNotFound: true,
wantErr: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
objects := []client.Object{namespace}
objects = append(objects, tt.machine)
objects = append(objects, tt.objects...)
c := fake.NewClientBuilder().
WithObjects(objects...).Build()

gotOutdated, gotNotFound, err := shouldNodeHaveOutdatedTaint(ctx, c, tt.machine)
if (err != nil) != tt.wantErr {
t.Errorf("shouldNodeHaveOutdatedTaint() error = %v, wantErr %v", err, tt.wantErr)
return
}
if gotOutdated != tt.wantOutdated {
t.Errorf("shouldNodeHaveOutdatedTaint() = %v, want %v", gotOutdated, tt.wantOutdated)
}
if gotNotFound != tt.wantNotFound {
t.Errorf("shouldNodeHaveOutdatedTaint() = %v, want %v", gotNotFound, tt.wantNotFound)
}
})
}
}
Loading