diff --git a/internal/controllers/machine/machine_controller.go b/internal/controllers/machine/machine_controller.go index 2dbbd916713b..31aa7726a868 100644 --- a/internal/controllers/machine/machine_controller.go +++ b/internal/controllers/machine/machine_controller.go @@ -562,8 +562,33 @@ func (r *Reconciler) isDeleteNodeAllowed(ctx context.Context, cluster *clusterv1 return errClusterIsBeingDeleted } - // Cannot delete something that doesn't exist. + if machine.Status.NodeRef == nil && machine.Spec.ProviderID != nil { + // If we don't have a node reference, but a provider id has been set, + // try to retrieve the node one more time. + // + // NOTE: The following is a best-effort attempt to retrieve the node, + // errors are logged but not returned to ensure machines are deleted + // even if the node cannot be retrieved. + remoteClient, err := r.Tracker.GetClient(ctx, util.ObjectKey(cluster)) + if err != nil { + log.Error(err, "Failed to get cluster client while deleting Machine and checking for nodes") + } else { + node, err := r.getNode(ctx, remoteClient, *machine.Spec.ProviderID) + if err != nil && err != ErrNodeNotFound { + log.Error(err, "Failed to get node while deleting Machine") + } else if err == nil { + machine.Status.NodeRef = &corev1.ObjectReference{ + APIVersion: corev1.SchemeGroupVersion.String(), + Kind: "Node", + Name: node.Name, + UID: node.UID, + } + } + } + } + if machine.Status.NodeRef == nil { + // Cannot delete something that doesn't exist. return errNilNodeRef } diff --git a/internal/controllers/machine/machine_controller_test.go b/internal/controllers/machine/machine_controller_test.go index 9d17f5404b16..8aa9b1a9868f 100644 --- a/internal/controllers/machine/machine_controller_test.go +++ b/internal/controllers/machine/machine_controller_test.go @@ -2522,6 +2522,130 @@ func TestNodeDeletion(t *testing.T) { } } +func TestNodeDeletionWithoutNodeRefFallback(t *testing.T) { + g := NewWithT(t) + + deletionTime := metav1.Now().Add(-1 * time.Second) + + testCluster := clusterv1.Cluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-cluster", + Namespace: metav1.NamespaceDefault, + }, + } + + node := &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test", + }, + Spec: corev1.NodeSpec{ProviderID: "test://id-1"}, + } + + testMachine := clusterv1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test", + Namespace: metav1.NamespaceDefault, + Labels: map[string]string{ + clusterv1.MachineControlPlaneLabel: "", + }, + Annotations: map[string]string{ + "machine.cluster.x-k8s.io/exclude-node-draining": "", + }, + Finalizers: []string{clusterv1.MachineFinalizer}, + DeletionTimestamp: &metav1.Time{Time: deletionTime}, + }, + Spec: clusterv1.MachineSpec{ + ClusterName: "test-cluster", + InfrastructureRef: corev1.ObjectReference{ + APIVersion: "infrastructure.cluster.x-k8s.io/v1beta1", + Kind: "GenericInfrastructureMachine", + Name: "infra-config1", + }, + Bootstrap: clusterv1.Bootstrap{DataSecretName: ptr.To("data")}, + ProviderID: ptr.To("test://id-1"), + }, + } + + cpmachine1 := &clusterv1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Name: "cp1", + Namespace: metav1.NamespaceDefault, + Labels: map[string]string{ + clusterv1.ClusterNameLabel: "test-cluster", + clusterv1.MachineControlPlaneLabel: "", + }, + Finalizers: []string{clusterv1.MachineFinalizer}, + }, + Spec: clusterv1.MachineSpec{ + ClusterName: "test-cluster", + InfrastructureRef: corev1.ObjectReference{}, + Bootstrap: clusterv1.Bootstrap{DataSecretName: ptr.To("data")}, + }, + Status: clusterv1.MachineStatus{ + NodeRef: &corev1.ObjectReference{ + Name: "cp1", + }, + }, + } + + testCases := []struct { + name string + deletionTimeout *metav1.Duration + resultErr bool + clusterDeleted bool + expectNodeDeletion bool + createFakeClient func(...client.Object) client.Client + }{ + { + name: "should return no error when the node exists and matches the provider id", + deletionTimeout: &metav1.Duration{Duration: time.Second}, + resultErr: false, + expectNodeDeletion: true, + createFakeClient: func(initObjs ...client.Object) client.Client { + return fake.NewClientBuilder(). + WithObjects(initObjs...). + WithIndex(&corev1.Node{}, index.NodeProviderIDField, index.NodeByProviderID). + WithStatusSubresource(&clusterv1.Machine{}). + Build() + }, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(*testing.T) { + m := testMachine.DeepCopy() + m.Spec.NodeDeletionTimeout = tc.deletionTimeout + + fakeClient := tc.createFakeClient(node, m, cpmachine1) + tracker := remote.NewTestClusterCacheTracker(ctrl.Log, fakeClient, fakeClient, fakeScheme, client.ObjectKeyFromObject(&testCluster)) + + r := &Reconciler{ + Client: fakeClient, + Tracker: tracker, + recorder: record.NewFakeRecorder(10), + nodeDeletionRetryTimeout: 10 * time.Millisecond, + } + + cluster := testCluster.DeepCopy() + if tc.clusterDeleted { + cluster.DeletionTimestamp = &metav1.Time{Time: deletionTime.Add(time.Hour)} + } + + _, err := r.reconcileDelete(context.Background(), cluster, m) + + if tc.resultErr { + g.Expect(err).To(HaveOccurred()) + } else { + g.Expect(err).ToNot(HaveOccurred()) + if tc.expectNodeDeletion { + n := &corev1.Node{} + g.Expect(fakeClient.Get(context.Background(), client.ObjectKeyFromObject(node), n)).NotTo(Succeed()) + } + } + }) + } +} + // adds a condition list to an external object. func addConditionsToExternal(u *unstructured.Unstructured, newConditions clusterv1.Conditions) { existingConditions := clusterv1.Conditions{}