From 9a22e84a0e3aa5a62a1185024279a0c62708bb0b Mon Sep 17 00:00:00 2001 From: Carlo Lobrano Date: Fri, 24 Nov 2023 10:35:25 +0100 Subject: [PATCH 1/8] Use shorter variable name for MDR remediation Signed-off-by: Carlo Lobrano --- .../machinedeletionremediation_controller.go | 40 +++++++++---------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/controllers/machinedeletionremediation_controller.go b/controllers/machinedeletionremediation_controller.go index 79e95e65..c62b2263 100644 --- a/controllers/machinedeletionremediation_controller.go +++ b/controllers/machinedeletionremediation_controller.go @@ -112,15 +112,15 @@ func (r *MachineDeletionRemediationReconciler) Reconcile(ctx context.Context, re log.Info("reconciling...") var err error - var remediation *v1alpha1.MachineDeletionRemediation - if remediation, err = r.getRemediation(ctx, req); remediation == nil || err != nil { + var mdr *v1alpha1.MachineDeletionRemediation + if mdr, err = r.getRemediation(ctx, req); mdr == nil || err != nil { return ctrl.Result{}, err } - log.Info("Machine Deletion Remediation CR found", "name", remediation.GetName()) + log.Info("Machine Deletion Remediation CR found", "name", mdr.GetName()) defer func() { - if updateErr := r.updateStatus(ctx, remediation); updateErr != nil { + if updateErr := r.updateStatus(ctx, mdr); updateErr != nil { if !apiErrors.IsConflict(updateErr) { finalErr = utilerrors.NewAggregate([]error{updateErr, finalErr}) } @@ -129,33 +129,33 @@ func (r *MachineDeletionRemediationReconciler) Reconcile(ctx context.Context, re }() // Remediation's name was created from Node's name - nodeName := remediation.GetName() + nodeName := mdr.GetName() - if r.isTimedOutByNHC(remediation) { + if r.isTimedOutByNHC(mdr) { log.Info("NHC time out annotation found, stopping remediation") - _, err = r.updateConditions(remediationTimedOutByNhc, remediation) + _, err = r.updateConditions(remediationTimedOutByNhc, mdr) return ctrl.Result{}, err } - if updateRequired, err := r.updateConditions(remediationStarted, remediation); err != nil { + if updateRequired, err := r.updateConditions(remediationStarted, mdr); err != nil { log.Error(err, "could not update Status conditions") return ctrl.Result{}, err } else if updateRequired { return ctrl.Result{RequeueAfter: time.Second}, nil } - machine, err := r.getMachine(ctx, remediation) + machine, err := r.getMachine(ctx, mdr) if err != nil { // Handling specific error scenarios. We avoid re-queue by returning nil after updating the // conditions, as these are unrecoverable errors and re-queue would not change the // situation. An error is returned only if it does not match the following custom errors, or // updateConditions fails. if err == nodeNotFoundError { - _, err = r.updateConditions(remediationSkippedNodeNotFound, remediation) + _, err = r.updateConditions(remediationSkippedNodeNotFound, mdr) } else if err == machineNotFoundError { - _, err = r.updateConditions(remediationSkippedMachineNotFound, remediation) + _, err = r.updateConditions(remediationSkippedMachineNotFound, mdr) } else if err == unrecoverableError { - _, err = r.updateConditions(remediationFailed, remediation) + _, err = r.updateConditions(remediationFailed, mdr) } return ctrl.Result{}, err @@ -168,22 +168,22 @@ func (r *MachineDeletionRemediationReconciler) Reconcile(ctx context.Context, re // If the latter, wait for the expected number of nodes to be restored before setting the Succeeded condition. // NOTE: the Machine will always be nil after deletion if it changes name after re-provisioning, this is why we // verify nodes count restoration even if machine == nil. - if machine == nil || machine.GetCreationTimestamp().After(remediation.GetCreationTimestamp().Time) { - if isRestored, err := r.isExpectedNodesNumberRestored(ctx, remediation); err != nil { + if machine == nil || machine.GetCreationTimestamp().After(mdr.GetCreationTimestamp().Time) { + if isRestored, err := r.isExpectedNodesNumberRestored(ctx, mdr); err != nil { log.Error(err, "could not verify if node was restored") if err == unrecoverableError { return ctrl.Result{}, nil } return ctrl.Result{}, err } else if isRestored { - _, err = r.updateConditions(remediationFinishedMachineDeleted, remediation) + _, err = r.updateConditions(remediationFinishedMachineDeleted, mdr) return ctrl.Result{}, err } log.Info("waiting for the nodes count to be re-provisioned") return ctrl.Result{RequeueAfter: 30 * time.Second}, nil } - log.Info("node-associated machine found", "node", remediation.Name, "machine", machine.GetName()) + log.Info("node-associated machine found", "node", mdr.Name, "machine", machine.GetName()) // Detect if Node name is expected to change after Machine deletion given // the Platform type. In case of BareMetal platform, the name is NOT @@ -205,7 +205,7 @@ func (r *MachineDeletionRemediationReconciler) Reconcile(ctx context.Context, re status = metav1.ConditionTrue } - if updateRequired := r.setPermanentNodeDeletionExpectedCondition(status, remediation); updateRequired { + if updateRequired := r.setPermanentNodeDeletionExpectedCondition(status, mdr); updateRequired { log.Info(permanentNodeDeletionExpectedMsg) return ctrl.Result{RequeueAfter: time.Second}, nil } @@ -217,13 +217,13 @@ func (r *MachineDeletionRemediationReconciler) Reconcile(ctx context.Context, re } if !hasControllerOwner(machine) { - log.Info("ignoring remediation of node-associated machine: the machine has no controller owner", "machine", machine.GetName(), "node name", remediation.Name) - _, err = r.updateConditions(remediationSkippedNoControllerOwner, remediation) + log.Info("ignoring remediation of node-associated machine: the machine has no controller owner", "machine", machine.GetName(), "node name", mdr.Name) + _, err = r.updateConditions(remediationSkippedNoControllerOwner, mdr) return ctrl.Result{}, err } // save Machine's name and namespace to follow its deletion phase - if err = r.saveMachineData(ctx, remediation, machine); err != nil { + if err = r.saveMachineData(ctx, mdr, machine); err != nil { log.Error(err, "could not save Machine's Name and Namespace", "machine name", machine.GetName(), "machine namespace", machine.GetNamespace()) return ctrl.Result{}, errors.Wrapf(err, "failed to save Machine's name and namespace") } From 3e3bd8f961c8b5b526006dcc564d5b624fdafc6c Mon Sep 17 00:00:00 2001 From: Carlo Lobrano Date: Tue, 12 Dec 2023 10:46:37 +0100 Subject: [PATCH 2/8] Replace deprecated k8s.io/utils/pointer with k8s.io/utils/ptr Signed-off-by: Carlo Lobrano --- controllers/machinedeletionremediation_controller_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/controllers/machinedeletionremediation_controller_test.go b/controllers/machinedeletionremediation_controller_test.go index 98f2aae6..5988cf0d 100644 --- a/controllers/machinedeletionremediation_controller_test.go +++ b/controllers/machinedeletionremediation_controller_test.go @@ -14,7 +14,7 @@ import ( "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/utils/pointer" + "k8s.io/utils/ptr" "sigs.k8s.io/controller-runtime/pkg/client" machinev1 "github.com/openshift/api/machine/v1" @@ -506,7 +506,7 @@ func createControlPlaneMachineSet(name string) *machinev1.ControlPlaneMachineSet Namespace: defaultNamespace, }, Spec: machinev1.ControlPlaneMachineSetSpec{ - Replicas: pointer.Int32(3), + Replicas: ptr.To[int32](3), Selector: metav1.LabelSelector{ MatchLabels: map[string]string{ "machine.openshift.io/cluster-api-machine-role": "master", From cc237e408ecbe0ed1d8021b7fd6de15432a5cb2c Mon Sep 17 00:00:00 2001 From: Carlo Lobrano Date: Tue, 12 Dec 2023 10:55:40 +0100 Subject: [PATCH 3/8] Add functions to create CR with specific owners NHC and MHC Signed-off-by: Carlo Lobrano --- ...hinedeletionremediation_controller_test.go | 63 +++++++++++++------ 1 file changed, 43 insertions(+), 20 deletions(-) diff --git a/controllers/machinedeletionremediation_controller_test.go b/controllers/machinedeletionremediation_controller_test.go index 5988cf0d..15efbe00 100644 --- a/controllers/machinedeletionremediation_controller_test.go +++ b/controllers/machinedeletionremediation_controller_test.go @@ -128,7 +128,7 @@ var _ = Describe("Machine Deletion Remediation CR", func() { Context("Sunny Flows", func() { When("node does not exist", func() { BeforeEach(func() { - underTest = createRemediation(phantomNode) + underTest = createRemediationOwnedByNHC(phantomNode) }) It("No machine is deleted", func() { @@ -143,7 +143,7 @@ var _ = Describe("Machine Deletion Remediation CR", func() { When("remediation associated machine has no owner ref", func() { BeforeEach(func() { - underTest = createRemediation(masterNode) + underTest = createRemediationOwnedByNHC(masterNode) }) It("No machine is deleted", func() { @@ -161,7 +161,7 @@ var _ = Describe("Machine Deletion Remediation CR", func() { BeforeEach(func() { workerNodeMachine.OwnerReferences[0].Controller = nil Expect(k8sClient.Update(context.Background(), workerNodeMachine)).ToNot(HaveOccurred()) - underTest = createRemediation(workerNode) + underTest = createRemediationOwnedByNHC(workerNode) }) It("No machine is deleted", func() { @@ -180,7 +180,7 @@ var _ = Describe("Machine Deletion Remediation CR", func() { controllerValue := false workerNodeMachine.OwnerReferences[0].Controller = &controllerValue Expect(k8sClient.Update(context.Background(), workerNodeMachine)).ToNot(HaveOccurred()) - underTest = createRemediation(workerNode) + underTest = createRemediationOwnedByNHC(workerNode) }) It("No machine is deleted", func() { @@ -196,7 +196,7 @@ var _ = Describe("Machine Deletion Remediation CR", func() { When("remediation associated machine has valid owner ref of CPMS Kind", func() { BeforeEach(func() { - underTest = createRemediation(&cpNodeWithOwnerList[0]) + underTest = createRemediationOwnedByNHC(&cpNodeWithOwnerList[0]) }) It("CP machine is deleted", func() { @@ -236,7 +236,7 @@ var _ = Describe("Machine Deletion Remediation CR", func() { When("worker node remediation exists", func() { BeforeEach(func() { - underTest = createRemediation(workerNode) + underTest = createRemediationOwnedByNHC(workerNode) }) It("worker machine is deleted", func() { verifyMachineIsDeleted(workerNodeMachineName) @@ -272,7 +272,7 @@ var _ = Describe("Machine Deletion Remediation CR", func() { When("creating a resource in baremetal provider", func() { BeforeEach(func() { setMachineProviderID(workerNodeMachine, "baremetal:///dummy-provider-ID") - underTest = createRemediation(workerNode) + underTest = createRemediationOwnedByNHC(workerNode) }) It("sets PermanentNodeDeletionExpected condition to false", func() { @@ -283,7 +283,7 @@ var _ = Describe("Machine Deletion Remediation CR", func() { When("creating a resource in cloud provider", func() { BeforeEach(func() { setMachineProviderID(workerNodeMachine, "cloud:///dummy-provider-ID") - underTest = createRemediation(workerNode) + underTest = createRemediationOwnedByNHC(workerNode) }) It("sets PermanentNodeDeletionExpected condition to true", func() { @@ -295,7 +295,7 @@ var _ = Describe("Machine Deletion Remediation CR", func() { When("creating a resource in an unknown provider", func() { BeforeEach(func() { // do not set the providerID in Machine - underTest = createRemediation(workerNode) + underTest = createRemediationOwnedByNHC(workerNode) }) It("sets PermanentNodeDeletionExpected condition to false", func() { @@ -307,7 +307,7 @@ var _ = Describe("Machine Deletion Remediation CR", func() { Context("Rainy (Error) Flows", func() { When("remediation is not connected to a node", func() { BeforeEach(func() { - underTest = createRemediation(phantomNode) + underTest = createRemediationOwnedByNHC(phantomNode) }) It("node not found error", func() { @@ -324,7 +324,7 @@ var _ = Describe("Machine Deletion Remediation CR", func() { When("node does not have annotations", func() { BeforeEach(func() { - underTest = createRemediation(masterNode) + underTest = createRemediationOwnedByNHC(masterNode) masterNode.Annotations = nil Expect(k8sClient.Update(context.Background(), masterNode)).ToNot(HaveOccurred()) }) @@ -343,7 +343,7 @@ var _ = Describe("Machine Deletion Remediation CR", func() { When("node does not have machine annotation", func() { BeforeEach(func() { - underTest = createRemediation(masterNode) + underTest = createRemediationOwnedByNHC(masterNode) masterNode.Annotations[machineAnnotationOpenshift] = "" Expect(k8sClient.Update(context.Background(), masterNode)).ToNot(HaveOccurred()) }) @@ -362,7 +362,7 @@ var _ = Describe("Machine Deletion Remediation CR", func() { When("node's machine annotation has invalid value", func() { BeforeEach(func() { - underTest = createRemediation(masterNode) + underTest = createRemediationOwnedByNHC(masterNode) masterNode.Annotations[machineAnnotationOpenshift] = "Gibberish" Expect(k8sClient.Update(context.Background(), masterNode)).ToNot(HaveOccurred()) }) @@ -381,7 +381,7 @@ var _ = Describe("Machine Deletion Remediation CR", func() { When("machine pointed to by node's annotation does not exist", func() { BeforeEach(func() { - underTest = createRemediation(masterNode) + underTest = createRemediationOwnedByNHC(masterNode) masterNode.Annotations[machineAnnotationOpenshift] = "phantom-machine-namespace/phantom-machine-name" Expect(k8sClient.Update(context.Background(), masterNode)).ToNot(HaveOccurred()) }) @@ -400,7 +400,7 @@ var _ = Describe("Machine Deletion Remediation CR", func() { When("Remediation has incorrect annotation", func() { BeforeEach(func() { - underTest = createRemediationWithAnnotation(masterNode, MachineNameNsAnnotation, "Gibberish") + underTest = createRemediationOwnedByNHCWithAnnotation(masterNode, MachineNameNsAnnotation, "Gibberish") }) It("fails to follow machine deletion", func() { @@ -424,7 +424,7 @@ var _ = Describe("Machine Deletion Remediation CR", func() { DeferCleanup(func() { cclient.onDeleteError = nil }) - underTest = createRemediation(workerNode) + underTest = createRemediationOwnedByNHC(workerNode) }) It("returns the same delete failure error", func() { @@ -442,7 +442,7 @@ var _ = Describe("Machine Deletion Remediation CR", func() { When("NHC stops the remediation", func() { BeforeEach(func() { - underTest = createRemediationWithAnnotation(workerNode, commonannotations.NhcTimedOut, "some timestamp") + underTest = createRemediationOwnedByNHCWithAnnotation(workerNode, commonannotations.NhcTimedOut, "some timestamp") }) It("returns without completing remediation", func() { @@ -459,15 +459,38 @@ var _ = Describe("Machine Deletion Remediation CR", func() { }) }) -func createRemediation(node *v1.Node) *v1alpha1.MachineDeletionRemediation { +func createRemediationOwnedByNHC(node *v1.Node) *v1alpha1.MachineDeletionRemediation { mdr := &v1alpha1.MachineDeletionRemediation{} mdr.Name = node.Name mdr.Namespace = defaultNamespace + mdr.SetOwnerReferences([]metav1.OwnerReference{ + { + Name: node.Name, + Kind: "NodeHealthCheck", + UID: "1234", + APIVersion: "remediation.medik8s.io/v1alpha1", + }, + }) + return mdr +} + +func createRemediationOwnedByMHC(node *v1.Node, owner *machinev1beta1.Machine) *v1alpha1.MachineDeletionRemediation { + mdr := &v1alpha1.MachineDeletionRemediation{} + mdr.Name = node.Name + mdr.Namespace = defaultNamespace + mdr.SetOwnerReferences([]metav1.OwnerReference{ + { + Name: owner.Name, + Kind: "Machine", + UID: "1234", + APIVersion: "machine.openshift.io/v1beta1", + }, + }) return mdr } -func createRemediationWithAnnotation(node *v1.Node, key, annotation string) *v1alpha1.MachineDeletionRemediation { - mdr := createRemediation(node) +func createRemediationOwnedByNHCWithAnnotation(node *v1.Node, key, annotation string) *v1alpha1.MachineDeletionRemediation { + mdr := createRemediationOwnedByNHC(node) annotations := make(map[string]string, 1) annotations[key] = fmt.Sprintf("%s", annotation) mdr.SetAnnotations(annotations) From ecf2c952eb390f0b3f448556f707223b5fbaf711 Mon Sep 17 00:00:00 2001 From: Carlo Lobrano Date: Tue, 12 Dec 2023 15:59:25 +0100 Subject: [PATCH 4/8] Add support for MHC created remediations In case the MDR CR is created by MHC - get the remediation target Machine from CR's ownerReference of Kind "Machine" - Ignore if the target Machine has no Nodes Signed-off-by: Carlo Lobrano --- .../machinedeletionremediation_controller.go | 117 ++++++++--- ...hinedeletionremediation_controller_test.go | 187 ++++++++++++------ controllers/suite_test.go | 5 + 3 files changed, 221 insertions(+), 88 deletions(-) diff --git a/controllers/machinedeletionremediation_controller.go b/controllers/machinedeletionremediation_controller.go index c62b2263..56546b5c 100644 --- a/controllers/machinedeletionremediation_controller.go +++ b/controllers/machinedeletionremediation_controller.go @@ -59,10 +59,12 @@ const ( failedToDeleteMachineError = "failed to delete machine of node name: %s" nodeNotFoundErrorMsg = "failed to fetch node" machineNotFoundErrorMsg = "failed to fetch machine of node" - + // Cluster Provider messages machineDeletedOnCloudProviderMessage = "Machine will be deleted and the unhealthy node replaced. This is a Cloud cluster provider: the new node is expected to have a new name" machineDeletedOnBareMetalProviderMessage = "Machine will be deleted and the unhealthy node replaced. This is a BareMetal cluster provider: the new node is NOT expected to have a new name" machineDeletedOnUnknownProviderMessage = "Machine will be deleted and the unhealthy node replaced. Unknown cluster provider: no information about the new node's name" + // Machine Kind + machineKind = "Machine" ) type conditionChangeReason string @@ -128,9 +130,6 @@ func (r *MachineDeletionRemediationReconciler) Reconcile(ctx context.Context, re } }() - // Remediation's name was created from Node's name - nodeName := mdr.GetName() - if r.isTimedOutByNHC(mdr) { log.Info("NHC time out annotation found, stopping remediation") _, err = r.updateConditions(remediationTimedOutByNhc, mdr) @@ -183,7 +182,7 @@ func (r *MachineDeletionRemediationReconciler) Reconcile(ctx context.Context, re return ctrl.Result{RequeueAfter: 30 * time.Second}, nil } - log.Info("node-associated machine found", "node", mdr.Name, "machine", machine.GetName()) + log.Info("target machine found", "machine", machine.GetName()) // Detect if Node name is expected to change after Machine deletion given // the Platform type. In case of BareMetal platform, the name is NOT @@ -212,12 +211,12 @@ func (r *MachineDeletionRemediationReconciler) Reconcile(ctx context.Context, re if !machine.GetDeletionTimestamp().IsZero() { // Machine deletion requested already. Log deletion progress until the Machine exists - log.Info(postponedMachineDeletionInfo, "node", nodeName, "machine", machine.Name, "machine status.phase", machine.Status.Phase) + log.Info(postponedMachineDeletionInfo, "machine", machine.Name, "machine status.phase", machine.Status.Phase) return ctrl.Result{RequeueAfter: 30 * time.Second}, nil } if !hasControllerOwner(machine) { - log.Info("ignoring remediation of node-associated machine: the machine has no controller owner", "machine", machine.GetName(), "node name", mdr.Name) + log.Info("ignoring remediation of the machine: the machine has no controller owner", "machine", machine.GetName()) _, err = r.updateConditions(remediationSkippedNoControllerOwner, mdr) return ctrl.Result{}, err } @@ -228,10 +227,10 @@ func (r *MachineDeletionRemediationReconciler) Reconcile(ctx context.Context, re return ctrl.Result{}, errors.Wrapf(err, "failed to save Machine's name and namespace") } - log.Info("request node-associated machine deletion", "machine", machine.GetName(), "node", nodeName) + log.Info("request machine deletion", "machine", machine.GetName()) err = r.Delete(ctx, machine) if err != nil { - log.Error(err, "failed to delete machine associated to node", "machine", machine.GetName(), "node", nodeName) + log.Error(err, "failed to delete machine", "machine", machine.GetName()) return ctrl.Result{}, err } @@ -270,7 +269,7 @@ func (r *MachineDeletionRemediationReconciler) getRemediation(ctx context.Contex return remediation, nil } -func (r *MachineDeletionRemediationReconciler) getNodeFromMdr(ctx context.Context, mdr *v1alpha1.MachineDeletionRemediation) (*v1.Node, error) { +func (r *MachineDeletionRemediationReconciler) getNodeFromCR(ctx context.Context, mdr *v1alpha1.MachineDeletionRemediation) (*v1.Node, error) { node := &v1.Node{} key := client.ObjectKey{ Name: mdr.Name, @@ -282,26 +281,36 @@ func (r *MachineDeletionRemediationReconciler) getNodeFromMdr(ctx context.Contex return node, nil } -// getMachine retrieves a machine from the cluster based on the remediation. -// It returns the machine, a boolean indicating whether the machine was expected to be found, -// and an error if any occurred during the retrieval process. +// getMachine retrieves a Machine from the cluster based on the remediation. +// It returns the machine and an error if any occurred during the retrieval process. func (r *MachineDeletionRemediationReconciler) getMachine(ctx context.Context, remediation *v1alpha1.MachineDeletionRemediation) (*machinev1beta1.Machine, error) { - // If the Machine's Name and Ns come from the related Node, it is expected - // to find the Machine in the cluster, while if its Name and Ns come from - // CR's annotation, the Machine might have been deleted upon our request. + // The Name and Namespace to retrieve the target Machine can come from the following sources: + // - the remediation's ownerReference: if the remediation was created by MachineHealthcheck + // - the remediation's Node: if the remediation was created by NodeHealthcheck or manually + // - the remediation's MachineNameNsAnnotation annotation: once the Machine is found and its Name and Namespace are saved in + + // Try to get first the Machine's data from the remediation's annotation, if any. It means that the Machine was already + // been found in a previous cycle and we can use the data to verify if the Machine was deleted upon our request or not machineName, machineNs, err := getRemediationDataFromAnnotation(remediation, MachineNameNsAnnotation) if err != nil { r.Log.Error(err, "could not get Machine data from remediation", "remediation", remediation.GetName(), "annotation", MachineNameNsAnnotation) return nil, unrecoverableError } - var mustExist bool - if machineName == "" { - // Remediation does not have the MachineDataAnnotation yet. We will try to get the Machine's data from its Node, - // and we expect to find the Node and the Machine in the cluster. - mustExist = true + // If the Machine's Name is not in the annotation, it means that it must come from the other two sources and in + // turns it means that the Machine must exist in the cluster, otherwise an error is returned. + mustExist := machineName == "" + + if machineName == "" && len(remediation.GetOwnerReferences()) > 0 { + machineName, machineNs = r.getMachineNameNsFromOwnerReference(ctx, remediation) + if machineName != "" { + r.Log.Info("MHC created CR: target machine is CR ownerReference", "machine", machineName, "namespace", machineNs) + } + } - node, err := r.getNodeFromMdr(ctx, remediation) + if machineName == "" { + r.Log.Info("trying to get target Machine from the Node", "node", remediation.Name) + machineName, machineNs, err = r.getMachineNameNsFromCR(ctx, remediation) if err != nil { if apiErrors.IsNotFound(err) { r.Log.Error(err, nodeNotFoundErrorMsg, "node name", remediation.Name) @@ -309,10 +318,6 @@ func (r *MachineDeletionRemediationReconciler) getMachine(ctx context.Context, r } return nil, err } - if machineName, machineNs, err = getMachineNameNsFromNode(node); err != nil { - r.Log.Error(err, "could not get Machine Name NS from Node", "node", node.Name, "annotations", node.GetAnnotations()) - return nil, unrecoverableError - } } machine := &machinev1beta1.Machine{} @@ -321,6 +326,7 @@ func (r *MachineDeletionRemediationReconciler) getMachine(ctx context.Context, r Namespace: machineNs, } + r.Log.Info("Looking for the target Machine", "machine", machineName, "namespace", machineNs) if err := r.Get(ctx, key, machine); err != nil { if !apiErrors.IsNotFound(err) { return nil, err @@ -335,6 +341,7 @@ func (r *MachineDeletionRemediationReconciler) getMachine(ctx context.Context, r r.Log.Info(successfulMachineDeletionInfo, "node", remediation.Name, "machine", machineName) return nil, nil } + return machine, nil } @@ -494,15 +501,20 @@ func (r *MachineDeletionRemediationReconciler) isExpectedNodesNumberRestored(ctx return false, errors.Wrap(unrecoverableError, err.Error()) } - nodes, err := r.getMachineOwnerNodes(ctx, name) + replicas, err := r.getMachineOwnerSpecReplicas(ctx, kind, name, namespace) if err != nil { - r.Log.Error(err, "could not get Machine owner's nodes", "kind", kind, "name", name, "namespace", namespace) + r.Log.Error(err, "could not get Machine owner's Spec.Replicas", "kind", kind, "name", name, "namespace", namespace) return false, err } - replicas, err := r.getMachineOwnerSpecReplicas(ctx, kind, name, namespace) + if replicas == 0 { + r.Log.Info("Machine owner's Spec.Replicas is 0, no need to verify Node count restoration") + return true, nil + } + + nodes, err := r.getMachineOwnerNodes(ctx, name) if err != nil { - r.Log.Error(err, "could not get Machine owner's Spec.Replicas", "kind", kind, "name", name, "namespace", namespace) + r.Log.Error(err, "could not get Machine owner's nodes", "kind", kind, "name", name, "namespace", namespace) return false, err } @@ -576,6 +588,36 @@ func (r *MachineDeletionRemediationReconciler) getMachineOwnerNodes(ctx context. return machineOwnerNodes, nil } +func (r *MachineDeletionRemediationReconciler) getMachineNameNsFromCR(ctx context.Context, remediation *v1alpha1.MachineDeletionRemediation) (machineName, machineNs string, err error) { + var node *v1.Node + node, err = r.getNodeFromCR(ctx, remediation) + if err != nil { + if apiErrors.IsNotFound(err) { + r.Log.Error(err, nodeNotFoundErrorMsg, "node name", remediation.Name) + err = nodeNotFoundError + } + return "", "", err + } + + machineName, machineNs, err = getMachineNameNsFromNode(node) + if err != nil { + r.Log.Error(err, "could not get Machine Name NS from Node", "node", node.Name, "annotations", node.GetAnnotations()) + return "", "", unrecoverableError + } + return machineName, machineNs, nil +} + +func (r *MachineDeletionRemediationReconciler) getMachineNameNsFromOwnerReference(ctx context.Context, remediation *v1alpha1.MachineDeletionRemediation) (machineName, machineNs string) { + for _, owner := range remediation.GetOwnerReferences() { + if owner.Kind == machineKind { + machineName, machineNs = owner.Name, remediation.Namespace + break + } + } + + return machineName, machineNs +} + func getMachineNameNsFromNode(node *v1.Node) (string, string, error) { var nodeAnnotations map[string]string if nodeAnnotations = node.Annotations; nodeAnnotations == nil { @@ -625,3 +667,18 @@ func (r *MachineDeletionRemediationReconciler) getMachineOwnerSpecReplicas(ctx c } return int(replicas), nil } + +func (r *MachineDeletionRemediationReconciler) getMachineKindAndNamespace(ctx context.Context) (kind, namespace string, err error) { + machines := &machinev1beta1.MachineList{} + if err = r.List(ctx, machines); err != nil { + r.Log.Error(err, "could not list Machines") + return "", "", err + } + + if len(machines.Items) == 0 { + err = fmt.Errorf("Could not get Machine Namespace: no Machines found") + return "", "", err + } + + return machines.Items[0].Kind, machines.Items[0].Namespace, nil +} diff --git a/controllers/machinedeletionremediation_controller_test.go b/controllers/machinedeletionremediation_controller_test.go index 15efbe00..fe5267f3 100644 --- a/controllers/machinedeletionremediation_controller_test.go +++ b/controllers/machinedeletionremediation_controller_test.go @@ -24,21 +24,23 @@ import ( ) const ( - defaultNamespace = "default" - machineSetName = "machine-set-x" - machineSetKind = "MachineSet" - cpmsName = "cpms-x" - cpmsKind = "ControlPlaneMachineSet" - dummyMachine = "dummy-machine" - workerNodeName, masterNodeName, cpNodeWithOwnerName, noneExistingNodeName = "worker-node-x", "master-node-x", "cp-node-x", "phantom-node" - workerNodeMachineName, masterNodeMachineName, cpNodeMachineName = "worker-node-x-machine", "master-node-x-machine", "control-plane-node-x-machine" - mockDeleteFailMessage = "mock delete failure" - noMachineDeletionRemediationCRFound = "noMachineDeletionRemediationCRFound" - processingConditionNotSetError = "ProcessingConditionNotSet" - processingConditionSetButNoMatchError = "ProcessingConditionSetButNoMatch" - processingConditionSetAndMatchSuccess = "ProcessingConditionSetAndMatch" - processingConditionSetButWrongReasonError = "processingConditionSetButWrongReason" - processingConditionStartedInfo = "{\"processingConditionStatus\": \"True\", \"succededConditionStatus\": \"Unknown\", \"reason\": \"RemediationStarted\"}" + defaultNamespace = "default" + machineNamespace = "openshift-machine-api" + machineSetName, machineSetNameZeroReplicas = "machine-set-x", "machine-set-x-zero-replicas" + machineSetKind = "MachineSet" + cpmsName = "cpms-x" + cpmsKind = "ControlPlaneMachineSet" + dummyMachine = "dummy-machine" + workerNodeName, masterNodeName, cpNodeWithOwnerName, phantomNodeName = "worker-node-x", "master-node-x", "cp-node-x", "phantom-node" + workerNodeMachineName, masterNodeMachineName, cpNodeMachineName = "worker-node-x-machine", "master-node-x-machine", "control-plane-node-x-machine" + phantomNodeMachineName = "phantom-node-machine" + mockDeleteFailMessage = "mock delete failure" + noMachineDeletionRemediationCRFound = "noMachineDeletionRemediationCRFound" + processingConditionNotSetError = "ProcessingConditionNotSet" + processingConditionSetButNoMatchError = "ProcessingConditionSetButNoMatch" + processingConditionSetAndMatchSuccess = "ProcessingConditionSetAndMatch" + processingConditionSetButWrongReasonError = "processingConditionSetButWrongReason" + processingConditionStartedInfo = "{\"processingConditionStatus\": \"True\", \"succededConditionStatus\": \"Unknown\", \"reason\": \"RemediationStarted\"}" ) var underTest *v1alpha1.MachineDeletionRemediation @@ -51,11 +53,11 @@ type expectedCondition struct { var _ = Describe("Machine Deletion Remediation CR", func() { var ( - machineSet *machinev1beta1.MachineSet - cpms *machinev1.ControlPlaneMachineSet - workerNodeMachine, masterNodeMachine, cpNodeMachine *machinev1beta1.Machine - workerNode, masterNode *v1.Node - cpNodeWithOwnerList []v1.Node + machineSet, machineSetZeroReplicas *machinev1beta1.MachineSet + cpms *machinev1.ControlPlaneMachineSet + workerNodeMachine, masterNodeMachine, cpNodeMachine, phantomNodeMachine *machinev1beta1.Machine + workerNode, masterNode *v1.Node + cpNodeWithOwnerList []v1.Node //phantomNode is never created by client phantomNode *v1.Node ) @@ -80,17 +82,19 @@ var _ = Describe("Machine Deletion Remediation CR", func() { BeforeEach(func() { plogs.Clear() - machineSet = createMachineSet(machineSetName) + machineSet = createMachineSet(machineSetName, 1) + machineSetZeroReplicas = createMachineSet(machineSetNameZeroReplicas, 0) cpms = createControlPlaneMachineSet(cpmsName) workerNodeMachine = createMachineWithOwner(workerNodeMachineName, machineSet) + phantomNodeMachine = createMachineWithOwner(phantomNodeMachineName, machineSetZeroReplicas) masterNodeMachine = createMachine(masterNodeMachineName) cpNodeMachine = createMachineWithOwner(cpNodeMachineName, cpms) workerNode, masterNode, phantomNode = createNodeWithMachine(workerNodeName, workerNodeMachine), createNodeWithMachine(masterNodeName, masterNodeMachine), - createNode(noneExistingNodeName) + createNode(phantomNodeName) // CPMS's ReplicaSet has minimum value of 3, so we need 3 CP nodes for i := 0; i < 3; i++ { @@ -101,23 +105,27 @@ var _ = Describe("Machine Deletion Remediation CR", func() { } Expect(k8sClient.Create(context.Background(), machineSet)).To(Succeed()) + Expect(k8sClient.Create(context.Background(), machineSetZeroReplicas)).To(Succeed()) Expect(k8sClient.Create(context.Background(), cpms)).To(Succeed()) Expect(k8sClient.Create(context.Background(), masterNode)).To(Succeed()) Expect(k8sClient.Create(context.Background(), workerNode)).To(Succeed()) Expect(k8sClient.Create(context.Background(), masterNodeMachine)).To(Succeed()) Expect(k8sClient.Create(context.Background(), cpNodeMachine)).To(Succeed()) Expect(k8sClient.Create(context.Background(), workerNodeMachine)).To(Succeed()) + Expect(k8sClient.Create(context.Background(), phantomNodeMachine)).To(Succeed()) DeferCleanup(k8sClient.Delete, machineSet) + DeferCleanup(k8sClient.Delete, machineSetZeroReplicas) DeferCleanup(k8sClient.Delete, cpms) DeferCleanup(k8sClient.Delete, masterNode) DeferCleanup(k8sClient.Delete, workerNode) DeferCleanup(k8sClient.Delete, masterNodeMachine) - // cpNodeMachine and workerNodeMachine are expected to be deleted in some tests + // The following Machines are expected to be deleted in some tests // so do not error if they are not found DeferCleanup(deleteIgnoreNotFound(), cpNodeMachine) DeferCleanup(deleteIgnoreNotFound(), workerNodeMachine) + DeferCleanup(deleteIgnoreNotFound(), phantomNodeMachine) }) JustBeforeEach(func() { @@ -128,7 +136,7 @@ var _ = Describe("Machine Deletion Remediation CR", func() { Context("Sunny Flows", func() { When("node does not exist", func() { BeforeEach(func() { - underTest = createRemediationOwnedByNHC(phantomNode) + underTest = createRemediationOwnedByNHC(phantomNode.Name) }) It("No machine is deleted", func() { @@ -143,7 +151,7 @@ var _ = Describe("Machine Deletion Remediation CR", func() { When("remediation associated machine has no owner ref", func() { BeforeEach(func() { - underTest = createRemediationOwnedByNHC(masterNode) + underTest = createRemediationOwnedByNHC(masterNode.Name) }) It("No machine is deleted", func() { @@ -161,7 +169,7 @@ var _ = Describe("Machine Deletion Remediation CR", func() { BeforeEach(func() { workerNodeMachine.OwnerReferences[0].Controller = nil Expect(k8sClient.Update(context.Background(), workerNodeMachine)).ToNot(HaveOccurred()) - underTest = createRemediationOwnedByNHC(workerNode) + underTest = createRemediationOwnedByNHC(workerNode.Name) }) It("No machine is deleted", func() { @@ -180,7 +188,7 @@ var _ = Describe("Machine Deletion Remediation CR", func() { controllerValue := false workerNodeMachine.OwnerReferences[0].Controller = &controllerValue Expect(k8sClient.Update(context.Background(), workerNodeMachine)).ToNot(HaveOccurred()) - underTest = createRemediationOwnedByNHC(workerNode) + underTest = createRemediationOwnedByNHC(workerNode.Name) }) It("No machine is deleted", func() { @@ -196,7 +204,7 @@ var _ = Describe("Machine Deletion Remediation CR", func() { When("remediation associated machine has valid owner ref of CPMS Kind", func() { BeforeEach(func() { - underTest = createRemediationOwnedByNHC(&cpNodeWithOwnerList[0]) + underTest = createRemediationOwnedByNHC(cpNodeWithOwnerList[0].Name) }) It("CP machine is deleted", func() { @@ -221,7 +229,7 @@ var _ = Describe("Machine Deletion Remediation CR", func() { for i := 0; i < 3; i++ { cpNode := cpNodeWithOwnerList[i] - cpNode.Annotations[machineAnnotationOpenshift] = fmt.Sprintf("%s/%s", defaultNamespace, replacementName) + cpNode.Annotations[machineAnnotationOpenshift] = fmt.Sprintf("%s/%s", machineNamespace, replacementName) Expect(k8sClient.Update(context.Background(), &cpNode)).To(Succeed()) } @@ -236,7 +244,7 @@ var _ = Describe("Machine Deletion Remediation CR", func() { When("worker node remediation exists", func() { BeforeEach(func() { - underTest = createRemediationOwnedByNHC(workerNode) + underTest = createRemediationOwnedByNHC(workerNode.Name) }) It("worker machine is deleted", func() { verifyMachineIsDeleted(workerNodeMachineName) @@ -257,7 +265,7 @@ var _ = Describe("Machine Deletion Remediation CR", func() { Expect(k8sClient.Create(context.Background(), workerNodeMachineReplacement)).To(Succeed()) DeferCleanup(k8sClient.Delete, workerNodeMachineReplacement) - workerNode.Annotations[machineAnnotationOpenshift] = fmt.Sprintf("%s/%s", defaultNamespace, machineReplacementName) + workerNode.Annotations[machineAnnotationOpenshift] = fmt.Sprintf("%s/%s", machineNamespace, machineReplacementName) Expect(k8sClient.Update(context.Background(), workerNode)).To(Succeed()) // Now the remediation should be completed @@ -272,7 +280,7 @@ var _ = Describe("Machine Deletion Remediation CR", func() { When("creating a resource in baremetal provider", func() { BeforeEach(func() { setMachineProviderID(workerNodeMachine, "baremetal:///dummy-provider-ID") - underTest = createRemediationOwnedByNHC(workerNode) + underTest = createRemediationOwnedByNHC(workerNode.Name) }) It("sets PermanentNodeDeletionExpected condition to false", func() { @@ -283,7 +291,7 @@ var _ = Describe("Machine Deletion Remediation CR", func() { When("creating a resource in cloud provider", func() { BeforeEach(func() { setMachineProviderID(workerNodeMachine, "cloud:///dummy-provider-ID") - underTest = createRemediationOwnedByNHC(workerNode) + underTest = createRemediationOwnedByNHC(workerNode.Name) }) It("sets PermanentNodeDeletionExpected condition to true", func() { @@ -295,7 +303,7 @@ var _ = Describe("Machine Deletion Remediation CR", func() { When("creating a resource in an unknown provider", func() { BeforeEach(func() { // do not set the providerID in Machine - underTest = createRemediationOwnedByNHC(workerNode) + underTest = createRemediationOwnedByNHC(workerNode.Name) }) It("sets PermanentNodeDeletionExpected condition to false", func() { @@ -307,7 +315,7 @@ var _ = Describe("Machine Deletion Remediation CR", func() { Context("Rainy (Error) Flows", func() { When("remediation is not connected to a node", func() { BeforeEach(func() { - underTest = createRemediationOwnedByNHC(phantomNode) + underTest = createRemediationOwnedByNHC(phantomNode.Name) }) It("node not found error", func() { @@ -324,7 +332,7 @@ var _ = Describe("Machine Deletion Remediation CR", func() { When("node does not have annotations", func() { BeforeEach(func() { - underTest = createRemediationOwnedByNHC(masterNode) + underTest = createRemediationOwnedByNHC(masterNode.Name) masterNode.Annotations = nil Expect(k8sClient.Update(context.Background(), masterNode)).ToNot(HaveOccurred()) }) @@ -343,7 +351,7 @@ var _ = Describe("Machine Deletion Remediation CR", func() { When("node does not have machine annotation", func() { BeforeEach(func() { - underTest = createRemediationOwnedByNHC(masterNode) + underTest = createRemediationOwnedByNHC(masterNode.Name) masterNode.Annotations[machineAnnotationOpenshift] = "" Expect(k8sClient.Update(context.Background(), masterNode)).ToNot(HaveOccurred()) }) @@ -362,7 +370,7 @@ var _ = Describe("Machine Deletion Remediation CR", func() { When("node's machine annotation has invalid value", func() { BeforeEach(func() { - underTest = createRemediationOwnedByNHC(masterNode) + underTest = createRemediationOwnedByNHC(masterNode.Name) masterNode.Annotations[machineAnnotationOpenshift] = "Gibberish" Expect(k8sClient.Update(context.Background(), masterNode)).ToNot(HaveOccurred()) }) @@ -381,7 +389,7 @@ var _ = Describe("Machine Deletion Remediation CR", func() { When("machine pointed to by node's annotation does not exist", func() { BeforeEach(func() { - underTest = createRemediationOwnedByNHC(masterNode) + underTest = createRemediationOwnedByNHC(masterNode.Name) masterNode.Annotations[machineAnnotationOpenshift] = "phantom-machine-namespace/phantom-machine-name" Expect(k8sClient.Update(context.Background(), masterNode)).ToNot(HaveOccurred()) }) @@ -400,7 +408,7 @@ var _ = Describe("Machine Deletion Remediation CR", func() { When("Remediation has incorrect annotation", func() { BeforeEach(func() { - underTest = createRemediationOwnedByNHCWithAnnotation(masterNode, MachineNameNsAnnotation, "Gibberish") + underTest = createRemediationOwnedByNHCWithAnnotation(masterNode.Name, MachineNameNsAnnotation, "Gibberish") }) It("fails to follow machine deletion", func() { @@ -424,7 +432,7 @@ var _ = Describe("Machine Deletion Remediation CR", func() { DeferCleanup(func() { cclient.onDeleteError = nil }) - underTest = createRemediationOwnedByNHC(workerNode) + underTest = createRemediationOwnedByNHC(workerNode.Name) }) It("returns the same delete failure error", func() { @@ -442,7 +450,7 @@ var _ = Describe("Machine Deletion Remediation CR", func() { When("NHC stops the remediation", func() { BeforeEach(func() { - underTest = createRemediationOwnedByNHCWithAnnotation(workerNode, commonannotations.NhcTimedOut, "some timestamp") + underTest = createRemediationOwnedByNHCWithAnnotation(workerNode.Name, commonannotations.NhcTimedOut, "some timestamp") }) It("returns without completing remediation", func() { @@ -456,16 +464,80 @@ var _ = Describe("Machine Deletion Remediation CR", func() { }) }) }) + + Context("Support to MHC created CR", func() { + When("Machine's node exists", func() { + BeforeEach(func() { + // The actual remediation name should be the same as the Machine's name, however + // the test use a different name to make sure the target Machine is not found looking at the + // remediation's name. + underTest = createRemediationOwnedByMHC("remediation-name", workerNodeMachine) + }) + + It("MHC worker machine is deleted", func() { + verifyMachineIsDeleted(workerNodeMachineName) + verifyMachineNotDeleted(masterNodeMachineName) + + // Machine is deleted, but the remediation is not completed yet + verifyConditionsMatch([]expectedCondition{ + {commonconditions.ProcessingType, metav1.ConditionTrue, remediationStarted}, + {commonconditions.SucceededType, metav1.ConditionUnknown, remediationStarted}, + // Cluster provider is not set in this test + {commonconditions.PermanentNodeDeletionExpectedType, metav1.ConditionUnknown, v1alpha1.MachineDeletionOnUndefinedProviderReason}}) + + // Mock Machine and Node re-provisioning (even though this test does not actually delete the node, just the machine). + // 1. Create a Machine's replacement with a new name + // 2. Update WorkerNode's annotation to point to the new Machine + machineReplacementName := workerNodeMachineName + "-replacement" + workerNodeMachineReplacement := createMachineWithOwner(machineReplacementName, machineSet) + Expect(k8sClient.Create(context.Background(), workerNodeMachineReplacement)).To(Succeed()) + DeferCleanup(k8sClient.Delete, workerNodeMachineReplacement) + + workerNode.Annotations[machineAnnotationOpenshift] = fmt.Sprintf("%s/%s", machineNamespace, machineReplacementName) + Expect(k8sClient.Update(context.Background(), workerNode)).To(Succeed()) + + // Now the remediation should be completed + verifyConditionsMatch([]expectedCondition{ + {commonconditions.ProcessingType, metav1.ConditionFalse, remediationFinishedMachineDeleted}, + {commonconditions.SucceededType, metav1.ConditionTrue, remediationFinishedMachineDeleted}, + // Cluster provider is not set in this test + {commonconditions.PermanentNodeDeletionExpectedType, metav1.ConditionUnknown, v1alpha1.MachineDeletionOnUndefinedProviderReason}}) + }) + }) + + When("Machine's node does not exist", func() { + BeforeEach(func() { + // The actual remediation name should be the same as the Machine's name, however + // the test use a different name to make sure the target Machine is not found looking at the + // remediation's name. + underTest = createRemediationOwnedByMHC("remediation-name", phantomNodeMachine) + }) + + It("MHC worker machine is deleted", func() { + verifyMachineIsDeleted(phantomNodeMachineName) + verifyMachineNotDeleted(workerNodeMachineName) + verifyMachineNotDeleted(masterNodeMachineName) + + // Machine is deleted and MDR does not have to wait for the node count restoration, so the + // remediation should be completed already. + verifyConditionsMatch([]expectedCondition{ + {commonconditions.ProcessingType, metav1.ConditionFalse, remediationFinishedMachineDeleted}, + {commonconditions.SucceededType, metav1.ConditionTrue, remediationFinishedMachineDeleted}, + // Cluster provider is not set in this test + {commonconditions.PermanentNodeDeletionExpectedType, metav1.ConditionUnknown, v1alpha1.MachineDeletionOnUndefinedProviderReason}}) + }) + }) + }) }) }) -func createRemediationOwnedByNHC(node *v1.Node) *v1alpha1.MachineDeletionRemediation { +func createRemediationOwnedByNHC(remediationName string) *v1alpha1.MachineDeletionRemediation { mdr := &v1alpha1.MachineDeletionRemediation{} - mdr.Name = node.Name + mdr.Name = remediationName mdr.Namespace = defaultNamespace mdr.SetOwnerReferences([]metav1.OwnerReference{ { - Name: node.Name, + Name: remediationName, Kind: "NodeHealthCheck", UID: "1234", APIVersion: "remediation.medik8s.io/v1alpha1", @@ -474,10 +546,10 @@ func createRemediationOwnedByNHC(node *v1.Node) *v1alpha1.MachineDeletionRemedia return mdr } -func createRemediationOwnedByMHC(node *v1.Node, owner *machinev1beta1.Machine) *v1alpha1.MachineDeletionRemediation { +func createRemediationOwnedByMHC(remediationName string, owner *machinev1beta1.Machine) *v1alpha1.MachineDeletionRemediation { mdr := &v1alpha1.MachineDeletionRemediation{} - mdr.Name = node.Name - mdr.Namespace = defaultNamespace + mdr.Name = remediationName + mdr.Namespace = machineNamespace mdr.SetOwnerReferences([]metav1.OwnerReference{ { Name: owner.Name, @@ -489,8 +561,8 @@ func createRemediationOwnedByMHC(node *v1.Node, owner *machinev1beta1.Machine) * return mdr } -func createRemediationOwnedByNHCWithAnnotation(node *v1.Node, key, annotation string) *v1alpha1.MachineDeletionRemediation { - mdr := createRemediationOwnedByNHC(node) +func createRemediationOwnedByNHCWithAnnotation(remediationName string, key, annotation string) *v1alpha1.MachineDeletionRemediation { + mdr := createRemediationOwnedByNHC(remediationName) annotations := make(map[string]string, 1) annotations[key] = fmt.Sprintf("%s", annotation) mdr.SetAnnotations(annotations) @@ -511,12 +583,11 @@ func createNode(nodeName string) *v1.Node { } // createMachineSet creates a MachineSet with the given name. -func createMachineSet(machineSetName string) *machinev1beta1.MachineSet { +func createMachineSet(machineSetName string, replicas int32) *machinev1beta1.MachineSet { machineSet := &machinev1beta1.MachineSet{} - machineSet.SetNamespace(defaultNamespace) + machineSet.SetNamespace(machineNamespace) machineSet.SetName(machineSetName) - replicas := int32(1) - machineSet.Spec.Replicas = &replicas + machineSet.Spec.Replicas = ptr.To[int32](replicas) return machineSet } @@ -526,7 +597,7 @@ func createControlPlaneMachineSet(name string) *machinev1.ControlPlaneMachineSet cpms := &machinev1.ControlPlaneMachineSet{ ObjectMeta: metav1.ObjectMeta{ Name: name, - Namespace: defaultNamespace, + Namespace: machineNamespace, }, Spec: machinev1.ControlPlaneMachineSetSpec{ Replicas: ptr.To[int32](3), @@ -557,7 +628,7 @@ func createControlPlaneMachineSet(name string) *machinev1.ControlPlaneMachineSet func createMachine(machineName string) *machinev1beta1.Machine { machine := &machinev1beta1.Machine{} - machine.SetNamespace(defaultNamespace) + machine.SetNamespace(machineNamespace) machine.SetName(machineName) return machine } @@ -597,13 +668,13 @@ func createDummyMachine() *machinev1beta1.Machine { func verifyMachineNotDeleted(machineName string) { Consistently( func() error { - return k8sClient.Get(context.Background(), client.ObjectKey{Namespace: defaultNamespace, Name: machineName}, createDummyMachine()) + return k8sClient.Get(context.Background(), client.ObjectKey{Namespace: machineNamespace, Name: machineName}, createDummyMachine()) }).ShouldNot(HaveOccurred()) } func verifyMachineIsDeleted(machineName string) { Eventually(func() bool { - return errors.IsNotFound(k8sClient.Get(context.Background(), client.ObjectKey{Namespace: defaultNamespace, Name: machineName}, createDummyMachine())) + return errors.IsNotFound(k8sClient.Get(context.Background(), client.ObjectKey{Namespace: machineNamespace, Name: machineName}, createDummyMachine())) }).Should(BeTrue()) } diff --git a/controllers/suite_test.go b/controllers/suite_test.go index e488d7ff..261625b9 100644 --- a/controllers/suite_test.go +++ b/controllers/suite_test.go @@ -25,6 +25,7 @@ import ( . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" + corev1 "k8s.io/api/core/v1" "k8s.io/client-go/kubernetes/scheme" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" @@ -128,6 +129,10 @@ var _ = BeforeSuite(func() { k8sClient = k8sManager.GetClient() Expect(k8sClient).ToNot(BeNil()) + ns := &corev1.Namespace{} + ns.SetName(machineNamespace) + Expect(k8sClient.Create(context.Background(), ns)).To(Succeed()) + cclient = customClient{Client: k8sClient} err = (&MachineDeletionRemediationReconciler{ From 7d4ae618d4cb8684b50674d129b066ae3ad66d69 Mon Sep 17 00:00:00 2001 From: Carlo Lobrano Date: Wed, 20 Dec 2023 13:53:41 +0100 Subject: [PATCH 5/8] Removed unnecessary global const Signed-off-by: Carlo Lobrano --- controllers/machinedeletionremediation_controller.go | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/controllers/machinedeletionremediation_controller.go b/controllers/machinedeletionremediation_controller.go index 56546b5c..6027699f 100644 --- a/controllers/machinedeletionremediation_controller.go +++ b/controllers/machinedeletionremediation_controller.go @@ -63,8 +63,6 @@ const ( machineDeletedOnCloudProviderMessage = "Machine will be deleted and the unhealthy node replaced. This is a Cloud cluster provider: the new node is expected to have a new name" machineDeletedOnBareMetalProviderMessage = "Machine will be deleted and the unhealthy node replaced. This is a BareMetal cluster provider: the new node is NOT expected to have a new name" machineDeletedOnUnknownProviderMessage = "Machine will be deleted and the unhealthy node replaced. Unknown cluster provider: no information about the new node's name" - // Machine Kind - machineKind = "Machine" ) type conditionChangeReason string @@ -609,7 +607,7 @@ func (r *MachineDeletionRemediationReconciler) getMachineNameNsFromCR(ctx contex func (r *MachineDeletionRemediationReconciler) getMachineNameNsFromOwnerReference(ctx context.Context, remediation *v1alpha1.MachineDeletionRemediation) (machineName, machineNs string) { for _, owner := range remediation.GetOwnerReferences() { - if owner.Kind == machineKind { + if owner.Kind == "Machine" { machineName, machineNs = owner.Name, remediation.Namespace break } From baf3f7eb3329adef959f5e3a46e1f2149b2339d9 Mon Sep 17 00:00:00 2001 From: Carlo Lobrano Date: Wed, 20 Dec 2023 13:58:29 +0100 Subject: [PATCH 6/8] Rename function to get Machine Name Ns from remediation For coherence with getMachineNameNsFromOwnerReference, rename getMachineNameNsFromCR into getMachineNameNsFromRemediationName. Signed-off-by: Carlo Lobrano --- controllers/machinedeletionremediation_controller.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/controllers/machinedeletionremediation_controller.go b/controllers/machinedeletionremediation_controller.go index 6027699f..b1a668af 100644 --- a/controllers/machinedeletionremediation_controller.go +++ b/controllers/machinedeletionremediation_controller.go @@ -308,7 +308,7 @@ func (r *MachineDeletionRemediationReconciler) getMachine(ctx context.Context, r if machineName == "" { r.Log.Info("trying to get target Machine from the Node", "node", remediation.Name) - machineName, machineNs, err = r.getMachineNameNsFromCR(ctx, remediation) + machineName, machineNs, err = r.getMachineNameNsFromRemediationName(ctx, remediation) if err != nil { if apiErrors.IsNotFound(err) { r.Log.Error(err, nodeNotFoundErrorMsg, "node name", remediation.Name) @@ -586,7 +586,7 @@ func (r *MachineDeletionRemediationReconciler) getMachineOwnerNodes(ctx context. return machineOwnerNodes, nil } -func (r *MachineDeletionRemediationReconciler) getMachineNameNsFromCR(ctx context.Context, remediation *v1alpha1.MachineDeletionRemediation) (machineName, machineNs string, err error) { +func (r *MachineDeletionRemediationReconciler) getMachineNameNsFromRemediationName(ctx context.Context, remediation *v1alpha1.MachineDeletionRemediation) (machineName, machineNs string, err error) { var node *v1.Node node, err = r.getNodeFromCR(ctx, remediation) if err != nil { From e6a7e62f2e34e1cb63b9396a75f765e0fe1d43fb Mon Sep 17 00:00:00 2001 From: Carlo Lobrano Date: Wed, 20 Dec 2023 14:00:20 +0100 Subject: [PATCH 7/8] Remove unused function Signed-off-by: Carlo Lobrano --- .../machinedeletionremediation_controller.go | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/controllers/machinedeletionremediation_controller.go b/controllers/machinedeletionremediation_controller.go index b1a668af..36290f81 100644 --- a/controllers/machinedeletionremediation_controller.go +++ b/controllers/machinedeletionremediation_controller.go @@ -665,18 +665,3 @@ func (r *MachineDeletionRemediationReconciler) getMachineOwnerSpecReplicas(ctx c } return int(replicas), nil } - -func (r *MachineDeletionRemediationReconciler) getMachineKindAndNamespace(ctx context.Context) (kind, namespace string, err error) { - machines := &machinev1beta1.MachineList{} - if err = r.List(ctx, machines); err != nil { - r.Log.Error(err, "could not list Machines") - return "", "", err - } - - if len(machines.Items) == 0 { - err = fmt.Errorf("Could not get Machine Namespace: no Machines found") - return "", "", err - } - - return machines.Items[0].Kind, machines.Items[0].Namespace, nil -} From 16570a4ab22f7b8232f9a2d127c6b3ec9ccda323 Mon Sep 17 00:00:00 2001 From: Carlo Lobrano Date: Wed, 20 Dec 2023 15:12:35 +0100 Subject: [PATCH 8/8] Improve readability of the logic to get Machine NamespacedName Signed-off-by: Carlo Lobrano --- .../machinedeletionremediation_controller.go | 27 +++++++------------ 1 file changed, 10 insertions(+), 17 deletions(-) diff --git a/controllers/machinedeletionremediation_controller.go b/controllers/machinedeletionremediation_controller.go index 36290f81..31564136 100644 --- a/controllers/machinedeletionremediation_controller.go +++ b/controllers/machinedeletionremediation_controller.go @@ -295,26 +295,18 @@ func (r *MachineDeletionRemediationReconciler) getMachine(ctx context.Context, r return nil, unrecoverableError } - // If the Machine's Name is not in the annotation, it means that it must come from the other two sources and in - // turns it means that the Machine must exist in the cluster, otherwise an error is returned. + // If the Machine's Name is not in the annotation, it means that it must come from the one of the other two + // sources and in turns it means that the Machine must exist in the cluster, otherwise an error is returned. mustExist := machineName == "" - - if machineName == "" && len(remediation.GetOwnerReferences()) > 0 { - machineName, machineNs = r.getMachineNameNsFromOwnerReference(ctx, remediation) - if machineName != "" { - r.Log.Info("MHC created CR: target machine is CR ownerReference", "machine", machineName, "namespace", machineNs) - } - } - if machineName == "" { - r.Log.Info("trying to get target Machine from the Node", "node", remediation.Name) - machineName, machineNs, err = r.getMachineNameNsFromRemediationName(ctx, remediation) - if err != nil { - if apiErrors.IsNotFound(err) { - r.Log.Error(err, nodeNotFoundErrorMsg, "node name", remediation.Name) - err = nodeNotFoundError + if machineName, machineNs = r.getMachineNameNsFromOwnerReference(ctx, remediation); machineName == "" { + if machineName, machineNs, err = r.getMachineNameNsFromRemediationName(ctx, remediation); err != nil { + if apiErrors.IsNotFound(err) { + r.Log.Error(err, nodeNotFoundErrorMsg, "node name", remediation.Name) + err = nodeNotFoundError + } + return nil, err } - return nil, err } } @@ -608,6 +600,7 @@ func (r *MachineDeletionRemediationReconciler) getMachineNameNsFromRemediationNa func (r *MachineDeletionRemediationReconciler) getMachineNameNsFromOwnerReference(ctx context.Context, remediation *v1alpha1.MachineDeletionRemediation) (machineName, machineNs string) { for _, owner := range remediation.GetOwnerReferences() { if owner.Kind == "Machine" { + r.Log.Info("remediation's ownerReference has Machine Kind", "machine", machineName, "namespace", machineNs) machineName, machineNs = owner.Name, remediation.Namespace break }