From 9ee55a8a73f2b8aee15d2b8ed22b3361cbed5cbe Mon Sep 17 00:00:00 2001 From: fabriziopandini Date: Tue, 8 Oct 2024 20:45:22 +0200 Subject: [PATCH 1/8] Ensure reconcile external references and reconcile node are always run --- .../controllers/machine/machine_controller.go | 191 ++++++++---------- .../machine/machine_controller_noderef.go | 31 ++- .../machine_controller_noderef_test.go | 28 +++ .../machine/machine_controller_phases.go | 131 ++++++------ .../machine/machine_controller_phases_test.go | 90 ++++++++- .../machine/machine_controller_test.go | 105 ++-------- 6 files changed, 298 insertions(+), 278 deletions(-) diff --git a/internal/controllers/machine/machine_controller.go b/internal/controllers/machine/machine_controller.go index 01e45ebd2a4e..07d2c31d1537 100644 --- a/internal/controllers/machine/machine_controller.go +++ b/internal/controllers/machine/machine_controller.go @@ -196,6 +196,11 @@ func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (_ ctrl.Re } // Initialize the patch helper + s := &scope{ + cluster: cluster, + machine: m, + } + patchHelper, err := patch.NewHelper(m, r.Client) if err != nil { return ctrl.Result{}, err @@ -215,15 +220,34 @@ func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (_ ctrl.Re } }() - // Reconcile labels. + // Always add the cluster label labels. if m.Labels == nil { m.Labels = make(map[string]string) } m.Labels[clusterv1.ClusterNameLabel] = m.Spec.ClusterName + // Add finalizer first if not set to avoid the race condition between init and delete. + // Note: Finalizers in general can only be added when the deletionTimestamp is not set. + if !controllerutil.ContainsFinalizer(m, clusterv1.MachineFinalizer) && m.ObjectMeta.DeletionTimestamp.IsZero() { + controllerutil.AddFinalizer(m, clusterv1.MachineFinalizer) + return ctrl.Result{}, nil + } + + alwaysReconcile := []machineReconcileFunc{ + r.reconcileBootstrap, + r.reconcileInfrastructure, + r.reconcileNode, + r.reconcileCertificateExpiry, + } + // Handle deletion reconciliation loop. if !m.ObjectMeta.DeletionTimestamp.IsZero() { - res, err := r.reconcileDelete(ctx, cluster, m) + reconcileDelete := append( + alwaysReconcile, + r.reconcileDelete, + ) + + res, err := doReconcile(ctx, reconcileDelete, s) // Requeue if the reconcile failed because the ClusterCacheTracker was locked for // the current cluster because of concurrent access. if errors.Is(err, remote.ErrClusterLocked) { @@ -233,15 +257,13 @@ func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (_ ctrl.Re return res, err } - // Add finalizer first if not set to avoid the race condition between init and delete. - // Note: Finalizers in general can only be added when the deletionTimestamp is not set. - if !controllerutil.ContainsFinalizer(m, clusterv1.MachineFinalizer) { - controllerutil.AddFinalizer(m, clusterv1.MachineFinalizer) - return ctrl.Result{}, nil - } - // Handle normal reconciliation loop. - res, err := r.reconcile(ctx, cluster, m) + reconcileNormal := append( + []machineReconcileFunc{r.reconcileMachineOwner}, + alwaysReconcile..., + ) + + res, err := doReconcile(ctx, reconcileNormal, s) // Requeue if the reconcile failed because the ClusterCacheTracker was locked for // the current cluster because of concurrent access. if errors.Is(err, remote.ErrClusterLocked) { @@ -285,36 +307,26 @@ func patchMachine(ctx context.Context, patchHelper *patch.Helper, machine *clust clusterv1.MachineHealthCheckSucceededCondition, clusterv1.MachineOwnerRemediatedCondition, }}, + patch.WithOwnedV1Beta2Conditions{Conditions: []string{ + clusterv1.MachineAvailableV1Beta2Condition, + clusterv1.MachineReadyV1Beta2Condition, + clusterv1.MachineBootstrapConfigReadyV1Beta2Condition, + clusterv1.MachineInfrastructureReadyV1Beta2Condition, + clusterv1.MachineNodeReadyV1Beta2Condition, + clusterv1.MachineNodeHealthyV1Beta2Condition, + clusterv1.MachineDeletingV1Beta2Condition, + clusterv1.MachinePausedV1Beta2Condition, + }}, ) return patchHelper.Patch(ctx, machine, options...) } -func (r *Reconciler) reconcile(ctx context.Context, cluster *clusterv1.Cluster, m *clusterv1.Machine) (ctrl.Result, error) { - // If the machine is a stand-alone one, meaning not originated from a MachineDeployment, then set it as directly - // owned by the Cluster (if not already present). - if r.shouldAdopt(m) { - m.SetOwnerReferences(util.EnsureOwnerRef(m.GetOwnerReferences(), metav1.OwnerReference{ - APIVersion: clusterv1.GroupVersion.String(), - Kind: "Cluster", - Name: cluster.Name, - UID: cluster.UID, - })) - } - - phases := []func(context.Context, *scope) (ctrl.Result, error){ - r.reconcileBootstrap, - r.reconcileInfrastructure, - r.reconcileNode, - r.reconcileCertificateExpiry, - } +type machineReconcileFunc func(context.Context, *scope) (ctrl.Result, error) +func doReconcile(ctx context.Context, phases []machineReconcileFunc, s *scope) (ctrl.Result, error) { res := ctrl.Result{} errs := []error{} - s := &scope{ - cluster: cluster, - machine: m, - } for _, phase := range phases { // Call the inner reconciliation methods. phaseResult, err := phase(ctx, s) @@ -346,10 +358,30 @@ type scope struct { // bootstrapConfig is the BootstrapConfig object that is referenced by the // Machine. It is set after reconcileBootstrap is called. bootstrapConfig *unstructured.Unstructured + + // node is the Kubernetes node hosted on the machine. + node *corev1.Node +} + +func (r *Reconciler) reconcileMachineOwner(_ context.Context, s *scope) (ctrl.Result, error) { + // If the machine is a stand-alone one, meaning not originated from a MachineDeployment, then set it as directly + // owned by the Cluster (if not already present). + if r.shouldAdopt(s.machine) { + s.machine.SetOwnerReferences(util.EnsureOwnerRef(s.machine.GetOwnerReferences(), metav1.OwnerReference{ + APIVersion: clusterv1.GroupVersion.String(), + Kind: "Cluster", + Name: s.cluster.Name, + UID: s.cluster.UID, + })) + } + + return ctrl.Result{}, nil } -func (r *Reconciler) reconcileDelete(ctx context.Context, cluster *clusterv1.Cluster, m *clusterv1.Machine) (ctrl.Result, error) { //nolint:gocyclo +func (r *Reconciler) reconcileDelete(ctx context.Context, s *scope) (ctrl.Result, error) { //nolint:gocyclo log := ctrl.LoggerFrom(ctx) + cluster := s.cluster + m := s.machine err := r.isDeleteNodeAllowed(ctx, cluster, m) isDeleteNodeAllowed := err == nil @@ -463,20 +495,7 @@ func (r *Reconciler) reconcileDelete(ctx context.Context, cluster *clusterv1.Clu } conditions.MarkTrue(m, clusterv1.PreTerminateDeleteHookSucceededCondition) - // Return early and don't remove the finalizer if we got an error or - // the external reconciliation deletion isn't ready. - - patchHelper, err := patch.NewHelper(m, r.Client) - if err != nil { - return ctrl.Result{}, err - } - conditions.MarkFalse(m, clusterv1.MachineNodeHealthyCondition, clusterv1.DeletingReason, clusterv1.ConditionSeverityInfo, "") - if err := patchMachine(ctx, patchHelper, m); err != nil { - conditions.MarkFalse(m, clusterv1.MachineNodeHealthyCondition, clusterv1.DeletionFailedReason, clusterv1.ConditionSeverityInfo, "") - return ctrl.Result{}, errors.Wrap(err, "failed to patch Machine") - } - - infrastructureDeleted, err := r.reconcileDeleteInfrastructure(ctx, cluster, m) + infrastructureDeleted, err := r.reconcileDeleteInfrastructure(ctx, s) if err != nil { return ctrl.Result{}, err } @@ -485,7 +504,7 @@ func (r *Reconciler) reconcileDelete(ctx context.Context, cluster *clusterv1.Clu return ctrl.Result{}, nil } - bootstrapDeleted, err := r.reconcileDeleteBootstrap(ctx, cluster, m) + bootstrapDeleted, err := r.reconcileDeleteBootstrap(ctx, s) if err != nil { return ctrl.Result{}, err } @@ -849,78 +868,34 @@ func (r *Reconciler) deleteNode(ctx context.Context, cluster *clusterv1.Cluster, return nil } -func (r *Reconciler) reconcileDeleteBootstrap(ctx context.Context, cluster *clusterv1.Cluster, m *clusterv1.Machine) (bool, error) { - obj, err := r.reconcileDeleteExternal(ctx, cluster, m, m.Spec.Bootstrap.ConfigRef) - if err != nil { - return false, err - } - - if obj == nil { - // Marks the bootstrap as deleted - conditions.MarkFalse(m, clusterv1.BootstrapReadyCondition, clusterv1.DeletedReason, clusterv1.ConditionSeverityInfo, "") +func (r *Reconciler) reconcileDeleteBootstrap(ctx context.Context, s *scope) (bool, error) { + if s.bootstrapConfig == nil { + conditions.MarkFalse(s.machine, clusterv1.BootstrapReadyCondition, clusterv1.DeletedReason, clusterv1.ConditionSeverityInfo, "") return true, nil } - // Report a summary of current status of the bootstrap object defined for this machine. - conditions.SetMirror(m, clusterv1.BootstrapReadyCondition, - conditions.UnstructuredGetter(obj), - conditions.WithFallbackValue(false, clusterv1.DeletingReason, clusterv1.ConditionSeverityInfo, ""), - ) - return false, nil -} - -func (r *Reconciler) reconcileDeleteInfrastructure(ctx context.Context, cluster *clusterv1.Cluster, m *clusterv1.Machine) (bool, error) { - obj, err := r.reconcileDeleteExternal(ctx, cluster, m, &m.Spec.InfrastructureRef) - if err != nil { - return false, err - } - - if obj == nil { - // Marks the infrastructure as deleted - conditions.MarkFalse(m, clusterv1.InfrastructureReadyCondition, clusterv1.DeletedReason, clusterv1.ConditionSeverityInfo, "") - return true, nil + if err := r.Client.Delete(ctx, s.bootstrapConfig); err != nil && !apierrors.IsNotFound(err) { + return false, errors.Wrapf(err, + "failed to delete %v %q for Machine %q in namespace %q", + s.bootstrapConfig.GroupVersionKind(), s.bootstrapConfig.GetName(), s.machine.Name, s.machine.Namespace) } - // Report a summary of current status of the bootstrap object defined for this machine. - conditions.SetMirror(m, clusterv1.InfrastructureReadyCondition, - conditions.UnstructuredGetter(obj), - conditions.WithFallbackValue(false, clusterv1.DeletingReason, clusterv1.ConditionSeverityInfo, ""), - ) return false, nil } -// reconcileDeleteExternal tries to delete external references. -func (r *Reconciler) reconcileDeleteExternal(ctx context.Context, cluster *clusterv1.Cluster, m *clusterv1.Machine, ref *corev1.ObjectReference) (*unstructured.Unstructured, error) { - if ref == nil { - return nil, nil - } - - // get the external object - obj, err := external.Get(ctx, r.Client, ref, m.Namespace) - if err != nil && !apierrors.IsNotFound(errors.Cause(err)) { - return nil, errors.Wrapf(err, "failed to get %s %q for Machine %q in namespace %q", - ref.GroupVersionKind(), ref.Name, m.Name, m.Namespace) +func (r *Reconciler) reconcileDeleteInfrastructure(ctx context.Context, s *scope) (bool, error) { + if s.infraMachine == nil { + conditions.MarkFalse(s.machine, clusterv1.InfrastructureReadyCondition, clusterv1.DeletedReason, clusterv1.ConditionSeverityInfo, "") + return true, nil } - if obj != nil { - // reconcileExternal ensures that we set the object's OwnerReferences correctly and watch the object. - // The machine delete logic depends on reconciling the machine when the external objects are deleted. - // This avoids a race condition where the machine is deleted before the external objects are ever reconciled - // by this controller. - if _, err := r.ensureExternalOwnershipAndWatch(ctx, cluster, m, ref); err != nil { - return nil, err - } - - // Issue a delete request. - if err := r.Client.Delete(ctx, obj); err != nil && !apierrors.IsNotFound(err) { - return obj, errors.Wrapf(err, - "failed to delete %v %q for Machine %q in namespace %q", - obj.GroupVersionKind(), obj.GetName(), m.Name, m.Namespace) - } + if err := r.Client.Delete(ctx, s.infraMachine); err != nil && !apierrors.IsNotFound(err) { + return false, errors.Wrapf(err, + "failed to delete %v %q for Machine %q in namespace %q", + s.infraMachine.GroupVersionKind(), s.infraMachine.GetName(), s.machine.Name, s.machine.Namespace) } - // Return true if there are no more external objects. - return obj, nil + return false, nil } // shouldAdopt returns true if the Machine should be adopted as a stand-alone Machine directly owned by the Cluster. diff --git a/internal/controllers/machine/machine_controller_noderef.go b/internal/controllers/machine/machine_controller_noderef.go index 67a4d028e599..db0b5df55655 100644 --- a/internal/controllers/machine/machine_controller_noderef.go +++ b/internal/controllers/machine/machine_controller_noderef.go @@ -72,6 +72,11 @@ func (r *Reconciler) reconcileNode(ctx context.Context, s *scope) (ctrl.Result, node, err := r.getNode(ctx, remoteClient, *machine.Spec.ProviderID) if err != nil { if err == ErrNodeNotFound { + if !s.machine.DeletionTimestamp.IsZero() { + // Tolerate node not found when the machine is being deleted. + return ctrl.Result{}, nil + } + // While a NodeRef is set in the status, failing to get that node means the node is deleted. // If Status.NodeRef is not set before, node still can be in the provisioning state. if machine.Status.NodeRef != nil { @@ -87,21 +92,22 @@ func (r *Reconciler) reconcileNode(ctx context.Context, s *scope) (ctrl.Result, conditions.MarkUnknown(machine, clusterv1.MachineNodeHealthyCondition, clusterv1.NodeInspectionFailedReason, "Failed to get the Node for this Machine by ProviderID") return ctrl.Result{}, err } + s.node = node // Set the Machine NodeRef. if machine.Status.NodeRef == nil { machine.Status.NodeRef = &corev1.ObjectReference{ APIVersion: corev1.SchemeGroupVersion.String(), Kind: "Node", - Name: node.Name, - UID: node.UID, + Name: s.node.Name, + UID: s.node.UID, } log.Info("Infrastructure provider reporting spec.providerID, Kubernetes node is now available", machine.Spec.InfrastructureRef.Kind, klog.KRef(machine.Spec.InfrastructureRef.Namespace, machine.Spec.InfrastructureRef.Name), "providerID", *machine.Spec.ProviderID, "Node", klog.KRef("", machine.Status.NodeRef.Name)) r.recorder.Event(machine, corev1.EventTypeNormal, "SuccessfulSetNodeRef", machine.Status.NodeRef.Name) } // Set the NodeSystemInfo. - machine.Status.NodeInfo = &node.Status.NodeInfo + machine.Status.NodeInfo = &s.node.Status.NodeInfo // Compute all the annotations that CAPI is setting on nodes; // CAPI only enforces some annotations and never changes or removes them. @@ -134,21 +140,26 @@ func (r *Reconciler) reconcileNode(ctx context.Context, s *scope) (ctrl.Result, } } - _, nodeHadInterruptibleLabel := node.Labels[clusterv1.InterruptibleLabel] + _, nodeHadInterruptibleLabel := s.node.Labels[clusterv1.InterruptibleLabel] // Reconcile node taints - if err := r.patchNode(ctx, remoteClient, node, nodeLabels, nodeAnnotations, machine); err != nil { - return ctrl.Result{}, errors.Wrapf(err, "failed to reconcile Node %s", klog.KObj(node)) + if err := r.patchNode(ctx, remoteClient, s.node, nodeLabels, nodeAnnotations, machine); err != nil { + return ctrl.Result{}, errors.Wrapf(err, "failed to reconcile Node %s", klog.KObj(s.node)) } if !nodeHadInterruptibleLabel && interruptible { // If the interruptible label is added to the node then record the event. // Nb. Only record the event if the node previously did not have the label to avoid recording // the event during every reconcile. - r.recorder.Event(machine, corev1.EventTypeNormal, "SuccessfulSetInterruptibleNodeLabel", node.Name) + r.recorder.Event(machine, corev1.EventTypeNormal, "SuccessfulSetInterruptibleNodeLabel", s.node.Name) } // Do the remaining node health checks, then set the node health to true if all checks pass. - status, message := summarizeNodeConditions(node) + if s.infraMachine == nil || !s.infraMachine.GetDeletionTimestamp().IsZero() { + conditions.MarkFalse(s.machine, clusterv1.MachineNodeHealthyCondition, clusterv1.DeletingReason, clusterv1.ConditionSeverityInfo, "") + return ctrl.Result{}, nil + } + + status, message := summarizeNodeConditions(s.node) if status == corev1.ConditionFalse { conditions.MarkFalse(machine, clusterv1.MachineNodeHealthyCondition, clusterv1.NodeConditionsFailedReason, clusterv1.ConditionSeverityWarning, message) return ctrl.Result{}, nil @@ -214,6 +225,10 @@ func summarizeNodeConditions(node *corev1.Node) (corev1.ConditionStatus, string) } } } + message = strings.TrimSpace(message) + if strings.HasSuffix(message, ".") { + message = strings.TrimSuffix(message, ".") + } if semanticallyFalseStatus > 0 { return corev1.ConditionFalse, message } diff --git a/internal/controllers/machine/machine_controller_noderef_test.go b/internal/controllers/machine/machine_controller_noderef_test.go index d1caf2f95bc3..acb486cd48a5 100644 --- a/internal/controllers/machine/machine_controller_noderef_test.go +++ b/internal/controllers/machine/machine_controller_noderef_test.go @@ -159,6 +159,34 @@ func TestReconcileNode(t *testing.T) { expectResult: ctrl.Result{}, expectError: true, }, + { + name: "node not found is tolerated when machine is deleting", + machine: &clusterv1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Name: "machine-test", + Namespace: metav1.NamespaceDefault, + Labels: map[string]string{ + clusterv1.ClusterNameLabel: "test-cluster", + }, + DeletionTimestamp: ptr.To(metav1.Now()), + Finalizers: []string{"foo"}, + }, + Spec: clusterv1.MachineSpec{ + ProviderID: ptr.To("aws://us-east-1/test-node-1"), + }, + Status: clusterv1.MachineStatus{ + NodeRef: &corev1.ObjectReference{ + Kind: "Node", + Name: "test-node-1", + APIVersion: "v1", + }, + }, + }, + node: nil, + nodeGetErr: false, + expectResult: ctrl.Result{}, + expectError: false, + }, } for _, tc := range testCases { diff --git a/internal/controllers/machine/machine_controller_phases.go b/internal/controllers/machine/machine_controller_phases.go index fc1668dee34e..3d79603b116c 100644 --- a/internal/controllers/machine/machine_controller_phases.go +++ b/internal/controllers/machine/machine_controller_phases.go @@ -38,7 +38,6 @@ import ( "sigs.k8s.io/cluster-api/controllers/external" capierrors "sigs.k8s.io/cluster-api/errors" "sigs.k8s.io/cluster-api/util" - "sigs.k8s.io/cluster-api/util/annotations" "sigs.k8s.io/cluster-api/util/conditions" utilconversion "sigs.k8s.io/cluster-api/util/conversion" "sigs.k8s.io/cluster-api/util/patch" @@ -87,24 +86,24 @@ func (r *Reconciler) reconcilePhase(_ context.Context, m *clusterv1.Machine) { } // reconcileExternal handles generic unstructured objects referenced by a Machine. -func (r *Reconciler) reconcileExternal(ctx context.Context, cluster *clusterv1.Cluster, m *clusterv1.Machine, ref *corev1.ObjectReference) (external.ReconcileOutput, error) { +func (r *Reconciler) reconcileExternal(ctx context.Context, cluster *clusterv1.Cluster, m *clusterv1.Machine, ref *corev1.ObjectReference) (*unstructured.Unstructured, error) { if err := utilconversion.UpdateReferenceAPIContract(ctx, r.Client, ref); err != nil { - return external.ReconcileOutput{}, err + if apierrors.IsNotFound(err) { + // We want to surface IsNotFound only for the referenced object, so we use a generic error in case CRD is not found. + return nil, errors.New(err.Error()) + } + return nil, err } - result, err := r.ensureExternalOwnershipAndWatch(ctx, cluster, m, ref) + obj, err := r.ensureExternalOwnershipAndWatch(ctx, cluster, m, ref) if err != nil { - return external.ReconcileOutput{}, err - } - if result.RequeueAfter > 0 { - return result, nil + return nil, err } - obj := result.Result // Set failure reason and message, if any. failureReason, failureMessage, err := external.FailuresFrom(obj) if err != nil { - return external.ReconcileOutput{}, err + return nil, err } if failureReason != "" { machineStatusError := capierrors.MachineStatusError(failureReason) @@ -117,44 +116,40 @@ func (r *Reconciler) reconcileExternal(ctx context.Context, cluster *clusterv1.C ) } - return external.ReconcileOutput{Result: obj}, nil + return obj, nil } // ensureExternalOwnershipAndWatch ensures that only the Machine owns the external object, // adds a watch to the external object if one does not already exist and adds the necessary labels. -func (r *Reconciler) ensureExternalOwnershipAndWatch(ctx context.Context, cluster *clusterv1.Cluster, m *clusterv1.Machine, ref *corev1.ObjectReference) (external.ReconcileOutput, error) { +func (r *Reconciler) ensureExternalOwnershipAndWatch(ctx context.Context, cluster *clusterv1.Cluster, m *clusterv1.Machine, ref *corev1.ObjectReference) (*unstructured.Unstructured, error) { log := ctrl.LoggerFrom(ctx) obj, err := external.Get(ctx, r.Client, ref, m.Namespace) if err != nil { - if apierrors.IsNotFound(errors.Cause(err)) { - log.Info("could not find external ref, requeuing", ref.Kind, klog.KRef(ref.Namespace, ref.Name)) - return external.ReconcileOutput{RequeueAfter: externalReadyWait}, nil - } - return external.ReconcileOutput{}, err + return nil, err } // Ensure we add a watch to the external object, if there isn't one already. if err := r.externalTracker.Watch(log, obj, handler.EnqueueRequestForOwner(r.Client.Scheme(), r.Client.RESTMapper(), &clusterv1.Machine{})); err != nil { - return external.ReconcileOutput{}, err + return nil, err } // Initialize the patch helper. patchHelper, err := patch.NewHelper(obj, r.Client) if err != nil { - return external.ReconcileOutput{}, err + return nil, err } // removeOnCreateOwnerRefs removes MachineSet and control plane owners from the objects referred to by a Machine. // These owner references are added initially because Machines don't exist when those objects are created. // At this point the Machine exists and can be set as the controller reference. if err := removeOnCreateOwnerRefs(cluster, m, obj); err != nil { - return external.ReconcileOutput{}, err + return nil, err } // Set external object ControllerReference to the Machine. if err := controllerutil.SetControllerReference(m, obj, r.Client.Scheme()); err != nil { - return external.ReconcileOutput{}, err + return nil, err } // Set the Cluster label. @@ -167,13 +162,9 @@ func (r *Reconciler) ensureExternalOwnershipAndWatch(ctx context.Context, cluste // Always attempt to Patch the external object. if err := patchHelper.Patch(ctx, obj); err != nil { - return external.ReconcileOutput{}, err - } - - if annotations.IsPaused(cluster, obj) { - return external.ReconcileOutput{Result: obj, Paused: true}, nil + return nil, err } - return external.ReconcileOutput{Result: obj}, nil + return obj, nil } // reconcileBootstrap reconciles the Spec.Bootstrap.ConfigRef object on a Machine. @@ -188,15 +179,21 @@ func (r *Reconciler) reconcileBootstrap(ctx context.Context, s *scope) (ctrl.Res } // Call generic external reconciler if we have an external reference. - externalResult, err := r.reconcileExternal(ctx, cluster, m, m.Spec.Bootstrap.ConfigRef) + obj, err := r.reconcileExternal(ctx, cluster, m, m.Spec.Bootstrap.ConfigRef) if err != nil { + if apierrors.IsNotFound(err) { + if !s.machine.DeletionTimestamp.IsZero() { + // Tolerate bootstrap object not found when the machine is being deleted. + // TODO: we can also relax this and tolerate the absence of the bootstrap ref way before, e.g. after node ref is set + return ctrl.Result{}, nil + } + log.Info("could not find bootstrap config object, requeuing", m.Spec.Bootstrap.ConfigRef.Kind, klog.KRef(m.Spec.Bootstrap.ConfigRef.Namespace, m.Spec.Bootstrap.ConfigRef.Name)) + // TODO: we can make this smarter and requeue only if we are before node ref is set + return ctrl.Result{RequeueAfter: externalReadyWait}, nil + } return ctrl.Result{}, err } - s.bootstrapConfig = externalResult.Result - - if externalResult.RequeueAfter > 0 { - return ctrl.Result{RequeueAfter: externalResult.RequeueAfter}, nil - } + s.bootstrapConfig = obj // If the bootstrap data is populated, set ready and return. if m.Spec.Bootstrap.DataSecretName != nil { @@ -204,33 +201,27 @@ func (r *Reconciler) reconcileBootstrap(ctx context.Context, s *scope) (ctrl.Res conditions.MarkTrue(m, clusterv1.BootstrapReadyCondition) return ctrl.Result{}, nil } - bootstrapConfig := externalResult.Result - - // If the bootstrap config is being deleted, return early. - if !bootstrapConfig.GetDeletionTimestamp().IsZero() { - return ctrl.Result{}, nil - } // Determine if the bootstrap provider is ready. - ready, err := external.IsReady(bootstrapConfig) + ready, err := external.IsReady(s.bootstrapConfig) if err != nil { return ctrl.Result{}, err } // Report a summary of current status of the bootstrap object defined for this machine. conditions.SetMirror(m, clusterv1.BootstrapReadyCondition, - conditions.UnstructuredGetter(bootstrapConfig), + conditions.UnstructuredGetter(s.bootstrapConfig), conditions.WithFallbackValue(ready, clusterv1.WaitingForDataSecretFallbackReason, clusterv1.ConditionSeverityInfo, ""), ) - // If the bootstrap provider is not ready, requeue. + // If the bootstrap provider is not ready, return. if !ready { - log.Info("Waiting for bootstrap provider to generate data secret and report status.ready", bootstrapConfig.GetKind(), klog.KObj(bootstrapConfig)) + log.Info("Waiting for bootstrap provider to generate data secret and report status.ready", s.bootstrapConfig.GetKind(), klog.KObj(s.bootstrapConfig)) return ctrl.Result{}, nil } // Get and set the name of the secret containing the bootstrap data. - secretName, _, err := unstructured.NestedString(bootstrapConfig.Object, "status", "dataSecretName") + secretName, _, err := unstructured.NestedString(s.bootstrapConfig.Object, "status", "dataSecretName") if err != nil { return ctrl.Result{}, errors.Wrapf(err, "failed to retrieve dataSecretName from bootstrap provider for Machine %q in namespace %q", m.Name, m.Namespace) } else if secretName == "" { @@ -238,7 +229,7 @@ func (r *Reconciler) reconcileBootstrap(ctx context.Context, s *scope) (ctrl.Res } m.Spec.Bootstrap.DataSecretName = ptr.To(secretName) if !m.Status.BootstrapReady { - log.Info("Bootstrap provider generated data secret and reports status.ready", bootstrapConfig.GetKind(), klog.KObj(bootstrapConfig), "Secret", klog.KRef(m.Namespace, secretName)) + log.Info("Bootstrap provider generated data secret and reports status.ready", s.bootstrapConfig.GetKind(), klog.KObj(s.bootstrapConfig), "Secret", klog.KRef(m.Namespace, secretName)) } m.Status.BootstrapReady = true return ctrl.Result{}, nil @@ -251,67 +242,71 @@ func (r *Reconciler) reconcileInfrastructure(ctx context.Context, s *scope) (ctr m := s.machine // Call generic external reconciler. - infraReconcileResult, err := r.reconcileExternal(ctx, cluster, m, &m.Spec.InfrastructureRef) + obj, err := r.reconcileExternal(ctx, cluster, m, &m.Spec.InfrastructureRef) if err != nil { - return ctrl.Result{}, err - } - s.infraMachine = infraReconcileResult.Result - if infraReconcileResult.RequeueAfter > 0 { - // Infra object went missing after the machine was up and running - if m.Status.InfrastructureReady { - log.Error(err, "Machine infrastructure reference has been deleted after being ready, setting failure state") - m.Status.FailureReason = ptr.To(capierrors.InvalidConfigurationMachineError) - m.Status.FailureMessage = ptr.To(fmt.Sprintf("Machine infrastructure resource %v with name %q has been deleted after being ready", - m.Spec.InfrastructureRef.GroupVersionKind(), m.Spec.InfrastructureRef.Name)) - return ctrl.Result{}, reconcile.TerminalError(errors.Errorf("could not find %v %q for Machine %q in namespace %q", m.Spec.InfrastructureRef.GroupVersionKind().String(), m.Spec.InfrastructureRef.Name, m.Name, m.Namespace)) + if apierrors.IsNotFound(err) { + if !s.machine.DeletionTimestamp.IsZero() { + // Tolerate infra machine not found when the machine is being deleted. + return ctrl.Result{}, nil + } + + if m.Status.InfrastructureReady { + // Infra object went missing after the machine was up and running + log.Error(err, "Machine infrastructure reference has been deleted after being ready, setting failure state") + m.Status.FailureReason = ptr.To(capierrors.InvalidConfigurationMachineError) + m.Status.FailureMessage = ptr.To(fmt.Sprintf("Machine infrastructure resource %v with name %q has been deleted after being ready", + m.Spec.InfrastructureRef.GroupVersionKind(), m.Spec.InfrastructureRef.Name)) + return ctrl.Result{}, reconcile.TerminalError(errors.Errorf("could not find %v %q for Machine %q in namespace %q", m.Spec.InfrastructureRef.GroupVersionKind().String(), m.Spec.InfrastructureRef.Name, m.Name, m.Namespace)) + } + log.Info("could not find infrastructure machine, requeuing", m.Spec.InfrastructureRef.Kind, klog.KRef(m.Spec.InfrastructureRef.Namespace, m.Spec.InfrastructureRef.Name)) + return ctrl.Result{RequeueAfter: externalReadyWait}, nil } - return ctrl.Result{RequeueAfter: infraReconcileResult.RequeueAfter}, nil + return ctrl.Result{}, err } + s.infraMachine = obj - infraConfig := infraReconcileResult.Result - - if !infraConfig.GetDeletionTimestamp().IsZero() { + if !s.infraMachine.GetDeletionTimestamp().IsZero() { return ctrl.Result{}, nil } // Determine if the infrastructure provider is ready. - ready, err := external.IsReady(infraConfig) + ready, err := external.IsReady(s.infraMachine) if err != nil { return ctrl.Result{}, err } if ready && !m.Status.InfrastructureReady { - log.Info("Infrastructure provider has completed machine infrastructure provisioning and reports status.ready", infraConfig.GetKind(), klog.KObj(infraConfig)) + log.Info("Infrastructure provider has completed machine infrastructure provisioning and reports status.ready", s.infraMachine.GetKind(), klog.KObj(s.infraMachine)) } // Report a summary of current status of the infrastructure object defined for this machine. conditions.SetMirror(m, clusterv1.InfrastructureReadyCondition, - conditions.UnstructuredGetter(infraConfig), + conditions.UnstructuredGetter(s.infraMachine), conditions.WithFallbackValue(ready, clusterv1.WaitingForInfrastructureFallbackReason, clusterv1.ConditionSeverityInfo, ""), ) // If the infrastructure provider is not ready (and it wasn't ready before), return early. if !ready && !m.Status.InfrastructureReady { - log.Info("Waiting for infrastructure provider to create machine infrastructure and report status.ready", infraConfig.GetKind(), klog.KObj(infraConfig)) + log.Info("Waiting for infrastructure provider to create machine infrastructure and report status.ready", s.infraMachine.GetKind(), klog.KObj(s.infraMachine)) return ctrl.Result{}, nil } // Get Spec.ProviderID from the infrastructure provider. var providerID string - if err := util.UnstructuredUnmarshalField(infraConfig, &providerID, "spec", "providerID"); err != nil { + if err := util.UnstructuredUnmarshalField(s.infraMachine, &providerID, "spec", "providerID"); err != nil { return ctrl.Result{}, errors.Wrapf(err, "failed to retrieve Spec.ProviderID from infrastructure provider for Machine %q in namespace %q", m.Name, m.Namespace) } else if providerID == "" { return ctrl.Result{}, errors.Errorf("retrieved empty Spec.ProviderID from infrastructure provider for Machine %q in namespace %q", m.Name, m.Namespace) } // Get and set Status.Addresses from the infrastructure provider. - err = util.UnstructuredUnmarshalField(infraConfig, &m.Status.Addresses, "status", "addresses") + err = util.UnstructuredUnmarshalField(s.infraMachine, &m.Status.Addresses, "status", "addresses") if err != nil && err != util.ErrUnstructuredFieldNotFound { return ctrl.Result{}, errors.Wrapf(err, "failed to retrieve addresses from infrastructure provider for Machine %q in namespace %q", m.Name, m.Namespace) } // Get and set the failure domain from the infrastructure provider. var failureDomain string - err = util.UnstructuredUnmarshalField(infraConfig, &failureDomain, "spec", "failureDomain") + err = util.UnstructuredUnmarshalField(s.infraMachine, &failureDomain, "spec", "failureDomain") switch { case err == util.ErrUnstructuredFieldNotFound: // no-op case err != nil: diff --git a/internal/controllers/machine/machine_controller_phases_test.go b/internal/controllers/machine/machine_controller_phases_test.go index 20cb2a3013c1..23fc85aac70b 100644 --- a/internal/controllers/machine/machine_controller_phases_test.go +++ b/internal/controllers/machine/machine_controller_phases_test.go @@ -661,6 +661,14 @@ func TestReconcileBootstrap(t *testing.T) { expectError bool expected func(g *WithT, m *clusterv1.Machine) }{ + { + name: "no op if bootstrap config ref is not set", + machine: &clusterv1.Machine{}, + bootstrapConfig: nil, + bootstrapConfigGetError: nil, + expectResult: ctrl.Result{}, + expectError: false, + }, { name: "err reading bootstrap config (something different than not found), it should return error", machine: defaultMachine.DeepCopy(), @@ -822,6 +830,35 @@ func TestReconcileBootstrap(t *testing.T) { g.Expect(*m.Spec.Bootstrap.DataSecretName).To(Equal("secret-data")) }, }, + { + name: "bootstrap config not found is tolerated when machine is deleting", + machine: &clusterv1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Name: "deleting-machine", + Namespace: metav1.NamespaceDefault, + DeletionTimestamp: ptr.To(metav1.Now()), + Finalizers: []string{"foo"}, + }, + Spec: clusterv1.MachineSpec{ + Bootstrap: clusterv1.Bootstrap{ + ConfigRef: &corev1.ObjectReference{ + APIVersion: "bootstrap.cluster.x-k8s.io/v1beta1", + Kind: "GenericBootstrapConfig", + Name: "bootstrap-config1", + }, + DataSecretName: ptr.To("secret-data"), + }, + }, + Status: clusterv1.MachineStatus{ + BootstrapReady: true, + }, + }, + bootstrapConfig: nil, + bootstrapConfigGetError: nil, + expectResult: ctrl.Result{}, + expectError: false, + expected: func(g *WithT, m *clusterv1.Machine) {}, + }, } for _, tc := range testCases { @@ -1327,6 +1364,58 @@ func TestReconcileInfrastructure(t *testing.T) { g.Expect(m.Status.FailureReason).ToNot(BeNil()) }, }, + { + name: "infra machine is not found is tolerated when infrastructure not yet ready and machine is deleting", + machine: &clusterv1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Name: "deleting-machine", + Namespace: metav1.NamespaceDefault, + DeletionTimestamp: ptr.To(metav1.Now()), + Finalizers: []string{"foo"}, + }, + Spec: clusterv1.MachineSpec{ + InfrastructureRef: corev1.ObjectReference{ + APIVersion: "infrastructure.cluster.x-k8s.io/v1beta1", + Kind: "GenericInfrastructureMachine", + Name: "infra-config1", + }, + }, + Status: clusterv1.MachineStatus{ + InfrastructureReady: false, + }, + }, + infraMachine: nil, + infraMachineGetError: nil, + expectResult: ctrl.Result{}, + expectError: false, + expected: func(g *WithT, m *clusterv1.Machine) {}, + }, + { + name: "infra machine is not found is tolerated when infrastructure ready and machine is deleting", + machine: &clusterv1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Name: "deleting-machine", + Namespace: metav1.NamespaceDefault, + DeletionTimestamp: ptr.To(metav1.Now()), + Finalizers: []string{"foo"}, + }, + Spec: clusterv1.MachineSpec{ + InfrastructureRef: corev1.ObjectReference{ + APIVersion: "infrastructure.cluster.x-k8s.io/v1beta1", + Kind: "GenericInfrastructureMachine", + Name: "infra-config1", + }, + }, + Status: clusterv1.MachineStatus{ + InfrastructureReady: true, + }, + }, + infraMachine: nil, + infraMachineGetError: nil, + expectResult: ctrl.Result{}, + expectError: false, + expected: func(g *WithT, m *clusterv1.Machine) {}, + }, } for _, tc := range testCases { @@ -1353,7 +1442,6 @@ func TestReconcileInfrastructure(t *testing.T) { } s := &scope{cluster: defaultCluster, machine: tc.machine} result, err := r.reconcileInfrastructure(ctx, s) - r.reconcilePhase(ctx, tc.machine) g.Expect(result).To(BeComparableTo(tc.expectResult)) if tc.expectError { g.Expect(err).To(HaveOccurred()) diff --git a/internal/controllers/machine/machine_controller_test.go b/internal/controllers/machine/machine_controller_test.go index 7ab8096215fa..816ccb7bcd4a 100644 --- a/internal/controllers/machine/machine_controller_test.go +++ b/internal/controllers/machine/machine_controller_test.go @@ -916,6 +916,7 @@ func TestReconcileRequest(t *testing.T) { Client: clientFake, Tracker: remote.NewTestClusterCacheTracker(logr.New(log.NullLogSink{}), clientFake, clientFake, scheme.Scheme, client.ObjectKey{Name: testCluster.Name, Namespace: testCluster.Namespace}), ssaCache: ssa.NewCache(), + recorder: record.NewFakeRecorder(10), } result, err := r.Reconcile(ctx, reconcile.Request{NamespacedName: util.ObjectKey(&tc.machine)}) @@ -1213,95 +1214,6 @@ func TestMachineConditions(t *testing.T) { } } -func TestReconcileDeleteExternal(t *testing.T) { - testCluster := &clusterv1.Cluster{ - ObjectMeta: metav1.ObjectMeta{Namespace: metav1.NamespaceDefault, Name: "test-cluster"}, - } - - bootstrapConfig := &unstructured.Unstructured{ - Object: map[string]interface{}{ - "kind": "BootstrapConfig", - "apiVersion": "bootstrap.cluster.x-k8s.io/v1beta1", - "metadata": map[string]interface{}{ - "name": "delete-bootstrap", - "namespace": metav1.NamespaceDefault, - }, - }, - } - - machine := &clusterv1.Machine{ - ObjectMeta: metav1.ObjectMeta{ - Name: "delete", - Namespace: metav1.NamespaceDefault, - }, - Spec: clusterv1.MachineSpec{ - ClusterName: "test-cluster", - Bootstrap: clusterv1.Bootstrap{ - ConfigRef: &corev1.ObjectReference{ - APIVersion: "bootstrap.cluster.x-k8s.io/v1beta1", - Kind: "BootstrapConfig", - Name: "delete-bootstrap", - }, - }, - }, - } - - testCases := []struct { - name string - bootstrapExists bool - expectError bool - expected *unstructured.Unstructured - }{ - { - name: "should continue to reconcile delete of external refs if exists", - bootstrapExists: true, - expected: &unstructured.Unstructured{ - Object: map[string]interface{}{ - "apiVersion": "bootstrap.cluster.x-k8s.io/v1beta1", - "kind": "BootstrapConfig", - "metadata": map[string]interface{}{ - "name": "delete-bootstrap", - "namespace": metav1.NamespaceDefault, - "resourceVersion": "999", - }, - }, - }, - expectError: false, - }, - { - name: "should no longer reconcile deletion of external refs since it doesn't exist", - bootstrapExists: false, - expected: nil, - expectError: false, - }, - } - - for _, tc := range testCases { - t.Run(tc.name, func(t *testing.T) { - g := NewWithT(t) - - objs := []client.Object{testCluster, machine} - - if tc.bootstrapExists { - objs = append(objs, bootstrapConfig) - } - - c := fake.NewClientBuilder().WithObjects(objs...).Build() - r := &Reconciler{ - Client: c, - } - - obj, err := r.reconcileDeleteExternal(ctx, testCluster, machine, machine.Spec.Bootstrap.ConfigRef) - if tc.expectError { - g.Expect(err).To(HaveOccurred()) - } else { - g.Expect(err).ToNot(HaveOccurred()) - } - g.Expect(obj).To(BeComparableTo(tc.expected)) - }) - } -} - func TestRemoveMachineFinalizerAfterDeleteReconcile(t *testing.T) { g := NewWithT(t) @@ -1329,7 +1241,7 @@ func TestRemoveMachineFinalizerAfterDeleteReconcile(t *testing.T) { }, } key := client.ObjectKey{Namespace: m.Namespace, Name: m.Name} - c := fake.NewClientBuilder().WithObjects(testCluster, m).WithStatusSubresource(&clusterv1.Machine{}).Build() + c := fake.NewClientBuilder().WithObjects(testCluster, m, builder.GenericInfrastructureMachineCRD.DeepCopy()).WithStatusSubresource(&clusterv1.Machine{}).Build() mr := &Reconciler{ Client: c, } @@ -2869,7 +2781,11 @@ func TestNodeDeletion(t *testing.T) { cluster.DeletionTimestamp = &metav1.Time{Time: deletionTime.Add(time.Hour)} } - _, err := r.reconcileDelete(context.Background(), cluster, m) + s := &scope{ + cluster: cluster, + machine: m, + } + _, err := r.reconcileDelete(context.Background(), s) if tc.resultErr { g.Expect(err).To(HaveOccurred()) @@ -2988,8 +2904,11 @@ func TestNodeDeletionWithoutNodeRefFallback(t *testing.T) { nodeDeletionRetryTimeout: 10 * time.Millisecond, } - cluster := testCluster.DeepCopy() - _, err := r.reconcileDelete(context.Background(), cluster, m) + s := &scope{ + cluster: testCluster.DeepCopy(), + machine: m, + } + _, err := r.reconcileDelete(context.Background(), s) if tc.resultErr { g.Expect(err).To(HaveOccurred()) From 4bd602b3a158e83329cd34a6b3463d51fd934b36 Mon Sep 17 00:00:00 2001 From: fabriziopandini Date: Tue, 8 Oct 2024 22:35:09 +0200 Subject: [PATCH 2/8] update status with v1beta2 conditions --- api/v1beta1/machine_types.go | 136 ++++ api/v1beta1/v1beta2_condition_consts.go | 48 +- internal/contract/bootstrap.go | 5 + internal/contract/infrastructure_machine.go | 5 + .../controllers/machine/machine_controller.go | 2 +- .../machine/machine_controller_noderef.go | 5 +- .../machine/machine_controller_phases.go | 40 - .../machine/machine_controller_phases_test.go | 594 +------------- .../machine/machine_controller_status.go | 486 ++++++++++++ .../machine/machine_controller_status_test.go | 732 ++++++++++++++++++ util/conditions/v1beta2/mirror.go | 43 +- util/conditions/v1beta2/mirror_test.go | 13 + util/conditions/v1beta2/options.go | 15 + 13 files changed, 1452 insertions(+), 672 deletions(-) create mode 100644 internal/controllers/machine/machine_controller_status.go create mode 100644 internal/controllers/machine/machine_controller_status_test.go diff --git a/api/v1beta1/machine_types.go b/api/v1beta1/machine_types.go index 8ed7c7b95662..49f8f8fc3845 100644 --- a/api/v1beta1/machine_types.go +++ b/api/v1beta1/machine_types.go @@ -86,6 +86,142 @@ const ( ManagedNodeLabelDomain = "node.cluster.x-k8s.io" ) +// Machine's Available condition and corresponding reasons that will be used in v1Beta2 API version. +const ( + // MachineAvailableV1Beta2Condition is true if the machine is Ready for at least MinReadySeconds, as defined by the Machine's MinReadySeconds field. + MachineAvailableV1Beta2Condition = AvailableV1Beta2Condition + + // MachineNotReadyV1Beta2Reason surfaces when a machine is not yet ready (and thus not yet available). + MachineNotReadyV1Beta2Reason = "NotReady" + + // MachineWaitingForMinReadySecondsV1Beta2Reason surfaces when a machine is ready for less then MinReadySeconds (and thus not yet available). + MachineWaitingForMinReadySecondsV1Beta2Reason = "WaitingForMinReadySeconds" + + // MachineReadyNotYetReportedV1Beta2Reason surfaces when a machine ready is not reported yet. + // Note: this should never happen and it is a signal of some internal error. + MachineReadyNotYetReportedV1Beta2Reason = "ReadyNotYetReported" + + // MachineAvailableV1Beta2Reason surfaces when a machine ready for at least MinReadySeconds. + MachineAvailableV1Beta2Reason = "MachineAvailable" +) + +// Machine's Ready condition and corresponding reasons that will be used in v1Beta2 API version. +// Note: when possible, Ready condition will use reasons from the conditions it summarizes. +const ( + // MachineReadyV1Beta2Condition is true if the Machine is not deleted, Machine's BootstrapConfigReady, InfrastructureReady, + // NodeHealthy and HealthCheckSucceeded (if present) are true; if other conditions are defined in spec.readinessGates, + // these conditions must be true as well. + MachineReadyV1Beta2Condition = ReadyV1Beta2Condition + + // MachineErrorComputingReadyV1Beta2Reason surfaces when there was an error computing the ready condition. + // Note: this should never happen and it is a signal of some internal error. + MachineErrorComputingReadyV1Beta2Reason = "ErrorComputingReady" +) + +// Machine's UpToDate condition and corresponding reasons that will be used in v1Beta2 API version. +// Note: UpToDate condition is set by the controller owning the machine. +const ( + // MachineUpToDateV1Beta2Condition is true if the Machine spec matches the spec of the Machine's owner resource, e.g. KubeadmControlPlane or MachineDeployment. + // The Machine's owner (e.g MachineDeployment) is authoritative to set their owned Machine's UpToDate conditions based on its current spec. + MachineUpToDateV1Beta2Condition = "UpToDate" +) + +// Machine's BootstrapConfigReady condition and corresponding reasons that will be used in v1Beta2 API version. +// Note: when possible, BootstrapConfigReady condition will use reasons surfaced from the underlying bootstrap config object. +const ( + // MachineBootstrapConfigReadyV1Beta2Condition condition mirrors the corresponding Ready condition from the Machine's BootstrapConfig resource. + MachineBootstrapConfigReadyV1Beta2Condition = BootstrapConfigReadyV1Beta2Condition + + // MachineBootstrapDataSecretDataSecretUserProvidedV1Beta2Reason surfaces when a bootstrap data secret is provided by the user (without a ConfigRef). + MachineBootstrapDataSecretDataSecretUserProvidedV1Beta2Reason = "DataSecretUserProvided" + + // MachineBootstrapInvalidConfigV1Beta2Reason surfaces when MachineBootstrap doesn't have the Boostrap.ConfigRef nor a + // Bootstrap.DataSecretName specified by the users. + MachineBootstrapInvalidConfigV1Beta2Reason = "InvalidConfig" + + // MachineBootstrapConfigInvalidConditionReportedV1Beta2Reason surfaces a BootstrapConfig Ready condition (read from a bootstrap config object) which is invalid. + // (e.g. it is status is missing). + MachineBootstrapConfigInvalidConditionReportedV1Beta2Reason = InvalidConditionReported + + // MachineBootstrapConfigReadyNoV1Beta2ReasonReported applies to a BootstrapConfig Ready condition (read from a bootstrap config object) that reports no reason. + MachineBootstrapConfigReadyNoV1Beta2ReasonReported = NoV1Beta2ReasonReported + + // MachineBootstrapConfigNotFoundV1Beta2Reason surfaces when a referenced bootstrap config object cannot be found. + // Note: this could happen when creating the machine. However, this state should be treated as an error if it last indefinitely. + MachineBootstrapConfigNotFoundV1Beta2Reason = RefObjectNotFoundV1Beta2Reason + + // MachineBootstrapConfigDeletedV1Beta2Reason surfaces when a referenced bootstrap config object has been deleted. + // Note: controllers can't identify if the deletion process has been initiated by the controller itself, e.g. + // during the deletion workflow, or by a users. + MachineBootstrapConfigDeletedV1Beta2Reason = RefObjectDeletedV1Beta2Reason +) + +// Machine's InfrastructureReady condition and corresponding reasons that will be used in v1Beta2 API version. +// Note: when possible, InfrastructureReady condition will use reasons surfaced from the underlying infra machine object. +const ( + // MachineInfrastructureReadyV1Beta2Condition mirrors the corresponding Ready condition from the Machine's Infrastructure resource. + MachineInfrastructureReadyV1Beta2Condition = InfrastructureReadyV1Beta2Condition + + // MachineInfrastructureInvalidConditionReportedV1Beta2Reason surfaces a infrastructure Ready condition (read from an infra machine object) which is invalid. + // (e.g. it is status is missing). + MachineInfrastructureInvalidConditionReportedV1Beta2Reason = InvalidConditionReported + + // MachineInfrastructureReadyNoV1Beta2ReasonReported applies to a infrastructure Ready condition (read from an infra machine object) that reports no reason. + MachineInfrastructureReadyNoV1Beta2ReasonReported = NoV1Beta2ReasonReported + + // MachineInfrastructureNotFoundV1Beta2Reason surfaces when a referenced infrastructure object cannot be found. + // Note: this could happen when creating the machine. However, this state should be treated as an error if it last indefinitely. + MachineInfrastructureNotFoundV1Beta2Reason = RefObjectNotFoundV1Beta2Reason + + // MachineInfrastructureDeletedV1Beta2Reason surfaces when a referenced infrastructure object has been deleted. + // Note: controllers can't identify if the deletion process has been initiated by the controller itself, e.g. + // during the deletion workflow, or by a users. + MachineInfrastructureDeletedV1Beta2Reason = RefObjectDeletedV1Beta2Reason +) + +// Machine's NodeHealthy and NodeReady conditions and corresponding reasons that will be used in v1Beta2 API version. +// Note: when possible, NodeHealthy and NodeReady conditions will use reasons surfaced from the underlying node. +const ( + // MachineNodeHealthyV1Beta2Condition is true if the Machine's Node is ready and it does not report MemoryPressure, DiskPressure and PIDPressure. + MachineNodeHealthyV1Beta2Condition = "NodeHealthy" + + // MachineNodeReadyV1Beta2Condition is true if the Machine's Node is ready. + MachineNodeReadyV1Beta2Condition = "NodeReady" + + // MachineNodeNotFoundV1Beta2Reason surfaces when the node hosted on the machine cannot be found. + // Note: this could happen when creating the machine. However, this state should be treated as an error if it last indefinitely. + MachineNodeNotFoundV1Beta2Reason = "NodeNotFound" + + // MachineNodeDeletedV1Beta2Reason surfaces when the node hosted on the machine has been deleted. + // Note: controllers can't identify if the deletion process has been initiated by the controller itself, e.g. + // during the deletion workflow, or by a users. + MachineNodeDeletedV1Beta2Reason = "NodeDeleted" +) + +// Machine's HealthCheckSucceeded and OwnerRemediated conditions and corresponding reasons that will be used in v1Beta2 API version. +// Note: HealthCheckSucceeded and OwnerRemediated condition are set by the MachineHealthCheck controller. +const ( + // MachineHealthCheckSucceededV1Beta2Condition is true if MHC instances targeting this machine report the Machine + // is healthy according to the definition of healthy present in the spec of the MachineHealthCheck object. + MachineHealthCheckSucceededV1Beta2Condition = "HealthCheckSucceeded" + + // MachineOwnerRemediatedV1Beta2Condition is only present if MHC instances targeting this machine + // determine that the controller owning this machine should perform remediation. + MachineOwnerRemediatedV1Beta2Condition = "OwnerRemediated" +) + +// Machine's Deleting condition and corresponding reasons that will be used in v1Beta2 API version. +const ( + // MachineDeletingV1Beta2Condition surfaces details about progress in the machine deletion workflow. + MachineDeletingV1Beta2Condition = DeletingV1Beta2Condition +) + +// Machine's Paused condition and corresponding reasons that will be used in v1Beta2 API version. +const ( + // MachinePausedV1Beta2Condition is true if the Machine or the Cluster it belongs to are paused. + MachinePausedV1Beta2Condition = PausedV1Beta2Condition +) + // ANCHOR: MachineSpec // MachineSpec defines the desired state of Machine. diff --git a/api/v1beta1/v1beta2_condition_consts.go b/api/v1beta1/v1beta2_condition_consts.go index d7d92b0037db..7e61d18faf30 100644 --- a/api/v1beta1/v1beta2_condition_consts.go +++ b/api/v1beta1/v1beta2_condition_consts.go @@ -85,45 +85,23 @@ const ( PausedV1Beta2Condition = "Paused" ) -// Conditions that will be used for the Machine object in v1Beta2 API version. +// Reasons that are used across different objects. const ( - // MachineAvailableV1Beta2Condition is true if the machine is Ready for at least MinReadySeconds, as defined by the Machine's MinReadySeconds field. - MachineAvailableV1Beta2Condition = AvailableV1Beta2Condition + // InvalidConditionReported applies to a condition, usually read from an external object, that is invalid + // (e.g. it is status is missing). + InvalidConditionReported = "InvalidConditionReported" - // MachineReadyV1Beta2Condition is true if the Machine is not deleted, Machine's BootstrapConfigReady, InfrastructureReady, - // NodeHealthy and HealthCheckSucceeded (if present) are true; if other conditions are defined in spec.readinessGates, - // these conditions must be true as well. - MachineReadyV1Beta2Condition = ReadyV1Beta2Condition + // NoV1Beta2ReasonReported applies to a condition, usually read from an external object, that reports no reason. + // Note: this could happen e.g. when an external object still uses Cluster API v1beta1 Conditions. + NoV1Beta2ReasonReported = "NoReasonReported" - // MachineUpToDateV1Beta2Condition is true if the Machine spec matches the spec of the Machine's owner resource, e.g. KubeadmControlPlane or MachineDeployment. - // The Machine's owner (e.g MachineDeployment) is authoritative to set their owned Machine's UpToDate conditions based on its current spec. - MachineUpToDateV1Beta2Condition = "UpToDate" + // RefObjectNotFoundV1Beta2Reason surfaces when a referenced object cannot be found. + RefObjectNotFoundV1Beta2Reason = "RefObjectNotFound" - // MachineBootstrapConfigReadyV1Beta2Condition condition mirrors the corresponding Ready condition from the Machine's BootstrapConfig resource. - MachineBootstrapConfigReadyV1Beta2Condition = BootstrapConfigReadyV1Beta2Condition - - // MachineInfrastructureReadyV1Beta2Condition mirrors the corresponding Ready condition from the Machine's Infrastructure resource. - MachineInfrastructureReadyV1Beta2Condition = InfrastructureReadyV1Beta2Condition - - // MachineNodeHealthyV1Beta2Condition is true if the Machine's Node is ready and it does not report MemoryPressure, DiskPressure and PIDPressure. - MachineNodeHealthyV1Beta2Condition = "NodeHealthy" - - // MachineNodeReadyV1Beta2Condition is true if the Machine's Node is ready. - MachineNodeReadyV1Beta2Condition = "NodeReady" - - // MachineHealthCheckSucceededV1Beta2Condition is true if MHC instances targeting this machine report the Machine - // is healthy according to the definition of healthy present in the spec of the MachineHealthCheck object. - MachineHealthCheckSucceededV1Beta2Condition = "HealthCheckSucceeded" - - // MachineOwnerRemediatedV1Beta2Condition is only present if MHC instances targeting this machine - // determine that the controller owning this machine should perform remediation. - MachineOwnerRemediatedV1Beta2Condition = "OwnerRemediated" - - // MachineDeletingV1Beta2Condition surfaces details about progress in the machine deletion workflow. - MachineDeletingV1Beta2Condition = DeletingV1Beta2Condition - - // MachinePausedV1Beta2Condition is true if the Machine or the Cluster it belongs to are paused. - MachinePausedV1Beta2Condition = PausedV1Beta2Condition + // RefObjectDeletedV1Beta2Reason surfaces when a referenced object has been deleted. + // Note: controllers can't identify if the deletion process has been initiated by the controller itself, e.g. + // during the deletion workflow, or by a users. + RefObjectDeletedV1Beta2Reason = "RefObjectDeleted" ) // Conditions that will be used for the MachineSet object in v1Beta2 API version. diff --git a/internal/contract/bootstrap.go b/internal/contract/bootstrap.go index d042d1a5e16c..0aa39500c388 100644 --- a/internal/contract/bootstrap.go +++ b/internal/contract/bootstrap.go @@ -39,6 +39,11 @@ func (b *BootstrapContract) Ready() *Bool { } } +// ReadyConditionType returns the type of the ready condition. +func (b *BootstrapContract) ReadyConditionType() string { + return "Ready" +} + // DataSecretName provide access to status.dataSecretName field in a bootstrap object. func (b *BootstrapContract) DataSecretName() *String { return &String{ diff --git a/internal/contract/infrastructure_machine.go b/internal/contract/infrastructure_machine.go index a762cb3fe264..12bf956d8e30 100644 --- a/internal/contract/infrastructure_machine.go +++ b/internal/contract/infrastructure_machine.go @@ -49,6 +49,11 @@ func (m *InfrastructureMachineContract) Ready() *Bool { } } +// ReadyConditionType returns the type of the ready condition. +func (m *InfrastructureMachineContract) ReadyConditionType() string { + return "Ready" +} + // FailureReason provides access to the status.failureReason field in an InfrastructureMachine object. Note that this field is optional. func (m *InfrastructureMachineContract) FailureReason() *String { return &String{ diff --git a/internal/controllers/machine/machine_controller.go b/internal/controllers/machine/machine_controller.go index 07d2c31d1537..56d6db239033 100644 --- a/internal/controllers/machine/machine_controller.go +++ b/internal/controllers/machine/machine_controller.go @@ -207,7 +207,7 @@ func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (_ ctrl.Re } defer func() { - r.reconcilePhase(ctx, m) + r.reconcileStatus(ctx, s) // Always attempt to patch the object and status after each reconciliation. // Patch ObservedGeneration only if the reconciliation completed successfully diff --git a/internal/controllers/machine/machine_controller_noderef.go b/internal/controllers/machine/machine_controller_noderef.go index db0b5df55655..72b298df059d 100644 --- a/internal/controllers/machine/machine_controller_noderef.go +++ b/internal/controllers/machine/machine_controller_noderef.go @@ -225,10 +225,7 @@ func summarizeNodeConditions(node *corev1.Node) (corev1.ConditionStatus, string) } } } - message = strings.TrimSpace(message) - if strings.HasSuffix(message, ".") { - message = strings.TrimSuffix(message, ".") - } + message = strings.TrimSuffix(message, ". ") if semanticallyFalseStatus > 0 { return corev1.ConditionFalse, message } diff --git a/internal/controllers/machine/machine_controller_phases.go b/internal/controllers/machine/machine_controller_phases.go index 3d79603b116c..caefa9bdf64b 100644 --- a/internal/controllers/machine/machine_controller_phases.go +++ b/internal/controllers/machine/machine_controller_phases.go @@ -45,46 +45,6 @@ import ( var externalReadyWait = 30 * time.Second -func (r *Reconciler) reconcilePhase(_ context.Context, m *clusterv1.Machine) { - originalPhase := m.Status.Phase - - // Set the phase to "pending" if nil. - if m.Status.Phase == "" { - m.Status.SetTypedPhase(clusterv1.MachinePhasePending) - } - - // Set the phase to "provisioning" if bootstrap is ready and the infrastructure isn't. - if m.Status.BootstrapReady && !m.Status.InfrastructureReady { - m.Status.SetTypedPhase(clusterv1.MachinePhaseProvisioning) - } - - // Set the phase to "provisioned" if there is a provider ID. - if m.Spec.ProviderID != nil { - m.Status.SetTypedPhase(clusterv1.MachinePhaseProvisioned) - } - - // Set the phase to "running" if there is a NodeRef field and infrastructure is ready. - if m.Status.NodeRef != nil && m.Status.InfrastructureReady { - m.Status.SetTypedPhase(clusterv1.MachinePhaseRunning) - } - - // Set the phase to "failed" if any of Status.FailureReason or Status.FailureMessage is not-nil. - if m.Status.FailureReason != nil || m.Status.FailureMessage != nil { - m.Status.SetTypedPhase(clusterv1.MachinePhaseFailed) - } - - // Set the phase to "deleting" if the deletion timestamp is set. - if !m.DeletionTimestamp.IsZero() { - m.Status.SetTypedPhase(clusterv1.MachinePhaseDeleting) - } - - // If the phase has changed, update the LastUpdated timestamp - if m.Status.Phase != originalPhase { - now := metav1.Now() - m.Status.LastUpdated = &now - } -} - // reconcileExternal handles generic unstructured objects referenced by a Machine. func (r *Reconciler) reconcileExternal(ctx context.Context, cluster *clusterv1.Cluster, m *clusterv1.Machine, ref *corev1.ObjectReference) (*unstructured.Unstructured, error) { if err := utilconversion.UpdateReferenceAPIContract(ctx, r.Client, ref); err != nil { diff --git a/internal/controllers/machine/machine_controller_phases_test.go b/internal/controllers/machine/machine_controller_phases_test.go index 23fc85aac70b..39b625836118 100644 --- a/internal/controllers/machine/machine_controller_phases_test.go +++ b/internal/controllers/machine/machine_controller_phases_test.go @@ -17,7 +17,6 @@ limitations under the License. package machine import ( - "fmt" "testing" "time" @@ -28,603 +27,16 @@ import ( "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "k8s.io/utils/ptr" ctrl "sigs.k8s.io/controller-runtime" - "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/fake" clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" "sigs.k8s.io/cluster-api/internal/test/builder" - "sigs.k8s.io/cluster-api/util" - "sigs.k8s.io/cluster-api/util/conditions" - "sigs.k8s.io/cluster-api/util/kubeconfig" ) func init() { externalReadyWait = 1 * time.Second } -func TestReconcileMachinePhases(t *testing.T) { - var defaultKubeconfigSecret *corev1.Secret - defaultCluster := &clusterv1.Cluster{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-cluster", - Namespace: metav1.NamespaceDefault, - }, - } - - defaultMachine := clusterv1.Machine{ - ObjectMeta: metav1.ObjectMeta{ - Name: "machine-test", - Namespace: metav1.NamespaceDefault, - Labels: map[string]string{ - clusterv1.MachineControlPlaneLabel: "", - }, - }, - Spec: clusterv1.MachineSpec{ - ClusterName: defaultCluster.Name, - Bootstrap: clusterv1.Bootstrap{ - ConfigRef: &corev1.ObjectReference{ - APIVersion: "bootstrap.cluster.x-k8s.io/v1beta1", - Kind: "GenericBootstrapConfig", - Name: "bootstrap-config1", - }, - }, - InfrastructureRef: corev1.ObjectReference{ - APIVersion: "infrastructure.cluster.x-k8s.io/v1beta1", - Kind: "GenericInfrastructureMachine", - Name: "infra-config1", - }, - }, - } - - defaultBootstrap := &unstructured.Unstructured{ - Object: map[string]interface{}{ - "kind": "GenericBootstrapConfig", - "apiVersion": "bootstrap.cluster.x-k8s.io/v1beta1", - "metadata": map[string]interface{}{ - "name": "bootstrap-config1", - "namespace": metav1.NamespaceDefault, - }, - "spec": map[string]interface{}{}, - "status": map[string]interface{}{}, - }, - } - - defaultInfra := &unstructured.Unstructured{ - Object: map[string]interface{}{ - "kind": "GenericInfrastructureMachine", - "apiVersion": "infrastructure.cluster.x-k8s.io/v1beta1", - "metadata": map[string]interface{}{ - "name": "infra-config1", - "namespace": metav1.NamespaceDefault, - }, - "spec": map[string]interface{}{}, - "status": map[string]interface{}{}, - }, - } - - t.Run("Should set OwnerReference and cluster name label on external objects", func(t *testing.T) { - g := NewWithT(t) - - ns, err := env.CreateNamespace(ctx, "test-reconcile-machine-phases") - g.Expect(err).ToNot(HaveOccurred()) - defer func() { - g.Expect(env.Cleanup(ctx, ns)).To(Succeed()) - }() - - cluster := defaultCluster.DeepCopy() - cluster.Namespace = ns.Name - - bootstrapConfig := defaultBootstrap.DeepCopy() - bootstrapConfig.SetNamespace(ns.Name) - infraMachine := defaultInfra.DeepCopy() - infraMachine.SetNamespace(ns.Name) - machine := defaultMachine.DeepCopy() - machine.Namespace = ns.Name - - g.Expect(env.Create(ctx, cluster)).To(Succeed()) - defaultKubeconfigSecret = kubeconfig.GenerateSecret(cluster, kubeconfig.FromEnvTestConfig(env.Config, cluster)) - g.Expect(env.Create(ctx, defaultKubeconfigSecret)).To(Succeed()) - - g.Expect(env.Create(ctx, bootstrapConfig)).To(Succeed()) - g.Expect(env.Create(ctx, infraMachine)).To(Succeed()) - g.Expect(env.Create(ctx, machine)).To(Succeed()) - - // Wait until BootstrapConfig has the ownerReference. - g.Eventually(func(g Gomega) bool { - if err := env.Get(ctx, client.ObjectKeyFromObject(bootstrapConfig), bootstrapConfig); err != nil { - return false - } - g.Expect(bootstrapConfig.GetOwnerReferences()).To(HaveLen(1)) - g.Expect(bootstrapConfig.GetLabels()[clusterv1.ClusterNameLabel]).To(Equal("test-cluster")) - return true - }, 10*time.Second).Should(BeTrue()) - - // Wait until InfraMachine has the ownerReference. - g.Eventually(func(g Gomega) bool { - if err := env.Get(ctx, client.ObjectKeyFromObject(infraMachine), infraMachine); err != nil { - return false - } - g.Expect(infraMachine.GetOwnerReferences()).To(HaveLen(1)) - g.Expect(infraMachine.GetLabels()[clusterv1.ClusterNameLabel]).To(Equal("test-cluster")) - return true - }, 10*time.Second).Should(BeTrue()) - }) - - t.Run("Should set `Pending` with a new Machine", func(t *testing.T) { - g := NewWithT(t) - - ns, err := env.CreateNamespace(ctx, "test-reconcile-machine-phases") - g.Expect(err).ToNot(HaveOccurred()) - defer func() { - g.Expect(env.Cleanup(ctx, ns)).To(Succeed()) - }() - - cluster := defaultCluster.DeepCopy() - cluster.Namespace = ns.Name - - bootstrapConfig := defaultBootstrap.DeepCopy() - bootstrapConfig.SetNamespace(ns.Name) - infraMachine := defaultInfra.DeepCopy() - infraMachine.SetNamespace(ns.Name) - machine := defaultMachine.DeepCopy() - machine.Namespace = ns.Name - - g.Expect(env.Create(ctx, cluster)).To(Succeed()) - defaultKubeconfigSecret = kubeconfig.GenerateSecret(cluster, kubeconfig.FromEnvTestConfig(env.Config, cluster)) - g.Expect(env.Create(ctx, defaultKubeconfigSecret)).To(Succeed()) - - g.Expect(env.Create(ctx, bootstrapConfig)).To(Succeed()) - g.Expect(env.Create(ctx, infraMachine)).To(Succeed()) - g.Expect(env.Create(ctx, machine)).To(Succeed()) - - // Wait until Machine was reconciled. - g.Eventually(func(g Gomega) bool { - if err := env.Get(ctx, client.ObjectKeyFromObject(machine), machine); err != nil { - return false - } - g.Expect(machine.Status.GetTypedPhase()).To(Equal(clusterv1.MachinePhasePending)) - // LastUpdated should be set as the phase changes - g.Expect(machine.Status.LastUpdated).NotTo(BeNil()) - return true - }, 10*time.Second).Should(BeTrue()) - }) - - t.Run("Should set `Provisioning` when bootstrap is ready", func(t *testing.T) { - g := NewWithT(t) - - ns, err := env.CreateNamespace(ctx, "test-reconcile-machine-phases") - g.Expect(err).ToNot(HaveOccurred()) - defer func() { - g.Expect(env.Cleanup(ctx, ns)).To(Succeed()) - }() - - cluster := defaultCluster.DeepCopy() - cluster.Namespace = ns.Name - - bootstrapConfig := defaultBootstrap.DeepCopy() - bootstrapConfig.SetNamespace(ns.Name) - infraMachine := defaultInfra.DeepCopy() - infraMachine.SetNamespace(ns.Name) - machine := defaultMachine.DeepCopy() - machine.Namespace = ns.Name - - g.Expect(env.Create(ctx, cluster)).To(Succeed()) - defaultKubeconfigSecret = kubeconfig.GenerateSecret(cluster, kubeconfig.FromEnvTestConfig(env.Config, cluster)) - g.Expect(env.Create(ctx, defaultKubeconfigSecret)).To(Succeed()) - - g.Expect(env.Create(ctx, bootstrapConfig)).To(Succeed()) - g.Expect(env.Create(ctx, infraMachine)).To(Succeed()) - // We have to subtract 2 seconds, because .status.lastUpdated does not contain miliseconds. - preUpdate := time.Now().Add(-2 * time.Second) - g.Expect(env.Create(ctx, machine)).To(Succeed()) - - // Set the LastUpdated to be able to verify it is updated when the phase changes - modifiedMachine := machine.DeepCopy() - g.Expect(env.Status().Patch(ctx, modifiedMachine, client.MergeFrom(machine))).To(Succeed()) - - // Set bootstrap ready. - modifiedBootstrapConfig := bootstrapConfig.DeepCopy() - g.Expect(unstructured.SetNestedField(modifiedBootstrapConfig.Object, true, "status", "ready")).To(Succeed()) - g.Expect(unstructured.SetNestedField(modifiedBootstrapConfig.Object, "secret-data", "status", "dataSecretName")).To(Succeed()) - g.Expect(env.Status().Patch(ctx, modifiedBootstrapConfig, client.MergeFrom(bootstrapConfig))).To(Succeed()) - - // Wait until Machine was reconciled. - g.Eventually(func(g Gomega) bool { - if err := env.Get(ctx, client.ObjectKeyFromObject(machine), machine); err != nil { - return false - } - g.Expect(machine.Status.GetTypedPhase()).To(Equal(clusterv1.MachinePhaseProvisioning)) - // Verify that the LastUpdated timestamp was updated - g.Expect(machine.Status.LastUpdated).NotTo(BeNil()) - g.Expect(machine.Status.LastUpdated.After(preUpdate)).To(BeTrue()) - return true - }, 10*time.Second).Should(BeTrue()) - }) - - t.Run("Should set `Running` when bootstrap and infra is ready", func(t *testing.T) { - g := NewWithT(t) - - ns, err := env.CreateNamespace(ctx, "test-reconcile-machine-phases") - g.Expect(err).ToNot(HaveOccurred()) - defer func() { - g.Expect(env.Cleanup(ctx, ns)).To(Succeed()) - }() - - nodeProviderID := fmt.Sprintf("test://%s", util.RandomString(6)) - - cluster := defaultCluster.DeepCopy() - cluster.Namespace = ns.Name - - bootstrapConfig := defaultBootstrap.DeepCopy() - bootstrapConfig.SetNamespace(ns.Name) - infraMachine := defaultInfra.DeepCopy() - infraMachine.SetNamespace(ns.Name) - g.Expect(unstructured.SetNestedField(infraMachine.Object, nodeProviderID, "spec", "providerID")).To(Succeed()) - g.Expect(unstructured.SetNestedField(infraMachine.Object, "us-east-2a", "spec", "failureDomain")).To(Succeed()) - machine := defaultMachine.DeepCopy() - machine.Namespace = ns.Name - - // Create Node. - node := &corev1.Node{ - ObjectMeta: metav1.ObjectMeta{ - GenerateName: "machine-test-node-", - }, - Spec: corev1.NodeSpec{ProviderID: nodeProviderID}, - } - g.Expect(env.Create(ctx, node)).To(Succeed()) - defer func() { - g.Expect(env.Cleanup(ctx, node)).To(Succeed()) - }() - - g.Expect(env.Create(ctx, cluster)).To(Succeed()) - defaultKubeconfigSecret = kubeconfig.GenerateSecret(cluster, kubeconfig.FromEnvTestConfig(env.Config, cluster)) - g.Expect(env.Create(ctx, defaultKubeconfigSecret)).To(Succeed()) - - g.Expect(env.Create(ctx, bootstrapConfig)).To(Succeed()) - g.Expect(env.Create(ctx, infraMachine)).To(Succeed()) - // We have to subtract 2 seconds, because .status.lastUpdated does not contain miliseconds. - preUpdate := time.Now().Add(-2 * time.Second) - g.Expect(env.Create(ctx, machine)).To(Succeed()) - - modifiedMachine := machine.DeepCopy() - // Set NodeRef. - machine.Status.NodeRef = &corev1.ObjectReference{Kind: "Node", Name: node.Name} - g.Expect(env.Status().Patch(ctx, modifiedMachine, client.MergeFrom(machine))).To(Succeed()) - - // Set bootstrap ready. - modifiedBootstrapConfig := bootstrapConfig.DeepCopy() - g.Expect(unstructured.SetNestedField(modifiedBootstrapConfig.Object, true, "status", "ready")).To(Succeed()) - g.Expect(unstructured.SetNestedField(modifiedBootstrapConfig.Object, "secret-data", "status", "dataSecretName")).To(Succeed()) - g.Expect(env.Status().Patch(ctx, modifiedBootstrapConfig, client.MergeFrom(bootstrapConfig))).To(Succeed()) - - // Set infra ready. - modifiedInfraMachine := infraMachine.DeepCopy() - g.Expect(unstructured.SetNestedField(modifiedInfraMachine.Object, true, "status", "ready")).To(Succeed()) - g.Expect(unstructured.SetNestedField(modifiedInfraMachine.Object, []interface{}{ - map[string]interface{}{ - "type": "InternalIP", - "address": "10.0.0.1", - }, - map[string]interface{}{ - "type": "InternalIP", - "address": "10.0.0.2", - }, - }, "status", "addresses")).To(Succeed()) - g.Expect(env.Status().Patch(ctx, modifiedInfraMachine, client.MergeFrom(infraMachine))).To(Succeed()) - - // Wait until Machine was reconciled. - g.Eventually(func(g Gomega) bool { - if err := env.Get(ctx, client.ObjectKeyFromObject(machine), machine); err != nil { - return false - } - g.Expect(machine.Status.Addresses).To(HaveLen(2)) - g.Expect(*machine.Spec.FailureDomain).To(Equal("us-east-2a")) - g.Expect(machine.Status.GetTypedPhase()).To(Equal(clusterv1.MachinePhaseRunning)) - // Verify that the LastUpdated timestamp was updated - g.Expect(machine.Status.LastUpdated).NotTo(BeNil()) - g.Expect(machine.Status.LastUpdated.After(preUpdate)).To(BeTrue()) - return true - }, 10*time.Second).Should(BeTrue()) - }) - - t.Run("Should set `Running` when bootstrap and infra is ready with no Status.Addresses", func(t *testing.T) { - g := NewWithT(t) - - ns, err := env.CreateNamespace(ctx, "test-reconcile-machine-phases") - g.Expect(err).ToNot(HaveOccurred()) - defer func() { - g.Expect(env.Cleanup(ctx, ns)).To(Succeed()) - }() - - nodeProviderID := fmt.Sprintf("test://%s", util.RandomString(6)) - - cluster := defaultCluster.DeepCopy() - cluster.Namespace = ns.Name - - bootstrapConfig := defaultBootstrap.DeepCopy() - bootstrapConfig.SetNamespace(ns.Name) - infraMachine := defaultInfra.DeepCopy() - infraMachine.SetNamespace(ns.Name) - g.Expect(unstructured.SetNestedField(infraMachine.Object, nodeProviderID, "spec", "providerID")).To(Succeed()) - machine := defaultMachine.DeepCopy() - machine.Namespace = ns.Name - - // Create Node. - node := &corev1.Node{ - ObjectMeta: metav1.ObjectMeta{ - GenerateName: "machine-test-node-", - }, - Spec: corev1.NodeSpec{ProviderID: nodeProviderID}, - } - g.Expect(env.Create(ctx, node)).To(Succeed()) - defer func() { - g.Expect(env.Cleanup(ctx, node)).To(Succeed()) - }() - - g.Expect(env.Create(ctx, cluster)).To(Succeed()) - defaultKubeconfigSecret = kubeconfig.GenerateSecret(cluster, kubeconfig.FromEnvTestConfig(env.Config, cluster)) - g.Expect(env.Create(ctx, defaultKubeconfigSecret)).To(Succeed()) - - g.Expect(env.Create(ctx, bootstrapConfig)).To(Succeed()) - g.Expect(env.Create(ctx, infraMachine)).To(Succeed()) - // We have to subtract 2 seconds, because .status.lastUpdated does not contain miliseconds. - preUpdate := time.Now().Add(-2 * time.Second) - g.Expect(env.Create(ctx, machine)).To(Succeed()) - - modifiedMachine := machine.DeepCopy() - // Set NodeRef. - machine.Status.NodeRef = &corev1.ObjectReference{Kind: "Node", Name: node.Name} - g.Expect(env.Status().Patch(ctx, modifiedMachine, client.MergeFrom(machine))).To(Succeed()) - - // Set bootstrap ready. - modifiedBootstrapConfig := bootstrapConfig.DeepCopy() - g.Expect(unstructured.SetNestedField(modifiedBootstrapConfig.Object, true, "status", "ready")).To(Succeed()) - g.Expect(unstructured.SetNestedField(modifiedBootstrapConfig.Object, "secret-data", "status", "dataSecretName")).To(Succeed()) - g.Expect(env.Status().Patch(ctx, modifiedBootstrapConfig, client.MergeFrom(bootstrapConfig))).To(Succeed()) - - // Set infra ready. - modifiedInfraMachine := infraMachine.DeepCopy() - g.Expect(unstructured.SetNestedField(modifiedInfraMachine.Object, true, "status", "ready")).To(Succeed()) - g.Expect(env.Status().Patch(ctx, modifiedInfraMachine, client.MergeFrom(infraMachine))).To(Succeed()) - - // Wait until Machine was reconciled. - g.Eventually(func(g Gomega) bool { - if err := env.Get(ctx, client.ObjectKeyFromObject(machine), machine); err != nil { - return false - } - g.Expect(machine.Status.GetTypedPhase()).To(Equal(clusterv1.MachinePhaseRunning)) - g.Expect(machine.Status.Addresses).To(BeEmpty()) - // Verify that the LastUpdated timestamp was updated - g.Expect(machine.Status.LastUpdated).NotTo(BeNil()) - g.Expect(machine.Status.LastUpdated.After(preUpdate)).To(BeTrue()) - return true - }, 10*time.Second).Should(BeTrue()) - }) - - t.Run("Should set `Running` when bootstrap, infra, and NodeRef is ready", func(t *testing.T) { - g := NewWithT(t) - - ns, err := env.CreateNamespace(ctx, "test-reconcile-machine-phases") - g.Expect(err).ToNot(HaveOccurred()) - defer func() { - g.Expect(env.Cleanup(ctx, ns)).To(Succeed()) - }() - - nodeProviderID := fmt.Sprintf("test://%s", util.RandomString(6)) - - cluster := defaultCluster.DeepCopy() - cluster.Namespace = ns.Name - - bootstrapConfig := defaultBootstrap.DeepCopy() - bootstrapConfig.SetNamespace(ns.Name) - infraMachine := defaultInfra.DeepCopy() - infraMachine.SetNamespace(ns.Name) - g.Expect(unstructured.SetNestedField(infraMachine.Object, nodeProviderID, "spec", "providerID")).To(Succeed()) - machine := defaultMachine.DeepCopy() - machine.Namespace = ns.Name - - // Create Node. - node := &corev1.Node{ - ObjectMeta: metav1.ObjectMeta{ - GenerateName: "machine-test-node-", - }, - Spec: corev1.NodeSpec{ProviderID: nodeProviderID}, - } - g.Expect(env.Create(ctx, node)).To(Succeed()) - defer func() { - g.Expect(env.Cleanup(ctx, node)).To(Succeed()) - }() - - g.Expect(env.Create(ctx, cluster)).To(Succeed()) - defaultKubeconfigSecret = kubeconfig.GenerateSecret(cluster, kubeconfig.FromEnvTestConfig(env.Config, cluster)) - g.Expect(env.Create(ctx, defaultKubeconfigSecret)).To(Succeed()) - - g.Expect(env.Create(ctx, bootstrapConfig)).To(Succeed()) - g.Expect(env.Create(ctx, infraMachine)).To(Succeed()) - // We have to subtract 2 seconds, because .status.lastUpdated does not contain miliseconds. - preUpdate := time.Now().Add(-2 * time.Second) - g.Expect(env.Create(ctx, machine)).To(Succeed()) - - modifiedMachine := machine.DeepCopy() - // Set NodeRef. - machine.Status.NodeRef = &corev1.ObjectReference{Kind: "Node", Name: node.Name} - g.Expect(env.Status().Patch(ctx, modifiedMachine, client.MergeFrom(machine))).To(Succeed()) - - // Set bootstrap ready. - modifiedBootstrapConfig := bootstrapConfig.DeepCopy() - g.Expect(unstructured.SetNestedField(modifiedBootstrapConfig.Object, true, "status", "ready")).To(Succeed()) - g.Expect(unstructured.SetNestedField(modifiedBootstrapConfig.Object, "secret-data", "status", "dataSecretName")).To(Succeed()) - g.Expect(env.Status().Patch(ctx, modifiedBootstrapConfig, client.MergeFrom(bootstrapConfig))).To(Succeed()) - - // Set infra ready. - modifiedInfraMachine := infraMachine.DeepCopy() - g.Expect(unstructured.SetNestedField(modifiedInfraMachine.Object, true, "status", "ready")).To(Succeed()) - g.Expect(env.Status().Patch(ctx, modifiedInfraMachine, client.MergeFrom(infraMachine))).To(Succeed()) - - // Wait until Machine was reconciled. - g.Eventually(func(g Gomega) bool { - if err := env.Get(ctx, client.ObjectKeyFromObject(machine), machine); err != nil { - return false - } - g.Expect(machine.Status.GetTypedPhase()).To(Equal(clusterv1.MachinePhaseRunning)) - // Verify that the LastUpdated timestamp was updated - g.Expect(machine.Status.LastUpdated).NotTo(BeNil()) - g.Expect(machine.Status.LastUpdated.After(preUpdate)).To(BeTrue()) - return true - }, 10*time.Second).Should(BeTrue()) - }) - - t.Run("Should set `Provisioned` when there is a ProviderID and there is no Node", func(t *testing.T) { - g := NewWithT(t) - - ns, err := env.CreateNamespace(ctx, "test-reconcile-machine-phases") - g.Expect(err).ToNot(HaveOccurred()) - defer func() { - g.Expect(env.Cleanup(ctx, ns)).To(Succeed()) - }() - - nodeProviderID := fmt.Sprintf("test://%s", util.RandomString(6)) - - cluster := defaultCluster.DeepCopy() - cluster.Namespace = ns.Name - - bootstrapConfig := defaultBootstrap.DeepCopy() - bootstrapConfig.SetNamespace(ns.Name) - infraMachine := defaultInfra.DeepCopy() - infraMachine.SetNamespace(ns.Name) - g.Expect(unstructured.SetNestedField(infraMachine.Object, nodeProviderID, "spec", "providerID")).To(Succeed()) - machine := defaultMachine.DeepCopy() - machine.Namespace = ns.Name - // Set Machine ProviderID. - machine.Spec.ProviderID = ptr.To(nodeProviderID) - - g.Expect(env.Create(ctx, cluster)).To(Succeed()) - defaultKubeconfigSecret = kubeconfig.GenerateSecret(cluster, kubeconfig.FromEnvTestConfig(env.Config, cluster)) - g.Expect(env.Create(ctx, defaultKubeconfigSecret)).To(Succeed()) - - g.Expect(env.Create(ctx, bootstrapConfig)).To(Succeed()) - g.Expect(env.Create(ctx, infraMachine)).To(Succeed()) - // We have to subtract 2 seconds, because .status.lastUpdated does not contain miliseconds. - preUpdate := time.Now().Add(-2 * time.Second) - g.Expect(env.Create(ctx, machine)).To(Succeed()) - - // Set bootstrap ready. - modifiedBootstrapConfig := bootstrapConfig.DeepCopy() - g.Expect(unstructured.SetNestedField(modifiedBootstrapConfig.Object, true, "status", "ready")).To(Succeed()) - g.Expect(unstructured.SetNestedField(modifiedBootstrapConfig.Object, "secret-data", "status", "dataSecretName")).To(Succeed()) - g.Expect(env.Status().Patch(ctx, modifiedBootstrapConfig, client.MergeFrom(bootstrapConfig))).To(Succeed()) - - // Set infra ready. - modifiedInfraMachine := infraMachine.DeepCopy() - g.Expect(unstructured.SetNestedField(modifiedInfraMachine.Object, true, "status", "ready")).To(Succeed()) - g.Expect(env.Status().Patch(ctx, modifiedInfraMachine, client.MergeFrom(infraMachine))).To(Succeed()) - - // Wait until Machine was reconciled. - g.Eventually(func(g Gomega) bool { - if err := env.Get(ctx, client.ObjectKeyFromObject(machine), machine); err != nil { - return false - } - g.Expect(machine.Status.GetTypedPhase()).To(Equal(clusterv1.MachinePhaseProvisioned)) - // Verify that the LastUpdated timestamp was updated - g.Expect(machine.Status.LastUpdated).NotTo(BeNil()) - g.Expect(machine.Status.LastUpdated.After(preUpdate)).To(BeTrue()) - return true - }, 10*time.Second).Should(BeTrue()) - }) - - t.Run("Should set `Deleting` when Machine is being deleted", func(t *testing.T) { - g := NewWithT(t) - - ns, err := env.CreateNamespace(ctx, "test-reconcile-machine-phases") - g.Expect(err).ToNot(HaveOccurred()) - defer func() { - g.Expect(env.Cleanup(ctx, ns)).To(Succeed()) - }() - - nodeProviderID := fmt.Sprintf("test://%s", util.RandomString(6)) - - cluster := defaultCluster.DeepCopy() - cluster.Namespace = ns.Name - - bootstrapConfig := defaultBootstrap.DeepCopy() - bootstrapConfig.SetNamespace(ns.Name) - infraMachine := defaultInfra.DeepCopy() - infraMachine.SetNamespace(ns.Name) - g.Expect(unstructured.SetNestedField(infraMachine.Object, nodeProviderID, "spec", "providerID")).To(Succeed()) - machine := defaultMachine.DeepCopy() - machine.Namespace = ns.Name - - // Create Node. - node := &corev1.Node{ - ObjectMeta: metav1.ObjectMeta{ - GenerateName: "machine-test-node-", - }, - Spec: corev1.NodeSpec{ProviderID: nodeProviderID}, - } - g.Expect(env.Create(ctx, node)).To(Succeed()) - defer func() { - g.Expect(env.Cleanup(ctx, node)).To(Succeed()) - }() - - g.Expect(env.Create(ctx, cluster)).To(Succeed()) - defaultKubeconfigSecret = kubeconfig.GenerateSecret(cluster, kubeconfig.FromEnvTestConfig(env.Config, cluster)) - g.Expect(env.Create(ctx, defaultKubeconfigSecret)).To(Succeed()) - - g.Expect(env.Create(ctx, bootstrapConfig)).To(Succeed()) - g.Expect(env.Create(ctx, infraMachine)).To(Succeed()) - // We have to subtract 2 seconds, because .status.lastUpdated does not contain miliseconds. - preUpdate := time.Now().Add(-2 * time.Second) - g.Expect(env.Create(ctx, machine)).To(Succeed()) - - // Set bootstrap ready. - modifiedBootstrapConfig := bootstrapConfig.DeepCopy() - g.Expect(unstructured.SetNestedField(modifiedBootstrapConfig.Object, true, "status", "ready")).To(Succeed()) - g.Expect(unstructured.SetNestedField(modifiedBootstrapConfig.Object, "secret-data", "status", "dataSecretName")).To(Succeed()) - g.Expect(env.Status().Patch(ctx, modifiedBootstrapConfig, client.MergeFrom(bootstrapConfig))).To(Succeed()) - - // Set infra ready. - modifiedInfraMachine := infraMachine.DeepCopy() - g.Expect(unstructured.SetNestedField(modifiedInfraMachine.Object, true, "status", "ready")).To(Succeed()) - g.Expect(env.Status().Patch(ctx, modifiedInfraMachine, client.MergeFrom(infraMachine))).To(Succeed()) - - // Wait until the Machine has the Machine finalizer - g.Eventually(func() []string { - if err := env.Get(ctx, client.ObjectKeyFromObject(machine), machine); err != nil { - return nil - } - return machine.Finalizers - }, 10*time.Second).Should(HaveLen(1)) - - modifiedMachine := machine.DeepCopy() - // Set NodeRef. - machine.Status.NodeRef = &corev1.ObjectReference{Kind: "Node", Name: node.Name} - g.Expect(env.Status().Patch(ctx, modifiedMachine, client.MergeFrom(machine))).To(Succeed()) - - modifiedMachine = machine.DeepCopy() - // Set finalizer so we can check the Machine later, otherwise it would be already gone. - modifiedMachine.Finalizers = append(modifiedMachine.Finalizers, "test") - g.Expect(env.Patch(ctx, modifiedMachine, client.MergeFrom(machine))).To(Succeed()) - - // Delete Machine - g.Expect(env.Delete(ctx, machine)).To(Succeed()) - - // Wait until Machine was reconciled. - g.Eventually(func(g Gomega) bool { - if err := env.Get(ctx, client.ObjectKeyFromObject(machine), machine); err != nil { - return false - } - g.Expect(machine.Status.GetTypedPhase()).To(Equal(clusterv1.MachinePhaseDeleting)) - nodeHealthyCondition := conditions.Get(machine, clusterv1.MachineNodeHealthyCondition) - g.Expect(nodeHealthyCondition.Status).To(Equal(corev1.ConditionFalse)) - g.Expect(nodeHealthyCondition.Reason).To(Equal(clusterv1.DeletingReason)) - // Verify that the LastUpdated timestamp was updated - g.Expect(machine.Status.LastUpdated).NotTo(BeNil()) - g.Expect(machine.Status.LastUpdated.After(preUpdate)).To(BeTrue()) - return true - }, 10*time.Second).Should(BeTrue()) - }) -} - func TestReconcileBootstrap(t *testing.T) { defaultMachine := clusterv1.Machine{ ObjectMeta: metav1.ObjectMeta{ @@ -857,7 +269,7 @@ func TestReconcileBootstrap(t *testing.T) { bootstrapConfigGetError: nil, expectResult: ctrl.Result{}, expectError: false, - expected: func(g *WithT, m *clusterv1.Machine) {}, + expected: func(_ *WithT, _ *clusterv1.Machine) {}, }, } @@ -1388,7 +800,7 @@ func TestReconcileInfrastructure(t *testing.T) { infraMachineGetError: nil, expectResult: ctrl.Result{}, expectError: false, - expected: func(g *WithT, m *clusterv1.Machine) {}, + expected: func(_ *WithT, _ *clusterv1.Machine) {}, }, { name: "infra machine is not found is tolerated when infrastructure ready and machine is deleting", @@ -1414,7 +826,7 @@ func TestReconcileInfrastructure(t *testing.T) { infraMachineGetError: nil, expectResult: ctrl.Result{}, expectError: false, - expected: func(g *WithT, m *clusterv1.Machine) {}, + expected: func(_ *WithT, _ *clusterv1.Machine) {}, }, } diff --git a/internal/controllers/machine/machine_controller_status.go b/internal/controllers/machine/machine_controller_status.go new file mode 100644 index 000000000000..2b9a99eedba8 --- /dev/null +++ b/internal/controllers/machine/machine_controller_status.go @@ -0,0 +1,486 @@ +/* +Copyright 2025 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package machine + +import ( + "context" + "fmt" + "strings" + "time" + + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/klog/v2" + "k8s.io/utils/ptr" + ctrl "sigs.k8s.io/controller-runtime" + + clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" + "sigs.k8s.io/cluster-api/internal/contract" + v1beta2conditions "sigs.k8s.io/cluster-api/util/conditions/v1beta2" +) + +// reconcileStatus reconciles Machine's status during the entire lifecycle of the machine. +// This implies that the code in this function should account for several edge cases e.g. machine being partially provisioned, +// machine being partially deleted but also for running machines being disrupted e.g. by deleting the node. +// Additionally, this func should ensure that the conditions managed by this controller are always set in order to +// comply with the recommendation in the Kubernetes API guidelines. +// Note: v1beta1 conditions are not managed by this func. +func (r *Reconciler) reconcileStatus(ctx context.Context, s *scope) { + // Update status from the Bootstrap Config external resource. + // Note: the Following Status fields are managed in reconcileBootstrap. + // - status.BootstrapReady + // - status.Addresses + // - status.FailureReason + // - status.FailureMessage + setBootstrapReadyCondition(ctx, s.machine, s.bootstrapConfig) + + // Update status from the InfraMachine external resource. + // Note: the Following Status field are managed in reconcileInfrastructure. + // - status.InfrastructureReady + // - status.FailureReason + // - status.FailureMessage + setInfrastructureReadyCondition(ctx, s.machine, s.infraMachine) + + // Update status from the Node external resource. + // Note: the Following Status field are managed in reconcileNode. + // - status.NodeRef + // - status.NodeInfo + setNodeHealthyAndReadyConditions(ctx, s.machine, s.node) + + // Updates Machine status not observed from Bootstrap Config, InfraMachine or Node (update Machine's own status). + // Note: + // - status.CertificatesExpiryDate is managed in reconcileCertificateExpiry. + // - status.ObservedGeneration is updated by the defer patch at the end of the main reconcile loop. + // - status.Deletion nested fields are updated in reconcileDelete. + // - UpToDate condition is set by machine's owner controller. // TODO: compute UpToDate for stand alone machines + // - HealthCheckSucceeded is set by the MHC controller. + // - OwnerRemediated conditions is set by the MHC controller, but the it is updated by the controller owning the machine + // while it carries over the remediation process. + + setReadyCondition(ctx, s.machine) + + setAvailableCondition(ctx, s.machine) + + // TODO: Update the Deleting condition. + + setPausedCondition(s) + + setMachinePhaseAndLastUpdated(ctx, s.machine) +} + +func setBootstrapReadyCondition(_ context.Context, machine *clusterv1.Machine, bootstrapConfig *unstructured.Unstructured) { + if machine.Spec.Bootstrap.ConfigRef == nil { + if ptr.Deref(machine.Spec.Bootstrap.DataSecretName, "") != "" { + v1beta2conditions.Set(machine, metav1.Condition{ + Type: clusterv1.MachineBootstrapConfigReadyV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: clusterv1.MachineBootstrapDataSecretDataSecretUserProvidedV1Beta2Reason, + }) + return + } + + // Note: validation web hooks should prevent invalid configuration to happen. + v1beta2conditions.Set(machine, metav1.Condition{ + Type: clusterv1.MachineBootstrapConfigReadyV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.MachineBootstrapInvalidConfigV1Beta2Reason, + Message: "either spec.bootstrap.configRef must be set or spec.bootstrap.dataSecretName must not be empty", + }) + return + } + + if bootstrapConfig != nil { + if err := v1beta2conditions.SetMirrorConditionFromUnstructured( + bootstrapConfig, machine, + contract.Bootstrap().ReadyConditionType(), v1beta2conditions.TargetConditionType(clusterv1.MachineBootstrapConfigReadyV1Beta2Condition), + v1beta2conditions.FallbackCondition{ + Status: v1beta2conditions.BoolToStatus(machine.Status.BootstrapReady), + Reason: clusterv1.MachineBootstrapConfigReadyNoV1Beta2ReasonReported, + }, + ); err != nil { + v1beta2conditions.Set(machine, metav1.Condition{ + Type: clusterv1.MachineBootstrapConfigReadyV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.MachineBootstrapConfigInvalidConditionReportedV1Beta2Reason, + Message: fmt.Sprintf("%s %s reports an invalid %s condition: %s", machine.Spec.Bootstrap.ConfigRef.Kind, klog.KRef(machine.Namespace, machine.Spec.Bootstrap.ConfigRef.Name), contract.Bootstrap().ReadyConditionType(), err.Error()), + }) + } + return + } + + // Tolerate Bootstrap config missing when the machine is deleting. + // NOTE: this code assumes that Bootstrap config deletion has been initiated by the controller itself, + // and thus this state is reported as Deleted instead of NotFound. + if !machine.DeletionTimestamp.IsZero() { + v1beta2conditions.Set(machine, metav1.Condition{ + Type: clusterv1.MachineBootstrapConfigReadyV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: clusterv1.MachineBootstrapConfigDeletedV1Beta2Reason, + Message: fmt.Sprintf("%s %s has been deleted", machine.Spec.Bootstrap.ConfigRef.Kind, klog.KRef(machine.Namespace, machine.Spec.Bootstrap.ConfigRef.Name)), + }) + return + } + + // If the machine is not deleting, and boostrap config object does not exist yet, + // surface the fact that the controller is waiting for the bootstrap config to exist, which could + // happen when creating the machine. However, this state should be treated as an error if it last indefinitely. + v1beta2conditions.Set(machine, metav1.Condition{ + Type: clusterv1.MachineBootstrapConfigReadyV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: clusterv1.MachineBootstrapConfigNotFoundV1Beta2Reason, + Message: fmt.Sprintf("waiting for %s %s to exist", machine.Spec.Bootstrap.ConfigRef.Kind, klog.KRef(machine.Namespace, machine.Spec.Bootstrap.ConfigRef.Name)), + }) +} + +func setInfrastructureReadyCondition(_ context.Context, machine *clusterv1.Machine, infraMachine *unstructured.Unstructured) { + if infraMachine != nil { + if err := v1beta2conditions.SetMirrorConditionFromUnstructured( + infraMachine, machine, + contract.InfrastructureMachine().ReadyConditionType(), v1beta2conditions.TargetConditionType(clusterv1.MachineInfrastructureReadyV1Beta2Condition), + v1beta2conditions.FallbackCondition{ + Status: v1beta2conditions.BoolToStatus(machine.Status.InfrastructureReady), + Reason: clusterv1.MachineInfrastructureReadyNoV1Beta2ReasonReported, + }, + ); err != nil { + v1beta2conditions.Set(machine, metav1.Condition{ + Type: clusterv1.MachineInfrastructureReadyV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.MachineInfrastructureInvalidConditionReportedV1Beta2Reason, + Message: fmt.Sprintf("%s %s reports an invalid %s condition: %s", machine.Spec.Bootstrap.ConfigRef.Kind, klog.KRef(machine.Namespace, machine.Spec.Bootstrap.ConfigRef.Name), contract.InfrastructureMachine().ReadyConditionType(), err.Error()), + }) + } + return + } + + // Tolerate infra machine missing when the machine is deleting. + // NOTE: this code assumes that infra machine deletion has been initiated by the controller itself, + // and thus this state is reported as Deleted instead of NotFound. + if !machine.DeletionTimestamp.IsZero() { + v1beta2conditions.Set(machine, metav1.Condition{ + Type: clusterv1.MachineInfrastructureReadyV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: clusterv1.MachineInfrastructureDeletedV1Beta2Reason, + }) + return + } + + // Report an issue if infra machine missing after the machine has been initialized. + if machine.Status.InfrastructureReady { + v1beta2conditions.Set(machine, metav1.Condition{ + Type: clusterv1.MachineInfrastructureReadyV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.MachineInfrastructureDeletedV1Beta2Reason, + Message: fmt.Sprintf("%s %s has been deleted while the machine still exist", machine.Spec.Bootstrap.ConfigRef.Kind, machine.Name), + }) + return + } + + // If the machine is not deleting, and infra machine object does not exist yet, + // surface the fact that the controller is waiting for the infra machine to exist, which could + // happen when creating the machine. However, this state should be treated as an error if it last indefinitely. + v1beta2conditions.Set(machine, metav1.Condition{ + Type: clusterv1.MachineBootstrapConfigReadyV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: clusterv1.MachineInfrastructureNotFoundV1Beta2Reason, + }) +} + +func setNodeHealthyAndReadyConditions(ctx context.Context, machine *clusterv1.Machine, node *corev1.Node) { + if node != nil { + var nodeReady *metav1.Condition + for _, condition := range node.Status.Conditions { + if condition.Type == corev1.NodeReady { + nodeReady = &metav1.Condition{ + Type: clusterv1.MachineNodeReadyV1Beta2Condition, + Status: metav1.ConditionStatus(condition.Status), + LastTransitionTime: condition.LastTransitionTime, + Reason: condition.Reason, + Message: condition.Message, + } + } + } + + if nodeReady == nil { + nodeReady = &metav1.Condition{ + Type: clusterv1.MachineNodeReadyV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: v1beta2conditions.NotYetReportedReason, + } + } + v1beta2conditions.Set(machine, *nodeReady) + + status, reason, message := summarizeNodeV1Beta2Conditions(ctx, node) + v1beta2conditions.Set(machine, metav1.Condition{ + Type: clusterv1.MachineNodeHealthyV1Beta2Condition, + Status: metav1.ConditionStatus(status), + Reason: reason, + Message: message, + }) + + return + } + + // Tolerate node missing when the machine is deleting. + // NOTE: controllers always assume that node deletion has been initiated by the controller itself, + // and thus this state is reported as Deleted instead of NotFound. + if !machine.DeletionTimestamp.IsZero() { + v1beta2conditions.Set(machine, metav1.Condition{ + Type: clusterv1.MachineNodeReadyV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: clusterv1.MachineNodeDeletedV1Beta2Reason, + }) + + v1beta2conditions.Set(machine, metav1.Condition{ + Type: clusterv1.MachineNodeHealthyV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: clusterv1.MachineNodeDeletedV1Beta2Reason, + }) + return + } + + // Report an issue if node missing after being initialized. + if machine.Status.NodeRef != nil { + v1beta2conditions.Set(machine, metav1.Condition{ + Type: clusterv1.MachineNodeReadyV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.MachineNodeDeletedV1Beta2Reason, + Message: fmt.Sprintf("Node %s has been deleted while the machine still exist", machine.Status.NodeRef.Name), + }) + + v1beta2conditions.Set(machine, metav1.Condition{ + Type: clusterv1.MachineNodeHealthyV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.MachineNodeDeletedV1Beta2Reason, + Message: fmt.Sprintf("Node %s has been deleted while the machine still exist", machine.Status.NodeRef.Name), + }) + return + } + + if ptr.Deref(machine.Spec.ProviderID, "") != "" { + v1beta2conditions.Set(machine, metav1.Condition{ + Type: clusterv1.MachineNodeReadyV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: clusterv1.MachineNodeNotFoundV1Beta2Reason, + Message: fmt.Sprintf("Waiting for a node with Provider ID %s to exist", *machine.Spec.ProviderID), + }) + + v1beta2conditions.Set(machine, metav1.Condition{ + Type: clusterv1.MachineNodeHealthyV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: clusterv1.MachineNodeNotFoundV1Beta2Reason, + Message: fmt.Sprintf("Waiting for a node with Provider ID %s to exist", *machine.Spec.ProviderID), + }) + return + } + + // Surface the fact that the controller is waiting for the bootstrap config to exist, which could + // happen when creating the machine. However, this state should be treated as an error if it last indefinitely. + v1beta2conditions.Set(machine, metav1.Condition{ + Type: clusterv1.MachineNodeReadyV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: clusterv1.MachineNodeNotFoundV1Beta2Reason, + Message: fmt.Sprintf("Waiting for %s %s to report spec.providerID", machine.Spec.InfrastructureRef.Kind, klog.KRef(machine.Spec.InfrastructureRef.Namespace, machine.Spec.InfrastructureRef.Name)), + }) + + v1beta2conditions.Set(machine, metav1.Condition{ + Type: clusterv1.MachineNodeHealthyV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: clusterv1.MachineNodeNotFoundV1Beta2Reason, + Message: fmt.Sprintf("Waiting for %s %s to report spec.providerID", machine.Spec.InfrastructureRef.Kind, klog.KRef(machine.Spec.InfrastructureRef.Namespace, machine.Spec.InfrastructureRef.Name)), + }) +} + +func summarizeNodeV1Beta2Conditions(_ context.Context, node *corev1.Node) (corev1.ConditionStatus, string, string) { + semanticallyFalseStatus := 0 + unknownStatus := 0 + + message := "" + issueReason := "" + unknownReason := "" + for _, condition := range node.Status.Conditions { + switch condition.Type { + case corev1.NodeMemoryPressure, corev1.NodeDiskPressure, corev1.NodePIDPressure: + if condition.Status != corev1.ConditionFalse { + message += fmt.Sprintf("Node's %s condition is %s", condition.Type, condition.Status) + ". " + if condition.Status == corev1.ConditionUnknown { + if unknownReason == "" { + unknownReason = condition.Reason + } else { + unknownReason = v1beta2conditions.MultipleUnknownReportedReason + } + unknownStatus++ + continue + } + if issueReason == "" { + issueReason = condition.Reason + } else { + issueReason = v1beta2conditions.MultipleIssuesReportedReason + } + semanticallyFalseStatus++ + } + case corev1.NodeReady: + if condition.Status != corev1.ConditionTrue { + message += fmt.Sprintf("Node's %s condition is %s", condition.Type, condition.Status) + ". " + if condition.Status == corev1.ConditionUnknown { + if unknownReason == "" { + unknownReason = condition.Reason + } else { + unknownReason = v1beta2conditions.MultipleUnknownReportedReason + } + unknownStatus++ + continue + } + if issueReason == "" { + issueReason = condition.Reason + } else { + issueReason = v1beta2conditions.MultipleIssuesReportedReason + } + semanticallyFalseStatus++ + } + } + } + message = strings.TrimSuffix(message, ". ") + if semanticallyFalseStatus > 0 { + return corev1.ConditionFalse, issueReason, message + } + if semanticallyFalseStatus+unknownStatus > 0 { + return corev1.ConditionUnknown, unknownReason, message + } + return corev1.ConditionTrue, v1beta2conditions.MultipleInfoReportedReason, message +} + +func setReadyCondition(ctx context.Context, machine *clusterv1.Machine) { + log := ctrl.LoggerFrom(ctx) + + forConditionTypes := v1beta2conditions.ForConditionTypes{ + // TODO: add machine deleting once implemented. + clusterv1.MachineBootstrapConfigReadyV1Beta2Condition, + clusterv1.MachineInfrastructureReadyV1Beta2Condition, + clusterv1.MachineNodeHealthyV1Beta2Condition, + clusterv1.MachineHealthCheckSucceededV1Beta2Condition, + } + for _, g := range machine.Spec.ReadinessGates { + forConditionTypes = append(forConditionTypes, g.ConditionType) + } + readyCondition, err := v1beta2conditions.NewSummaryCondition(machine, clusterv1.MachineReadyV1Beta2Condition, forConditionTypes, + v1beta2conditions.IgnoreTypesIfMissing{clusterv1.MachineHealthCheckSucceededV1Beta2Condition}, + // TODO: think about the step counter + ) + if err != nil || readyCondition == nil { + // Note, this could only happen if we hit edge cases in computing the summary, which should not happen due to the fact + // that we are passing a non empty list of ForConditionTypes. + log.Error(err, "failed to set ready condition") + readyCondition = &metav1.Condition{ + Type: clusterv1.MachineReadyV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: clusterv1.MachineErrorComputingReadyV1Beta2Reason, + Message: "Please check controller logs for errors", + } + } + + v1beta2conditions.Set(machine, *readyCondition) +} + +func setAvailableCondition(_ context.Context, machine *clusterv1.Machine) { + readyCondition := v1beta2conditions.Get(machine, clusterv1.MachineReadyV1Beta2Condition) + + if readyCondition == nil { + // NOTE: this should never happen given that setReadyCondition is called before this method and + // it always add a ready condition. + v1beta2conditions.Set(machine, metav1.Condition{ + Type: clusterv1.MachineAvailableV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: clusterv1.MachineReadyNotYetReportedV1Beta2Reason, + Message: "Please check controller logs for errors", + }) + return + } + + if readyCondition.Status != metav1.ConditionTrue { + v1beta2conditions.Set(machine, metav1.Condition{ + Type: clusterv1.MachineAvailableV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.MachineNotReadyV1Beta2Reason, + }) + return + } + + if time.Now().Add(0).After(readyCondition.LastTransitionTime.Time) { + v1beta2conditions.Set(machine, metav1.Condition{ + Type: clusterv1.MachineAvailableV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.MachineWaitingForMinReadySecondsV1Beta2Reason, + }) + return + } + + v1beta2conditions.Set(machine, metav1.Condition{ + Type: clusterv1.MachineAvailableV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: clusterv1.MachineAvailableV1Beta2Reason, + }) +} + +func setPausedCondition(s *scope) { + // Note: If we hit this code, the controller is reconciling and this Paused condition must be set to false. + v1beta2conditions.Set(s.machine, metav1.Condition{ + Type: clusterv1.MachinePausedV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: "NotPaused", // TODO: create a const. + }) +} + +func setMachinePhaseAndLastUpdated(_ context.Context, m *clusterv1.Machine) { + originalPhase := m.Status.Phase + + // Set the phase to "pending" if nil. + if m.Status.Phase == "" { + m.Status.SetTypedPhase(clusterv1.MachinePhasePending) + } + + // Set the phase to "provisioning" if bootstrap is ready and the infrastructure isn't. + if m.Status.BootstrapReady && !m.Status.InfrastructureReady { + m.Status.SetTypedPhase(clusterv1.MachinePhaseProvisioning) + } + + // Set the phase to "provisioned" if there is a provider ID. + if m.Spec.ProviderID != nil { + m.Status.SetTypedPhase(clusterv1.MachinePhaseProvisioned) + } + + // Set the phase to "running" if there is a NodeRef field and infrastructure is ready. + if m.Status.NodeRef != nil && m.Status.InfrastructureReady { + m.Status.SetTypedPhase(clusterv1.MachinePhaseRunning) + } + + // Set the phase to "failed" if any of Status.FailureReason or Status.FailureMessage is not-nil. + if m.Status.FailureReason != nil || m.Status.FailureMessage != nil { + m.Status.SetTypedPhase(clusterv1.MachinePhaseFailed) + } + + // Set the phase to "deleting" if the deletion timestamp is set. + if !m.DeletionTimestamp.IsZero() { + m.Status.SetTypedPhase(clusterv1.MachinePhaseDeleting) + } + + // If the phase has changed, update the LastUpdated timestamp + if m.Status.Phase != originalPhase { + now := metav1.Now() + m.Status.LastUpdated = &now + } +} diff --git a/internal/controllers/machine/machine_controller_status_test.go b/internal/controllers/machine/machine_controller_status_test.go new file mode 100644 index 000000000000..c2999f0e5dd2 --- /dev/null +++ b/internal/controllers/machine/machine_controller_status_test.go @@ -0,0 +1,732 @@ +/* +Copyright 2025 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package machine + +import ( + "fmt" + "testing" + "time" + + . "github.com/onsi/gomega" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/meta" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/utils/ptr" + "sigs.k8s.io/controller-runtime/pkg/client" + + clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" + "sigs.k8s.io/cluster-api/util" + "sigs.k8s.io/cluster-api/util/conditions" + v1beta2conditions "sigs.k8s.io/cluster-api/util/conditions/v1beta2" + "sigs.k8s.io/cluster-api/util/kubeconfig" +) + +func TestSetBootstrapReadyCondition(t *testing.T) { + defaultMachine := clusterv1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Name: "machine-test", + Namespace: metav1.NamespaceDefault, + }, + Spec: clusterv1.MachineSpec{ + Bootstrap: clusterv1.Bootstrap{ + ConfigRef: &corev1.ObjectReference{ + APIVersion: "bootstrap.cluster.x-k8s.io/v1beta1", + Kind: "GenericBootstrapConfig", + Name: "bootstrap-config1", + }, + }, + }, + } + + testCases := []struct { + name string + machine *clusterv1.Machine + bootstrapConfig *unstructured.Unstructured + expectConditions []metav1.Condition + }{ + { + name: "boostrap data secret provided by user/operator", + machine: func() *clusterv1.Machine { + m := defaultMachine.DeepCopy() + m.Spec.Bootstrap.ConfigRef = nil + m.Spec.Bootstrap.DataSecretName = ptr.To("foo") + return m + }(), + bootstrapConfig: &unstructured.Unstructured{Object: map[string]interface{}{ + "kind": "GenericBootstrapConfig", + "apiVersion": "bootstrap.cluster.x-k8s.io/v1beta1", + "metadata": map[string]interface{}{ + "name": "bootstrap-config1", + "namespace": metav1.NamespaceDefault, + }, + "status": map[string]interface{}{}, + }}, + expectConditions: []metav1.Condition{ + { + Type: clusterv1.MachineReadyV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: v1beta2conditions.MultipleInfoReportedReason, + }, + { + Type: clusterv1.MachineBootstrapConfigReadyV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: clusterv1.MachineBootstrapDataSecretDataSecretUserProvidedV1Beta2Reason, + }, + }, + }, + { + name: "InvalidConfig: machine without bootstrap config ref and with dataSecretName not set", + machine: func() *clusterv1.Machine { + m := defaultMachine.DeepCopy() + m.Spec.Bootstrap.ConfigRef = nil + return m + }(), + bootstrapConfig: &unstructured.Unstructured{Object: map[string]interface{}{ + "kind": "GenericBootstrapConfig", + "apiVersion": "bootstrap.cluster.x-k8s.io/v1beta1", + "metadata": map[string]interface{}{ + "name": "bootstrap-config1", + "namespace": metav1.NamespaceDefault, + }, + "status": map[string]interface{}{}, + }}, + expectConditions: []metav1.Condition{ + { + Type: clusterv1.MachineReadyV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.MachineBootstrapInvalidConfigV1Beta2Reason, + Message: "BootstrapConfigReady: either spec.bootstrap.configRef must be set or spec.bootstrap.dataSecretName must not be empty", + }, + { + Type: clusterv1.MachineBootstrapConfigReadyV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.MachineBootstrapInvalidConfigV1Beta2Reason, + Message: "either spec.bootstrap.configRef must be set or spec.bootstrap.dataSecretName must not be empty", + }, + }, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + g := NewWithT(t) + + setBootstrapReadyCondition(ctx, tc.machine, tc.bootstrapConfig) + + // Compute ready by ensuring no other conditions influence the result. + tc.machine.Status.V1Beta2.Conditions = append(tc.machine.Status.V1Beta2.Conditions, + metav1.Condition{ + Type: clusterv1.MachineInfrastructureReadyV1Beta2Condition, + Status: metav1.ConditionTrue, + }, + metav1.Condition{ + Type: clusterv1.MachineNodeHealthyV1Beta2Condition, + Status: metav1.ConditionTrue, + }, + ) + setReadyCondition(ctx, tc.machine) + meta.RemoveStatusCondition(&tc.machine.Status.V1Beta2.Conditions, clusterv1.MachineInfrastructureReadyV1Beta2Condition) + meta.RemoveStatusCondition(&tc.machine.Status.V1Beta2.Conditions, clusterv1.MachineNodeHealthyV1Beta2Condition) + + g.Expect(tc.machine.GetV1Beta2Conditions()).To(v1beta2conditions.MatchConditions(tc.expectConditions, v1beta2conditions.IgnoreLastTransitionTime(true))) + }) + } +} + +func TestReconcileMachinePhases(t *testing.T) { + var defaultKubeconfigSecret *corev1.Secret + defaultCluster := &clusterv1.Cluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-cluster", + Namespace: metav1.NamespaceDefault, + }, + } + + defaultMachine := clusterv1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Name: "machine-test", + Namespace: metav1.NamespaceDefault, + Labels: map[string]string{ + clusterv1.MachineControlPlaneLabel: "", + }, + }, + Spec: clusterv1.MachineSpec{ + ClusterName: defaultCluster.Name, + Bootstrap: clusterv1.Bootstrap{ + ConfigRef: &corev1.ObjectReference{ + APIVersion: "bootstrap.cluster.x-k8s.io/v1beta1", + Kind: "GenericBootstrapConfig", + Name: "bootstrap-config1", + }, + }, + InfrastructureRef: corev1.ObjectReference{ + APIVersion: "infrastructure.cluster.x-k8s.io/v1beta1", + Kind: "GenericInfrastructureMachine", + Name: "infra-config1", + }, + }, + } + + defaultBootstrap := &unstructured.Unstructured{ + Object: map[string]interface{}{ + "kind": "GenericBootstrapConfig", + "apiVersion": "bootstrap.cluster.x-k8s.io/v1beta1", + "metadata": map[string]interface{}{ + "name": "bootstrap-config1", + "namespace": metav1.NamespaceDefault, + }, + "spec": map[string]interface{}{}, + "status": map[string]interface{}{}, + }, + } + + defaultInfra := &unstructured.Unstructured{ + Object: map[string]interface{}{ + "kind": "GenericInfrastructureMachine", + "apiVersion": "infrastructure.cluster.x-k8s.io/v1beta1", + "metadata": map[string]interface{}{ + "name": "infra-config1", + "namespace": metav1.NamespaceDefault, + }, + "spec": map[string]interface{}{}, + "status": map[string]interface{}{}, + }, + } + + t.Run("Should set OwnerReference and cluster name label on external objects", func(t *testing.T) { + g := NewWithT(t) + + ns, err := env.CreateNamespace(ctx, "test-reconcile-machine-phases") + g.Expect(err).ToNot(HaveOccurred()) + defer func() { + g.Expect(env.Cleanup(ctx, ns)).To(Succeed()) + }() + + cluster := defaultCluster.DeepCopy() + cluster.Namespace = ns.Name + + bootstrapConfig := defaultBootstrap.DeepCopy() + bootstrapConfig.SetNamespace(ns.Name) + infraMachine := defaultInfra.DeepCopy() + infraMachine.SetNamespace(ns.Name) + machine := defaultMachine.DeepCopy() + machine.Namespace = ns.Name + + g.Expect(env.Create(ctx, cluster)).To(Succeed()) + defaultKubeconfigSecret = kubeconfig.GenerateSecret(cluster, kubeconfig.FromEnvTestConfig(env.Config, cluster)) + g.Expect(env.Create(ctx, defaultKubeconfigSecret)).To(Succeed()) + + g.Expect(env.Create(ctx, bootstrapConfig)).To(Succeed()) + g.Expect(env.Create(ctx, infraMachine)).To(Succeed()) + g.Expect(env.Create(ctx, machine)).To(Succeed()) + + // Wait until BootstrapConfig has the ownerReference. + g.Eventually(func(g Gomega) bool { + if err := env.Get(ctx, client.ObjectKeyFromObject(bootstrapConfig), bootstrapConfig); err != nil { + return false + } + g.Expect(bootstrapConfig.GetOwnerReferences()).To(HaveLen(1)) + g.Expect(bootstrapConfig.GetLabels()[clusterv1.ClusterNameLabel]).To(Equal("test-cluster")) + return true + }, 10*time.Second).Should(BeTrue()) + + // Wait until InfraMachine has the ownerReference. + g.Eventually(func(g Gomega) bool { + if err := env.Get(ctx, client.ObjectKeyFromObject(infraMachine), infraMachine); err != nil { + return false + } + g.Expect(infraMachine.GetOwnerReferences()).To(HaveLen(1)) + g.Expect(infraMachine.GetLabels()[clusterv1.ClusterNameLabel]).To(Equal("test-cluster")) + return true + }, 10*time.Second).Should(BeTrue()) + }) + + t.Run("Should set `Pending` with a new Machine", func(t *testing.T) { + g := NewWithT(t) + + ns, err := env.CreateNamespace(ctx, "test-reconcile-machine-phases") + g.Expect(err).ToNot(HaveOccurred()) + defer func() { + g.Expect(env.Cleanup(ctx, ns)).To(Succeed()) + }() + + cluster := defaultCluster.DeepCopy() + cluster.Namespace = ns.Name + + bootstrapConfig := defaultBootstrap.DeepCopy() + bootstrapConfig.SetNamespace(ns.Name) + infraMachine := defaultInfra.DeepCopy() + infraMachine.SetNamespace(ns.Name) + machine := defaultMachine.DeepCopy() + machine.Namespace = ns.Name + + g.Expect(env.Create(ctx, cluster)).To(Succeed()) + defaultKubeconfigSecret = kubeconfig.GenerateSecret(cluster, kubeconfig.FromEnvTestConfig(env.Config, cluster)) + g.Expect(env.Create(ctx, defaultKubeconfigSecret)).To(Succeed()) + + g.Expect(env.Create(ctx, bootstrapConfig)).To(Succeed()) + g.Expect(env.Create(ctx, infraMachine)).To(Succeed()) + g.Expect(env.Create(ctx, machine)).To(Succeed()) + + // Wait until Machine was reconciled. + g.Eventually(func(g Gomega) bool { + if err := env.Get(ctx, client.ObjectKeyFromObject(machine), machine); err != nil { + return false + } + g.Expect(machine.Status.GetTypedPhase()).To(Equal(clusterv1.MachinePhasePending)) + // LastUpdated should be set as the phase changes + g.Expect(machine.Status.LastUpdated).NotTo(BeNil()) + return true + }, 10*time.Second).Should(BeTrue()) + }) + + t.Run("Should set `Provisioning` when bootstrap is ready", func(t *testing.T) { + g := NewWithT(t) + + ns, err := env.CreateNamespace(ctx, "test-reconcile-machine-phases") + g.Expect(err).ToNot(HaveOccurred()) + defer func() { + g.Expect(env.Cleanup(ctx, ns)).To(Succeed()) + }() + + cluster := defaultCluster.DeepCopy() + cluster.Namespace = ns.Name + + bootstrapConfig := defaultBootstrap.DeepCopy() + bootstrapConfig.SetNamespace(ns.Name) + infraMachine := defaultInfra.DeepCopy() + infraMachine.SetNamespace(ns.Name) + machine := defaultMachine.DeepCopy() + machine.Namespace = ns.Name + + g.Expect(env.Create(ctx, cluster)).To(Succeed()) + defaultKubeconfigSecret = kubeconfig.GenerateSecret(cluster, kubeconfig.FromEnvTestConfig(env.Config, cluster)) + g.Expect(env.Create(ctx, defaultKubeconfigSecret)).To(Succeed()) + + g.Expect(env.Create(ctx, bootstrapConfig)).To(Succeed()) + g.Expect(env.Create(ctx, infraMachine)).To(Succeed()) + // We have to subtract 2 seconds, because .status.lastUpdated does not contain miliseconds. + preUpdate := time.Now().Add(-2 * time.Second) + g.Expect(env.Create(ctx, machine)).To(Succeed()) + + // Set the LastUpdated to be able to verify it is updated when the phase changes + modifiedMachine := machine.DeepCopy() + g.Expect(env.Status().Patch(ctx, modifiedMachine, client.MergeFrom(machine))).To(Succeed()) + + // Set bootstrap ready. + modifiedBootstrapConfig := bootstrapConfig.DeepCopy() + g.Expect(unstructured.SetNestedField(modifiedBootstrapConfig.Object, true, "status", "ready")).To(Succeed()) + g.Expect(unstructured.SetNestedField(modifiedBootstrapConfig.Object, "secret-data", "status", "dataSecretName")).To(Succeed()) + g.Expect(env.Status().Patch(ctx, modifiedBootstrapConfig, client.MergeFrom(bootstrapConfig))).To(Succeed()) + + // Wait until Machine was reconciled. + g.Eventually(func(g Gomega) bool { + if err := env.Get(ctx, client.ObjectKeyFromObject(machine), machine); err != nil { + return false + } + g.Expect(machine.Status.GetTypedPhase()).To(Equal(clusterv1.MachinePhaseProvisioning)) + // Verify that the LastUpdated timestamp was updated + g.Expect(machine.Status.LastUpdated).NotTo(BeNil()) + g.Expect(machine.Status.LastUpdated.After(preUpdate)).To(BeTrue()) + return true + }, 10*time.Second).Should(BeTrue()) + }) + + t.Run("Should set `Running` when bootstrap and infra is ready", func(t *testing.T) { + g := NewWithT(t) + + ns, err := env.CreateNamespace(ctx, "test-reconcile-machine-phases") + g.Expect(err).ToNot(HaveOccurred()) + defer func() { + g.Expect(env.Cleanup(ctx, ns)).To(Succeed()) + }() + + nodeProviderID := fmt.Sprintf("test://%s", util.RandomString(6)) + + cluster := defaultCluster.DeepCopy() + cluster.Namespace = ns.Name + + bootstrapConfig := defaultBootstrap.DeepCopy() + bootstrapConfig.SetNamespace(ns.Name) + infraMachine := defaultInfra.DeepCopy() + infraMachine.SetNamespace(ns.Name) + g.Expect(unstructured.SetNestedField(infraMachine.Object, nodeProviderID, "spec", "providerID")).To(Succeed()) + g.Expect(unstructured.SetNestedField(infraMachine.Object, "us-east-2a", "spec", "failureDomain")).To(Succeed()) + machine := defaultMachine.DeepCopy() + machine.Namespace = ns.Name + + // Create Node. + node := &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + GenerateName: "machine-test-node-", + }, + Spec: corev1.NodeSpec{ProviderID: nodeProviderID}, + } + g.Expect(env.Create(ctx, node)).To(Succeed()) + defer func() { + g.Expect(env.Cleanup(ctx, node)).To(Succeed()) + }() + + g.Expect(env.Create(ctx, cluster)).To(Succeed()) + defaultKubeconfigSecret = kubeconfig.GenerateSecret(cluster, kubeconfig.FromEnvTestConfig(env.Config, cluster)) + g.Expect(env.Create(ctx, defaultKubeconfigSecret)).To(Succeed()) + + g.Expect(env.Create(ctx, bootstrapConfig)).To(Succeed()) + g.Expect(env.Create(ctx, infraMachine)).To(Succeed()) + // We have to subtract 2 seconds, because .status.lastUpdated does not contain miliseconds. + preUpdate := time.Now().Add(-2 * time.Second) + g.Expect(env.Create(ctx, machine)).To(Succeed()) + + modifiedMachine := machine.DeepCopy() + // Set NodeRef. + machine.Status.NodeRef = &corev1.ObjectReference{Kind: "Node", Name: node.Name} + g.Expect(env.Status().Patch(ctx, modifiedMachine, client.MergeFrom(machine))).To(Succeed()) + + // Set bootstrap ready. + modifiedBootstrapConfig := bootstrapConfig.DeepCopy() + g.Expect(unstructured.SetNestedField(modifiedBootstrapConfig.Object, true, "status", "ready")).To(Succeed()) + g.Expect(unstructured.SetNestedField(modifiedBootstrapConfig.Object, "secret-data", "status", "dataSecretName")).To(Succeed()) + g.Expect(env.Status().Patch(ctx, modifiedBootstrapConfig, client.MergeFrom(bootstrapConfig))).To(Succeed()) + + // Set infra ready. + modifiedInfraMachine := infraMachine.DeepCopy() + g.Expect(unstructured.SetNestedField(modifiedInfraMachine.Object, true, "status", "ready")).To(Succeed()) + g.Expect(unstructured.SetNestedField(modifiedInfraMachine.Object, []interface{}{ + map[string]interface{}{ + "type": "InternalIP", + "address": "10.0.0.1", + }, + map[string]interface{}{ + "type": "InternalIP", + "address": "10.0.0.2", + }, + }, "status", "addresses")).To(Succeed()) + g.Expect(env.Status().Patch(ctx, modifiedInfraMachine, client.MergeFrom(infraMachine))).To(Succeed()) + + // Wait until Machine was reconciled. + g.Eventually(func(g Gomega) bool { + if err := env.Get(ctx, client.ObjectKeyFromObject(machine), machine); err != nil { + return false + } + g.Expect(machine.Status.Addresses).To(HaveLen(2)) + g.Expect(*machine.Spec.FailureDomain).To(Equal("us-east-2a")) + g.Expect(machine.Status.GetTypedPhase()).To(Equal(clusterv1.MachinePhaseRunning)) + // Verify that the LastUpdated timestamp was updated + g.Expect(machine.Status.LastUpdated).NotTo(BeNil()) + g.Expect(machine.Status.LastUpdated.After(preUpdate)).To(BeTrue()) + return true + }, 10*time.Second).Should(BeTrue()) + }) + + t.Run("Should set `Running` when bootstrap and infra is ready with no Status.Addresses", func(t *testing.T) { + g := NewWithT(t) + + ns, err := env.CreateNamespace(ctx, "test-reconcile-machine-phases") + g.Expect(err).ToNot(HaveOccurred()) + defer func() { + g.Expect(env.Cleanup(ctx, ns)).To(Succeed()) + }() + + nodeProviderID := fmt.Sprintf("test://%s", util.RandomString(6)) + + cluster := defaultCluster.DeepCopy() + cluster.Namespace = ns.Name + + bootstrapConfig := defaultBootstrap.DeepCopy() + bootstrapConfig.SetNamespace(ns.Name) + infraMachine := defaultInfra.DeepCopy() + infraMachine.SetNamespace(ns.Name) + g.Expect(unstructured.SetNestedField(infraMachine.Object, nodeProviderID, "spec", "providerID")).To(Succeed()) + machine := defaultMachine.DeepCopy() + machine.Namespace = ns.Name + + // Create Node. + node := &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + GenerateName: "machine-test-node-", + }, + Spec: corev1.NodeSpec{ProviderID: nodeProviderID}, + } + g.Expect(env.Create(ctx, node)).To(Succeed()) + defer func() { + g.Expect(env.Cleanup(ctx, node)).To(Succeed()) + }() + + g.Expect(env.Create(ctx, cluster)).To(Succeed()) + defaultKubeconfigSecret = kubeconfig.GenerateSecret(cluster, kubeconfig.FromEnvTestConfig(env.Config, cluster)) + g.Expect(env.Create(ctx, defaultKubeconfigSecret)).To(Succeed()) + + g.Expect(env.Create(ctx, bootstrapConfig)).To(Succeed()) + g.Expect(env.Create(ctx, infraMachine)).To(Succeed()) + // We have to subtract 2 seconds, because .status.lastUpdated does not contain miliseconds. + preUpdate := time.Now().Add(-2 * time.Second) + g.Expect(env.Create(ctx, machine)).To(Succeed()) + + modifiedMachine := machine.DeepCopy() + // Set NodeRef. + machine.Status.NodeRef = &corev1.ObjectReference{Kind: "Node", Name: node.Name} + g.Expect(env.Status().Patch(ctx, modifiedMachine, client.MergeFrom(machine))).To(Succeed()) + + // Set bootstrap ready. + modifiedBootstrapConfig := bootstrapConfig.DeepCopy() + g.Expect(unstructured.SetNestedField(modifiedBootstrapConfig.Object, true, "status", "ready")).To(Succeed()) + g.Expect(unstructured.SetNestedField(modifiedBootstrapConfig.Object, "secret-data", "status", "dataSecretName")).To(Succeed()) + g.Expect(env.Status().Patch(ctx, modifiedBootstrapConfig, client.MergeFrom(bootstrapConfig))).To(Succeed()) + + // Set infra ready. + modifiedInfraMachine := infraMachine.DeepCopy() + g.Expect(unstructured.SetNestedField(modifiedInfraMachine.Object, true, "status", "ready")).To(Succeed()) + g.Expect(env.Status().Patch(ctx, modifiedInfraMachine, client.MergeFrom(infraMachine))).To(Succeed()) + + // Wait until Machine was reconciled. + g.Eventually(func(g Gomega) bool { + if err := env.Get(ctx, client.ObjectKeyFromObject(machine), machine); err != nil { + return false + } + g.Expect(machine.Status.GetTypedPhase()).To(Equal(clusterv1.MachinePhaseRunning)) + g.Expect(machine.Status.Addresses).To(BeEmpty()) + // Verify that the LastUpdated timestamp was updated + g.Expect(machine.Status.LastUpdated).NotTo(BeNil()) + g.Expect(machine.Status.LastUpdated.After(preUpdate)).To(BeTrue()) + return true + }, 10*time.Second).Should(BeTrue()) + }) + + t.Run("Should set `Running` when bootstrap, infra, and NodeRef is ready", func(t *testing.T) { + g := NewWithT(t) + + ns, err := env.CreateNamespace(ctx, "test-reconcile-machine-phases") + g.Expect(err).ToNot(HaveOccurred()) + defer func() { + g.Expect(env.Cleanup(ctx, ns)).To(Succeed()) + }() + + nodeProviderID := fmt.Sprintf("test://%s", util.RandomString(6)) + + cluster := defaultCluster.DeepCopy() + cluster.Namespace = ns.Name + + bootstrapConfig := defaultBootstrap.DeepCopy() + bootstrapConfig.SetNamespace(ns.Name) + infraMachine := defaultInfra.DeepCopy() + infraMachine.SetNamespace(ns.Name) + g.Expect(unstructured.SetNestedField(infraMachine.Object, nodeProviderID, "spec", "providerID")).To(Succeed()) + machine := defaultMachine.DeepCopy() + machine.Namespace = ns.Name + + // Create Node. + node := &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + GenerateName: "machine-test-node-", + }, + Spec: corev1.NodeSpec{ProviderID: nodeProviderID}, + } + g.Expect(env.Create(ctx, node)).To(Succeed()) + defer func() { + g.Expect(env.Cleanup(ctx, node)).To(Succeed()) + }() + + g.Expect(env.Create(ctx, cluster)).To(Succeed()) + defaultKubeconfigSecret = kubeconfig.GenerateSecret(cluster, kubeconfig.FromEnvTestConfig(env.Config, cluster)) + g.Expect(env.Create(ctx, defaultKubeconfigSecret)).To(Succeed()) + + g.Expect(env.Create(ctx, bootstrapConfig)).To(Succeed()) + g.Expect(env.Create(ctx, infraMachine)).To(Succeed()) + // We have to subtract 2 seconds, because .status.lastUpdated does not contain miliseconds. + preUpdate := time.Now().Add(-2 * time.Second) + g.Expect(env.Create(ctx, machine)).To(Succeed()) + + modifiedMachine := machine.DeepCopy() + // Set NodeRef. + machine.Status.NodeRef = &corev1.ObjectReference{Kind: "Node", Name: node.Name} + g.Expect(env.Status().Patch(ctx, modifiedMachine, client.MergeFrom(machine))).To(Succeed()) + + // Set bootstrap ready. + modifiedBootstrapConfig := bootstrapConfig.DeepCopy() + g.Expect(unstructured.SetNestedField(modifiedBootstrapConfig.Object, true, "status", "ready")).To(Succeed()) + g.Expect(unstructured.SetNestedField(modifiedBootstrapConfig.Object, "secret-data", "status", "dataSecretName")).To(Succeed()) + g.Expect(env.Status().Patch(ctx, modifiedBootstrapConfig, client.MergeFrom(bootstrapConfig))).To(Succeed()) + + // Set infra ready. + modifiedInfraMachine := infraMachine.DeepCopy() + g.Expect(unstructured.SetNestedField(modifiedInfraMachine.Object, true, "status", "ready")).To(Succeed()) + g.Expect(env.Status().Patch(ctx, modifiedInfraMachine, client.MergeFrom(infraMachine))).To(Succeed()) + + // Wait until Machine was reconciled. + g.Eventually(func(g Gomega) bool { + if err := env.Get(ctx, client.ObjectKeyFromObject(machine), machine); err != nil { + return false + } + g.Expect(machine.Status.GetTypedPhase()).To(Equal(clusterv1.MachinePhaseRunning)) + // Verify that the LastUpdated timestamp was updated + g.Expect(machine.Status.LastUpdated).NotTo(BeNil()) + g.Expect(machine.Status.LastUpdated.After(preUpdate)).To(BeTrue()) + return true + }, 10*time.Second).Should(BeTrue()) + }) + + t.Run("Should set `Provisioned` when there is a ProviderID and there is no Node", func(t *testing.T) { + g := NewWithT(t) + + ns, err := env.CreateNamespace(ctx, "test-reconcile-machine-phases") + g.Expect(err).ToNot(HaveOccurred()) + defer func() { + g.Expect(env.Cleanup(ctx, ns)).To(Succeed()) + }() + + nodeProviderID := fmt.Sprintf("test://%s", util.RandomString(6)) + + cluster := defaultCluster.DeepCopy() + cluster.Namespace = ns.Name + + bootstrapConfig := defaultBootstrap.DeepCopy() + bootstrapConfig.SetNamespace(ns.Name) + infraMachine := defaultInfra.DeepCopy() + infraMachine.SetNamespace(ns.Name) + g.Expect(unstructured.SetNestedField(infraMachine.Object, nodeProviderID, "spec", "providerID")).To(Succeed()) + machine := defaultMachine.DeepCopy() + machine.Namespace = ns.Name + // Set Machine ProviderID. + machine.Spec.ProviderID = ptr.To(nodeProviderID) + + g.Expect(env.Create(ctx, cluster)).To(Succeed()) + defaultKubeconfigSecret = kubeconfig.GenerateSecret(cluster, kubeconfig.FromEnvTestConfig(env.Config, cluster)) + g.Expect(env.Create(ctx, defaultKubeconfigSecret)).To(Succeed()) + + g.Expect(env.Create(ctx, bootstrapConfig)).To(Succeed()) + g.Expect(env.Create(ctx, infraMachine)).To(Succeed()) + // We have to subtract 2 seconds, because .status.lastUpdated does not contain miliseconds. + preUpdate := time.Now().Add(-2 * time.Second) + g.Expect(env.Create(ctx, machine)).To(Succeed()) + + // Set bootstrap ready. + modifiedBootstrapConfig := bootstrapConfig.DeepCopy() + g.Expect(unstructured.SetNestedField(modifiedBootstrapConfig.Object, true, "status", "ready")).To(Succeed()) + g.Expect(unstructured.SetNestedField(modifiedBootstrapConfig.Object, "secret-data", "status", "dataSecretName")).To(Succeed()) + g.Expect(env.Status().Patch(ctx, modifiedBootstrapConfig, client.MergeFrom(bootstrapConfig))).To(Succeed()) + + // Set infra ready. + modifiedInfraMachine := infraMachine.DeepCopy() + g.Expect(unstructured.SetNestedField(modifiedInfraMachine.Object, true, "status", "ready")).To(Succeed()) + g.Expect(env.Status().Patch(ctx, modifiedInfraMachine, client.MergeFrom(infraMachine))).To(Succeed()) + + // Wait until Machine was reconciled. + g.Eventually(func(g Gomega) bool { + if err := env.Get(ctx, client.ObjectKeyFromObject(machine), machine); err != nil { + return false + } + g.Expect(machine.Status.GetTypedPhase()).To(Equal(clusterv1.MachinePhaseProvisioned)) + // Verify that the LastUpdated timestamp was updated + g.Expect(machine.Status.LastUpdated).NotTo(BeNil()) + g.Expect(machine.Status.LastUpdated.After(preUpdate)).To(BeTrue()) + return true + }, 10*time.Second).Should(BeTrue()) + }) + + t.Run("Should set `Deleting` when Machine is being deleted", func(t *testing.T) { + g := NewWithT(t) + + ns, err := env.CreateNamespace(ctx, "test-reconcile-machine-phases") + g.Expect(err).ToNot(HaveOccurred()) + defer func() { + g.Expect(env.Cleanup(ctx, ns)).To(Succeed()) + }() + + nodeProviderID := fmt.Sprintf("test://%s", util.RandomString(6)) + + cluster := defaultCluster.DeepCopy() + cluster.Namespace = ns.Name + + bootstrapConfig := defaultBootstrap.DeepCopy() + bootstrapConfig.SetNamespace(ns.Name) + infraMachine := defaultInfra.DeepCopy() + infraMachine.SetNamespace(ns.Name) + g.Expect(unstructured.SetNestedField(infraMachine.Object, nodeProviderID, "spec", "providerID")).To(Succeed()) + machine := defaultMachine.DeepCopy() + machine.Namespace = ns.Name + + // Create Node. + node := &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + GenerateName: "machine-test-node-", + }, + Spec: corev1.NodeSpec{ProviderID: nodeProviderID}, + } + g.Expect(env.Create(ctx, node)).To(Succeed()) + defer func() { + g.Expect(env.Cleanup(ctx, node)).To(Succeed()) + }() + + g.Expect(env.Create(ctx, cluster)).To(Succeed()) + defaultKubeconfigSecret = kubeconfig.GenerateSecret(cluster, kubeconfig.FromEnvTestConfig(env.Config, cluster)) + g.Expect(env.Create(ctx, defaultKubeconfigSecret)).To(Succeed()) + + g.Expect(env.Create(ctx, bootstrapConfig)).To(Succeed()) + g.Expect(env.Create(ctx, infraMachine)).To(Succeed()) + // We have to subtract 2 seconds, because .status.lastUpdated does not contain miliseconds. + preUpdate := time.Now().Add(-2 * time.Second) + g.Expect(env.Create(ctx, machine)).To(Succeed()) + + // Set bootstrap ready. + modifiedBootstrapConfig := bootstrapConfig.DeepCopy() + g.Expect(unstructured.SetNestedField(modifiedBootstrapConfig.Object, true, "status", "ready")).To(Succeed()) + g.Expect(unstructured.SetNestedField(modifiedBootstrapConfig.Object, "secret-data", "status", "dataSecretName")).To(Succeed()) + g.Expect(env.Status().Patch(ctx, modifiedBootstrapConfig, client.MergeFrom(bootstrapConfig))).To(Succeed()) + + // Set infra ready. + modifiedInfraMachine := infraMachine.DeepCopy() + g.Expect(unstructured.SetNestedField(modifiedInfraMachine.Object, true, "status", "ready")).To(Succeed()) + g.Expect(env.Status().Patch(ctx, modifiedInfraMachine, client.MergeFrom(infraMachine))).To(Succeed()) + + // Wait until the Machine has the Machine finalizer + g.Eventually(func() []string { + if err := env.Get(ctx, client.ObjectKeyFromObject(machine), machine); err != nil { + return nil + } + return machine.Finalizers + }, 10*time.Second).Should(HaveLen(1)) + + modifiedMachine := machine.DeepCopy() + // Set NodeRef. + machine.Status.NodeRef = &corev1.ObjectReference{Kind: "Node", Name: node.Name} + g.Expect(env.Status().Patch(ctx, modifiedMachine, client.MergeFrom(machine))).To(Succeed()) + + modifiedMachine = machine.DeepCopy() + // Set finalizer so we can check the Machine later, otherwise it would be already gone. + modifiedMachine.Finalizers = append(modifiedMachine.Finalizers, "test") + g.Expect(env.Patch(ctx, modifiedMachine, client.MergeFrom(machine))).To(Succeed()) + + // Delete Machine + g.Expect(env.Delete(ctx, machine)).To(Succeed()) + + // Wait until Machine was reconciled. + g.Eventually(func(g Gomega) bool { + if err := env.Get(ctx, client.ObjectKeyFromObject(machine), machine); err != nil { + return false + } + g.Expect(machine.Status.GetTypedPhase()).To(Equal(clusterv1.MachinePhaseDeleting)) + nodeHealthyCondition := conditions.Get(machine, clusterv1.MachineNodeHealthyCondition) + g.Expect(nodeHealthyCondition.Status).To(Equal(corev1.ConditionFalse)) + g.Expect(nodeHealthyCondition.Reason).To(Equal(clusterv1.DeletingReason)) + // Verify that the LastUpdated timestamp was updated + g.Expect(machine.Status.LastUpdated).NotTo(BeNil()) + g.Expect(machine.Status.LastUpdated.After(preUpdate)).To(BeTrue()) + return true + }, 10*time.Second).Should(BeTrue()) + }) +} diff --git a/util/conditions/v1beta2/mirror.go b/util/conditions/v1beta2/mirror.go index 5d1bfc71125b..098e61c9135a 100644 --- a/util/conditions/v1beta2/mirror.go +++ b/util/conditions/v1beta2/mirror.go @@ -21,6 +21,7 @@ import ( "strings" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" ) // NotYetReportedReason is set on missing conditions generated during mirror, aggregate or summary operations. @@ -37,6 +38,9 @@ type MirrorOption interface { // MirrorOptions allows to set options for the mirror operation. type MirrorOptions struct { targetConditionType string + fallbackStatus metav1.ConditionStatus + fallbackReason string + fallbackMessage string } // ApplyOptions applies the given list options on these options, @@ -54,6 +58,12 @@ func (o *MirrorOptions) ApplyOptions(opts []MirrorOption) *MirrorOptions { // By default, the Mirror condition has the same type as the source condition, but this can be changed by using // the TargetConditionType option. func NewMirrorCondition(sourceObj Getter, sourceConditionType string, opts ...MirrorOption) *metav1.Condition { + condition := Get(sourceObj, sourceConditionType) + + return newMirrorCondition(sourceObj, condition, sourceConditionType, opts) +} + +func newMirrorCondition(sourceObj any, condition *metav1.Condition, sourceConditionType string, opts []MirrorOption) *metav1.Condition { mirrorOpt := &MirrorOptions{ targetConditionType: sourceConditionType, } @@ -61,7 +71,7 @@ func NewMirrorCondition(sourceObj Getter, sourceConditionType string, opts ...Mi conditionOwner := getConditionOwnerInfo(sourceObj) - if condition := Get(sourceObj, sourceConditionType); condition != nil { + if condition != nil { return &metav1.Condition{ Type: mirrorOpt.targetConditionType, Status: condition.Status, @@ -74,6 +84,17 @@ func NewMirrorCondition(sourceObj Getter, sourceConditionType string, opts ...Mi } } + if mirrorOpt.fallbackStatus != "" { + return &metav1.Condition{ + Type: mirrorOpt.targetConditionType, + Status: mirrorOpt.fallbackStatus, + Reason: mirrorOpt.fallbackReason, + Message: mirrorOpt.fallbackMessage, + // NOTE: ObservedGeneration will be set when this condition is added to an object by calling Set. + // LastTransitionTime will be set to now. + } + } + return &metav1.Condition{ Type: mirrorOpt.targetConditionType, Status: metav1.ConditionUnknown, @@ -89,3 +110,23 @@ func SetMirrorCondition(sourceObj Getter, targetObj Setter, sourceConditionType mirrorCondition := NewMirrorCondition(sourceObj, sourceConditionType, opts...) Set(targetObj, *mirrorCondition) } + +// SetMirrorConditionFromUnstructured is a convenience method that calls NewMirrorCondition to create a mirror condition from the source object, +// and then calls Set to add the new condition to the target object. +func SetMirrorConditionFromUnstructured(sourceObj runtime.Unstructured, targetObj Setter, sourceConditionType string, opts ...MirrorOption) error { + condition, err := UnstructuredGet(sourceObj, sourceConditionType) + if err != nil { + return err + } + + Set(targetObj, *newMirrorCondition(sourceObj, condition, sourceConditionType, opts)) + return nil +} + +// BoolToStatus converts a bool to either metav1.ConditionTrue or metav1.ConditionFalse. +func BoolToStatus(status bool) metav1.ConditionStatus { + if status { + return metav1.ConditionTrue + } + return metav1.ConditionFalse +} diff --git a/util/conditions/v1beta2/mirror_test.go b/util/conditions/v1beta2/mirror_test.go index 91c7a9104ff7..cc8bba75c873 100644 --- a/util/conditions/v1beta2/mirror_test.go +++ b/util/conditions/v1beta2/mirror_test.go @@ -75,6 +75,19 @@ func TestMirrorStatusCondition(t *testing.T) { options: []MirrorOption{TargetConditionType("SomethingReady")}, want: metav1.Condition{Type: "SomethingReady", Status: metav1.ConditionUnknown, Reason: NotYetReportedReason, Message: "Condition Ready not yet reported from Phase3Obj SourceObject"}, }, + { + name: "Mirror a condition not yet reported with a fallback condtion", + conditions: []metav1.Condition{}, + conditionType: "Ready", + options: []MirrorOption{ + FallbackCondition{ + Status: BoolToStatus(true), + Reason: "SomeReason", + Message: "Foo", + }, + }, + want: metav1.Condition{Type: "Ready", Status: metav1.ConditionTrue, Reason: "SomeReason", Message: "Foo"}, + }, } for _, tt := range tests { diff --git a/util/conditions/v1beta2/options.go b/util/conditions/v1beta2/options.go index 8ec2b7d81765..ed9efaba0c09 100644 --- a/util/conditions/v1beta2/options.go +++ b/util/conditions/v1beta2/options.go @@ -39,6 +39,21 @@ func (t TargetConditionType) ApplyToAggregate(opts *AggregateOptions) { opts.targetConditionType = string(t) } +// FallbackCondition defines the condition that should be returned by mirror if the source condition +// does not exist. +type FallbackCondition struct { + Status metav1.ConditionStatus + Reason string + Message string +} + +// ApplyToMirror applies this configuration to the given mirror options. +func (f FallbackCondition) ApplyToMirror(opts *MirrorOptions) { + opts.fallbackStatus = f.Status + opts.fallbackReason = f.Reason + opts.fallbackMessage = f.Message +} + // ForConditionTypes allows to define the set of conditions in scope for a summary operation. // Please note that condition types have an implicit order that can be used by the summary operation to determine relevance of the different conditions. type ForConditionTypes []string From 250bc17c46011a7fe700f9a361fd79b3ea8e4d41 Mon Sep 17 00:00:00 2001 From: fabriziopandini Date: Wed, 9 Oct 2024 13:56:31 +0200 Subject: [PATCH 3/8] Address feedback --- .../controllers/machine/machine_controller.go | 70 +++++++++++-------- .../machine/machine_controller_phases.go | 20 ++++-- .../machine/machine_controller_status.go | 4 +- .../machine/machine_controller_status_test.go | 2 +- .../machine/machine_controller_test.go | 12 ++-- 5 files changed, 66 insertions(+), 42 deletions(-) diff --git a/internal/controllers/machine/machine_controller.go b/internal/controllers/machine/machine_controller.go index 56d6db239033..64d4e72cfbd8 100644 --- a/internal/controllers/machine/machine_controller.go +++ b/internal/controllers/machine/machine_controller.go @@ -195,12 +195,12 @@ func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (_ ctrl.Re return ctrl.Result{}, nil } - // Initialize the patch helper s := &scope{ cluster: cluster, machine: m, } + // Initialize the patch helper patchHelper, err := patch.NewHelper(m, r.Client) if err != nil { return ctrl.Result{}, err @@ -220,12 +220,6 @@ func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (_ ctrl.Re } }() - // Always add the cluster label labels. - if m.Labels == nil { - m.Labels = make(map[string]string) - } - m.Labels[clusterv1.ClusterNameLabel] = m.Spec.ClusterName - // Add finalizer first if not set to avoid the race condition between init and delete. // Note: Finalizers in general can only be added when the deletionTimestamp is not set. if !controllerutil.ContainsFinalizer(m, clusterv1.MachineFinalizer) && m.ObjectMeta.DeletionTimestamp.IsZero() { @@ -234,6 +228,7 @@ func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (_ ctrl.Re } alwaysReconcile := []machineReconcileFunc{ + r.reconcileMachineOwnerAndLabels, r.reconcileBootstrap, r.reconcileInfrastructure, r.reconcileNode, @@ -258,12 +253,7 @@ func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (_ ctrl.Re } // Handle normal reconciliation loop. - reconcileNormal := append( - []machineReconcileFunc{r.reconcileMachineOwner}, - alwaysReconcile..., - ) - - res, err := doReconcile(ctx, reconcileNormal, s) + res, err := doReconcile(ctx, alwaysReconcile, s) // Requeue if the reconcile failed because the ClusterCacheTracker was locked for // the current cluster because of concurrent access. if errors.Is(err, remote.ErrClusterLocked) { @@ -355,15 +345,21 @@ type scope struct { // Machine. It is set after reconcileInfrastructure is called. infraMachine *unstructured.Unstructured + // infraMachineNotFound is true if getting the infra machine object failed with an IsNotFound err + infraMachineIsNotFound bool + // bootstrapConfig is the BootstrapConfig object that is referenced by the // Machine. It is set after reconcileBootstrap is called. bootstrapConfig *unstructured.Unstructured + // bootstrapConfigNotFound is true if getting the BootstrapConfig object failed with an IsNotFound err + bootstrapConfigIsNotFound bool + // node is the Kubernetes node hosted on the machine. node *corev1.Node } -func (r *Reconciler) reconcileMachineOwner(_ context.Context, s *scope) (ctrl.Result, error) { +func (r *Reconciler) reconcileMachineOwnerAndLabels(_ context.Context, s *scope) (ctrl.Result, error) { // If the machine is a stand-alone one, meaning not originated from a MachineDeployment, then set it as directly // owned by the Cluster (if not already present). if r.shouldAdopt(s.machine) { @@ -375,6 +371,12 @@ func (r *Reconciler) reconcileMachineOwner(_ context.Context, s *scope) (ctrl.Re })) } + // Always add the cluster label. + if s.machine.Labels == nil { + s.machine.Labels = make(map[string]string) + } + s.machine.Labels[clusterv1.ClusterNameLabel] = s.machine.Spec.ClusterName + return ctrl.Result{}, nil } @@ -504,13 +506,15 @@ func (r *Reconciler) reconcileDelete(ctx context.Context, s *scope) (ctrl.Result return ctrl.Result{}, nil } - bootstrapDeleted, err := r.reconcileDeleteBootstrap(ctx, s) - if err != nil { - return ctrl.Result{}, err - } - if !bootstrapDeleted { - log.Info("Waiting for bootstrap to be deleted", m.Spec.Bootstrap.ConfigRef.Kind, klog.KRef(m.Spec.Bootstrap.ConfigRef.Namespace, m.Spec.Bootstrap.ConfigRef.Name)) - return ctrl.Result{}, nil + if m.Spec.Bootstrap.ConfigRef != nil { + bootstrapDeleted, err := r.reconcileDeleteBootstrap(ctx, s) + if err != nil { + return ctrl.Result{}, err + } + if !bootstrapDeleted { + log.Info("Waiting for bootstrap to be deleted", m.Spec.Bootstrap.ConfigRef.Kind, klog.KRef(m.Spec.Bootstrap.ConfigRef.Namespace, m.Spec.Bootstrap.ConfigRef.Name)) + return ctrl.Result{}, nil + } } // We only delete the node after the underlying infrastructure is gone. @@ -869,30 +873,34 @@ func (r *Reconciler) deleteNode(ctx context.Context, cluster *clusterv1.Cluster, } func (r *Reconciler) reconcileDeleteBootstrap(ctx context.Context, s *scope) (bool, error) { - if s.bootstrapConfig == nil { + if s.bootstrapConfig == nil && s.bootstrapConfigIsNotFound { conditions.MarkFalse(s.machine, clusterv1.BootstrapReadyCondition, clusterv1.DeletedReason, clusterv1.ConditionSeverityInfo, "") return true, nil } - if err := r.Client.Delete(ctx, s.bootstrapConfig); err != nil && !apierrors.IsNotFound(err) { - return false, errors.Wrapf(err, - "failed to delete %v %q for Machine %q in namespace %q", - s.bootstrapConfig.GroupVersionKind(), s.bootstrapConfig.GetName(), s.machine.Name, s.machine.Namespace) + if s.bootstrapConfig != nil { + if err := r.Client.Delete(ctx, s.bootstrapConfig); err != nil && !apierrors.IsNotFound(err) { + return false, errors.Wrapf(err, + "failed to delete %v %q for Machine %q in namespace %q", + s.bootstrapConfig.GroupVersionKind(), s.bootstrapConfig.GetName(), s.machine.Name, s.machine.Namespace) + } } return false, nil } func (r *Reconciler) reconcileDeleteInfrastructure(ctx context.Context, s *scope) (bool, error) { - if s.infraMachine == nil { + if s.infraMachine == nil && s.infraMachineIsNotFound { conditions.MarkFalse(s.machine, clusterv1.InfrastructureReadyCondition, clusterv1.DeletedReason, clusterv1.ConditionSeverityInfo, "") return true, nil } - if err := r.Client.Delete(ctx, s.infraMachine); err != nil && !apierrors.IsNotFound(err) { - return false, errors.Wrapf(err, - "failed to delete %v %q for Machine %q in namespace %q", - s.infraMachine.GroupVersionKind(), s.infraMachine.GetName(), s.machine.Name, s.machine.Namespace) + if s.infraMachine != nil { + if err := r.Client.Delete(ctx, s.infraMachine); err != nil && !apierrors.IsNotFound(err) { + return false, errors.Wrapf(err, + "failed to delete %v %q for Machine %q in namespace %q", + s.infraMachine.GroupVersionKind(), s.infraMachine.GetName(), s.machine.Name, s.machine.Namespace) + } } return false, nil diff --git a/internal/controllers/machine/machine_controller_phases.go b/internal/controllers/machine/machine_controller_phases.go index caefa9bdf64b..8fa3258094a4 100644 --- a/internal/controllers/machine/machine_controller_phases.go +++ b/internal/controllers/machine/machine_controller_phases.go @@ -142,12 +142,14 @@ func (r *Reconciler) reconcileBootstrap(ctx context.Context, s *scope) (ctrl.Res obj, err := r.reconcileExternal(ctx, cluster, m, m.Spec.Bootstrap.ConfigRef) if err != nil { if apierrors.IsNotFound(err) { + s.bootstrapConfigIsNotFound = true + if !s.machine.DeletionTimestamp.IsZero() { // Tolerate bootstrap object not found when the machine is being deleted. // TODO: we can also relax this and tolerate the absence of the bootstrap ref way before, e.g. after node ref is set return ctrl.Result{}, nil } - log.Info("could not find bootstrap config object, requeuing", m.Spec.Bootstrap.ConfigRef.Kind, klog.KRef(m.Spec.Bootstrap.ConfigRef.Namespace, m.Spec.Bootstrap.ConfigRef.Name)) + log.Info("Could not find bootstrap config object, requeuing", m.Spec.Bootstrap.ConfigRef.Kind, klog.KRef(m.Spec.Bootstrap.ConfigRef.Namespace, m.Spec.Bootstrap.ConfigRef.Name)) // TODO: we can make this smarter and requeue only if we are before node ref is set return ctrl.Result{RequeueAfter: externalReadyWait}, nil } @@ -169,9 +171,13 @@ func (r *Reconciler) reconcileBootstrap(ctx context.Context, s *scope) (ctrl.Res } // Report a summary of current status of the bootstrap object defined for this machine. + fallBack := conditions.WithFallbackValue(ready, clusterv1.WaitingForDataSecretFallbackReason, clusterv1.ConditionSeverityInfo, "") + if !s.machine.DeletionTimestamp.IsZero() { + fallBack = conditions.WithFallbackValue(ready, clusterv1.DeletingReason, clusterv1.ConditionSeverityInfo, "") + } conditions.SetMirror(m, clusterv1.BootstrapReadyCondition, conditions.UnstructuredGetter(s.bootstrapConfig), - conditions.WithFallbackValue(ready, clusterv1.WaitingForDataSecretFallbackReason, clusterv1.ConditionSeverityInfo, ""), + fallBack, ) // If the bootstrap provider is not ready, return. @@ -205,6 +211,8 @@ func (r *Reconciler) reconcileInfrastructure(ctx context.Context, s *scope) (ctr obj, err := r.reconcileExternal(ctx, cluster, m, &m.Spec.InfrastructureRef) if err != nil { if apierrors.IsNotFound(err) { + s.infraMachineIsNotFound = true + if !s.machine.DeletionTimestamp.IsZero() { // Tolerate infra machine not found when the machine is being deleted. return ctrl.Result{}, nil @@ -218,7 +226,7 @@ func (r *Reconciler) reconcileInfrastructure(ctx context.Context, s *scope) (ctr m.Spec.InfrastructureRef.GroupVersionKind(), m.Spec.InfrastructureRef.Name)) return ctrl.Result{}, reconcile.TerminalError(errors.Errorf("could not find %v %q for Machine %q in namespace %q", m.Spec.InfrastructureRef.GroupVersionKind().String(), m.Spec.InfrastructureRef.Name, m.Name, m.Namespace)) } - log.Info("could not find infrastructure machine, requeuing", m.Spec.InfrastructureRef.Kind, klog.KRef(m.Spec.InfrastructureRef.Namespace, m.Spec.InfrastructureRef.Name)) + log.Info("Could not find infrastructure machine, requeuing", m.Spec.InfrastructureRef.Kind, klog.KRef(m.Spec.InfrastructureRef.Namespace, m.Spec.InfrastructureRef.Name)) return ctrl.Result{RequeueAfter: externalReadyWait}, nil } return ctrl.Result{}, err @@ -239,9 +247,13 @@ func (r *Reconciler) reconcileInfrastructure(ctx context.Context, s *scope) (ctr } // Report a summary of current status of the infrastructure object defined for this machine. + fallBack := conditions.WithFallbackValue(ready, clusterv1.WaitingForInfrastructureFallbackReason, clusterv1.ConditionSeverityInfo, "") + if !s.machine.DeletionTimestamp.IsZero() { + fallBack = conditions.WithFallbackValue(ready, clusterv1.DeletingReason, clusterv1.ConditionSeverityInfo, "") + } conditions.SetMirror(m, clusterv1.InfrastructureReadyCondition, conditions.UnstructuredGetter(s.infraMachine), - conditions.WithFallbackValue(ready, clusterv1.WaitingForInfrastructureFallbackReason, clusterv1.ConditionSeverityInfo, ""), + fallBack, ) // If the infrastructure provider is not ready (and it wasn't ready before), return early. diff --git a/internal/controllers/machine/machine_controller_status.go b/internal/controllers/machine/machine_controller_status.go index 2b9a99eedba8..ed4bb2b5f946 100644 --- a/internal/controllers/machine/machine_controller_status.go +++ b/internal/controllers/machine/machine_controller_status.go @@ -1,5 +1,5 @@ /* -Copyright 2025 The Kubernetes Authors. +Copyright 2024 The Kubernetes Authors. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -185,7 +185,7 @@ func setInfrastructureReadyCondition(_ context.Context, machine *clusterv1.Machi Type: clusterv1.MachineInfrastructureReadyV1Beta2Condition, Status: metav1.ConditionFalse, Reason: clusterv1.MachineInfrastructureDeletedV1Beta2Reason, - Message: fmt.Sprintf("%s %s has been deleted while the machine still exist", machine.Spec.Bootstrap.ConfigRef.Kind, machine.Name), + Message: fmt.Sprintf("%s %s has been deleted while the machine still exist", machine.Spec.InfrastructureRef.Kind, klog.KRef(machine.Namespace, machine.Spec.InfrastructureRef.Name)), }) return } diff --git a/internal/controllers/machine/machine_controller_status_test.go b/internal/controllers/machine/machine_controller_status_test.go index c2999f0e5dd2..b58f5423c47f 100644 --- a/internal/controllers/machine/machine_controller_status_test.go +++ b/internal/controllers/machine/machine_controller_status_test.go @@ -1,5 +1,5 @@ /* -Copyright 2025 The Kubernetes Authors. +Copyright 2024 The Kubernetes Authors. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/internal/controllers/machine/machine_controller_test.go b/internal/controllers/machine/machine_controller_test.go index 816ccb7bcd4a..06fbcbeca3fe 100644 --- a/internal/controllers/machine/machine_controller_test.go +++ b/internal/controllers/machine/machine_controller_test.go @@ -2782,8 +2782,10 @@ func TestNodeDeletion(t *testing.T) { } s := &scope{ - cluster: cluster, - machine: m, + cluster: cluster, + machine: m, + infraMachineIsNotFound: true, + bootstrapConfigIsNotFound: true, } _, err := r.reconcileDelete(context.Background(), s) @@ -2905,8 +2907,10 @@ func TestNodeDeletionWithoutNodeRefFallback(t *testing.T) { } s := &scope{ - cluster: testCluster.DeepCopy(), - machine: m, + cluster: testCluster.DeepCopy(), + machine: m, + infraMachineIsNotFound: true, + bootstrapConfigIsNotFound: true, } _, err := r.reconcileDelete(context.Background(), s) From d4648de40cab153cb489c040ac58fb187563263a Mon Sep 17 00:00:00 2001 From: fabriziopandini Date: Thu, 10 Oct 2024 19:07:55 +0200 Subject: [PATCH 4/8] Complete update status --- api/v1beta1/machine_types.go | 27 +- api/v1beta1/v1beta2_condition_consts.go | 17 +- .../controllers/machine/machine_controller.go | 12 +- .../machine/machine_controller_status.go | 229 +++-- .../machine/machine_controller_status_test.go | 899 +++++++++++++++++- util/conditions/v1beta2/aggregate.go | 9 +- util/conditions/v1beta2/getter.go | 17 +- util/conditions/v1beta2/getter_test.go | 7 + util/conditions/v1beta2/merge_strategies.go | 140 +-- .../v1beta2/merge_strategies_test.go | 24 +- util/conditions/v1beta2/mirror.go | 9 +- util/conditions/v1beta2/mirror_test.go | 12 +- util/conditions/v1beta2/options.go | 8 - util/conditions/v1beta2/summary.go | 15 +- util/conditions/v1beta2/summary_test.go | 19 +- 15 files changed, 1204 insertions(+), 240 deletions(-) diff --git a/api/v1beta1/machine_types.go b/api/v1beta1/machine_types.go index 49f8f8fc3845..6b63419e46e2 100644 --- a/api/v1beta1/machine_types.go +++ b/api/v1beta1/machine_types.go @@ -135,8 +135,8 @@ const ( // MachineBootstrapDataSecretDataSecretUserProvidedV1Beta2Reason surfaces when a bootstrap data secret is provided by the user (without a ConfigRef). MachineBootstrapDataSecretDataSecretUserProvidedV1Beta2Reason = "DataSecretUserProvided" - // MachineBootstrapInvalidConfigV1Beta2Reason surfaces when MachineBootstrap doesn't have the Boostrap.ConfigRef nor a - // Bootstrap.DataSecretName specified by the users. + // MachineBootstrapInvalidConfigV1Beta2Reason surfaces when Machine's spec.bootstrap doesn't have configRef nor a + // dataSecretName set. MachineBootstrapInvalidConfigV1Beta2Reason = "InvalidConfig" // MachineBootstrapConfigInvalidConditionReportedV1Beta2Reason surfaces a BootstrapConfig Ready condition (read from a bootstrap config object) which is invalid. @@ -146,9 +146,12 @@ const ( // MachineBootstrapConfigReadyNoV1Beta2ReasonReported applies to a BootstrapConfig Ready condition (read from a bootstrap config object) that reports no reason. MachineBootstrapConfigReadyNoV1Beta2ReasonReported = NoV1Beta2ReasonReported - // MachineBootstrapConfigNotFoundV1Beta2Reason surfaces when a referenced bootstrap config object cannot be found. + // MachineBootstrapConfigInternalErrorV1Beta2Reason surfaces unexpected failures when reading a BootstrapConfig object. + MachineBootstrapConfigInternalErrorV1Beta2Reason = InternalErrorV1Beta2Reason + + // MachineBootstrapConfigDoesNotExistV1Beta2Reason surfaces when a referenced bootstrap config object does not exist. // Note: this could happen when creating the machine. However, this state should be treated as an error if it last indefinitely. - MachineBootstrapConfigNotFoundV1Beta2Reason = RefObjectNotFoundV1Beta2Reason + MachineBootstrapConfigDoesNotExistV1Beta2Reason = RefObjectDoesNotExistV1Beta2Reason // MachineBootstrapConfigDeletedV1Beta2Reason surfaces when a referenced bootstrap config object has been deleted. // Note: controllers can't identify if the deletion process has been initiated by the controller itself, e.g. @@ -169,9 +172,12 @@ const ( // MachineInfrastructureReadyNoV1Beta2ReasonReported applies to a infrastructure Ready condition (read from an infra machine object) that reports no reason. MachineInfrastructureReadyNoV1Beta2ReasonReported = NoV1Beta2ReasonReported - // MachineInfrastructureNotFoundV1Beta2Reason surfaces when a referenced infrastructure object cannot be found. + // MachineInfrastructureInternalErrorV1Beta2Reason surfaces unexpected failures when reading a BootstrapConfig object. + MachineInfrastructureInternalErrorV1Beta2Reason = InternalErrorV1Beta2Reason + + // MachineInfrastructureDoesNotExistV1Beta2Reason surfaces when a referenced infrastructure object does not exist. // Note: this could happen when creating the machine. However, this state should be treated as an error if it last indefinitely. - MachineInfrastructureNotFoundV1Beta2Reason = RefObjectNotFoundV1Beta2Reason + MachineInfrastructureDoesNotExistV1Beta2Reason = RefObjectDoesNotExistV1Beta2Reason // MachineInfrastructureDeletedV1Beta2Reason surfaces when a referenced infrastructure object has been deleted. // Note: controllers can't identify if the deletion process has been initiated by the controller itself, e.g. @@ -188,6 +194,9 @@ const ( // MachineNodeReadyV1Beta2Condition is true if the Machine's Node is ready. MachineNodeReadyV1Beta2Condition = "NodeReady" + // MachineNodeConditionNotYetReportedV1Beta2Reason surfaces when a Machine's Node doesn't have a condition reported yet. + MachineNodeConditionNotYetReportedV1Beta2Reason = "NodeConditionNotYetReported" + // MachineNodeNotFoundV1Beta2Reason surfaces when the node hosted on the machine cannot be found. // Note: this could happen when creating the machine. However, this state should be treated as an error if it last indefinitely. MachineNodeNotFoundV1Beta2Reason = "NodeNotFound" @@ -220,6 +229,12 @@ const ( const ( // MachinePausedV1Beta2Condition is true if the Machine or the Cluster it belongs to are paused. MachinePausedV1Beta2Condition = PausedV1Beta2Condition + + // MachineNotPausedV1Beta2Reason surfaces when a Machine is not paused. + MachineNotPausedV1Beta2Reason = NotPausedV1Beta2Reason + + // MachineObjectPausedV1Beta2Reason surfaces when a Machine is paused. + MachineObjectPausedV1Beta2Reason = ObjectPausedV1Beta2Reason ) // ANCHOR: MachineSpec diff --git a/api/v1beta1/v1beta2_condition_consts.go b/api/v1beta1/v1beta2_condition_consts.go index 7e61d18faf30..2d3841b4d41a 100644 --- a/api/v1beta1/v1beta2_condition_consts.go +++ b/api/v1beta1/v1beta2_condition_consts.go @@ -95,13 +95,26 @@ const ( // Note: this could happen e.g. when an external object still uses Cluster API v1beta1 Conditions. NoV1Beta2ReasonReported = "NoReasonReported" - // RefObjectNotFoundV1Beta2Reason surfaces when a referenced object cannot be found. - RefObjectNotFoundV1Beta2Reason = "RefObjectNotFound" + // InternalErrorV1Beta2Reason surfaces unexpected errors reporting by controllers. + // In most cases, it will be required to look at controllers logs to proper triage those issues. + InternalErrorV1Beta2Reason = "InternalError" + + // RefObjectDoesNotExistV1Beta2Reason surfaces when a referenced object does not exist. + RefObjectDoesNotExistV1Beta2Reason = "RefObjectDoesNotExist" // RefObjectDeletedV1Beta2Reason surfaces when a referenced object has been deleted. // Note: controllers can't identify if the deletion process has been initiated by the controller itself, e.g. // during the deletion workflow, or by a users. RefObjectDeletedV1Beta2Reason = "RefObjectDeleted" + + // NotPausedV1Beta2Reason surfaces when an object is not paused. + NotPausedV1Beta2Reason = "NotPaused" + + // ClusterPausedV1Beta2Reason surfaces when an Cluster is paused. + ClusterPausedV1Beta2Reason = "ClusterPaused" + + // ObjectPausedV1Beta2Reason surfaces when an object is paused. + ObjectPausedV1Beta2Reason = "ObjectPaused" ) // Conditions that will be used for the MachineSet object in v1Beta2 API version. diff --git a/internal/controllers/machine/machine_controller.go b/internal/controllers/machine/machine_controller.go index 64d4e72cfbd8..ce680f3ebdf4 100644 --- a/internal/controllers/machine/machine_controller.go +++ b/internal/controllers/machine/machine_controller.go @@ -189,17 +189,17 @@ func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (_ ctrl.Re m.Spec.ClusterName, m.Name, m.Namespace) } - // Return early if the object or Cluster is paused. - if annotations.IsPaused(cluster, m) { - log.Info("Reconciliation is paused for this object") - return ctrl.Result{}, nil - } - s := &scope{ cluster: cluster, machine: m, } + // Return early if the object or Cluster is paused. + if annotations.IsPaused(cluster, m) { + log.Info("Reconciliation is paused for this object") + return ctrl.Result{}, setPausedCondition(ctx, r.Client, s) + } + // Initialize the patch helper patchHelper, err := patch.NewHelper(m, r.Client) if err != nil { diff --git a/internal/controllers/machine/machine_controller_status.go b/internal/controllers/machine/machine_controller_status.go index ed4bb2b5f946..b848c1d407f8 100644 --- a/internal/controllers/machine/machine_controller_status.go +++ b/internal/controllers/machine/machine_controller_status.go @@ -25,13 +25,14 @@ import ( corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" - "k8s.io/klog/v2" "k8s.io/utils/ptr" ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" "sigs.k8s.io/cluster-api/internal/contract" v1beta2conditions "sigs.k8s.io/cluster-api/util/conditions/v1beta2" + "sigs.k8s.io/cluster-api/util/patch" ) // reconcileStatus reconciles Machine's status during the entire lifecycle of the machine. @@ -47,14 +48,14 @@ func (r *Reconciler) reconcileStatus(ctx context.Context, s *scope) { // - status.Addresses // - status.FailureReason // - status.FailureMessage - setBootstrapReadyCondition(ctx, s.machine, s.bootstrapConfig) + setBootstrapReadyCondition(ctx, s.machine, s.bootstrapConfig, s.bootstrapConfigIsNotFound) // Update status from the InfraMachine external resource. // Note: the Following Status field are managed in reconcileInfrastructure. // - status.InfrastructureReady // - status.FailureReason // - status.FailureMessage - setInfrastructureReadyCondition(ctx, s.machine, s.infraMachine) + setInfrastructureReadyCondition(ctx, s.machine, s.infraMachine, s.infraMachineIsNotFound) // Update status from the Node external resource. // Note: the Following Status field are managed in reconcileNode. @@ -78,12 +79,12 @@ func (r *Reconciler) reconcileStatus(ctx context.Context, s *scope) { // TODO: Update the Deleting condition. - setPausedCondition(s) + unsetPausedCondition(s) setMachinePhaseAndLastUpdated(ctx, s.machine) } -func setBootstrapReadyCondition(_ context.Context, machine *clusterv1.Machine, bootstrapConfig *unstructured.Unstructured) { +func setBootstrapReadyCondition(_ context.Context, machine *clusterv1.Machine, bootstrapConfig *unstructured.Unstructured, bootstrapConfigIsNotFound bool) { if machine.Spec.Bootstrap.ConfigRef == nil { if ptr.Deref(machine.Spec.Bootstrap.DataSecretName, "") != "" { v1beta2conditions.Set(machine, metav1.Condition{ @@ -99,7 +100,7 @@ func setBootstrapReadyCondition(_ context.Context, machine *clusterv1.Machine, b Type: clusterv1.MachineBootstrapConfigReadyV1Beta2Condition, Status: metav1.ConditionFalse, Reason: clusterv1.MachineBootstrapInvalidConfigV1Beta2Reason, - Message: "either spec.bootstrap.configRef must be set or spec.bootstrap.dataSecretName must not be empty", + Message: "Either spec.bootstrap.configRef must be set or spec.bootstrap.dataSecretName must not be empty", }) return } @@ -109,20 +110,32 @@ func setBootstrapReadyCondition(_ context.Context, machine *clusterv1.Machine, b bootstrapConfig, machine, contract.Bootstrap().ReadyConditionType(), v1beta2conditions.TargetConditionType(clusterv1.MachineBootstrapConfigReadyV1Beta2Condition), v1beta2conditions.FallbackCondition{ - Status: v1beta2conditions.BoolToStatus(machine.Status.BootstrapReady), - Reason: clusterv1.MachineBootstrapConfigReadyNoV1Beta2ReasonReported, + Status: v1beta2conditions.BoolToStatus(machine.Status.BootstrapReady), + Reason: clusterv1.MachineBootstrapConfigReadyNoV1Beta2ReasonReported, + Message: fmt.Sprintf("%s status.ready is %t", machine.Spec.Bootstrap.ConfigRef.Kind, machine.Status.BootstrapReady), }, ); err != nil { v1beta2conditions.Set(machine, metav1.Condition{ Type: clusterv1.MachineBootstrapConfigReadyV1Beta2Condition, Status: metav1.ConditionFalse, Reason: clusterv1.MachineBootstrapConfigInvalidConditionReportedV1Beta2Reason, - Message: fmt.Sprintf("%s %s reports an invalid %s condition: %s", machine.Spec.Bootstrap.ConfigRef.Kind, klog.KRef(machine.Namespace, machine.Spec.Bootstrap.ConfigRef.Name), contract.Bootstrap().ReadyConditionType(), err.Error()), + Message: err.Error(), }) } return } + // If we got unexpected errors in reading the bootstrap config (this should happen rarely), surface them + if !bootstrapConfigIsNotFound { + v1beta2conditions.Set(machine, metav1.Condition{ + Type: clusterv1.MachineBootstrapConfigReadyV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: clusterv1.MachineBootstrapConfigInternalErrorV1Beta2Reason, + Message: "Please check controller logs for errors", + }) + return + } + // Tolerate Bootstrap config missing when the machine is deleting. // NOTE: this code assumes that Bootstrap config deletion has been initiated by the controller itself, // and thus this state is reported as Deleted instead of NotFound. @@ -131,50 +144,66 @@ func setBootstrapReadyCondition(_ context.Context, machine *clusterv1.Machine, b Type: clusterv1.MachineBootstrapConfigReadyV1Beta2Condition, Status: metav1.ConditionUnknown, Reason: clusterv1.MachineBootstrapConfigDeletedV1Beta2Reason, - Message: fmt.Sprintf("%s %s has been deleted", machine.Spec.Bootstrap.ConfigRef.Kind, klog.KRef(machine.Namespace, machine.Spec.Bootstrap.ConfigRef.Name)), + Message: fmt.Sprintf("%s has been deleted", machine.Spec.Bootstrap.ConfigRef.Kind), }) return } - // If the machine is not deleting, and boostrap config object does not exist yet, - // surface the fact that the controller is waiting for the bootstrap config to exist, which could - // happen when creating the machine. However, this state should be treated as an error if it last indefinitely. + // If the machine is not deleting, and boostrap config object does not exist, + // surface this fact. This could happen when: + // - when applying the yaml file with the machine and all the objects referenced by it (provisioning yet to start/started, but status.nodeRef not yet set). + // - when the machine has been provisioned (status.nodeRef is set). v1beta2conditions.Set(machine, metav1.Condition{ Type: clusterv1.MachineBootstrapConfigReadyV1Beta2Condition, Status: metav1.ConditionUnknown, - Reason: clusterv1.MachineBootstrapConfigNotFoundV1Beta2Reason, - Message: fmt.Sprintf("waiting for %s %s to exist", machine.Spec.Bootstrap.ConfigRef.Kind, klog.KRef(machine.Namespace, machine.Spec.Bootstrap.ConfigRef.Name)), + Reason: clusterv1.MachineBootstrapConfigDoesNotExistV1Beta2Reason, + Message: fmt.Sprintf("%s does not exist", machine.Spec.Bootstrap.ConfigRef.Kind), }) } -func setInfrastructureReadyCondition(_ context.Context, machine *clusterv1.Machine, infraMachine *unstructured.Unstructured) { +func setInfrastructureReadyCondition(_ context.Context, machine *clusterv1.Machine, infraMachine *unstructured.Unstructured, infraMachineIsNotFound bool) { if infraMachine != nil { if err := v1beta2conditions.SetMirrorConditionFromUnstructured( infraMachine, machine, contract.InfrastructureMachine().ReadyConditionType(), v1beta2conditions.TargetConditionType(clusterv1.MachineInfrastructureReadyV1Beta2Condition), v1beta2conditions.FallbackCondition{ - Status: v1beta2conditions.BoolToStatus(machine.Status.InfrastructureReady), - Reason: clusterv1.MachineInfrastructureReadyNoV1Beta2ReasonReported, + Status: v1beta2conditions.BoolToStatus(machine.Status.InfrastructureReady), + Reason: clusterv1.MachineInfrastructureReadyNoV1Beta2ReasonReported, + Message: fmt.Sprintf("%s status.ready is %t", machine.Spec.InfrastructureRef.Kind, machine.Status.BootstrapReady), }, ); err != nil { v1beta2conditions.Set(machine, metav1.Condition{ Type: clusterv1.MachineInfrastructureReadyV1Beta2Condition, Status: metav1.ConditionFalse, Reason: clusterv1.MachineInfrastructureInvalidConditionReportedV1Beta2Reason, - Message: fmt.Sprintf("%s %s reports an invalid %s condition: %s", machine.Spec.Bootstrap.ConfigRef.Kind, klog.KRef(machine.Namespace, machine.Spec.Bootstrap.ConfigRef.Name), contract.InfrastructureMachine().ReadyConditionType(), err.Error()), + Message: err.Error(), }) } return } + // If we got errors in reading the infra machine (this should happen rarely), surface them + if !infraMachineIsNotFound { + v1beta2conditions.Set(machine, metav1.Condition{ + Type: clusterv1.MachineInfrastructureReadyV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: clusterv1.MachineInfrastructureInternalErrorV1Beta2Reason, + Message: "Please check controller logs for errors", + }) + return + } + // Tolerate infra machine missing when the machine is deleting. // NOTE: this code assumes that infra machine deletion has been initiated by the controller itself, // and thus this state is reported as Deleted instead of NotFound. + // NOTE: in case an accidental deletion happens before volume detach is completed, the Node hosted on the Machine + // will be considered unreachable Machine deletion will complete. if !machine.DeletionTimestamp.IsZero() { v1beta2conditions.Set(machine, metav1.Condition{ - Type: clusterv1.MachineInfrastructureReadyV1Beta2Condition, - Status: metav1.ConditionUnknown, - Reason: clusterv1.MachineInfrastructureDeletedV1Beta2Reason, + Type: clusterv1.MachineInfrastructureReadyV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: clusterv1.MachineInfrastructureDeletedV1Beta2Reason, + Message: fmt.Sprintf("%s has been deleted", machine.Spec.InfrastructureRef.Kind), }) return } @@ -185,32 +214,39 @@ func setInfrastructureReadyCondition(_ context.Context, machine *clusterv1.Machi Type: clusterv1.MachineInfrastructureReadyV1Beta2Condition, Status: metav1.ConditionFalse, Reason: clusterv1.MachineInfrastructureDeletedV1Beta2Reason, - Message: fmt.Sprintf("%s %s has been deleted while the machine still exist", machine.Spec.InfrastructureRef.Kind, klog.KRef(machine.Namespace, machine.Spec.InfrastructureRef.Name)), + Message: fmt.Sprintf("%s has been deleted while the machine still exist", machine.Spec.InfrastructureRef.Kind), }) return } // If the machine is not deleting, and infra machine object does not exist yet, - // surface the fact that the controller is waiting for the infra machine to exist, which could - // happen when creating the machine. However, this state should be treated as an error if it last indefinitely. + // surface this fact. This could happen when: + // - when applying the yaml file with the machine and all the objects referenced by it (provisioning yet to start/started, but status.InfrastructureReady not yet set). v1beta2conditions.Set(machine, metav1.Condition{ - Type: clusterv1.MachineBootstrapConfigReadyV1Beta2Condition, - Status: metav1.ConditionUnknown, - Reason: clusterv1.MachineInfrastructureNotFoundV1Beta2Reason, + Type: clusterv1.MachineInfrastructureReadyV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: clusterv1.MachineInfrastructureDoesNotExistV1Beta2Reason, + Message: fmt.Sprintf("%s does not exist", machine.Spec.InfrastructureRef.Kind), }) } func setNodeHealthyAndReadyConditions(ctx context.Context, machine *clusterv1.Machine, node *corev1.Node) { + // TODO: handle disconnected clusters when the new ClusterCache is merged + if node != nil { var nodeReady *metav1.Condition for _, condition := range node.Status.Conditions { if condition.Type == corev1.NodeReady { + message := "" + if condition.Message != "" { + message = fmt.Sprintf("%s (from Node)", condition.Message) + } nodeReady = &metav1.Condition{ Type: clusterv1.MachineNodeReadyV1Beta2Condition, Status: metav1.ConditionStatus(condition.Status), LastTransitionTime: condition.LastTransitionTime, Reason: condition.Reason, - Message: condition.Message, + Message: message, } } } @@ -219,7 +255,7 @@ func setNodeHealthyAndReadyConditions(ctx context.Context, machine *clusterv1.Ma nodeReady = &metav1.Condition{ Type: clusterv1.MachineNodeReadyV1Beta2Condition, Status: metav1.ConditionUnknown, - Reason: v1beta2conditions.NotYetReportedReason, + Reason: clusterv1.MachineNodeConditionNotYetReportedV1Beta2Reason, } } v1beta2conditions.Set(machine, *nodeReady) @@ -227,7 +263,7 @@ func setNodeHealthyAndReadyConditions(ctx context.Context, machine *clusterv1.Ma status, reason, message := summarizeNodeV1Beta2Conditions(ctx, node) v1beta2conditions.Set(machine, metav1.Condition{ Type: clusterv1.MachineNodeHealthyV1Beta2Condition, - Status: metav1.ConditionStatus(status), + Status: status, Reason: reason, Message: message, }) @@ -238,17 +274,21 @@ func setNodeHealthyAndReadyConditions(ctx context.Context, machine *clusterv1.Ma // Tolerate node missing when the machine is deleting. // NOTE: controllers always assume that node deletion has been initiated by the controller itself, // and thus this state is reported as Deleted instead of NotFound. + // NOTE: in case an accidental deletion happens before volume detach is completed, the Node + // will be considered unreachable Machine deletion will complete. if !machine.DeletionTimestamp.IsZero() { v1beta2conditions.Set(machine, metav1.Condition{ - Type: clusterv1.MachineNodeReadyV1Beta2Condition, - Status: metav1.ConditionUnknown, - Reason: clusterv1.MachineNodeDeletedV1Beta2Reason, + Type: clusterv1.MachineNodeReadyV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: clusterv1.MachineNodeDeletedV1Beta2Reason, + Message: "Node has been deleted", }) v1beta2conditions.Set(machine, metav1.Condition{ - Type: clusterv1.MachineNodeHealthyV1Beta2Condition, - Status: metav1.ConditionUnknown, - Reason: clusterv1.MachineNodeDeletedV1Beta2Reason, + Type: clusterv1.MachineNodeHealthyV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: clusterv1.MachineNodeDeletedV1Beta2Reason, + Message: "Node has been deleted", }) return } @@ -271,6 +311,8 @@ func setNodeHealthyAndReadyConditions(ctx context.Context, machine *clusterv1.Ma return } + // If the machine is at the end of the provisioning phase, with ProviderID set, but still waiting + // for a matching Node to exists, surface this. if ptr.Deref(machine.Spec.ProviderID, "") != "" { v1beta2conditions.Set(machine, metav1.Condition{ Type: clusterv1.MachineNodeReadyV1Beta2Condition, @@ -288,37 +330,53 @@ func setNodeHealthyAndReadyConditions(ctx context.Context, machine *clusterv1.Ma return } - // Surface the fact that the controller is waiting for the bootstrap config to exist, which could - // happen when creating the machine. However, this state should be treated as an error if it last indefinitely. + // If the machine is at the beginning of the provisioning phase, with ProviderID not yet set, surface this. v1beta2conditions.Set(machine, metav1.Condition{ Type: clusterv1.MachineNodeReadyV1Beta2Condition, Status: metav1.ConditionUnknown, Reason: clusterv1.MachineNodeNotFoundV1Beta2Reason, - Message: fmt.Sprintf("Waiting for %s %s to report spec.providerID", machine.Spec.InfrastructureRef.Kind, klog.KRef(machine.Spec.InfrastructureRef.Namespace, machine.Spec.InfrastructureRef.Name)), + Message: fmt.Sprintf("Waiting for %s to report spec.providerID", machine.Spec.InfrastructureRef.Kind), }) v1beta2conditions.Set(machine, metav1.Condition{ Type: clusterv1.MachineNodeHealthyV1Beta2Condition, Status: metav1.ConditionUnknown, Reason: clusterv1.MachineNodeNotFoundV1Beta2Reason, - Message: fmt.Sprintf("Waiting for %s %s to report spec.providerID", machine.Spec.InfrastructureRef.Kind, klog.KRef(machine.Spec.InfrastructureRef.Namespace, machine.Spec.InfrastructureRef.Name)), + Message: fmt.Sprintf("Waiting for %s to report spec.providerID", machine.Spec.InfrastructureRef.Kind), }) } -func summarizeNodeV1Beta2Conditions(_ context.Context, node *corev1.Node) (corev1.ConditionStatus, string, string) { +func summarizeNodeV1Beta2Conditions(_ context.Context, node *corev1.Node) (metav1.ConditionStatus, string, string) { semanticallyFalseStatus := 0 unknownStatus := 0 message := "" issueReason := "" unknownReason := "" - for _, condition := range node.Status.Conditions { + for _, conditionType := range []corev1.NodeConditionType{corev1.NodeReady, corev1.NodeMemoryPressure, corev1.NodeDiskPressure, corev1.NodePIDPressure} { + var condition *corev1.NodeCondition + for _, c := range node.Status.Conditions { + if c.Type == conditionType { + condition = &c + } + } + if condition == nil { + message += fmt.Sprintf("Node %s: condition not yet reported", conditionType) + "; " + if unknownStatus == 0 { + unknownReason = clusterv1.MachineNodeConditionNotYetReportedV1Beta2Reason + } else { + unknownReason = v1beta2conditions.MultipleUnknownReportedReason + } + unknownStatus++ + continue + } + switch condition.Type { case corev1.NodeMemoryPressure, corev1.NodeDiskPressure, corev1.NodePIDPressure: if condition.Status != corev1.ConditionFalse { - message += fmt.Sprintf("Node's %s condition is %s", condition.Type, condition.Status) + ". " + message += fmt.Sprintf("Node %s: condition is %s", condition.Type, condition.Status) + "; " if condition.Status == corev1.ConditionUnknown { - if unknownReason == "" { + if unknownStatus == 0 { unknownReason = condition.Reason } else { unknownReason = v1beta2conditions.MultipleUnknownReportedReason @@ -326,18 +384,19 @@ func summarizeNodeV1Beta2Conditions(_ context.Context, node *corev1.Node) (corev unknownStatus++ continue } - if issueReason == "" { + if semanticallyFalseStatus == 0 { issueReason = condition.Reason } else { issueReason = v1beta2conditions.MultipleIssuesReportedReason } semanticallyFalseStatus++ + continue } case corev1.NodeReady: if condition.Status != corev1.ConditionTrue { - message += fmt.Sprintf("Node's %s condition is %s", condition.Type, condition.Status) + ". " + message += fmt.Sprintf("Node %s: condition is %s", condition.Type, condition.Status) + "; " if condition.Status == corev1.ConditionUnknown { - if unknownReason == "" { + if unknownStatus == 0 { unknownReason = condition.Reason } else { unknownReason = v1beta2conditions.MultipleUnknownReportedReason @@ -345,7 +404,7 @@ func summarizeNodeV1Beta2Conditions(_ context.Context, node *corev1.Node) (corev unknownStatus++ continue } - if issueReason == "" { + if semanticallyFalseStatus == 0 { issueReason = condition.Reason } else { issueReason = v1beta2conditions.MultipleIssuesReportedReason @@ -354,14 +413,43 @@ func summarizeNodeV1Beta2Conditions(_ context.Context, node *corev1.Node) (corev } } } - message = strings.TrimSuffix(message, ". ") + + message = strings.TrimSuffix(message, "; ") if semanticallyFalseStatus > 0 { - return corev1.ConditionFalse, issueReason, message + if issueReason == "" { + issueReason = v1beta2conditions.NoReasonReported + } + return metav1.ConditionFalse, issueReason, message } if semanticallyFalseStatus+unknownStatus > 0 { - return corev1.ConditionUnknown, unknownReason, message + if unknownReason == "" { + unknownReason = v1beta2conditions.NoReasonReported + } + return metav1.ConditionUnknown, unknownReason, message } - return corev1.ConditionTrue, v1beta2conditions.MultipleInfoReportedReason, message + return metav1.ConditionTrue, v1beta2conditions.MultipleInfoReportedReason, "" +} + +type machineConditionCostomMergeStrategy struct { + machine *clusterv1.Machine +} + +func (c machineConditionCostomMergeStrategy) Merge(conditions []v1beta2conditions.ConditionWithOwnerInfo, conditionTypes []string) (status metav1.ConditionStatus, reason, message string, err error) { + return v1beta2conditions.DefaultMergeStrategyWithCustomPriority(func(condition metav1.Condition) v1beta2conditions.MergePriority { + // While machine is deleting, treat unknown conditions from external objects as info (it is ok that those objects have been deleted at this stage). + if !c.machine.DeletionTimestamp.IsZero() { + if condition.Type == clusterv1.MachineBootstrapConfigReadyV1Beta2Condition && condition.Reason == clusterv1.MachineBootstrapConfigDeletedV1Beta2Reason && condition.Status == metav1.ConditionUnknown { + return v1beta2conditions.InfoMergePriority + } + if condition.Type == clusterv1.MachineInfrastructureReadyV1Beta2Condition && condition.Reason == clusterv1.MachineInfrastructureDeletedV1Beta2Reason && condition.Status == metav1.ConditionUnknown { + return v1beta2conditions.InfoMergePriority + } + if condition.Type == clusterv1.MachineNodeHealthyV1Beta2Condition && condition.Reason == clusterv1.MachineNodeDeletedV1Beta2Reason && condition.Status == metav1.ConditionUnknown { + return v1beta2conditions.InfoMergePriority + } + } + return v1beta2conditions.GetDefaultMergePriority(nil)(condition) + }).Merge(conditions, conditionTypes) } func setReadyCondition(ctx context.Context, machine *clusterv1.Machine) { @@ -379,7 +467,9 @@ func setReadyCondition(ctx context.Context, machine *clusterv1.Machine) { } readyCondition, err := v1beta2conditions.NewSummaryCondition(machine, clusterv1.MachineReadyV1Beta2Condition, forConditionTypes, v1beta2conditions.IgnoreTypesIfMissing{clusterv1.MachineHealthCheckSucceededV1Beta2Condition}, - // TODO: think about the step counter + v1beta2conditions.CustomMergeStrategy{ + MergeStrategy: machineConditionCostomMergeStrategy{machine: machine}, + }, ) if err != nil || readyCondition == nil { // Note, this could only happen if we hit edge cases in computing the summary, which should not happen due to the fact @@ -420,7 +510,7 @@ func setAvailableCondition(_ context.Context, machine *clusterv1.Machine) { return } - if time.Now().Add(0).After(readyCondition.LastTransitionTime.Time) { + if !time.Now().After(readyCondition.LastTransitionTime.Time.Add(0)) { // TODO: use MinReadySeconds as soon as it is available (and fix corresponding unit test) v1beta2conditions.Set(machine, metav1.Condition{ Type: clusterv1.MachineAvailableV1Beta2Condition, Status: metav1.ConditionFalse, @@ -436,12 +526,37 @@ func setAvailableCondition(_ context.Context, machine *clusterv1.Machine) { }) } -func setPausedCondition(s *scope) { +func setPausedCondition(ctx context.Context, c client.Client, s *scope) error { + patchHelper, err := patch.NewHelper(s.machine, c) + if err != nil { + return err + } + + if s.cluster.Spec.Paused { + v1beta2conditions.Set(s.machine, metav1.Condition{ + Type: clusterv1.MachinePausedV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.ClusterPausedV1Beta2Reason, + }) + } else { + v1beta2conditions.Set(s.machine, metav1.Condition{ + Type: clusterv1.MachinePausedV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.MachineObjectPausedV1Beta2Reason, + }) + } + + return patchHelper.Patch(ctx, s.machine, patch.WithOwnedV1Beta2Conditions{Conditions: []string{ + clusterv1.MachinePausedV1Beta2Condition, + }}) +} + +func unsetPausedCondition(s *scope) { // Note: If we hit this code, the controller is reconciling and this Paused condition must be set to false. v1beta2conditions.Set(s.machine, metav1.Condition{ Type: clusterv1.MachinePausedV1Beta2Condition, Status: metav1.ConditionFalse, - Reason: "NotPaused", // TODO: create a const. + Reason: clusterv1.MachineNotPausedV1Beta2Reason, }) } diff --git a/internal/controllers/machine/machine_controller_status_test.go b/internal/controllers/machine/machine_controller_status_test.go index b58f5423c47f..e601d2a6401d 100644 --- a/internal/controllers/machine/machine_controller_status_test.go +++ b/internal/controllers/machine/machine_controller_status_test.go @@ -23,7 +23,6 @@ import ( . "github.com/onsi/gomega" corev1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "k8s.io/utils/ptr" @@ -54,10 +53,11 @@ func TestSetBootstrapReadyCondition(t *testing.T) { } testCases := []struct { - name string - machine *clusterv1.Machine - bootstrapConfig *unstructured.Unstructured - expectConditions []metav1.Condition + name string + machine *clusterv1.Machine + bootstrapConfig *unstructured.Unstructured + bootstrapConfigIsNotFound bool + expectCondition metav1.Condition }{ { name: "boostrap data secret provided by user/operator", @@ -76,21 +76,15 @@ func TestSetBootstrapReadyCondition(t *testing.T) { }, "status": map[string]interface{}{}, }}, - expectConditions: []metav1.Condition{ - { - Type: clusterv1.MachineReadyV1Beta2Condition, - Status: metav1.ConditionTrue, - Reason: v1beta2conditions.MultipleInfoReportedReason, - }, - { - Type: clusterv1.MachineBootstrapConfigReadyV1Beta2Condition, - Status: metav1.ConditionTrue, - Reason: clusterv1.MachineBootstrapDataSecretDataSecretUserProvidedV1Beta2Reason, - }, + bootstrapConfigIsNotFound: false, + expectCondition: metav1.Condition{ + Type: clusterv1.MachineBootstrapConfigReadyV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: clusterv1.MachineBootstrapDataSecretDataSecretUserProvidedV1Beta2Reason, }, }, { - name: "InvalidConfig: machine without bootstrap config ref and with dataSecretName not set", + name: "machine without bootstrap config ref and with dataSecretName not set", machine: func() *clusterv1.Machine { m := defaultMachine.DeepCopy() m.Spec.Bootstrap.ConfigRef = nil @@ -105,18 +99,579 @@ func TestSetBootstrapReadyCondition(t *testing.T) { }, "status": map[string]interface{}{}, }}, + bootstrapConfigIsNotFound: false, + expectCondition: metav1.Condition{ + Type: clusterv1.MachineBootstrapConfigReadyV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.MachineBootstrapInvalidConfigV1Beta2Reason, + Message: "Either spec.bootstrap.configRef must be set or spec.bootstrap.dataSecretName must not be empty", + }, + }, + { + name: "mirror Ready condition from bootstrap config", + machine: defaultMachine.DeepCopy(), + bootstrapConfig: &unstructured.Unstructured{Object: map[string]interface{}{ + "kind": "GenericBootstrapConfig", + "apiVersion": "bootstrap.cluster.x-k8s.io/v1beta1", + "metadata": map[string]interface{}{ + "name": "bootstrap-config1", + "namespace": metav1.NamespaceDefault, + }, + "status": map[string]interface{}{ + "conditions": []interface{}{ + map[string]interface{}{ + "type": "Ready", + "status": "False", + "message": "some message", + }, + }, + }, + }}, + bootstrapConfigIsNotFound: false, + expectCondition: metav1.Condition{ + Type: clusterv1.MachineBootstrapConfigReadyV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.MachineBootstrapConfigReadyNoV1Beta2ReasonReported, + Message: "some message (from GenericBootstrapConfig)", + }, + }, + { + name: "Use status.BoostrapReady flag as a fallback Ready condition from bootstrap config is missing", + machine: defaultMachine.DeepCopy(), + bootstrapConfig: &unstructured.Unstructured{Object: map[string]interface{}{ + "kind": "GenericBootstrapConfig", + "apiVersion": "bootstrap.cluster.x-k8s.io/v1beta1", + "metadata": map[string]interface{}{ + "name": "bootstrap-config1", + "namespace": metav1.NamespaceDefault, + }, + "status": map[string]interface{}{ + "conditions": []interface{}{}, + }, + }}, + bootstrapConfigIsNotFound: false, + expectCondition: metav1.Condition{ + Type: clusterv1.MachineBootstrapConfigReadyV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.MachineBootstrapConfigReadyNoV1Beta2ReasonReported, + Message: "GenericBootstrapConfig status.ready is false", + }, + }, + { + name: "invalid Ready condition from bootstrap config", + machine: defaultMachine.DeepCopy(), + bootstrapConfig: &unstructured.Unstructured{Object: map[string]interface{}{ + "kind": "GenericBootstrapConfig", + "apiVersion": "bootstrap.cluster.x-k8s.io/v1beta1", + "metadata": map[string]interface{}{ + "name": "bootstrap-config1", + "namespace": metav1.NamespaceDefault, + }, + "status": map[string]interface{}{ + "conditions": []interface{}{ + map[string]interface{}{ + "type": "Ready", + }, + }, + }, + }}, + bootstrapConfigIsNotFound: false, + expectCondition: metav1.Condition{ + Type: clusterv1.MachineBootstrapConfigReadyV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.MachineBootstrapConfigInvalidConditionReportedV1Beta2Reason, + Message: "failed to convert status.conditions from GenericBootstrapConfig to []metav1.Condition: status must be set for the Ready condition", + }, + }, + { + name: "failed to get bootstrap config", + machine: defaultMachine.DeepCopy(), + bootstrapConfig: nil, + bootstrapConfigIsNotFound: false, + expectCondition: metav1.Condition{ + Type: clusterv1.MachineBootstrapConfigReadyV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: clusterv1.MachineBootstrapConfigInternalErrorV1Beta2Reason, + Message: "Please check controller logs for errors", + }, + }, + { + name: "bootstrap config not found while machine is deleting", + machine: func() *clusterv1.Machine { + m := defaultMachine.DeepCopy() + m.SetDeletionTimestamp(&metav1.Time{Time: time.Now()}) + return m + }(), + bootstrapConfig: nil, + bootstrapConfigIsNotFound: true, + expectCondition: metav1.Condition{ + Type: clusterv1.MachineBootstrapConfigReadyV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: clusterv1.MachineBootstrapConfigDeletedV1Beta2Reason, + Message: "GenericBootstrapConfig has been deleted", + }, + }, + { + name: "bootstrap config not found", + machine: defaultMachine.DeepCopy(), + bootstrapConfig: nil, + bootstrapConfigIsNotFound: true, + expectCondition: metav1.Condition{ + Type: clusterv1.MachineBootstrapConfigReadyV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: clusterv1.MachineBootstrapConfigDoesNotExistV1Beta2Reason, + Message: "GenericBootstrapConfig does not exist", + }, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + g := NewWithT(t) + + setBootstrapReadyCondition(ctx, tc.machine, tc.bootstrapConfig, tc.bootstrapConfigIsNotFound) + + condition := v1beta2conditions.Get(tc.machine, clusterv1.MachineBootstrapConfigReadyV1Beta2Condition) + g.Expect(condition).ToNot(BeNil()) + g.Expect(*condition).To(v1beta2conditions.MatchCondition(tc.expectCondition, v1beta2conditions.IgnoreLastTransitionTime(true))) + }) + } +} + +func TestSetInfrastructureReadyCondition(t *testing.T) { + defaultMachine := clusterv1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Name: "machine-test", + Namespace: metav1.NamespaceDefault, + }, + Spec: clusterv1.MachineSpec{ + InfrastructureRef: corev1.ObjectReference{ + APIVersion: "infrastructure.cluster.x-k8s.io/v1beta1", + Kind: "GenericInfrastructureMachine", + Name: "infra-machine1", + }, + }, + } + + testCases := []struct { + name string + machine *clusterv1.Machine + infraMachine *unstructured.Unstructured + infraMachineIsNotFound bool + expectCondition metav1.Condition + }{ + { + name: "mirror Ready condition from infra machine", + machine: defaultMachine.DeepCopy(), + infraMachine: &unstructured.Unstructured{Object: map[string]interface{}{ + "kind": "GenericInfrastructureMachine", + "apiVersion": "infrastructure.cluster.x-k8s.io/v1beta1", + "metadata": map[string]interface{}{ + "name": "infra-machine1", + "namespace": metav1.NamespaceDefault, + }, + "status": map[string]interface{}{ + "conditions": []interface{}{ + map[string]interface{}{ + "type": "Ready", + "status": "False", + "message": "some message", + }, + }, + }, + }}, + infraMachineIsNotFound: false, + expectCondition: metav1.Condition{ + Type: clusterv1.MachineInfrastructureReadyV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.MachineInfrastructureReadyNoV1Beta2ReasonReported, + Message: "some message (from GenericInfrastructureMachine)", + }, + }, + { + name: "Use status.InfrastructureReady flag as a fallback Ready condition from infra machine is missing", + machine: defaultMachine.DeepCopy(), + infraMachine: &unstructured.Unstructured{Object: map[string]interface{}{ + "kind": "GenericInfrastructureMachine", + "apiVersion": "infrastructure.cluster.x-k8s.io/v1beta1", + "metadata": map[string]interface{}{ + "name": "infra-machine1", + "namespace": metav1.NamespaceDefault, + }, + "status": map[string]interface{}{ + "conditions": []interface{}{}, + }, + }}, + infraMachineIsNotFound: false, + expectCondition: metav1.Condition{ + Type: clusterv1.MachineInfrastructureReadyV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.MachineInfrastructureReadyNoV1Beta2ReasonReported, + Message: "GenericInfrastructureMachine status.ready is false", + }, + }, + { + name: "invalid Ready condition from infra machine", + machine: defaultMachine.DeepCopy(), + infraMachine: &unstructured.Unstructured{Object: map[string]interface{}{ + "kind": "GenericInfrastructureMachine", + "apiVersion": "infrastructure.cluster.x-k8s.io/v1beta1", + "metadata": map[string]interface{}{ + "name": "infra-machine1", + "namespace": metav1.NamespaceDefault, + }, + "status": map[string]interface{}{ + "conditions": []interface{}{ + map[string]interface{}{ + "type": "Ready", + }, + }, + }, + }}, + infraMachineIsNotFound: false, + expectCondition: metav1.Condition{ + Type: clusterv1.MachineInfrastructureReadyV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.MachineInfrastructureInvalidConditionReportedV1Beta2Reason, + Message: "failed to convert status.conditions from GenericInfrastructureMachine to []metav1.Condition: status must be set for the Ready condition", + }, + }, + { + name: "failed to get infra machine", + machine: func() *clusterv1.Machine { + m := defaultMachine.DeepCopy() + m.SetDeletionTimestamp(&metav1.Time{Time: time.Now()}) + return m + }(), + infraMachine: nil, + infraMachineIsNotFound: false, + expectCondition: metav1.Condition{ + Type: clusterv1.MachineInfrastructureReadyV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: clusterv1.MachineInfrastructureInternalErrorV1Beta2Reason, + Message: "Please check controller logs for errors", + }, + }, + { + name: "infra machine not found while machine is deleting", + machine: func() *clusterv1.Machine { + m := defaultMachine.DeepCopy() + m.SetDeletionTimestamp(&metav1.Time{Time: time.Now()}) + return m + }(), + infraMachine: nil, + infraMachineIsNotFound: true, + expectCondition: metav1.Condition{ + Type: clusterv1.MachineInfrastructureReadyV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: clusterv1.MachineInfrastructureDeletedV1Beta2Reason, + Message: "GenericInfrastructureMachine has been deleted", + }, + }, + { + name: "infra machine not found after the machine has been initialized", + machine: func() *clusterv1.Machine { + m := defaultMachine.DeepCopy() + m.Status.InfrastructureReady = true + return m + }(), + infraMachine: nil, + infraMachineIsNotFound: true, + expectCondition: metav1.Condition{ + Type: clusterv1.MachineInfrastructureReadyV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.MachineInfrastructureDeletedV1Beta2Reason, + Message: "GenericInfrastructureMachine has been deleted while the machine still exist", + }, + }, + { + name: "infra machine not found", + machine: defaultMachine.DeepCopy(), + infraMachine: nil, + infraMachineIsNotFound: true, + expectCondition: metav1.Condition{ + Type: clusterv1.MachineInfrastructureReadyV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: clusterv1.MachineInfrastructureDoesNotExistV1Beta2Reason, + Message: "GenericInfrastructureMachine does not exist", + }, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + g := NewWithT(t) + + setInfrastructureReadyCondition(ctx, tc.machine, tc.infraMachine, tc.infraMachineIsNotFound) + + condition := v1beta2conditions.Get(tc.machine, clusterv1.MachineInfrastructureReadyV1Beta2Condition) + g.Expect(condition).ToNot(BeNil()) + g.Expect(*condition).To(v1beta2conditions.MatchCondition(tc.expectCondition, v1beta2conditions.IgnoreLastTransitionTime(true))) + }) + } +} + +func TestSummarizeNodeV1Beta2Conditions(t *testing.T) { + testCases := []struct { + name string + conditions []corev1.NodeCondition + expectedStatus metav1.ConditionStatus + expectedReason string + expectedMessage string + }{ + { + name: "node is healthy", + conditions: []corev1.NodeCondition{ + {Type: corev1.NodeReady, Status: corev1.ConditionTrue}, + {Type: corev1.NodeMemoryPressure, Status: corev1.ConditionFalse}, + {Type: corev1.NodeDiskPressure, Status: corev1.ConditionFalse}, + {Type: corev1.NodePIDPressure, Status: corev1.ConditionFalse}, + }, + expectedStatus: metav1.ConditionTrue, + expectedReason: v1beta2conditions.MultipleInfoReportedReason, + }, + { + name: "all conditions are unknown", + conditions: []corev1.NodeCondition{ + {Type: corev1.NodeReady, Status: corev1.ConditionUnknown}, + {Type: corev1.NodeMemoryPressure, Status: corev1.ConditionUnknown}, + {Type: corev1.NodeDiskPressure, Status: corev1.ConditionUnknown}, + {Type: corev1.NodePIDPressure, Status: corev1.ConditionUnknown}, + }, + expectedStatus: metav1.ConditionUnknown, + expectedReason: v1beta2conditions.MultipleUnknownReportedReason, + expectedMessage: "Node Ready: condition is Unknown; Node MemoryPressure: condition is Unknown; Node DiskPressure: condition is Unknown; Node PIDPressure: condition is Unknown", + }, + { + name: "multiple semantically failed condition", + conditions: []corev1.NodeCondition{ + {Type: corev1.NodeReady, Status: corev1.ConditionUnknown}, + {Type: corev1.NodeMemoryPressure, Status: corev1.ConditionTrue}, + {Type: corev1.NodeDiskPressure, Status: corev1.ConditionTrue}, + {Type: corev1.NodePIDPressure, Status: corev1.ConditionTrue}, + }, + expectedStatus: metav1.ConditionFalse, + expectedReason: v1beta2conditions.MultipleIssuesReportedReason, + expectedMessage: "Node Ready: condition is Unknown; Node MemoryPressure: condition is True; Node DiskPressure: condition is True; Node PIDPressure: condition is True", + }, + { + name: "one semantically failed condition when the rest is healthy", + conditions: []corev1.NodeCondition{ + {Type: corev1.NodeReady, Status: corev1.ConditionFalse, Reason: "SomeReason"}, + {Type: corev1.NodeMemoryPressure, Status: corev1.ConditionFalse}, + {Type: corev1.NodeDiskPressure, Status: corev1.ConditionFalse}, + {Type: corev1.NodePIDPressure, Status: corev1.ConditionFalse}, + }, + expectedStatus: metav1.ConditionFalse, + expectedReason: "SomeReason", + expectedMessage: "Node Ready: condition is False", + }, + { + name: "one unknown condition when the rest is healthy", + conditions: []corev1.NodeCondition{ + {Type: corev1.NodeReady, Status: corev1.ConditionUnknown, Reason: "SomeReason"}, + {Type: corev1.NodeMemoryPressure, Status: corev1.ConditionFalse}, + {Type: corev1.NodeDiskPressure, Status: corev1.ConditionFalse}, + {Type: corev1.NodePIDPressure, Status: corev1.ConditionFalse}, + }, + expectedStatus: metav1.ConditionUnknown, + expectedReason: "SomeReason", + expectedMessage: "Node Ready: condition is Unknown", + }, + { + name: "one condition missing", + conditions: []corev1.NodeCondition{ + {Type: corev1.NodeMemoryPressure, Status: corev1.ConditionFalse}, + {Type: corev1.NodeDiskPressure, Status: corev1.ConditionFalse}, + {Type: corev1.NodePIDPressure, Status: corev1.ConditionFalse}, + }, + expectedStatus: metav1.ConditionUnknown, + expectedReason: clusterv1.MachineNodeConditionNotYetReportedV1Beta2Reason, + expectedMessage: "Node Ready: condition not yet reported", + }, + } + for _, test := range testCases { + t.Run(test.name, func(t *testing.T) { + g := NewWithT(t) + node := &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node-1", + }, + Status: corev1.NodeStatus{ + Conditions: test.conditions, + }, + } + status, reason, message := summarizeNodeV1Beta2Conditions(ctx, node) + g.Expect(status).To(Equal(test.expectedStatus)) + g.Expect(reason).To(Equal(test.expectedReason)) + g.Expect(message).To(Equal(test.expectedMessage)) + }) + } +} + +func TestSetNodeHealthyAndReadyConditions(t *testing.T) { + defaultMachine := clusterv1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Name: "machine-test", + Namespace: metav1.NamespaceDefault, + }, + Spec: clusterv1.MachineSpec{ + InfrastructureRef: corev1.ObjectReference{ + APIVersion: "infrastructure.cluster.x-k8s.io/v1beta1", + Kind: "GenericInfrastructureMachine", + Name: "infra-machine1", + }, + }, + } + + testCases := []struct { + name string + machine *clusterv1.Machine + node *corev1.Node + expectConditions []metav1.Condition + }{ + { + name: "get NodeHealthy and NodeReady from node", + machine: defaultMachine.DeepCopy(), + node: &corev1.Node{ + Status: corev1.NodeStatus{ + Conditions: []corev1.NodeCondition{ + {Type: corev1.NodeReady, Status: corev1.ConditionFalse, Reason: "SomeReason", Message: "Some message"}, + {Type: corev1.NodeMemoryPressure, Status: corev1.ConditionFalse}, + {Type: corev1.NodeDiskPressure, Status: corev1.ConditionFalse}, + {Type: corev1.NodePIDPressure, Status: corev1.ConditionFalse}, + }, + }, + }, + expectConditions: []metav1.Condition{ + { + Type: clusterv1.MachineNodeHealthyV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: "SomeReason", + Message: "Node Ready: condition is False", + }, + { + Type: clusterv1.MachineNodeReadyV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: "SomeReason", + Message: "Some message (from Node)", + }, + }, + }, + { + // TODO: handle missing conditions in summarize node conditions. + name: "NodeReady missing from node", + machine: defaultMachine.DeepCopy(), + node: &corev1.Node{ + Status: corev1.NodeStatus{ + Conditions: []corev1.NodeCondition{ + {Type: corev1.NodeMemoryPressure, Status: corev1.ConditionFalse}, + {Type: corev1.NodeDiskPressure, Status: corev1.ConditionFalse}, + {Type: corev1.NodePIDPressure, Status: corev1.ConditionFalse}, + }, + }, + }, + expectConditions: []metav1.Condition{ + { + Type: clusterv1.MachineNodeHealthyV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: clusterv1.MachineNodeConditionNotYetReportedV1Beta2Reason, + Message: "Node Ready: condition not yet reported", + }, + { + Type: clusterv1.MachineNodeReadyV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: clusterv1.MachineNodeConditionNotYetReportedV1Beta2Reason, + }, + }, + }, + { + name: "node not found while machine is deleting", + machine: func() *clusterv1.Machine { + m := defaultMachine.DeepCopy() + m.SetDeletionTimestamp(&metav1.Time{Time: time.Now()}) + return m + }(), + node: nil, + expectConditions: []metav1.Condition{ + { + Type: clusterv1.MachineNodeHealthyV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: clusterv1.MachineNodeDeletedV1Beta2Reason, + Message: "Node has been deleted", + }, + { + Type: clusterv1.MachineNodeReadyV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: clusterv1.MachineNodeDeletedV1Beta2Reason, + Message: "Node has been deleted", + }, + }, + }, + { + name: "node missing while machine is still running", + machine: func() *clusterv1.Machine { + m := defaultMachine.DeepCopy() + m.Status.NodeRef = &corev1.ObjectReference{ + Name: "test-node-1", + } + return m + }(), + node: nil, expectConditions: []metav1.Condition{ { - Type: clusterv1.MachineReadyV1Beta2Condition, + Type: clusterv1.MachineNodeHealthyV1Beta2Condition, Status: metav1.ConditionFalse, - Reason: clusterv1.MachineBootstrapInvalidConfigV1Beta2Reason, - Message: "BootstrapConfigReady: either spec.bootstrap.configRef must be set or spec.bootstrap.dataSecretName must not be empty", + Reason: clusterv1.MachineNodeDeletedV1Beta2Reason, + Message: "Node test-node-1 has been deleted while the machine still exist", }, { - Type: clusterv1.MachineBootstrapConfigReadyV1Beta2Condition, + Type: clusterv1.MachineNodeReadyV1Beta2Condition, Status: metav1.ConditionFalse, - Reason: clusterv1.MachineBootstrapInvalidConfigV1Beta2Reason, - Message: "either spec.bootstrap.configRef must be set or spec.bootstrap.dataSecretName must not be empty", + Reason: clusterv1.MachineNodeDeletedV1Beta2Reason, + Message: "Node test-node-1 has been deleted while the machine still exist", + }, + }, + }, + { + name: "machine with ProviderID set, Node still missing", + machine: func() *clusterv1.Machine { + m := defaultMachine.DeepCopy() + m.Spec.ProviderID = ptr.To("foo://test-node-1") + return m + }(), + node: nil, + expectConditions: []metav1.Condition{ + { + Type: clusterv1.MachineNodeHealthyV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: clusterv1.MachineNodeNotFoundV1Beta2Reason, + Message: "Waiting for a node with Provider ID foo://test-node-1 to exist", + }, + { + Type: clusterv1.MachineNodeReadyV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: clusterv1.MachineNodeNotFoundV1Beta2Reason, + Message: "Waiting for a node with Provider ID foo://test-node-1 to exist", + }, + }, + }, + { + name: "machine with ProviderID not yet set, waiting for it", + machine: defaultMachine.DeepCopy(), + node: nil, + expectConditions: []metav1.Condition{ + { + Type: clusterv1.MachineNodeHealthyV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: clusterv1.MachineNodeNotFoundV1Beta2Reason, + Message: "Waiting for GenericInfrastructureMachine to report spec.providerID", + }, + { + Type: clusterv1.MachineNodeReadyV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: clusterv1.MachineNodeNotFoundV1Beta2Reason, + Message: "Waiting for GenericInfrastructureMachine to report spec.providerID", }, }, }, @@ -126,24 +681,294 @@ func TestSetBootstrapReadyCondition(t *testing.T) { t.Run(tc.name, func(t *testing.T) { g := NewWithT(t) - setBootstrapReadyCondition(ctx, tc.machine, tc.bootstrapConfig) + setNodeHealthyAndReadyConditions(ctx, tc.machine, tc.node) + g.Expect(tc.machine.GetV1Beta2Conditions()).To(v1beta2conditions.MatchConditions(tc.expectConditions, v1beta2conditions.IgnoreLastTransitionTime(true))) + }) + } +} - // Compute ready by ensuring no other conditions influence the result. - tc.machine.Status.V1Beta2.Conditions = append(tc.machine.Status.V1Beta2.Conditions, - metav1.Condition{ - Type: clusterv1.MachineInfrastructureReadyV1Beta2Condition, - Status: metav1.ConditionTrue, +func TestSetReadyCondition(t *testing.T) { + testCases := []struct { + name string + machine *clusterv1.Machine + expectCondition metav1.Condition + }{ + { + name: "Accepts HealthCheckSucceeded to be missing", + machine: &clusterv1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Name: "machine-test", + Namespace: metav1.NamespaceDefault, + }, + Status: clusterv1.MachineStatus{ + V1Beta2: &clusterv1.MachineV1Beta2Status{ + Conditions: []metav1.Condition{ + { + Type: clusterv1.MachineBootstrapConfigReadyV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: "Foo", + }, + { + Type: clusterv1.InfrastructureReadyV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: "Foo", + }, + { + Type: clusterv1.MachineNodeHealthyV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: "Foo", + }, + }, + }, + }, + }, + expectCondition: metav1.Condition{ + Type: clusterv1.MachineReadyV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: v1beta2conditions.MultipleInfoReportedReason, + }, + }, + { + name: "Tolerates BootstrapConfig, InfraMachine and Node do not exists while the machine is deleting", + machine: &clusterv1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Name: "machine-test", + Namespace: metav1.NamespaceDefault, + DeletionTimestamp: &metav1.Time{Time: time.Now()}, + }, + Status: clusterv1.MachineStatus{ + V1Beta2: &clusterv1.MachineV1Beta2Status{ + Conditions: []metav1.Condition{ + { + Type: clusterv1.MachineBootstrapConfigReadyV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: "Foo", + }, + { + Type: clusterv1.InfrastructureReadyV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: clusterv1.MachineInfrastructureDeletedV1Beta2Reason, + }, + { + Type: clusterv1.MachineNodeHealthyV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: clusterv1.MachineNodeDeletedV1Beta2Reason, + }, + }, + }, + }, + }, + expectCondition: metav1.Condition{ + Type: clusterv1.MachineReadyV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: v1beta2conditions.MultipleInfoReportedReason, + }, + }, + { + name: "Takes into account HealthCheckSucceeded when it exists", + machine: &clusterv1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Name: "machine-test", + Namespace: metav1.NamespaceDefault, }, - metav1.Condition{ - Type: clusterv1.MachineNodeHealthyV1Beta2Condition, - Status: metav1.ConditionTrue, + Status: clusterv1.MachineStatus{ + V1Beta2: &clusterv1.MachineV1Beta2Status{ + Conditions: []metav1.Condition{ + { + Type: clusterv1.MachineBootstrapConfigReadyV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: "Foo", + }, + { + Type: clusterv1.InfrastructureReadyV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: "Foo", + }, + { + Type: clusterv1.MachineNodeHealthyV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: "Foo", + }, + { + Type: clusterv1.MachineHealthCheckSucceededV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: "SomeReason", + Message: "Some message", + }, + }, + }, }, - ) + }, + expectCondition: metav1.Condition{ + Type: clusterv1.MachineReadyV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: "SomeReason", + Message: "HealthCheckSucceeded: Some message", + }, + }, + { + name: "Takes into account Readiness gates when defined", + machine: &clusterv1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Name: "machine-test", + Namespace: metav1.NamespaceDefault, + }, + Spec: clusterv1.MachineSpec{ + ReadinessGates: []clusterv1.MachineReadinessGate{ + { + ConditionType: "MyReadinessGate", + }, + }, + }, + Status: clusterv1.MachineStatus{ + V1Beta2: &clusterv1.MachineV1Beta2Status{ + Conditions: []metav1.Condition{ + { + Type: clusterv1.MachineBootstrapConfigReadyV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: "Foo", + }, + { + Type: clusterv1.InfrastructureReadyV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: "Foo", + }, + { + Type: clusterv1.MachineNodeHealthyV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: "Foo", + }, + { + Type: clusterv1.MachineHealthCheckSucceededV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: "Foo", + }, + { + Type: "MyReadinessGate", + Status: metav1.ConditionFalse, + Reason: "SomeReason", + Message: "Some message", + }, + }, + }, + }, + }, + expectCondition: metav1.Condition{ + Type: clusterv1.MachineReadyV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: "SomeReason", + Message: "MyReadinessGate: Some message", + }, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + g := NewWithT(t) + setReadyCondition(ctx, tc.machine) - meta.RemoveStatusCondition(&tc.machine.Status.V1Beta2.Conditions, clusterv1.MachineInfrastructureReadyV1Beta2Condition) - meta.RemoveStatusCondition(&tc.machine.Status.V1Beta2.Conditions, clusterv1.MachineNodeHealthyV1Beta2Condition) - g.Expect(tc.machine.GetV1Beta2Conditions()).To(v1beta2conditions.MatchConditions(tc.expectConditions, v1beta2conditions.IgnoreLastTransitionTime(true))) + condition := v1beta2conditions.Get(tc.machine, clusterv1.MachineReadyV1Beta2Condition) + g.Expect(condition).ToNot(BeNil()) + g.Expect(*condition).To(v1beta2conditions.MatchCondition(tc.expectCondition, v1beta2conditions.IgnoreLastTransitionTime(true))) + }) + } +} + +func TestAvailableCondition(t *testing.T) { + testCases := []struct { + name string + machine *clusterv1.Machine + expectCondition metav1.Condition + }{ + { + name: "Not Ready", + machine: &clusterv1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Name: "machine-test", + Namespace: metav1.NamespaceDefault, + }, + Status: clusterv1.MachineStatus{ + V1Beta2: &clusterv1.MachineV1Beta2Status{ + Conditions: []metav1.Condition{ + { + Type: clusterv1.MachineReadyV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: "SomeReason", + }, + }, + }, + }, + }, + expectCondition: metav1.Condition{ + Type: clusterv1.MachineAvailableV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.MachineNotReadyV1Beta2Reason, + }, + }, + { + name: "Ready but still waiting for MinReadySeconds", + machine: &clusterv1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Name: "machine-test", + Namespace: metav1.NamespaceDefault, + }, + Status: clusterv1.MachineStatus{ + V1Beta2: &clusterv1.MachineV1Beta2Status{ + Conditions: []metav1.Condition{ + { + Type: clusterv1.MachineReadyV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: v1beta2conditions.MultipleInfoReportedReason, + LastTransitionTime: metav1.Time{Time: time.Now().Add(10 * time.Second)}, + }, + }, + }, + }, + }, + expectCondition: metav1.Condition{ + Type: clusterv1.MachineAvailableV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.MachineWaitingForMinReadySecondsV1Beta2Reason, + }, + }, + { + name: "Ready and available", + machine: &clusterv1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Name: "machine-test", + Namespace: metav1.NamespaceDefault, + }, + Status: clusterv1.MachineStatus{ + V1Beta2: &clusterv1.MachineV1Beta2Status{ + Conditions: []metav1.Condition{ + { + Type: clusterv1.MachineReadyV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: v1beta2conditions.MultipleInfoReportedReason, + LastTransitionTime: metav1.Time{Time: time.Now().Add(-10 * time.Second)}, + }, + }, + }, + }, + }, + expectCondition: metav1.Condition{ + Type: clusterv1.MachineAvailableV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: clusterv1.MachineAvailableV1Beta2Reason, + }, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + g := NewWithT(t) + + setAvailableCondition(ctx, tc.machine) + + readyCondition := v1beta2conditions.Get(tc.machine, clusterv1.MachineAvailableV1Beta2Condition) + g.Expect(readyCondition).ToNot(BeNil()) + g.Expect(*readyCondition).To(v1beta2conditions.MatchCondition(tc.expectCondition, v1beta2conditions.IgnoreLastTransitionTime(true))) }) } } @@ -727,6 +1552,6 @@ func TestReconcileMachinePhases(t *testing.T) { g.Expect(machine.Status.LastUpdated).NotTo(BeNil()) g.Expect(machine.Status.LastUpdated.After(preUpdate)).To(BeTrue()) return true - }, 10*time.Second).Should(BeTrue()) + }, 100*time.Second).Should(BeTrue()) }) } diff --git a/util/conditions/v1beta2/aggregate.go b/util/conditions/v1beta2/aggregate.go index a4a4e8a36b6a..26942d2c6c1f 100644 --- a/util/conditions/v1beta2/aggregate.go +++ b/util/conditions/v1beta2/aggregate.go @@ -57,7 +57,7 @@ func NewAggregateCondition[T Getter](sourceObjs []T, sourceConditionType string, } aggregateOpt := &AggregateOptions{ - mergeStrategy: newDefaultMergeStrategy(), + mergeStrategy: newDefaultMergeStrategy(nil), targetConditionType: sourceConditionType, } aggregateOpt.ApplyOptions(opts) @@ -95,12 +95,7 @@ func NewAggregateCondition[T Getter](sourceObjs []T, sourceConditionType string, } } - status, reason, message, err := aggregateOpt.mergeStrategy.Merge( - conditionsInScope, - []string{sourceConditionType}, - nil, // negative conditions - false, // step counter - ) + status, reason, message, err := aggregateOpt.mergeStrategy.Merge(conditionsInScope, []string{sourceConditionType}) if err != nil { return nil, err } diff --git a/util/conditions/v1beta2/getter.go b/util/conditions/v1beta2/getter.go index 3a3818ae5fc9..d414aca5700c 100644 --- a/util/conditions/v1beta2/getter.go +++ b/util/conditions/v1beta2/getter.go @@ -75,7 +75,7 @@ func UnstructuredGet(sourceObj runtime.Unstructured, sourceConditionType string) if conditions, ok := value.([]interface{}); ok { r, err := convertFromUnstructuredConditions(conditions) if err != nil { - return nil, errors.Wrapf(err, "failed to convert %s.status.v1beta2.conditions to []metav1.Condition", ownerInfo) + return nil, errors.Wrapf(err, "failed to convert status.v1beta2.conditions from %s to []metav1.Condition", ownerInfo.Kind) } return meta.FindStatusCondition(r, sourceConditionType), nil } @@ -86,7 +86,7 @@ func UnstructuredGet(sourceObj runtime.Unstructured, sourceConditionType string) if conditions, ok := value.([]interface{}); ok { r, err := convertFromUnstructuredConditions(conditions) if err != nil { - return nil, errors.Wrapf(err, "failed to convert %s.status.conditions to []metav1.Condition", ownerInfo) + return nil, errors.Wrapf(err, "failed to convert status.conditions from %s to []metav1.Condition", ownerInfo.Kind) } return meta.FindStatusCondition(r, sourceConditionType), nil } @@ -166,11 +166,18 @@ func convertFromUnstructuredConditions(conditions []interface{}) ([]metav1.Condi // also, only a few, minimal rules are enforced, just enough to allow surfacing a condition from a providers object with Mirror. func validateAndFixConvertedCondition(c *metav1.Condition) error { if c.Type == "" { - return errors.New("condition type must be set") + return errors.New("type must be set for all conditions") } if c.Status == "" { - return errors.New("condition status must be set") + return errors.Errorf("status must be set for the %s condition", c.Type) } + switch c.Status { + case metav1.ConditionFalse, metav1.ConditionTrue, metav1.ConditionUnknown: + break + default: + return errors.Errorf("status for the %s condition must be one of %s, %s, %s", c.Type, metav1.ConditionTrue, metav1.ConditionFalse, metav1.ConditionUnknown) + } + if c.Reason == "" { switch c.Status { case metav1.ConditionFalse: // When using old Cluster API condition utils, for conditions with Status false, Reason can be empty only when a condition has negative polarity (means "good") @@ -178,7 +185,7 @@ func validateAndFixConvertedCondition(c *metav1.Condition) error { case metav1.ConditionTrue: // When using old Cluster API condition utils, for conditions with Status true, Reason can be empty only when a condition has positive polarity (means "good"). c.Reason = NoReasonReported case metav1.ConditionUnknown: - return errors.New("condition reason must be set when a condition is unknown") + return errors.Errorf("reason must be set for the %s condition, status unknown", c.Type) } } diff --git a/util/conditions/v1beta2/getter_test.go b/util/conditions/v1beta2/getter_test.go index 65cb8fcd660a..cd812cfda6b6 100644 --- a/util/conditions/v1beta2/getter_test.go +++ b/util/conditions/v1beta2/getter_test.go @@ -535,6 +535,13 @@ func TestConvertFromUnstructuredConditions(t *testing.T) { }, wantError: true, }, + { + name: "Fails if Status is a wrong value", + conditions: clusterv1.Conditions{ + clusterv1.Condition{Type: clusterv1.ConditionType("foo"), Status: "foo"}, + }, + wantError: true, + }, { name: "Defaults reason for positive polarity", conditions: clusterv1.Conditions{ diff --git a/util/conditions/v1beta2/merge_strategies.go b/util/conditions/v1beta2/merge_strategies.go index dfbe3cf3d068..f89658924261 100644 --- a/util/conditions/v1beta2/merge_strategies.go +++ b/util/conditions/v1beta2/merge_strategies.go @@ -44,20 +44,6 @@ const ( MultipleInfoReportedReason = "MultipleInfoReported" ) -// MergeStrategy defines a strategy used to merge conditions during the aggregate or summary operation. -type MergeStrategy interface { - // Merge passed in conditions. - // - // It is up to the caller to ensure that all the expected conditions exist (e.g. by adding new conditions with status Unknown). - // Conditions passed in must be of the given conditionTypes (other condition types must be discarded). - // - // The list of conditionTypes has an implicit order; it is up to the implementation of merge to use this info or not. - // If negativeConditionTypes are in scope, the implementation of merge should treat them accordingly. - // - // If stepCounter is true, the implementation of merge must add info about step progress to the output message. - Merge(conditions []ConditionWithOwnerInfo, conditionTypes []string, negativeConditionTypes sets.Set[string], stepCounter bool) (status metav1.ConditionStatus, reason, message string, err error) -} - // ConditionWithOwnerInfo is a wrapper around metav1.Condition with additional ConditionOwnerInfo. // These infos can be used when generating the message resulting from the merge operation. type ConditionWithOwnerInfo struct { @@ -76,30 +62,89 @@ func (o ConditionOwnerInfo) String() string { return fmt.Sprintf("%s %s", o.Kind, o.Name) } -// defaultMergeStrategy defines the default merge strategy for Cluster API conditions. -type defaultMergeStrategy struct{} +// MergeStrategy defines a strategy used to merge conditions during the aggregate or summary operation. +type MergeStrategy interface { + // Merge passed in conditions. + // + // It is up to the caller to ensure that all the expected conditions exist (e.g. by adding new conditions with status Unknown). + // Conditions passed in must be of the given conditionTypes (other condition types must be discarded). + // + // The list of conditionTypes has an implicit order; it is up to the implementation of merge to use this info or not. + Merge(conditions []ConditionWithOwnerInfo, conditionTypes []string) (status metav1.ConditionStatus, reason, message string, err error) +} + +// DefaultMergeStrategyWithCustomPriority is the default merge strategy with a customized getPriority function. +func DefaultMergeStrategyWithCustomPriority(getPriority func(condition metav1.Condition) MergePriority) MergeStrategy { + return &defaultMergeStrategy{ + getPriority: getPriority, + } +} + +func newDefaultMergeStrategy(negativePolarityConditionTypes sets.Set[string]) MergeStrategy { + return &defaultMergeStrategy{ + getPriority: GetDefaultMergePriority(negativePolarityConditionTypes), + } +} + +// GetDefaultMergePriority returns the merge priority for each condition. +// It assigns following priority values to conditions: +// - issues: conditions with positive polarity (normal True) and status False or conditions with negative polarity (normal False) and status True. +// - unknown: conditions with status unknown. +// - info: conditions with positive polarity (normal True) and status True or conditions with negative polarity (normal False) and status False. +func GetDefaultMergePriority(negativePolarityConditionTypes sets.Set[string]) func(condition metav1.Condition) MergePriority { + return func(condition metav1.Condition) MergePriority { + switch condition.Status { + case metav1.ConditionTrue: + if negativePolarityConditionTypes.Has(condition.Type) { + return IssueMergePriority + } + return InfoMergePriority + case metav1.ConditionFalse: + if negativePolarityConditionTypes.Has(condition.Type) { + return InfoMergePriority + } + return IssueMergePriority + case metav1.ConditionUnknown: + return UnknownMergePriority + } -type mergePriority uint8 + // Note: this should never happen. In case, those conditions are considered like conditions with unknown status. + return UnknownMergePriority + } +} + +// MergePriority defines the priority for a condition during a merge operation. +type MergePriority uint8 const ( - issueMergePriority mergePriority = iota - unknownMergePriority - infoMergePriority + // IssueMergePriority is the merge priority used by GetDefaultMergePriority in case the condition state is considered an issue. + IssueMergePriority MergePriority = iota + + // UnknownMergePriority is the merge priority used by GetDefaultMergePriority in case of unknown conditions. + UnknownMergePriority + + // InfoMergePriority is the merge priority used by GetDefaultMergePriority in case the condition state is not considered an issue. + InfoMergePriority ) -func newDefaultMergeStrategy() MergeStrategy { - return &defaultMergeStrategy{} +// defaultMergeStrategy defines the default merge strategy for Cluster API conditions. +type defaultMergeStrategy struct { + getPriority func(condition metav1.Condition) MergePriority } // Merge all conditions in input based on a strategy that surfaces issues first, then unknown conditions, then info (if none of issues and unknown condition exists). // - issues: conditions with positive polarity (normal True) and status False or conditions with negative polarity (normal False) and status True. // - unknown: conditions with status unknown. // - info: conditions with positive polarity (normal True) and status True or conditions with negative polarity (normal False) and status False. -func (d *defaultMergeStrategy) Merge(conditions []ConditionWithOwnerInfo, conditionTypes []string, negativeConditionTypes sets.Set[string], stepCounter bool) (status metav1.ConditionStatus, reason, message string, err error) { +func (d *defaultMergeStrategy) Merge(conditions []ConditionWithOwnerInfo, conditionTypes []string) (status metav1.ConditionStatus, reason, message string, err error) { if len(conditions) == 0 { return "", "", "", errors.New("can't merge an empty list of conditions") } + if d.getPriority == nil { + return "", "", "", errors.New("can't merge without a getPriority func") + } + // Infer which operation is calling this func, so it is possible to use different strategies for computing the message for the target condition. // - When merge should consider a single condition type, we can assume this func is called within an aggregate operation // (Aggregate should merge the same condition across many objects) @@ -112,7 +157,7 @@ func (d *defaultMergeStrategy) Merge(conditions []ConditionWithOwnerInfo, condit // sortConditions the relevance defined by the users (the order of condition types), LastTransition time (older first). sortConditions(conditions, conditionTypes) - issueConditions, unknownConditions, infoConditions := splitConditionsByPriority(conditions, negativeConditionTypes) + issueConditions, unknownConditions, infoConditions := splitConditionsByPriority(conditions, d.getPriority) // Compute the status for the target condition: // Note: This function always returns a condition with positive polarity. @@ -168,8 +213,8 @@ func (d *defaultMergeStrategy) Merge(conditions []ConditionWithOwnerInfo, condit if isSummaryOperation { messages := []string{} for _, condition := range append(issueConditions, append(unknownConditions, infoConditions...)...) { - priority := getPriority(condition.Condition, negativeConditionTypes) - if priority == infoMergePriority { + priority := d.getPriority(condition.Condition) + if priority == InfoMergePriority { // Drop info messages when we are surfacing issues or unknown. if status != metav1.ConditionTrue { continue @@ -189,14 +234,6 @@ func (d *defaultMergeStrategy) Merge(conditions []ConditionWithOwnerInfo, condit messages = append(messages, m) } - // Prepend the step counter if required. - if stepCounter { - totalSteps := len(conditionTypes) - stepsCompleted := len(infoConditions) - - messages = append([]string{fmt.Sprintf("%d of %d completed", stepsCompleted, totalSteps)}, messages...) - } - message = strings.Join(messages, "; ") } @@ -268,45 +305,20 @@ func sortConditions(conditions []ConditionWithOwnerInfo, orderedConditionTypes [ // - conditions with status unknown. // - conditions representing an info. // NOTE: The order of conditions is preserved in each group. -func splitConditionsByPriority(conditions []ConditionWithOwnerInfo, negativePolarityConditionTypes sets.Set[string]) (issueConditions, unknownConditions, infoConditions []ConditionWithOwnerInfo) { +func splitConditionsByPriority(conditions []ConditionWithOwnerInfo, getPriority func(condition metav1.Condition) MergePriority) (issueConditions, unknownConditions, infoConditions []ConditionWithOwnerInfo) { for _, condition := range conditions { - switch getPriority(condition.Condition, negativePolarityConditionTypes) { - case issueMergePriority: + switch getPriority(condition.Condition) { + case IssueMergePriority: issueConditions = append(issueConditions, condition) - case unknownMergePriority: + case UnknownMergePriority: unknownConditions = append(unknownConditions, condition) - case infoMergePriority: + case InfoMergePriority: infoConditions = append(infoConditions, condition) } } return issueConditions, unknownConditions, infoConditions } -// getPriority returns the merge priority for each condition. -// It assigns following priority values to conditions: -// - issues: conditions with positive polarity (normal True) and status False or conditions with negative polarity (normal False) and status True. -// - unknown: conditions with status unknown. -// - info: conditions with positive polarity (normal True) and status True or conditions with negative polarity (normal False) and status False. -func getPriority(condition metav1.Condition, negativePolarityConditionTypes sets.Set[string]) mergePriority { - switch condition.Status { - case metav1.ConditionTrue: - if negativePolarityConditionTypes.Has(condition.Type) { - return issueMergePriority - } - return infoMergePriority - case metav1.ConditionFalse: - if negativePolarityConditionTypes.Has(condition.Type) { - return infoMergePriority - } - return issueMergePriority - case metav1.ConditionUnknown: - return unknownMergePriority - } - - // Note: this should never happen. In case, those conditions are considered like conditions with unknown status. - return unknownMergePriority -} - // aggregateMessages returns messages for the aggregate operation. func aggregateMessages(conditions []ConditionWithOwnerInfo, n *int, dropEmpty bool, otherMessage string) (messages []string) { // create a map with all the messages and the list of objects reporting the same message. diff --git a/util/conditions/v1beta2/merge_strategies_test.go b/util/conditions/v1beta2/merge_strategies_test.go index c216016528b0..0ac6dfc0b8fb 100644 --- a/util/conditions/v1beta2/merge_strategies_test.go +++ b/util/conditions/v1beta2/merge_strategies_test.go @@ -108,7 +108,7 @@ func TestSplitConditionsByPriority(t *testing.T) { {OwnerResource: ConditionOwnerInfo{Name: "baz"}, Condition: metav1.Condition{Type: "!C", Status: metav1.ConditionFalse}}, // info } - issueConditions, unknownConditions, infoConditions := splitConditionsByPriority(conditions, sets.New[string]("!C")) + issueConditions, unknownConditions, infoConditions := splitConditionsByPriority(conditions, GetDefaultMergePriority(sets.New[string]("!C"))) // Check condition are grouped as expected and order is preserved. @@ -131,60 +131,60 @@ func TestSplitConditionsByPriority(t *testing.T) { })) } -func TestGetPriority(t *testing.T) { +func TestDefaultMergePriority(t *testing.T) { tests := []struct { name string condition metav1.Condition negativePolarity bool - wantPriority mergePriority + wantPriority MergePriority }{ { name: "Issue (PositivePolarity)", condition: metav1.Condition{Type: "foo", Status: metav1.ConditionFalse}, negativePolarity: false, - wantPriority: issueMergePriority, + wantPriority: IssueMergePriority, }, { name: "Unknown (PositivePolarity)", condition: metav1.Condition{Type: "foo", Status: metav1.ConditionUnknown}, negativePolarity: false, - wantPriority: unknownMergePriority, + wantPriority: UnknownMergePriority, }, { name: "Info (PositivePolarity)", condition: metav1.Condition{Type: "foo", Status: metav1.ConditionTrue}, negativePolarity: false, - wantPriority: infoMergePriority, + wantPriority: InfoMergePriority, }, { name: "NoStatus (PositivePolarity)", condition: metav1.Condition{Type: "foo"}, negativePolarity: false, - wantPriority: unknownMergePriority, + wantPriority: UnknownMergePriority, }, { name: "Issue (NegativePolarity)", condition: metav1.Condition{Type: "foo", Status: metav1.ConditionTrue}, negativePolarity: true, - wantPriority: issueMergePriority, + wantPriority: IssueMergePriority, }, { name: "Unknown (NegativePolarity)", condition: metav1.Condition{Type: "foo", Status: metav1.ConditionUnknown}, negativePolarity: true, - wantPriority: unknownMergePriority, + wantPriority: UnknownMergePriority, }, { name: "Info (NegativePolarity)", condition: metav1.Condition{Type: "foo", Status: metav1.ConditionFalse}, negativePolarity: true, - wantPriority: infoMergePriority, + wantPriority: InfoMergePriority, }, { name: "NoStatus (NegativePolarity)", condition: metav1.Condition{Type: "foo"}, negativePolarity: true, - wantPriority: unknownMergePriority, + wantPriority: UnknownMergePriority, }, } @@ -196,7 +196,7 @@ func TestGetPriority(t *testing.T) { if tt.negativePolarity { negativePolarityConditionTypes.Insert(tt.condition.Type) } - gotPriority := getPriority(tt.condition, negativePolarityConditionTypes) + gotPriority := GetDefaultMergePriority(negativePolarityConditionTypes)(tt.condition) g.Expect(gotPriority).To(Equal(tt.wantPriority)) }) diff --git a/util/conditions/v1beta2/mirror.go b/util/conditions/v1beta2/mirror.go index 098e61c9135a..b8435cb6ff4d 100644 --- a/util/conditions/v1beta2/mirror.go +++ b/util/conditions/v1beta2/mirror.go @@ -18,7 +18,6 @@ package v1beta2 import ( "fmt" - "strings" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" @@ -72,13 +71,17 @@ func newMirrorCondition(sourceObj any, condition *metav1.Condition, sourceCondit conditionOwner := getConditionOwnerInfo(sourceObj) if condition != nil { + message := "" + if condition.Message != "" { + message = fmt.Sprintf("%s (from %s)", condition.Message, conditionOwner.Kind) + } return &metav1.Condition{ Type: mirrorOpt.targetConditionType, Status: condition.Status, // NOTE: we are preserving the original transition time (when the underlying condition changed) LastTransitionTime: condition.LastTransitionTime, Reason: condition.Reason, - Message: strings.TrimSpace(fmt.Sprintf("%s (from %s)", condition.Message, conditionOwner)), + Message: message, // NOTE: ObservedGeneration will be set when this condition is added to an object by calling Set // (also preserving ObservedGeneration from the source object will be confusing when the mirror conditions shows up in the target object). } @@ -99,7 +102,7 @@ func newMirrorCondition(sourceObj any, condition *metav1.Condition, sourceCondit Type: mirrorOpt.targetConditionType, Status: metav1.ConditionUnknown, Reason: NotYetReportedReason, - Message: fmt.Sprintf("Condition %s not yet reported from %s", sourceConditionType, conditionOwner), + Message: fmt.Sprintf("Condition %s not yet reported from %s", sourceConditionType, conditionOwner.Kind), // NOTE: LastTransitionTime and ObservedGeneration will be set when this condition is added to an object by calling Set. } } diff --git a/util/conditions/v1beta2/mirror_test.go b/util/conditions/v1beta2/mirror_test.go index cc8bba75c873..52af53ea162d 100644 --- a/util/conditions/v1beta2/mirror_test.go +++ b/util/conditions/v1beta2/mirror_test.go @@ -41,7 +41,7 @@ func TestMirrorStatusCondition(t *testing.T) { }, conditionType: "Ready", options: []MirrorOption{}, - want: metav1.Condition{Type: "Ready", Status: metav1.ConditionTrue, Reason: "AllGood!", Message: "We are good! (from Phase3Obj SourceObject)", LastTransitionTime: now}, + want: metav1.Condition{Type: "Ready", Status: metav1.ConditionTrue, Reason: "AllGood!", Message: "We are good! (from Phase3Obj)", LastTransitionTime: now}, }, { name: "Mirror a condition with target type", @@ -50,7 +50,7 @@ func TestMirrorStatusCondition(t *testing.T) { }, conditionType: "Ready", options: []MirrorOption{TargetConditionType("SomethingReady")}, - want: metav1.Condition{Type: "SomethingReady", Status: metav1.ConditionTrue, Reason: "AllGood!", Message: "We are good! (from Phase3Obj SourceObject)", LastTransitionTime: now}, + want: metav1.Condition{Type: "SomethingReady", Status: metav1.ConditionTrue, Reason: "AllGood!", Message: "We are good! (from Phase3Obj)", LastTransitionTime: now}, }, { name: "Mirror a condition with empty message", @@ -59,24 +59,24 @@ func TestMirrorStatusCondition(t *testing.T) { }, conditionType: "Ready", options: []MirrorOption{}, - want: metav1.Condition{Type: "Ready", Status: metav1.ConditionTrue, Reason: "AllGood!", Message: "(from Phase3Obj SourceObject)", LastTransitionTime: now}, + want: metav1.Condition{Type: "Ready", Status: metav1.ConditionTrue, Reason: "AllGood!", Message: "", LastTransitionTime: now}, }, { name: "Mirror a condition not yet reported", conditions: []metav1.Condition{}, conditionType: "Ready", options: []MirrorOption{}, - want: metav1.Condition{Type: "Ready", Status: metav1.ConditionUnknown, Reason: NotYetReportedReason, Message: "Condition Ready not yet reported from Phase3Obj SourceObject"}, + want: metav1.Condition{Type: "Ready", Status: metav1.ConditionUnknown, Reason: NotYetReportedReason, Message: "Condition Ready not yet reported from Phase3Obj"}, }, { name: "Mirror a condition not yet reported with target type", conditions: []metav1.Condition{}, conditionType: "Ready", options: []MirrorOption{TargetConditionType("SomethingReady")}, - want: metav1.Condition{Type: "SomethingReady", Status: metav1.ConditionUnknown, Reason: NotYetReportedReason, Message: "Condition Ready not yet reported from Phase3Obj SourceObject"}, + want: metav1.Condition{Type: "SomethingReady", Status: metav1.ConditionUnknown, Reason: NotYetReportedReason, Message: "Condition Ready not yet reported from Phase3Obj"}, }, { - name: "Mirror a condition not yet reported with a fallback condtion", + name: "Mirror a condition not yet reported with a fallback condition", conditions: []metav1.Condition{}, conditionType: "Ready", options: []MirrorOption{ diff --git a/util/conditions/v1beta2/options.go b/util/conditions/v1beta2/options.go index ed9efaba0c09..bca050192a93 100644 --- a/util/conditions/v1beta2/options.go +++ b/util/conditions/v1beta2/options.go @@ -94,14 +94,6 @@ func (t CustomMergeStrategy) ApplyToAggregate(opts *AggregateOptions) { opts.mergeStrategy = t } -// StepCounter adds a step counter message to new summary conditions. -type StepCounter bool - -// ApplyToSummary applies this configuration to the given summary options. -func (t StepCounter) ApplyToSummary(opts *SummaryOptions) { - opts.stepCounter = bool(t) -} - // OwnedConditionTypes allows to define condition types owned by the controller when performing patch apply. // In case of conflicts for the owned conditions, the patch helper will always use the value provided by the controller. func OwnedConditionTypes(conditionTypes ...string) ApplyOption { diff --git a/util/conditions/v1beta2/summary.go b/util/conditions/v1beta2/summary.go index d7a90c2743e8..ae4fd71464de 100644 --- a/util/conditions/v1beta2/summary.go +++ b/util/conditions/v1beta2/summary.go @@ -36,7 +36,6 @@ type SummaryOptions struct { conditionTypes []string negativePolarityConditionTypes []string ignoreTypesIfMissing []string - stepCounter bool } // ApplyOptions applies the given list options on these options, @@ -61,10 +60,11 @@ func (o *SummaryOptions) ApplyOptions(opts []SummaryOption) *SummaryOptions { // Additionally, it is possible to inject custom merge strategies using the CustomMergeStrategy option or // to add a step counter to the generated message by using the StepCounter option. func NewSummaryCondition(sourceObj Getter, targetConditionType string, opts ...SummaryOption) (*metav1.Condition, error) { - summarizeOpt := &SummaryOptions{ - mergeStrategy: newDefaultMergeStrategy(), - } + summarizeOpt := &SummaryOptions{} summarizeOpt.ApplyOptions(opts) + if summarizeOpt.mergeStrategy == nil { + summarizeOpt.mergeStrategy = newDefaultMergeStrategy(sets.New[string](summarizeOpt.negativePolarityConditionTypes...)) + } if len(summarizeOpt.conditionTypes) == 0 { return nil, errors.New("option ForConditionTypes not provided or empty") @@ -110,12 +110,7 @@ func NewSummaryCondition(sourceObj Getter, targetConditionType string, opts ...S return nil, errors.New("summary can't be performed when the list of conditions to be summarized is empty") } - status, reason, message, err := summarizeOpt.mergeStrategy.Merge( - conditionsInScope, - summarizeOpt.conditionTypes, - sets.New[string](summarizeOpt.negativePolarityConditionTypes...), - summarizeOpt.stepCounter, - ) + status, reason, message, err := summarizeOpt.mergeStrategy.Merge(conditionsInScope, summarizeOpt.conditionTypes) if err != nil { return nil, err } diff --git a/util/conditions/v1beta2/summary_test.go b/util/conditions/v1beta2/summary_test.go index 99c243414005..00f166f75264 100644 --- a/util/conditions/v1beta2/summary_test.go +++ b/util/conditions/v1beta2/summary_test.go @@ -21,6 +21,7 @@ import ( . "github.com/onsi/gomega" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/sets" clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" "sigs.k8s.io/cluster-api/internal/test/builder" @@ -139,7 +140,7 @@ func TestSummary(t *testing.T) { {Type: "!C", Status: metav1.ConditionFalse, Reason: "Reason-!C", Message: "Message-!C"}, // info }, conditionType: clusterv1.AvailableV1Beta2Condition, - options: []SummaryOption{ForConditionTypes{"A", "B", "!C"}, NegativePolarityConditionTypes{"!C"}, CustomMergeStrategy{newDefaultMergeStrategy()}}, + options: []SummaryOption{ForConditionTypes{"A", "B", "!C"}, NegativePolarityConditionTypes{"!C"}, CustomMergeStrategy{newDefaultMergeStrategy(sets.New("!C"))}}, want: &metav1.Condition{ Type: clusterv1.AvailableV1Beta2Condition, Status: metav1.ConditionTrue, // True because there are many info @@ -208,22 +209,6 @@ func TestSummary(t *testing.T) { Message: "B: Message-B", // messages from all the info conditions (empty messages are dropped) }, }, - { - name: "With stepCounter", - conditions: []metav1.Condition{ - {Type: "B", Status: metav1.ConditionTrue, Reason: "Reason-B", Message: "Message-B"}, // info - {Type: "A", Status: metav1.ConditionTrue, Reason: "Reason-A", Message: ""}, // info - {Type: "!C", Status: metav1.ConditionUnknown, Reason: "Reason-!C", Message: "Message-!C"}, // unknown - }, - conditionType: clusterv1.AvailableV1Beta2Condition, - options: []SummaryOption{ForConditionTypes{"A", "B", "!C"}, NegativePolarityConditionTypes{"!C"}, StepCounter(true)}, - want: &metav1.Condition{ - Type: clusterv1.AvailableV1Beta2Condition, - Status: metav1.ConditionUnknown, // Unknown because there is one unknown - Reason: "Reason-!C", // Picking the reason from the only existing unknown - Message: "2 of 3 completed; !C: Message-!C", // step counter + messages from all the issues & unknown conditions (info dropped) - }, - }, } for _, tt := range tests { From 8fd2725c8c1467857b8a8a3e32d9208c662d6082 Mon Sep 17 00:00:00 2001 From: fabriziopandini Date: Thu, 10 Oct 2024 23:28:37 +0200 Subject: [PATCH 5/8] Address comments --- api/v1beta1/machine_types.go | 43 +++++++++---------- api/v1beta1/v1beta2_condition_consts.go | 10 ++--- .../controllers/machine/machine_controller.go | 10 ++--- .../machine/machine_controller_status.go | 17 ++------ .../machine/machine_controller_status_test.go | 26 +---------- internal/webhooks/machine_test.go | 5 +++ util/conditions/v1beta2/getter.go | 10 +---- 7 files changed, 41 insertions(+), 80 deletions(-) diff --git a/api/v1beta1/machine_types.go b/api/v1beta1/machine_types.go index 6b63419e46e2..bfcc8af73436 100644 --- a/api/v1beta1/machine_types.go +++ b/api/v1beta1/machine_types.go @@ -89,27 +89,29 @@ const ( // Machine's Available condition and corresponding reasons that will be used in v1Beta2 API version. const ( // MachineAvailableV1Beta2Condition is true if the machine is Ready for at least MinReadySeconds, as defined by the Machine's MinReadySeconds field. + // Note: MinReadySeconds is assumed 0 until it will be implemented in v1beta2 API. MachineAvailableV1Beta2Condition = AvailableV1Beta2Condition - // MachineNotReadyV1Beta2Reason surfaces when a machine is not yet ready (and thus not yet available). + // MachineNotReadyV1Beta2Reason surfaces when a machine is not ready (and thus not available). MachineNotReadyV1Beta2Reason = "NotReady" - // MachineWaitingForMinReadySecondsV1Beta2Reason surfaces when a machine is ready for less then MinReadySeconds (and thus not yet available). + // MachineWaitingForMinReadySecondsV1Beta2Reason surfaces when a machine is ready for less than MinReadySeconds (and thus not yet available). MachineWaitingForMinReadySecondsV1Beta2Reason = "WaitingForMinReadySeconds" // MachineReadyNotYetReportedV1Beta2Reason surfaces when a machine ready is not reported yet. // Note: this should never happen and it is a signal of some internal error. MachineReadyNotYetReportedV1Beta2Reason = "ReadyNotYetReported" - // MachineAvailableV1Beta2Reason surfaces when a machine ready for at least MinReadySeconds. + // MachineAvailableV1Beta2Reason surfaces when a machine is ready for at least MinReadySeconds. + // Note: MinReadySeconds is assumed 0 until it will be implemented in v1beta2 API. MachineAvailableV1Beta2Reason = "MachineAvailable" ) // Machine's Ready condition and corresponding reasons that will be used in v1Beta2 API version. // Note: when possible, Ready condition will use reasons from the conditions it summarizes. const ( - // MachineReadyV1Beta2Condition is true if the Machine is not deleted, Machine's BootstrapConfigReady, InfrastructureReady, - // NodeHealthy and HealthCheckSucceeded (if present) are true; if other conditions are defined in spec.readinessGates, + // MachineReadyV1Beta2Condition is true if the Machine's deletionTimestamp is not set, Machine's BootstrapConfigReady, InfrastructureReady, + // NodeHealthy and HealthCheckSucceeded (if present) conditions are true; if other conditions are defined in spec.readinessGates, // these conditions must be true as well. MachineReadyV1Beta2Condition = ReadyV1Beta2Condition @@ -132,12 +134,9 @@ const ( // MachineBootstrapConfigReadyV1Beta2Condition condition mirrors the corresponding Ready condition from the Machine's BootstrapConfig resource. MachineBootstrapConfigReadyV1Beta2Condition = BootstrapConfigReadyV1Beta2Condition - // MachineBootstrapDataSecretDataSecretUserProvidedV1Beta2Reason surfaces when a bootstrap data secret is provided by the user (without a ConfigRef). - MachineBootstrapDataSecretDataSecretUserProvidedV1Beta2Reason = "DataSecretUserProvided" - - // MachineBootstrapInvalidConfigV1Beta2Reason surfaces when Machine's spec.bootstrap doesn't have configRef nor a - // dataSecretName set. - MachineBootstrapInvalidConfigV1Beta2Reason = "InvalidConfig" + // MachineBootstrapDataSecretProvidedV1Beta2Reason surfaces when a bootstrap data secret is provided (not originated + // from a BoostrapConfig object referenced from the machine). + MachineBootstrapDataSecretProvidedV1Beta2Reason = "DataSecretProvided" // MachineBootstrapConfigInvalidConditionReportedV1Beta2Reason surfaces a BootstrapConfig Ready condition (read from a bootstrap config object) which is invalid. // (e.g. it is status is missing). @@ -150,13 +149,13 @@ const ( MachineBootstrapConfigInternalErrorV1Beta2Reason = InternalErrorV1Beta2Reason // MachineBootstrapConfigDoesNotExistV1Beta2Reason surfaces when a referenced bootstrap config object does not exist. - // Note: this could happen when creating the machine. However, this state should be treated as an error if it last indefinitely. - MachineBootstrapConfigDoesNotExistV1Beta2Reason = RefObjectDoesNotExistV1Beta2Reason + // Note: this could happen when creating the machine. However, this state should be treated as an error if it lasts indefinitely. + MachineBootstrapConfigDoesNotExistV1Beta2Reason = ObjectDoesNotExistV1Beta2Reason // MachineBootstrapConfigDeletedV1Beta2Reason surfaces when a referenced bootstrap config object has been deleted. - // Note: controllers can't identify if the deletion process has been initiated by the controller itself, e.g. + // Note: controllers can't identify if the bootstrap config object was deleted the controller itself, e.g. // during the deletion workflow, or by a users. - MachineBootstrapConfigDeletedV1Beta2Reason = RefObjectDeletedV1Beta2Reason + MachineBootstrapConfigDeletedV1Beta2Reason = ObjectDeletedV1Beta2Reason ) // Machine's InfrastructureReady condition and corresponding reasons that will be used in v1Beta2 API version. @@ -172,17 +171,17 @@ const ( // MachineInfrastructureReadyNoV1Beta2ReasonReported applies to a infrastructure Ready condition (read from an infra machine object) that reports no reason. MachineInfrastructureReadyNoV1Beta2ReasonReported = NoV1Beta2ReasonReported - // MachineInfrastructureInternalErrorV1Beta2Reason surfaces unexpected failures when reading a BootstrapConfig object. + // MachineInfrastructureInternalErrorV1Beta2Reason surfaces unexpected failures when reading a infra machine object. MachineInfrastructureInternalErrorV1Beta2Reason = InternalErrorV1Beta2Reason // MachineInfrastructureDoesNotExistV1Beta2Reason surfaces when a referenced infrastructure object does not exist. - // Note: this could happen when creating the machine. However, this state should be treated as an error if it last indefinitely. - MachineInfrastructureDoesNotExistV1Beta2Reason = RefObjectDoesNotExistV1Beta2Reason + // Note: this could happen when creating the machine. However, this state should be treated as an error if it lasts indefinitely. + MachineInfrastructureDoesNotExistV1Beta2Reason = ObjectDoesNotExistV1Beta2Reason // MachineInfrastructureDeletedV1Beta2Reason surfaces when a referenced infrastructure object has been deleted. - // Note: controllers can't identify if the deletion process has been initiated by the controller itself, e.g. + // Note: controllers can't identify if the infrastructure object was deleted by the controller itself, e.g. // during the deletion workflow, or by a users. - MachineInfrastructureDeletedV1Beta2Reason = RefObjectDeletedV1Beta2Reason + MachineInfrastructureDeletedV1Beta2Reason = ObjectDeletedV1Beta2Reason ) // Machine's NodeHealthy and NodeReady conditions and corresponding reasons that will be used in v1Beta2 API version. @@ -198,11 +197,11 @@ const ( MachineNodeConditionNotYetReportedV1Beta2Reason = "NodeConditionNotYetReported" // MachineNodeNotFoundV1Beta2Reason surfaces when the node hosted on the machine cannot be found. - // Note: this could happen when creating the machine. However, this state should be treated as an error if it last indefinitely. + // Note: this could happen when creating the machine. However, this state should be treated as an error if it lasts indefinitely. MachineNodeNotFoundV1Beta2Reason = "NodeNotFound" // MachineNodeDeletedV1Beta2Reason surfaces when the node hosted on the machine has been deleted. - // Note: controllers can't identify if the deletion process has been initiated by the controller itself, e.g. + // Note: controllers can't identify if the Node was deleted by the controller itself, e.g. // during the deletion workflow, or by a users. MachineNodeDeletedV1Beta2Reason = "NodeDeleted" ) diff --git a/api/v1beta1/v1beta2_condition_consts.go b/api/v1beta1/v1beta2_condition_consts.go index 2d3841b4d41a..09d625ba812a 100644 --- a/api/v1beta1/v1beta2_condition_consts.go +++ b/api/v1beta1/v1beta2_condition_consts.go @@ -99,13 +99,13 @@ const ( // In most cases, it will be required to look at controllers logs to proper triage those issues. InternalErrorV1Beta2Reason = "InternalError" - // RefObjectDoesNotExistV1Beta2Reason surfaces when a referenced object does not exist. - RefObjectDoesNotExistV1Beta2Reason = "RefObjectDoesNotExist" + // ObjectDoesNotExistV1Beta2Reason surfaces when a referenced object does not exist. + ObjectDoesNotExistV1Beta2Reason = "ObjectDoesNotExist" - // RefObjectDeletedV1Beta2Reason surfaces when a referenced object has been deleted. - // Note: controllers can't identify if the deletion process has been initiated by the controller itself, e.g. + // ObjectDeletedV1Beta2Reason surfaces when a referenced object has been deleted. + // Note: controllers can't identify if the object was deleted by the controller itself, e.g. // during the deletion workflow, or by a users. - RefObjectDeletedV1Beta2Reason = "RefObjectDeleted" + ObjectDeletedV1Beta2Reason = "ObjectDeleted" // NotPausedV1Beta2Reason surfaces when an object is not paused. NotPausedV1Beta2Reason = "NotPaused" diff --git a/internal/controllers/machine/machine_controller.go b/internal/controllers/machine/machine_controller.go index ce680f3ebdf4..91076bf42276 100644 --- a/internal/controllers/machine/machine_controller.go +++ b/internal/controllers/machine/machine_controller.go @@ -345,14 +345,14 @@ type scope struct { // Machine. It is set after reconcileInfrastructure is called. infraMachine *unstructured.Unstructured - // infraMachineNotFound is true if getting the infra machine object failed with an IsNotFound err + // infraMachineNotFound is true if getting the infra machine object failed with an NotFound err infraMachineIsNotFound bool // bootstrapConfig is the BootstrapConfig object that is referenced by the // Machine. It is set after reconcileBootstrap is called. bootstrapConfig *unstructured.Unstructured - // bootstrapConfigNotFound is true if getting the BootstrapConfig object failed with an IsNotFound err + // bootstrapConfigNotFound is true if getting the BootstrapConfig object failed with an NotFound err bootstrapConfigIsNotFound bool // node is the Kubernetes node hosted on the machine. @@ -360,7 +360,7 @@ type scope struct { } func (r *Reconciler) reconcileMachineOwnerAndLabels(_ context.Context, s *scope) (ctrl.Result, error) { - // If the machine is a stand-alone one, meaning not originated from a MachineDeployment, then set it as directly + // If the machine is a stand-alone Machine, then set it as directly // owned by the Cluster (if not already present). if r.shouldAdopt(s.machine) { s.machine.SetOwnerReferences(util.EnsureOwnerRef(s.machine.GetOwnerReferences(), metav1.OwnerReference{ @@ -882,7 +882,7 @@ func (r *Reconciler) reconcileDeleteBootstrap(ctx context.Context, s *scope) (bo if err := r.Client.Delete(ctx, s.bootstrapConfig); err != nil && !apierrors.IsNotFound(err) { return false, errors.Wrapf(err, "failed to delete %v %q for Machine %q in namespace %q", - s.bootstrapConfig.GroupVersionKind(), s.bootstrapConfig.GetName(), s.machine.Name, s.machine.Namespace) + s.bootstrapConfig.GroupVersionKind().Kind, s.bootstrapConfig.GetName(), s.machine.Name, s.machine.Namespace) } } @@ -899,7 +899,7 @@ func (r *Reconciler) reconcileDeleteInfrastructure(ctx context.Context, s *scope if err := r.Client.Delete(ctx, s.infraMachine); err != nil && !apierrors.IsNotFound(err) { return false, errors.Wrapf(err, "failed to delete %v %q for Machine %q in namespace %q", - s.infraMachine.GroupVersionKind(), s.infraMachine.GetName(), s.machine.Name, s.machine.Namespace) + s.infraMachine.GroupVersionKind().Kind, s.infraMachine.GetName(), s.machine.Name, s.machine.Namespace) } } diff --git a/internal/controllers/machine/machine_controller_status.go b/internal/controllers/machine/machine_controller_status.go index b848c1d407f8..f8c83266f59c 100644 --- a/internal/controllers/machine/machine_controller_status.go +++ b/internal/controllers/machine/machine_controller_status.go @@ -86,21 +86,10 @@ func (r *Reconciler) reconcileStatus(ctx context.Context, s *scope) { func setBootstrapReadyCondition(_ context.Context, machine *clusterv1.Machine, bootstrapConfig *unstructured.Unstructured, bootstrapConfigIsNotFound bool) { if machine.Spec.Bootstrap.ConfigRef == nil { - if ptr.Deref(machine.Spec.Bootstrap.DataSecretName, "") != "" { - v1beta2conditions.Set(machine, metav1.Condition{ - Type: clusterv1.MachineBootstrapConfigReadyV1Beta2Condition, - Status: metav1.ConditionTrue, - Reason: clusterv1.MachineBootstrapDataSecretDataSecretUserProvidedV1Beta2Reason, - }) - return - } - - // Note: validation web hooks should prevent invalid configuration to happen. v1beta2conditions.Set(machine, metav1.Condition{ - Type: clusterv1.MachineBootstrapConfigReadyV1Beta2Condition, - Status: metav1.ConditionFalse, - Reason: clusterv1.MachineBootstrapInvalidConfigV1Beta2Reason, - Message: "Either spec.bootstrap.configRef must be set or spec.bootstrap.dataSecretName must not be empty", + Type: clusterv1.MachineBootstrapConfigReadyV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: clusterv1.MachineBootstrapDataSecretProvidedV1Beta2Reason, }) return } diff --git a/internal/controllers/machine/machine_controller_status_test.go b/internal/controllers/machine/machine_controller_status_test.go index e601d2a6401d..035fdc81063b 100644 --- a/internal/controllers/machine/machine_controller_status_test.go +++ b/internal/controllers/machine/machine_controller_status_test.go @@ -80,31 +80,7 @@ func TestSetBootstrapReadyCondition(t *testing.T) { expectCondition: metav1.Condition{ Type: clusterv1.MachineBootstrapConfigReadyV1Beta2Condition, Status: metav1.ConditionTrue, - Reason: clusterv1.MachineBootstrapDataSecretDataSecretUserProvidedV1Beta2Reason, - }, - }, - { - name: "machine without bootstrap config ref and with dataSecretName not set", - machine: func() *clusterv1.Machine { - m := defaultMachine.DeepCopy() - m.Spec.Bootstrap.ConfigRef = nil - return m - }(), - bootstrapConfig: &unstructured.Unstructured{Object: map[string]interface{}{ - "kind": "GenericBootstrapConfig", - "apiVersion": "bootstrap.cluster.x-k8s.io/v1beta1", - "metadata": map[string]interface{}{ - "name": "bootstrap-config1", - "namespace": metav1.NamespaceDefault, - }, - "status": map[string]interface{}{}, - }}, - bootstrapConfigIsNotFound: false, - expectCondition: metav1.Condition{ - Type: clusterv1.MachineBootstrapConfigReadyV1Beta2Condition, - Status: metav1.ConditionFalse, - Reason: clusterv1.MachineBootstrapInvalidConfigV1Beta2Reason, - Message: "Either spec.bootstrap.configRef must be set or spec.bootstrap.dataSecretName must not be empty", + Reason: clusterv1.MachineBootstrapDataSecretProvidedV1Beta2Reason, }, }, { diff --git a/internal/webhooks/machine_test.go b/internal/webhooks/machine_test.go index 0b1732900c94..cf9eb951e739 100644 --- a/internal/webhooks/machine_test.go +++ b/internal/webhooks/machine_test.go @@ -69,6 +69,11 @@ func TestMachineBootstrapValidation(t *testing.T) { bootstrap: clusterv1.Bootstrap{ConfigRef: nil, DataSecretName: ptr.To("test")}, expectErr: false, }, + { + name: "should not return error if dataSecretName is set", + bootstrap: clusterv1.Bootstrap{ConfigRef: nil, DataSecretName: ptr.To("")}, + expectErr: false, + }, { name: "should not return error if config ref is set", bootstrap: clusterv1.Bootstrap{ConfigRef: &corev1.ObjectReference{}, DataSecretName: nil}, diff --git a/util/conditions/v1beta2/getter.go b/util/conditions/v1beta2/getter.go index d414aca5700c..3d8364fde850 100644 --- a/util/conditions/v1beta2/getter.go +++ b/util/conditions/v1beta2/getter.go @@ -177,16 +177,8 @@ func validateAndFixConvertedCondition(c *metav1.Condition) error { default: return errors.Errorf("status for the %s condition must be one of %s, %s, %s", c.Type, metav1.ConditionTrue, metav1.ConditionFalse, metav1.ConditionUnknown) } - if c.Reason == "" { - switch c.Status { - case metav1.ConditionFalse: // When using old Cluster API condition utils, for conditions with Status false, Reason can be empty only when a condition has negative polarity (means "good") - c.Reason = NoReasonReported - case metav1.ConditionTrue: // When using old Cluster API condition utils, for conditions with Status true, Reason can be empty only when a condition has positive polarity (means "good"). - c.Reason = NoReasonReported - case metav1.ConditionUnknown: - return errors.Errorf("reason must be set for the %s condition, status unknown", c.Type) - } + c.Reason = NoReasonReported } // NOTE: Empty LastTransitionTime is tolerated because it will be set when assigning the newly generated mirror condition to an object. From d2760b4c69766cb88f431586750e72c05277b2a3 Mon Sep 17 00:00:00 2001 From: fabriziopandini Date: Fri, 11 Oct 2024 13:00:07 +0200 Subject: [PATCH 6/8] Fix MS tests + make mirror Node Ready more robust --- internal/controllers/machine/machine_controller_noderef.go | 2 +- internal/controllers/machine/machine_controller_status.go | 6 +++++- internal/controllers/machineset/suite_test.go | 2 +- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/internal/controllers/machine/machine_controller_noderef.go b/internal/controllers/machine/machine_controller_noderef.go index 72b298df059d..9643b3457608 100644 --- a/internal/controllers/machine/machine_controller_noderef.go +++ b/internal/controllers/machine/machine_controller_noderef.go @@ -153,12 +153,12 @@ func (r *Reconciler) reconcileNode(ctx context.Context, s *scope) (ctrl.Result, r.recorder.Event(machine, corev1.EventTypeNormal, "SuccessfulSetInterruptibleNodeLabel", s.node.Name) } - // Do the remaining node health checks, then set the node health to true if all checks pass. if s.infraMachine == nil || !s.infraMachine.GetDeletionTimestamp().IsZero() { conditions.MarkFalse(s.machine, clusterv1.MachineNodeHealthyCondition, clusterv1.DeletingReason, clusterv1.ConditionSeverityInfo, "") return ctrl.Result{}, nil } + // Do the remaining node health checks, then set the node health to true if all checks pass. status, message := summarizeNodeConditions(s.node) if status == corev1.ConditionFalse { conditions.MarkFalse(machine, clusterv1.MachineNodeHealthyCondition, clusterv1.NodeConditionsFailedReason, clusterv1.ConditionSeverityWarning, message) diff --git a/internal/controllers/machine/machine_controller_status.go b/internal/controllers/machine/machine_controller_status.go index f8c83266f59c..469185af05cc 100644 --- a/internal/controllers/machine/machine_controller_status.go +++ b/internal/controllers/machine/machine_controller_status.go @@ -230,11 +230,15 @@ func setNodeHealthyAndReadyConditions(ctx context.Context, machine *clusterv1.Ma if condition.Message != "" { message = fmt.Sprintf("%s (from Node)", condition.Message) } + reason := condition.Reason + if reason == "" { + reason = clusterv1.NoV1Beta2ReasonReported + } nodeReady = &metav1.Condition{ Type: clusterv1.MachineNodeReadyV1Beta2Condition, Status: metav1.ConditionStatus(condition.Status), LastTransitionTime: condition.LastTransitionTime, - Reason: condition.Reason, + Reason: reason, Message: message, } } diff --git a/internal/controllers/machineset/suite_test.go b/internal/controllers/machineset/suite_test.go index 49cf1a7ce6c8..f42aa1737b58 100644 --- a/internal/controllers/machineset/suite_test.go +++ b/internal/controllers/machineset/suite_test.go @@ -182,7 +182,7 @@ func fakeMachineNodeRef(m *clusterv1.Machine, pid string, g *WithT) { // Patch the node and make it look like ready. patchNode := client.MergeFrom(node.DeepCopy()) - node.Status.Conditions = append(node.Status.Conditions, corev1.NodeCondition{Type: corev1.NodeReady, Status: corev1.ConditionTrue}) + node.Status.Conditions = append(node.Status.Conditions, corev1.NodeCondition{Type: corev1.NodeReady, Status: corev1.ConditionTrue, Reason: "SomeReason"}) g.Expect(env.Status().Patch(ctx, node, patchNode)).To(Succeed()) // Patch the Machine. From 82ee71d55057452075b5a069070f112b1917c0e7 Mon Sep 17 00:00:00 2001 From: fabriziopandini Date: Fri, 11 Oct 2024 17:51:36 +0200 Subject: [PATCH 7/8] More comments --- api/v1beta1/machine_types.go | 32 ++- api/v1beta1/v1beta2_condition_consts.go | 15 +- .../controllers/machine/machine_controller.go | 4 +- .../machine/machine_controller_phases.go | 14 +- .../machine/machine_controller_status.go | 214 ++++++++++-------- .../machine/machine_controller_status_test.go | 153 ++++++++++--- util/conditions/v1beta2/merge_strategies.go | 14 +- util/conditions/v1beta2/mirror.go | 25 +- 8 files changed, 293 insertions(+), 178 deletions(-) diff --git a/api/v1beta1/machine_types.go b/api/v1beta1/machine_types.go index bfcc8af73436..d4396c3f0691 100644 --- a/api/v1beta1/machine_types.go +++ b/api/v1beta1/machine_types.go @@ -104,7 +104,7 @@ const ( // MachineAvailableV1Beta2Reason surfaces when a machine is ready for at least MinReadySeconds. // Note: MinReadySeconds is assumed 0 until it will be implemented in v1beta2 API. - MachineAvailableV1Beta2Reason = "MachineAvailable" + MachineAvailableV1Beta2Reason = AvailableV1Beta2Condition ) // Machine's Ready condition and corresponding reasons that will be used in v1Beta2 API version. @@ -124,7 +124,7 @@ const ( // Note: UpToDate condition is set by the controller owning the machine. const ( // MachineUpToDateV1Beta2Condition is true if the Machine spec matches the spec of the Machine's owner resource, e.g. KubeadmControlPlane or MachineDeployment. - // The Machine's owner (e.g MachineDeployment) is authoritative to set their owned Machine's UpToDate conditions based on its current spec. + // The Machine's owner (e.g. MachineDeployment) is authoritative to set their owned Machine's UpToDate conditions based on its current spec. MachineUpToDateV1Beta2Condition = "UpToDate" ) @@ -139,11 +139,11 @@ const ( MachineBootstrapDataSecretProvidedV1Beta2Reason = "DataSecretProvided" // MachineBootstrapConfigInvalidConditionReportedV1Beta2Reason surfaces a BootstrapConfig Ready condition (read from a bootstrap config object) which is invalid. - // (e.g. it is status is missing). - MachineBootstrapConfigInvalidConditionReportedV1Beta2Reason = InvalidConditionReported + // (e.g. its status is missing). + MachineBootstrapConfigInvalidConditionReportedV1Beta2Reason = InvalidConditionReportedV1Beta2Reason - // MachineBootstrapConfigReadyNoV1Beta2ReasonReported applies to a BootstrapConfig Ready condition (read from a bootstrap config object) that reports no reason. - MachineBootstrapConfigReadyNoV1Beta2ReasonReported = NoV1Beta2ReasonReported + // MachineBootstrapConfigReadyNoReasonReportedV1Beta2Reason applies to a BootstrapConfig Ready condition (read from a bootstrap config object) that reports no reason. + MachineBootstrapConfigReadyNoReasonReportedV1Beta2Reason = NoV1Beta2ReasonReported // MachineBootstrapConfigInternalErrorV1Beta2Reason surfaces unexpected failures when reading a BootstrapConfig object. MachineBootstrapConfigInternalErrorV1Beta2Reason = InternalErrorV1Beta2Reason @@ -165,11 +165,11 @@ const ( MachineInfrastructureReadyV1Beta2Condition = InfrastructureReadyV1Beta2Condition // MachineInfrastructureInvalidConditionReportedV1Beta2Reason surfaces a infrastructure Ready condition (read from an infra machine object) which is invalid. - // (e.g. it is status is missing). - MachineInfrastructureInvalidConditionReportedV1Beta2Reason = InvalidConditionReported + // (e.g. its status is missing). + MachineInfrastructureInvalidConditionReportedV1Beta2Reason = InvalidConditionReportedV1Beta2Reason - // MachineInfrastructureReadyNoV1Beta2ReasonReported applies to a infrastructure Ready condition (read from an infra machine object) that reports no reason. - MachineInfrastructureReadyNoV1Beta2ReasonReported = NoV1Beta2ReasonReported + // MachineInfrastructureReadyNoReasonReportedV1Beta2Reason applies to a infrastructure Ready condition (read from an infra machine object) that reports no reason. + MachineInfrastructureReadyNoReasonReportedV1Beta2Reason = NoV1Beta2ReasonReported // MachineInfrastructureInternalErrorV1Beta2Reason surfaces unexpected failures when reading a infra machine object. MachineInfrastructureInternalErrorV1Beta2Reason = InternalErrorV1Beta2Reason @@ -196,14 +196,14 @@ const ( // MachineNodeConditionNotYetReportedV1Beta2Reason surfaces when a Machine's Node doesn't have a condition reported yet. MachineNodeConditionNotYetReportedV1Beta2Reason = "NodeConditionNotYetReported" - // MachineNodeNotFoundV1Beta2Reason surfaces when the node hosted on the machine cannot be found. + // MachineNodeDoesNotExistV1Beta2Reason surfaces when the node hosted on the machine does not exist. // Note: this could happen when creating the machine. However, this state should be treated as an error if it lasts indefinitely. - MachineNodeNotFoundV1Beta2Reason = "NodeNotFound" + MachineNodeDoesNotExistV1Beta2Reason = ObjectDoesNotExistV1Beta2Reason // MachineNodeDeletedV1Beta2Reason surfaces when the node hosted on the machine has been deleted. // Note: controllers can't identify if the Node was deleted by the controller itself, e.g. // during the deletion workflow, or by a users. - MachineNodeDeletedV1Beta2Reason = "NodeDeleted" + MachineNodeDeletedV1Beta2Reason = ObjectDeletedV1Beta2Reason ) // Machine's HealthCheckSucceeded and OwnerRemediated conditions and corresponding reasons that will be used in v1Beta2 API version. @@ -228,12 +228,6 @@ const ( const ( // MachinePausedV1Beta2Condition is true if the Machine or the Cluster it belongs to are paused. MachinePausedV1Beta2Condition = PausedV1Beta2Condition - - // MachineNotPausedV1Beta2Reason surfaces when a Machine is not paused. - MachineNotPausedV1Beta2Reason = NotPausedV1Beta2Reason - - // MachineObjectPausedV1Beta2Reason surfaces when a Machine is paused. - MachineObjectPausedV1Beta2Reason = ObjectPausedV1Beta2Reason ) // ANCHOR: MachineSpec diff --git a/api/v1beta1/v1beta2_condition_consts.go b/api/v1beta1/v1beta2_condition_consts.go index 09d625ba812a..bfe873b20f34 100644 --- a/api/v1beta1/v1beta2_condition_consts.go +++ b/api/v1beta1/v1beta2_condition_consts.go @@ -87,16 +87,16 @@ const ( // Reasons that are used across different objects. const ( - // InvalidConditionReported applies to a condition, usually read from an external object, that is invalid - // (e.g. it is status is missing). - InvalidConditionReported = "InvalidConditionReported" + // InvalidConditionReportedV1Beta2Reason applies to a condition, usually read from an external object, that is invalid + // (e.g. its status is missing). + InvalidConditionReportedV1Beta2Reason = "InvalidConditionReported" // NoV1Beta2ReasonReported applies to a condition, usually read from an external object, that reports no reason. // Note: this could happen e.g. when an external object still uses Cluster API v1beta1 Conditions. NoV1Beta2ReasonReported = "NoReasonReported" // InternalErrorV1Beta2Reason surfaces unexpected errors reporting by controllers. - // In most cases, it will be required to look at controllers logs to proper triage those issues. + // In most cases, it will be required to look at controllers logs to properly triage those issues. InternalErrorV1Beta2Reason = "InternalError" // ObjectDoesNotExistV1Beta2Reason surfaces when a referenced object does not exist. @@ -110,11 +110,8 @@ const ( // NotPausedV1Beta2Reason surfaces when an object is not paused. NotPausedV1Beta2Reason = "NotPaused" - // ClusterPausedV1Beta2Reason surfaces when an Cluster is paused. - ClusterPausedV1Beta2Reason = "ClusterPaused" - - // ObjectPausedV1Beta2Reason surfaces when an object is paused. - ObjectPausedV1Beta2Reason = "ObjectPaused" + // PausedV1Beta2Reason surfaces when an object is paused. + PausedV1Beta2Reason = "Paused" ) // Conditions that will be used for the MachineSet object in v1Beta2 API version. diff --git a/internal/controllers/machine/machine_controller.go b/internal/controllers/machine/machine_controller.go index 91076bf42276..7f6dc8f30394 100644 --- a/internal/controllers/machine/machine_controller.go +++ b/internal/controllers/machine/machine_controller.go @@ -878,7 +878,7 @@ func (r *Reconciler) reconcileDeleteBootstrap(ctx context.Context, s *scope) (bo return true, nil } - if s.bootstrapConfig != nil { + if s.bootstrapConfig != nil && s.bootstrapConfig.GetDeletionTimestamp().IsZero() { if err := r.Client.Delete(ctx, s.bootstrapConfig); err != nil && !apierrors.IsNotFound(err) { return false, errors.Wrapf(err, "failed to delete %v %q for Machine %q in namespace %q", @@ -895,7 +895,7 @@ func (r *Reconciler) reconcileDeleteInfrastructure(ctx context.Context, s *scope return true, nil } - if s.infraMachine != nil { + if s.infraMachine != nil && s.infraMachine.GetDeletionTimestamp().IsZero() { if err := r.Client.Delete(ctx, s.infraMachine); err != nil && !apierrors.IsNotFound(err) { return false, errors.Wrapf(err, "failed to delete %v %q for Machine %q in namespace %q", diff --git a/internal/controllers/machine/machine_controller_phases.go b/internal/controllers/machine/machine_controller_phases.go index 8fa3258094a4..ffe24b8f340b 100644 --- a/internal/controllers/machine/machine_controller_phases.go +++ b/internal/controllers/machine/machine_controller_phases.go @@ -49,7 +49,7 @@ var externalReadyWait = 30 * time.Second func (r *Reconciler) reconcileExternal(ctx context.Context, cluster *clusterv1.Cluster, m *clusterv1.Machine, ref *corev1.ObjectReference) (*unstructured.Unstructured, error) { if err := utilconversion.UpdateReferenceAPIContract(ctx, r.Client, ref); err != nil { if apierrors.IsNotFound(err) { - // We want to surface IsNotFound only for the referenced object, so we use a generic error in case CRD is not found. + // We want to surface the NotFound error only for the referenced object, so we use a generic error in case CRD is not found. return nil, errors.New(err.Error()) } return nil, err @@ -180,6 +180,10 @@ func (r *Reconciler) reconcileBootstrap(ctx context.Context, s *scope) (ctrl.Res fallBack, ) + if !s.bootstrapConfig.GetDeletionTimestamp().IsZero() { + return ctrl.Result{}, nil + } + // If the bootstrap provider is not ready, return. if !ready { log.Info("Waiting for bootstrap provider to generate data secret and report status.ready", s.bootstrapConfig.GetKind(), klog.KObj(s.bootstrapConfig)) @@ -233,10 +237,6 @@ func (r *Reconciler) reconcileInfrastructure(ctx context.Context, s *scope) (ctr } s.infraMachine = obj - if !s.infraMachine.GetDeletionTimestamp().IsZero() { - return ctrl.Result{}, nil - } - // Determine if the infrastructure provider is ready. ready, err := external.IsReady(s.infraMachine) if err != nil { @@ -256,6 +256,10 @@ func (r *Reconciler) reconcileInfrastructure(ctx context.Context, s *scope) (ctr fallBack, ) + if !s.infraMachine.GetDeletionTimestamp().IsZero() { + return ctrl.Result{}, nil + } + // If the infrastructure provider is not ready (and it wasn't ready before), return early. if !ready && !m.Status.InfrastructureReady { log.Info("Waiting for infrastructure provider to create machine infrastructure and report status.ready", s.infraMachine.GetKind(), klog.KObj(s.infraMachine)) diff --git a/internal/controllers/machine/machine_controller_status.go b/internal/controllers/machine/machine_controller_status.go index 469185af05cc..98fa2b9fcb08 100644 --- a/internal/controllers/machine/machine_controller_status.go +++ b/internal/controllers/machine/machine_controller_status.go @@ -31,6 +31,7 @@ import ( clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" "sigs.k8s.io/cluster-api/internal/contract" + "sigs.k8s.io/cluster-api/util/annotations" v1beta2conditions "sigs.k8s.io/cluster-api/util/conditions/v1beta2" "sigs.k8s.io/cluster-api/util/patch" ) @@ -43,35 +44,27 @@ import ( // Note: v1beta1 conditions are not managed by this func. func (r *Reconciler) reconcileStatus(ctx context.Context, s *scope) { // Update status from the Bootstrap Config external resource. - // Note: the Following Status fields are managed in reconcileBootstrap. - // - status.BootstrapReady - // - status.Addresses - // - status.FailureReason - // - status.FailureMessage + // Note: some of the status fields derived from the Bootstrap Config are managed in reconcileBootstrap, e.g. status.BootstrapReady, etc. + // here we are taking care only of the delta (condition). setBootstrapReadyCondition(ctx, s.machine, s.bootstrapConfig, s.bootstrapConfigIsNotFound) // Update status from the InfraMachine external resource. - // Note: the Following Status field are managed in reconcileInfrastructure. - // - status.InfrastructureReady - // - status.FailureReason - // - status.FailureMessage + // Note: some of the status fields derived from the InfraMachine are managed in reconcileInfrastructure, e.g. status.InfrastructureReady, etc. + // here we are taking care only of the delta (condition). setInfrastructureReadyCondition(ctx, s.machine, s.infraMachine, s.infraMachineIsNotFound) // Update status from the Node external resource. - // Note: the Following Status field are managed in reconcileNode. - // - status.NodeRef - // - status.NodeInfo + // Note: some of the status fields are managed in reconcileNode, e.g. status.NodeRef, etc. + // here we are taking care only of the delta (condition). setNodeHealthyAndReadyConditions(ctx, s.machine, s.node) // Updates Machine status not observed from Bootstrap Config, InfraMachine or Node (update Machine's own status). - // Note: - // - status.CertificatesExpiryDate is managed in reconcileCertificateExpiry. - // - status.ObservedGeneration is updated by the defer patch at the end of the main reconcile loop. - // - status.Deletion nested fields are updated in reconcileDelete. - // - UpToDate condition is set by machine's owner controller. // TODO: compute UpToDate for stand alone machines - // - HealthCheckSucceeded is set by the MHC controller. - // - OwnerRemediated conditions is set by the MHC controller, but the it is updated by the controller owning the machine - // while it carries over the remediation process. + // Note: some of the status are set in reconcileCertificateExpiry (e.g.status.CertificatesExpiryDate), + // in reconcileDelete (e.g. status.Deletion nested fields), and also in the defer patch at the end of the main reconcile loop (status.ObservedGeneration) etc. + // Note: also other controllers adds conditions to the machine object (machine's owner controller sets the UpToDate condition, + // MHC controller sets HealthCheckSucceeded and OwnerRemediated conditions, KCP sets conditions about etcd and control plane pods). + + // TODO: Set the uptodate condition for standalone pods setReadyCondition(ctx, s.machine) @@ -100,13 +93,13 @@ func setBootstrapReadyCondition(_ context.Context, machine *clusterv1.Machine, b contract.Bootstrap().ReadyConditionType(), v1beta2conditions.TargetConditionType(clusterv1.MachineBootstrapConfigReadyV1Beta2Condition), v1beta2conditions.FallbackCondition{ Status: v1beta2conditions.BoolToStatus(machine.Status.BootstrapReady), - Reason: clusterv1.MachineBootstrapConfigReadyNoV1Beta2ReasonReported, + Reason: clusterv1.MachineBootstrapConfigReadyNoReasonReportedV1Beta2Reason, Message: fmt.Sprintf("%s status.ready is %t", machine.Spec.Bootstrap.ConfigRef.Kind, machine.Status.BootstrapReady), }, ); err != nil { v1beta2conditions.Set(machine, metav1.Condition{ Type: clusterv1.MachineBootstrapConfigReadyV1Beta2Condition, - Status: metav1.ConditionFalse, + Status: metav1.ConditionUnknown, Reason: clusterv1.MachineBootstrapConfigInvalidConditionReportedV1Beta2Reason, Message: err.Error(), }) @@ -125,10 +118,8 @@ func setBootstrapReadyCondition(_ context.Context, machine *clusterv1.Machine, b return } - // Tolerate Bootstrap config missing when the machine is deleting. - // NOTE: this code assumes that Bootstrap config deletion has been initiated by the controller itself, - // and thus this state is reported as Deleted instead of NotFound. - if !machine.DeletionTimestamp.IsZero() { + // Bootstrap config missing when the machine is deleting and we know that the BootstrapConfig actually existed. + if !machine.DeletionTimestamp.IsZero() && machine.Status.BootstrapReady { v1beta2conditions.Set(machine, metav1.Condition{ Type: clusterv1.MachineBootstrapConfigReadyV1Beta2Condition, Status: metav1.ConditionUnknown, @@ -141,7 +132,7 @@ func setBootstrapReadyCondition(_ context.Context, machine *clusterv1.Machine, b // If the machine is not deleting, and boostrap config object does not exist, // surface this fact. This could happen when: // - when applying the yaml file with the machine and all the objects referenced by it (provisioning yet to start/started, but status.nodeRef not yet set). - // - when the machine has been provisioned (status.nodeRef is set). + // - when the machine has been provisioned v1beta2conditions.Set(machine, metav1.Condition{ Type: clusterv1.MachineBootstrapConfigReadyV1Beta2Condition, Status: metav1.ConditionUnknown, @@ -157,13 +148,13 @@ func setInfrastructureReadyCondition(_ context.Context, machine *clusterv1.Machi contract.InfrastructureMachine().ReadyConditionType(), v1beta2conditions.TargetConditionType(clusterv1.MachineInfrastructureReadyV1Beta2Condition), v1beta2conditions.FallbackCondition{ Status: v1beta2conditions.BoolToStatus(machine.Status.InfrastructureReady), - Reason: clusterv1.MachineInfrastructureReadyNoV1Beta2ReasonReported, - Message: fmt.Sprintf("%s status.ready is %t", machine.Spec.InfrastructureRef.Kind, machine.Status.BootstrapReady), + Reason: clusterv1.MachineInfrastructureReadyNoReasonReportedV1Beta2Reason, + Message: fmt.Sprintf("%s status.ready is %t", machine.Spec.InfrastructureRef.Kind, machine.Status.InfrastructureReady), }, ); err != nil { v1beta2conditions.Set(machine, metav1.Condition{ Type: clusterv1.MachineInfrastructureReadyV1Beta2Condition, - Status: metav1.ConditionFalse, + Status: metav1.ConditionUnknown, Reason: clusterv1.MachineInfrastructureInvalidConditionReportedV1Beta2Reason, Message: err.Error(), }) @@ -182,28 +173,36 @@ func setInfrastructureReadyCondition(_ context.Context, machine *clusterv1.Machi return } - // Tolerate infra machine missing when the machine is deleting. - // NOTE: this code assumes that infra machine deletion has been initiated by the controller itself, - // and thus this state is reported as Deleted instead of NotFound. + // Infra machine missing when the machine is deleting. // NOTE: in case an accidental deletion happens before volume detach is completed, the Node hosted on the Machine // will be considered unreachable Machine deletion will complete. if !machine.DeletionTimestamp.IsZero() { + if machine.Status.InfrastructureReady { + v1beta2conditions.Set(machine, metav1.Condition{ + Type: clusterv1.MachineInfrastructureReadyV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: clusterv1.MachineInfrastructureDeletedV1Beta2Reason, + Message: fmt.Sprintf("%s has been deleted", machine.Spec.InfrastructureRef.Kind), + }) + return + } + v1beta2conditions.Set(machine, metav1.Condition{ Type: clusterv1.MachineInfrastructureReadyV1Beta2Condition, Status: metav1.ConditionUnknown, - Reason: clusterv1.MachineInfrastructureDeletedV1Beta2Reason, - Message: fmt.Sprintf("%s has been deleted", machine.Spec.InfrastructureRef.Kind), + Reason: clusterv1.MachineInfrastructureDoesNotExistV1Beta2Reason, + Message: fmt.Sprintf("%s does not exist", machine.Spec.InfrastructureRef.Kind), }) return } - // Report an issue if infra machine missing after the machine has been initialized. + // Report an issue if infra machine missing after the machine has been initialized (and the machine is still running). if machine.Status.InfrastructureReady { v1beta2conditions.Set(machine, metav1.Condition{ Type: clusterv1.MachineInfrastructureReadyV1Beta2Condition, - Status: metav1.ConditionFalse, + Status: metav1.ConditionFalse, // setting to false to give more relevance in the ready condition summary. Reason: clusterv1.MachineInfrastructureDeletedV1Beta2Reason, - Message: fmt.Sprintf("%s has been deleted while the machine still exist", machine.Spec.InfrastructureRef.Kind), + Message: fmt.Sprintf("%s has been deleted while the machine still exists", machine.Spec.InfrastructureRef.Kind), }) return } @@ -225,22 +224,24 @@ func setNodeHealthyAndReadyConditions(ctx context.Context, machine *clusterv1.Ma if node != nil { var nodeReady *metav1.Condition for _, condition := range node.Status.Conditions { - if condition.Type == corev1.NodeReady { - message := "" - if condition.Message != "" { - message = fmt.Sprintf("%s (from Node)", condition.Message) - } - reason := condition.Reason - if reason == "" { - reason = clusterv1.NoV1Beta2ReasonReported - } - nodeReady = &metav1.Condition{ - Type: clusterv1.MachineNodeReadyV1Beta2Condition, - Status: metav1.ConditionStatus(condition.Status), - LastTransitionTime: condition.LastTransitionTime, - Reason: reason, - Message: message, - } + if condition.Type != corev1.NodeReady { + continue + } + + message := "" + if condition.Message != "" { + message = fmt.Sprintf("%s (from Node)", condition.Message) + } + reason := condition.Reason + if reason == "" { + reason = clusterv1.NoV1Beta2ReasonReported + } + nodeReady = &metav1.Condition{ + Type: clusterv1.MachineNodeReadyV1Beta2Condition, + Status: metav1.ConditionStatus(condition.Status), + LastTransitionTime: condition.LastTransitionTime, + Reason: reason, + Message: message, } } @@ -264,24 +265,39 @@ func setNodeHealthyAndReadyConditions(ctx context.Context, machine *clusterv1.Ma return } - // Tolerate node missing when the machine is deleting. - // NOTE: controllers always assume that node deletion has been initiated by the controller itself, - // and thus this state is reported as Deleted instead of NotFound. + // Node missing when the machine is deleting. // NOTE: in case an accidental deletion happens before volume detach is completed, the Node // will be considered unreachable Machine deletion will complete. if !machine.DeletionTimestamp.IsZero() { + if machine.Status.NodeRef != nil { + v1beta2conditions.Set(machine, metav1.Condition{ + Type: clusterv1.MachineNodeReadyV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: clusterv1.MachineNodeDeletedV1Beta2Reason, + Message: fmt.Sprintf("Node %s has been deleted", machine.Status.NodeRef.Name), + }) + + v1beta2conditions.Set(machine, metav1.Condition{ + Type: clusterv1.MachineNodeHealthyV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: clusterv1.MachineNodeDeletedV1Beta2Reason, + Message: fmt.Sprintf("Node %s has been deleted", machine.Status.NodeRef.Name), + }) + return + } + v1beta2conditions.Set(machine, metav1.Condition{ Type: clusterv1.MachineNodeReadyV1Beta2Condition, Status: metav1.ConditionUnknown, - Reason: clusterv1.MachineNodeDeletedV1Beta2Reason, - Message: "Node has been deleted", + Reason: clusterv1.MachineNodeDoesNotExistV1Beta2Reason, + Message: "Node does not exist", }) v1beta2conditions.Set(machine, metav1.Condition{ Type: clusterv1.MachineNodeHealthyV1Beta2Condition, Status: metav1.ConditionUnknown, - Reason: clusterv1.MachineNodeDeletedV1Beta2Reason, - Message: "Node has been deleted", + Reason: clusterv1.MachineNodeDoesNotExistV1Beta2Reason, + Message: "Node does not exist", }) return } @@ -290,16 +306,16 @@ func setNodeHealthyAndReadyConditions(ctx context.Context, machine *clusterv1.Ma if machine.Status.NodeRef != nil { v1beta2conditions.Set(machine, metav1.Condition{ Type: clusterv1.MachineNodeReadyV1Beta2Condition, - Status: metav1.ConditionFalse, + Status: metav1.ConditionFalse, // setting to false to keep it consistent with node below. Reason: clusterv1.MachineNodeDeletedV1Beta2Reason, - Message: fmt.Sprintf("Node %s has been deleted while the machine still exist", machine.Status.NodeRef.Name), + Message: fmt.Sprintf("Node %s has been deleted while the machine still exists", machine.Status.NodeRef.Name), }) v1beta2conditions.Set(machine, metav1.Condition{ Type: clusterv1.MachineNodeHealthyV1Beta2Condition, - Status: metav1.ConditionFalse, + Status: metav1.ConditionFalse, // setting to false to give more relevance in the ready condition summary. Reason: clusterv1.MachineNodeDeletedV1Beta2Reason, - Message: fmt.Sprintf("Node %s has been deleted while the machine still exist", machine.Status.NodeRef.Name), + Message: fmt.Sprintf("Node %s has been deleted while the machine still exists", machine.Status.NodeRef.Name), }) return } @@ -310,15 +326,15 @@ func setNodeHealthyAndReadyConditions(ctx context.Context, machine *clusterv1.Ma v1beta2conditions.Set(machine, metav1.Condition{ Type: clusterv1.MachineNodeReadyV1Beta2Condition, Status: metav1.ConditionUnknown, - Reason: clusterv1.MachineNodeNotFoundV1Beta2Reason, - Message: fmt.Sprintf("Waiting for a node with Provider ID %s to exist", *machine.Spec.ProviderID), + Reason: clusterv1.MachineNodeDoesNotExistV1Beta2Reason, + Message: fmt.Sprintf("Waiting for a Node with spec.providerID %s to exist", *machine.Spec.ProviderID), }) v1beta2conditions.Set(machine, metav1.Condition{ Type: clusterv1.MachineNodeHealthyV1Beta2Condition, Status: metav1.ConditionUnknown, - Reason: clusterv1.MachineNodeNotFoundV1Beta2Reason, - Message: fmt.Sprintf("Waiting for a node with Provider ID %s to exist", *machine.Spec.ProviderID), + Reason: clusterv1.MachineNodeDoesNotExistV1Beta2Reason, + Message: fmt.Sprintf("Waiting for a Node with spec.providerID %s to exist", *machine.Spec.ProviderID), }) return } @@ -327,18 +343,21 @@ func setNodeHealthyAndReadyConditions(ctx context.Context, machine *clusterv1.Ma v1beta2conditions.Set(machine, metav1.Condition{ Type: clusterv1.MachineNodeReadyV1Beta2Condition, Status: metav1.ConditionUnknown, - Reason: clusterv1.MachineNodeNotFoundV1Beta2Reason, + Reason: clusterv1.MachineNodeDoesNotExistV1Beta2Reason, Message: fmt.Sprintf("Waiting for %s to report spec.providerID", machine.Spec.InfrastructureRef.Kind), }) v1beta2conditions.Set(machine, metav1.Condition{ Type: clusterv1.MachineNodeHealthyV1Beta2Condition, Status: metav1.ConditionUnknown, - Reason: clusterv1.MachineNodeNotFoundV1Beta2Reason, + Reason: clusterv1.MachineNodeDoesNotExistV1Beta2Reason, Message: fmt.Sprintf("Waiting for %s to report spec.providerID", machine.Spec.InfrastructureRef.Kind), }) } +// summarizeNodeV1Beta2Conditions summarizes a Node's conditions (NodeReady, NodeMemoryPressure, NodeDiskPressure, NodePIDPressure). +// the summary is computed in way that is similar to how v1beta2conditions.NewSummaryCondition works, but in this case the +// implementation is simpler/less flexible and it surfaces only issues & unknown conditions. func summarizeNodeV1Beta2Conditions(_ context.Context, node *corev1.Node) (metav1.ConditionStatus, string, string) { semanticallyFalseStatus := 0 unknownStatus := 0 @@ -414,7 +433,7 @@ func summarizeNodeV1Beta2Conditions(_ context.Context, node *corev1.Node) (metav } return metav1.ConditionFalse, issueReason, message } - if semanticallyFalseStatus+unknownStatus > 0 { + if unknownStatus > 0 { if unknownReason == "" { unknownReason = v1beta2conditions.NoReasonReported } @@ -423,23 +442,24 @@ func summarizeNodeV1Beta2Conditions(_ context.Context, node *corev1.Node) (metav return metav1.ConditionTrue, v1beta2conditions.MultipleInfoReportedReason, "" } -type machineConditionCostomMergeStrategy struct { +type machineConditionCustomMergeStrategy struct { machine *clusterv1.Machine } -func (c machineConditionCostomMergeStrategy) Merge(conditions []v1beta2conditions.ConditionWithOwnerInfo, conditionTypes []string) (status metav1.ConditionStatus, reason, message string, err error) { +func (c machineConditionCustomMergeStrategy) Merge(conditions []v1beta2conditions.ConditionWithOwnerInfo, conditionTypes []string) (status metav1.ConditionStatus, reason, message string, err error) { return v1beta2conditions.DefaultMergeStrategyWithCustomPriority(func(condition metav1.Condition) v1beta2conditions.MergePriority { // While machine is deleting, treat unknown conditions from external objects as info (it is ok that those objects have been deleted at this stage). if !c.machine.DeletionTimestamp.IsZero() { - if condition.Type == clusterv1.MachineBootstrapConfigReadyV1Beta2Condition && condition.Reason == clusterv1.MachineBootstrapConfigDeletedV1Beta2Reason && condition.Status == metav1.ConditionUnknown { + if condition.Type == clusterv1.MachineBootstrapConfigReadyV1Beta2Condition && condition.Status == metav1.ConditionUnknown && (condition.Reason == clusterv1.MachineBootstrapConfigDeletedV1Beta2Reason || condition.Reason == clusterv1.MachineBootstrapConfigDoesNotExistV1Beta2Reason) { return v1beta2conditions.InfoMergePriority } - if condition.Type == clusterv1.MachineInfrastructureReadyV1Beta2Condition && condition.Reason == clusterv1.MachineInfrastructureDeletedV1Beta2Reason && condition.Status == metav1.ConditionUnknown { + if condition.Type == clusterv1.MachineInfrastructureReadyV1Beta2Condition && condition.Status == metav1.ConditionUnknown && (condition.Reason == clusterv1.MachineInfrastructureDeletedV1Beta2Reason || condition.Reason == clusterv1.MachineInfrastructureDoesNotExistV1Beta2Reason) { return v1beta2conditions.InfoMergePriority } - if condition.Type == clusterv1.MachineNodeHealthyV1Beta2Condition && condition.Reason == clusterv1.MachineNodeDeletedV1Beta2Reason && condition.Status == metav1.ConditionUnknown { + if condition.Type == clusterv1.MachineNodeHealthyV1Beta2Condition && condition.Status == metav1.ConditionUnknown && (condition.Reason == clusterv1.MachineNodeDeletedV1Beta2Reason || condition.Reason == clusterv1.MachineNodeDoesNotExistV1Beta2Reason) { return v1beta2conditions.InfoMergePriority } + // Note: MachineNodeReadyV1Beta2Condition is not relevant for the summary. } return v1beta2conditions.GetDefaultMergePriority(nil)(condition) }).Merge(conditions, conditionTypes) @@ -449,7 +469,7 @@ func setReadyCondition(ctx context.Context, machine *clusterv1.Machine) { log := ctrl.LoggerFrom(ctx) forConditionTypes := v1beta2conditions.ForConditionTypes{ - // TODO: add machine deleting once implemented. + // TODO: add machine deleting condition once implemented. clusterv1.MachineBootstrapConfigReadyV1Beta2Condition, clusterv1.MachineInfrastructureReadyV1Beta2Condition, clusterv1.MachineNodeHealthyV1Beta2Condition, @@ -461,13 +481,13 @@ func setReadyCondition(ctx context.Context, machine *clusterv1.Machine) { readyCondition, err := v1beta2conditions.NewSummaryCondition(machine, clusterv1.MachineReadyV1Beta2Condition, forConditionTypes, v1beta2conditions.IgnoreTypesIfMissing{clusterv1.MachineHealthCheckSucceededV1Beta2Condition}, v1beta2conditions.CustomMergeStrategy{ - MergeStrategy: machineConditionCostomMergeStrategy{machine: machine}, + MergeStrategy: machineConditionCustomMergeStrategy{machine: machine}, }, ) if err != nil || readyCondition == nil { // Note, this could only happen if we hit edge cases in computing the summary, which should not happen due to the fact // that we are passing a non empty list of ForConditionTypes. - log.Error(err, "failed to set ready condition") + log.Error(err, "Failed to set ready condition") readyCondition = &metav1.Condition{ Type: clusterv1.MachineReadyV1Beta2Condition, Status: metav1.ConditionUnknown, @@ -503,19 +523,19 @@ func setAvailableCondition(_ context.Context, machine *clusterv1.Machine) { return } - if !time.Now().After(readyCondition.LastTransitionTime.Time.Add(0)) { // TODO: use MinReadySeconds as soon as it is available (and fix corresponding unit test) + if time.Since(readyCondition.LastTransitionTime.Time) >= 0*time.Second { // TODO: use MinReadySeconds as soon as it is available (and fix corresponding unit test) v1beta2conditions.Set(machine, metav1.Condition{ Type: clusterv1.MachineAvailableV1Beta2Condition, - Status: metav1.ConditionFalse, - Reason: clusterv1.MachineWaitingForMinReadySecondsV1Beta2Reason, + Status: metav1.ConditionTrue, + Reason: clusterv1.MachineAvailableV1Beta2Reason, }) return } v1beta2conditions.Set(machine, metav1.Condition{ Type: clusterv1.MachineAvailableV1Beta2Condition, - Status: metav1.ConditionTrue, - Reason: clusterv1.MachineAvailableV1Beta2Reason, + Status: metav1.ConditionFalse, + Reason: clusterv1.MachineWaitingForMinReadySecondsV1Beta2Reason, }) } @@ -525,19 +545,19 @@ func setPausedCondition(ctx context.Context, c client.Client, s *scope) error { return err } + var messages []string if s.cluster.Spec.Paused { - v1beta2conditions.Set(s.machine, metav1.Condition{ - Type: clusterv1.MachinePausedV1Beta2Condition, - Status: metav1.ConditionFalse, - Reason: clusterv1.ClusterPausedV1Beta2Reason, - }) - } else { - v1beta2conditions.Set(s.machine, metav1.Condition{ - Type: clusterv1.MachinePausedV1Beta2Condition, - Status: metav1.ConditionFalse, - Reason: clusterv1.MachineObjectPausedV1Beta2Reason, - }) + messages = append(messages, "Cluster spec.paused is set to true") + } + if annotations.HasPaused(s.machine) { + messages = append(messages, "Machine has the cluster.x-k8s.io/paused annotation") } + v1beta2conditions.Set(s.machine, metav1.Condition{ + Type: clusterv1.MachinePausedV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: clusterv1.PausedV1Beta2Reason, + Message: strings.Join(messages, ", "), + }) return patchHelper.Patch(ctx, s.machine, patch.WithOwnedV1Beta2Conditions{Conditions: []string{ clusterv1.MachinePausedV1Beta2Condition, @@ -549,7 +569,7 @@ func unsetPausedCondition(s *scope) { v1beta2conditions.Set(s.machine, metav1.Condition{ Type: clusterv1.MachinePausedV1Beta2Condition, Status: metav1.ConditionFalse, - Reason: clusterv1.MachineNotPausedV1Beta2Reason, + Reason: clusterv1.NotPausedV1Beta2Reason, }) } diff --git a/internal/controllers/machine/machine_controller_status_test.go b/internal/controllers/machine/machine_controller_status_test.go index 035fdc81063b..3266be75f87e 100644 --- a/internal/controllers/machine/machine_controller_status_test.go +++ b/internal/controllers/machine/machine_controller_status_test.go @@ -107,13 +107,33 @@ func TestSetBootstrapReadyCondition(t *testing.T) { expectCondition: metav1.Condition{ Type: clusterv1.MachineBootstrapConfigReadyV1Beta2Condition, Status: metav1.ConditionFalse, - Reason: clusterv1.MachineBootstrapConfigReadyNoV1Beta2ReasonReported, + Reason: clusterv1.MachineBootstrapConfigReadyNoReasonReportedV1Beta2Reason, Message: "some message (from GenericBootstrapConfig)", }, }, { name: "Use status.BoostrapReady flag as a fallback Ready condition from bootstrap config is missing", machine: defaultMachine.DeepCopy(), + bootstrapConfig: &unstructured.Unstructured{Object: map[string]interface{}{ + "kind": "GenericBootstrapConfig", + "apiVersion": "bootstrap.cluster.x-k8s.io/v1beta1", + "metadata": map[string]interface{}{ + "name": "bootstrap-config1", + "namespace": metav1.NamespaceDefault, + }, + "status": map[string]interface{}{}, + }}, + bootstrapConfigIsNotFound: false, + expectCondition: metav1.Condition{ + Type: clusterv1.MachineBootstrapConfigReadyV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.MachineBootstrapConfigReadyNoReasonReportedV1Beta2Reason, + Message: "GenericBootstrapConfig status.ready is false", + }, + }, + { + name: "Use status.BoostrapReady flag as a fallback Ready condition from bootstrap config is missing (ready true)", + machine: defaultMachine.DeepCopy(), bootstrapConfig: &unstructured.Unstructured{Object: map[string]interface{}{ "kind": "GenericBootstrapConfig", "apiVersion": "bootstrap.cluster.x-k8s.io/v1beta1", @@ -122,14 +142,14 @@ func TestSetBootstrapReadyCondition(t *testing.T) { "namespace": metav1.NamespaceDefault, }, "status": map[string]interface{}{ - "conditions": []interface{}{}, + "ready": true, }, }}, bootstrapConfigIsNotFound: false, expectCondition: metav1.Condition{ Type: clusterv1.MachineBootstrapConfigReadyV1Beta2Condition, Status: metav1.ConditionFalse, - Reason: clusterv1.MachineBootstrapConfigReadyNoV1Beta2ReasonReported, + Reason: clusterv1.MachineBootstrapConfigReadyNoReasonReportedV1Beta2Reason, Message: "GenericBootstrapConfig status.ready is false", }, }, @@ -154,7 +174,7 @@ func TestSetBootstrapReadyCondition(t *testing.T) { bootstrapConfigIsNotFound: false, expectCondition: metav1.Condition{ Type: clusterv1.MachineBootstrapConfigReadyV1Beta2Condition, - Status: metav1.ConditionFalse, + Status: metav1.ConditionUnknown, Reason: clusterv1.MachineBootstrapConfigInvalidConditionReportedV1Beta2Reason, Message: "failed to convert status.conditions from GenericBootstrapConfig to []metav1.Condition: status must be set for the Ready condition", }, @@ -172,9 +192,10 @@ func TestSetBootstrapReadyCondition(t *testing.T) { }, }, { - name: "bootstrap config not found while machine is deleting", + name: "bootstrap config that was ready not found while machine is deleting", machine: func() *clusterv1.Machine { m := defaultMachine.DeepCopy() + m.Status.BootstrapReady = true m.SetDeletionTimestamp(&metav1.Time{Time: time.Now()}) return m }(), @@ -187,6 +208,22 @@ func TestSetBootstrapReadyCondition(t *testing.T) { Message: "GenericBootstrapConfig has been deleted", }, }, + { + name: "bootstrap config not found while machine is deleting", + machine: func() *clusterv1.Machine { + m := defaultMachine.DeepCopy() + m.SetDeletionTimestamp(&metav1.Time{Time: time.Now()}) + return m + }(), + bootstrapConfig: nil, + bootstrapConfigIsNotFound: true, + expectCondition: metav1.Condition{ + Type: clusterv1.MachineBootstrapConfigReadyV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: clusterv1.MachineBootstrapConfigDoesNotExistV1Beta2Reason, + Message: "GenericBootstrapConfig does not exist", + }, + }, { name: "bootstrap config not found", machine: defaultMachine.DeepCopy(), @@ -260,13 +297,33 @@ func TestSetInfrastructureReadyCondition(t *testing.T) { expectCondition: metav1.Condition{ Type: clusterv1.MachineInfrastructureReadyV1Beta2Condition, Status: metav1.ConditionFalse, - Reason: clusterv1.MachineInfrastructureReadyNoV1Beta2ReasonReported, + Reason: clusterv1.MachineInfrastructureReadyNoReasonReportedV1Beta2Reason, Message: "some message (from GenericInfrastructureMachine)", }, }, { name: "Use status.InfrastructureReady flag as a fallback Ready condition from infra machine is missing", machine: defaultMachine.DeepCopy(), + infraMachine: &unstructured.Unstructured{Object: map[string]interface{}{ + "kind": "GenericInfrastructureMachine", + "apiVersion": "infrastructure.cluster.x-k8s.io/v1beta1", + "metadata": map[string]interface{}{ + "name": "infra-machine1", + "namespace": metav1.NamespaceDefault, + }, + "status": map[string]interface{}{}, + }}, + infraMachineIsNotFound: false, + expectCondition: metav1.Condition{ + Type: clusterv1.MachineInfrastructureReadyV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.MachineInfrastructureReadyNoReasonReportedV1Beta2Reason, + Message: "GenericInfrastructureMachine status.ready is false", + }, + }, + { + name: "Use status.InfrastructureReady flag as a fallback Ready condition from infra machine is missing (ready true)", + machine: defaultMachine.DeepCopy(), infraMachine: &unstructured.Unstructured{Object: map[string]interface{}{ "kind": "GenericInfrastructureMachine", "apiVersion": "infrastructure.cluster.x-k8s.io/v1beta1", @@ -275,14 +332,14 @@ func TestSetInfrastructureReadyCondition(t *testing.T) { "namespace": metav1.NamespaceDefault, }, "status": map[string]interface{}{ - "conditions": []interface{}{}, + "ready": true, }, }}, infraMachineIsNotFound: false, expectCondition: metav1.Condition{ Type: clusterv1.MachineInfrastructureReadyV1Beta2Condition, Status: metav1.ConditionFalse, - Reason: clusterv1.MachineInfrastructureReadyNoV1Beta2ReasonReported, + Reason: clusterv1.MachineInfrastructureReadyNoReasonReportedV1Beta2Reason, Message: "GenericInfrastructureMachine status.ready is false", }, }, @@ -307,7 +364,7 @@ func TestSetInfrastructureReadyCondition(t *testing.T) { infraMachineIsNotFound: false, expectCondition: metav1.Condition{ Type: clusterv1.MachineInfrastructureReadyV1Beta2Condition, - Status: metav1.ConditionFalse, + Status: metav1.ConditionUnknown, Reason: clusterv1.MachineInfrastructureInvalidConditionReportedV1Beta2Reason, Message: "failed to convert status.conditions from GenericInfrastructureMachine to []metav1.Condition: status must be set for the Ready condition", }, @@ -329,10 +386,11 @@ func TestSetInfrastructureReadyCondition(t *testing.T) { }, }, { - name: "infra machine not found while machine is deleting", + name: "infra machine that was ready not found while machine is deleting", machine: func() *clusterv1.Machine { m := defaultMachine.DeepCopy() m.SetDeletionTimestamp(&metav1.Time{Time: time.Now()}) + m.Status.InfrastructureReady = true return m }(), infraMachine: nil, @@ -344,6 +402,22 @@ func TestSetInfrastructureReadyCondition(t *testing.T) { Message: "GenericInfrastructureMachine has been deleted", }, }, + { + name: "infra machine not found while machine is deleting", + machine: func() *clusterv1.Machine { + m := defaultMachine.DeepCopy() + m.SetDeletionTimestamp(&metav1.Time{Time: time.Now()}) + return m + }(), + infraMachine: nil, + infraMachineIsNotFound: true, + expectCondition: metav1.Condition{ + Type: clusterv1.MachineInfrastructureReadyV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: clusterv1.MachineInfrastructureDoesNotExistV1Beta2Reason, + Message: "GenericInfrastructureMachine does not exist", + }, + }, { name: "infra machine not found after the machine has been initialized", machine: func() *clusterv1.Machine { @@ -357,7 +431,7 @@ func TestSetInfrastructureReadyCondition(t *testing.T) { Type: clusterv1.MachineInfrastructureReadyV1Beta2Condition, Status: metav1.ConditionFalse, Reason: clusterv1.MachineInfrastructureDeletedV1Beta2Reason, - Message: "GenericInfrastructureMachine has been deleted while the machine still exist", + Message: "GenericInfrastructureMachine has been deleted while the machine still exists", }, }, { @@ -535,7 +609,6 @@ func TestSetNodeHealthyAndReadyConditions(t *testing.T) { }, }, { - // TODO: handle missing conditions in summarize node conditions. name: "NodeReady missing from node", machine: defaultMachine.DeepCopy(), node: &corev1.Node{ @@ -562,10 +635,13 @@ func TestSetNodeHealthyAndReadyConditions(t *testing.T) { }, }, { - name: "node not found while machine is deleting", + name: "node that existed not found while machine is deleting", machine: func() *clusterv1.Machine { m := defaultMachine.DeepCopy() m.SetDeletionTimestamp(&metav1.Time{Time: time.Now()}) + m.Status.NodeRef = &corev1.ObjectReference{ + Name: "test-node-1", + } return m }(), node: nil, @@ -574,13 +650,36 @@ func TestSetNodeHealthyAndReadyConditions(t *testing.T) { Type: clusterv1.MachineNodeHealthyV1Beta2Condition, Status: metav1.ConditionUnknown, Reason: clusterv1.MachineNodeDeletedV1Beta2Reason, - Message: "Node has been deleted", + Message: "Node test-node-1 has been deleted", }, { Type: clusterv1.MachineNodeReadyV1Beta2Condition, Status: metav1.ConditionUnknown, Reason: clusterv1.MachineNodeDeletedV1Beta2Reason, - Message: "Node has been deleted", + Message: "Node test-node-1 has been deleted", + }, + }, + }, + { + name: "node not found while machine is deleting", + machine: func() *clusterv1.Machine { + m := defaultMachine.DeepCopy() + m.SetDeletionTimestamp(&metav1.Time{Time: time.Now()}) + return m + }(), + node: nil, + expectConditions: []metav1.Condition{ + { + Type: clusterv1.MachineNodeHealthyV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: clusterv1.MachineNodeDoesNotExistV1Beta2Reason, + Message: "Node does not exist", + }, + { + Type: clusterv1.MachineNodeReadyV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: clusterv1.MachineNodeDoesNotExistV1Beta2Reason, + Message: "Node does not exist", }, }, }, @@ -599,13 +698,13 @@ func TestSetNodeHealthyAndReadyConditions(t *testing.T) { Type: clusterv1.MachineNodeHealthyV1Beta2Condition, Status: metav1.ConditionFalse, Reason: clusterv1.MachineNodeDeletedV1Beta2Reason, - Message: "Node test-node-1 has been deleted while the machine still exist", + Message: "Node test-node-1 has been deleted while the machine still exists", }, { Type: clusterv1.MachineNodeReadyV1Beta2Condition, Status: metav1.ConditionFalse, Reason: clusterv1.MachineNodeDeletedV1Beta2Reason, - Message: "Node test-node-1 has been deleted while the machine still exist", + Message: "Node test-node-1 has been deleted while the machine still exists", }, }, }, @@ -621,14 +720,14 @@ func TestSetNodeHealthyAndReadyConditions(t *testing.T) { { Type: clusterv1.MachineNodeHealthyV1Beta2Condition, Status: metav1.ConditionUnknown, - Reason: clusterv1.MachineNodeNotFoundV1Beta2Reason, - Message: "Waiting for a node with Provider ID foo://test-node-1 to exist", + Reason: clusterv1.MachineNodeDoesNotExistV1Beta2Reason, + Message: "Waiting for a Node with spec.providerID foo://test-node-1 to exist", }, { Type: clusterv1.MachineNodeReadyV1Beta2Condition, Status: metav1.ConditionUnknown, - Reason: clusterv1.MachineNodeNotFoundV1Beta2Reason, - Message: "Waiting for a node with Provider ID foo://test-node-1 to exist", + Reason: clusterv1.MachineNodeDoesNotExistV1Beta2Reason, + Message: "Waiting for a Node with spec.providerID foo://test-node-1 to exist", }, }, }, @@ -640,13 +739,13 @@ func TestSetNodeHealthyAndReadyConditions(t *testing.T) { { Type: clusterv1.MachineNodeHealthyV1Beta2Condition, Status: metav1.ConditionUnknown, - Reason: clusterv1.MachineNodeNotFoundV1Beta2Reason, + Reason: clusterv1.MachineNodeDoesNotExistV1Beta2Reason, Message: "Waiting for GenericInfrastructureMachine to report spec.providerID", }, { Type: clusterv1.MachineNodeReadyV1Beta2Condition, Status: metav1.ConditionUnknown, - Reason: clusterv1.MachineNodeNotFoundV1Beta2Reason, + Reason: clusterv1.MachineNodeDoesNotExistV1Beta2Reason, Message: "Waiting for GenericInfrastructureMachine to report spec.providerID", }, }, @@ -717,8 +816,8 @@ func TestSetReadyCondition(t *testing.T) { Conditions: []metav1.Condition{ { Type: clusterv1.MachineBootstrapConfigReadyV1Beta2Condition, - Status: metav1.ConditionTrue, - Reason: "Foo", + Status: metav1.ConditionUnknown, + Reason: clusterv1.MachineBootstrapConfigDoesNotExistV1Beta2Reason, }, { Type: clusterv1.InfrastructureReadyV1Beta2Condition, @@ -727,7 +826,7 @@ func TestSetReadyCondition(t *testing.T) { }, { Type: clusterv1.MachineNodeHealthyV1Beta2Condition, - Status: metav1.ConditionTrue, + Status: metav1.ConditionUnknown, Reason: clusterv1.MachineNodeDeletedV1Beta2Reason, }, }, @@ -1528,6 +1627,6 @@ func TestReconcileMachinePhases(t *testing.T) { g.Expect(machine.Status.LastUpdated).NotTo(BeNil()) g.Expect(machine.Status.LastUpdated.After(preUpdate)).To(BeTrue()) return true - }, 100*time.Second).Should(BeTrue()) + }, 10*time.Second).Should(BeTrue()) }) } diff --git a/util/conditions/v1beta2/merge_strategies.go b/util/conditions/v1beta2/merge_strategies.go index f89658924261..9c1e43626716 100644 --- a/util/conditions/v1beta2/merge_strategies.go +++ b/util/conditions/v1beta2/merge_strategies.go @@ -74,15 +74,15 @@ type MergeStrategy interface { } // DefaultMergeStrategyWithCustomPriority is the default merge strategy with a customized getPriority function. -func DefaultMergeStrategyWithCustomPriority(getPriority func(condition metav1.Condition) MergePriority) MergeStrategy { +func DefaultMergeStrategyWithCustomPriority(getPriorityFunc func(condition metav1.Condition) MergePriority) MergeStrategy { return &defaultMergeStrategy{ - getPriority: getPriority, + getPriorityFunc: getPriorityFunc, } } func newDefaultMergeStrategy(negativePolarityConditionTypes sets.Set[string]) MergeStrategy { return &defaultMergeStrategy{ - getPriority: GetDefaultMergePriority(negativePolarityConditionTypes), + getPriorityFunc: GetDefaultMergePriority(negativePolarityConditionTypes), } } @@ -129,7 +129,7 @@ const ( // defaultMergeStrategy defines the default merge strategy for Cluster API conditions. type defaultMergeStrategy struct { - getPriority func(condition metav1.Condition) MergePriority + getPriorityFunc func(condition metav1.Condition) MergePriority } // Merge all conditions in input based on a strategy that surfaces issues first, then unknown conditions, then info (if none of issues and unknown condition exists). @@ -141,7 +141,7 @@ func (d *defaultMergeStrategy) Merge(conditions []ConditionWithOwnerInfo, condit return "", "", "", errors.New("can't merge an empty list of conditions") } - if d.getPriority == nil { + if d.getPriorityFunc == nil { return "", "", "", errors.New("can't merge without a getPriority func") } @@ -157,7 +157,7 @@ func (d *defaultMergeStrategy) Merge(conditions []ConditionWithOwnerInfo, condit // sortConditions the relevance defined by the users (the order of condition types), LastTransition time (older first). sortConditions(conditions, conditionTypes) - issueConditions, unknownConditions, infoConditions := splitConditionsByPriority(conditions, d.getPriority) + issueConditions, unknownConditions, infoConditions := splitConditionsByPriority(conditions, d.getPriorityFunc) // Compute the status for the target condition: // Note: This function always returns a condition with positive polarity. @@ -213,7 +213,7 @@ func (d *defaultMergeStrategy) Merge(conditions []ConditionWithOwnerInfo, condit if isSummaryOperation { messages := []string{} for _, condition := range append(issueConditions, append(unknownConditions, infoConditions...)...) { - priority := d.getPriority(condition.Condition) + priority := d.getPriorityFunc(condition.Condition) if priority == InfoMergePriority { // Drop info messages when we are surfacing issues or unknown. if status != metav1.ConditionTrue { diff --git a/util/conditions/v1beta2/mirror.go b/util/conditions/v1beta2/mirror.go index b8435cb6ff4d..2ef8b939dd8f 100644 --- a/util/conditions/v1beta2/mirror.go +++ b/util/conditions/v1beta2/mirror.go @@ -52,7 +52,8 @@ func (o *MirrorOptions) ApplyOptions(opts []MirrorOption) *MirrorOptions { } // NewMirrorCondition create a mirror of the given condition from obj; if the given condition does not exist in the source obj, -// a new condition with status Unknown, reason NotYetReported is created. +// the condition specified in the FallbackCondition is used; if this option is not set, a new condition with status Unknown +// and reason NotYetReported is created. // // By default, the Mirror condition has the same type as the source condition, but this can be changed by using // the TargetConditionType option. @@ -62,7 +63,7 @@ func NewMirrorCondition(sourceObj Getter, sourceConditionType string, opts ...Mi return newMirrorCondition(sourceObj, condition, sourceConditionType, opts) } -func newMirrorCondition(sourceObj any, condition *metav1.Condition, sourceConditionType string, opts []MirrorOption) *metav1.Condition { +func newMirrorCondition(sourceObj any, sourceCondition *metav1.Condition, sourceConditionType string, opts []MirrorOption) *metav1.Condition { mirrorOpt := &MirrorOptions{ targetConditionType: sourceConditionType, } @@ -70,17 +71,17 @@ func newMirrorCondition(sourceObj any, condition *metav1.Condition, sourceCondit conditionOwner := getConditionOwnerInfo(sourceObj) - if condition != nil { + if sourceCondition != nil { message := "" - if condition.Message != "" { - message = fmt.Sprintf("%s (from %s)", condition.Message, conditionOwner.Kind) + if sourceCondition.Message != "" { + message = fmt.Sprintf("%s (from %s)", sourceCondition.Message, conditionOwner.Kind) } return &metav1.Condition{ Type: mirrorOpt.targetConditionType, - Status: condition.Status, + Status: sourceCondition.Status, // NOTE: we are preserving the original transition time (when the underlying condition changed) - LastTransitionTime: condition.LastTransitionTime, - Reason: condition.Reason, + LastTransitionTime: sourceCondition.LastTransitionTime, + Reason: sourceCondition.Reason, Message: message, // NOTE: ObservedGeneration will be set when this condition is added to an object by calling Set // (also preserving ObservedGeneration from the source object will be confusing when the mirror conditions shows up in the target object). @@ -93,8 +94,7 @@ func newMirrorCondition(sourceObj any, condition *metav1.Condition, sourceCondit Status: mirrorOpt.fallbackStatus, Reason: mirrorOpt.fallbackReason, Message: mirrorOpt.fallbackMessage, - // NOTE: ObservedGeneration will be set when this condition is added to an object by calling Set. - // LastTransitionTime will be set to now. + // NOTE: LastTransitionTime and ObservedGeneration will be set when this condition is added to an object by calling Set. } } @@ -114,8 +114,9 @@ func SetMirrorCondition(sourceObj Getter, targetObj Setter, sourceConditionType Set(targetObj, *mirrorCondition) } -// SetMirrorConditionFromUnstructured is a convenience method that calls NewMirrorCondition to create a mirror condition from the source object, -// and then calls Set to add the new condition to the target object. +// SetMirrorConditionFromUnstructured is a convenience method that mirror of the given condition from the unstructured source obj +// into the target object. It combines, UnstructuredGet, NewMirrorCondition (most specifically it uses only the logic to +// create a mirror condition), and Set. func SetMirrorConditionFromUnstructured(sourceObj runtime.Unstructured, targetObj Setter, sourceConditionType string, opts ...MirrorOption) error { condition, err := UnstructuredGet(sourceObj, sourceConditionType) if err != nil { From 7278f4a85bd1e5e6adf68ac87eac85551da91fa0 Mon Sep 17 00:00:00 2001 From: fabriziopandini Date: Fri, 11 Oct 2024 19:54:31 +0200 Subject: [PATCH 8/8] More comments --- api/v1beta1/machine_types.go | 4 ++-- api/v1beta1/v1beta2_condition_consts.go | 4 ++-- .../machine/machine_controller_status.go | 4 ++-- .../machine/machine_controller_status_test.go | 24 ++++++++++++------- util/conditions/v1beta2/merge_strategies.go | 6 ++--- .../v1beta2/merge_strategies_test.go | 4 ++-- 6 files changed, 27 insertions(+), 19 deletions(-) diff --git a/api/v1beta1/machine_types.go b/api/v1beta1/machine_types.go index d4396c3f0691..858be9dec0c2 100644 --- a/api/v1beta1/machine_types.go +++ b/api/v1beta1/machine_types.go @@ -143,7 +143,7 @@ const ( MachineBootstrapConfigInvalidConditionReportedV1Beta2Reason = InvalidConditionReportedV1Beta2Reason // MachineBootstrapConfigReadyNoReasonReportedV1Beta2Reason applies to a BootstrapConfig Ready condition (read from a bootstrap config object) that reports no reason. - MachineBootstrapConfigReadyNoReasonReportedV1Beta2Reason = NoV1Beta2ReasonReported + MachineBootstrapConfigReadyNoReasonReportedV1Beta2Reason = NoReasonReportedV1Beta2Reason // MachineBootstrapConfigInternalErrorV1Beta2Reason surfaces unexpected failures when reading a BootstrapConfig object. MachineBootstrapConfigInternalErrorV1Beta2Reason = InternalErrorV1Beta2Reason @@ -169,7 +169,7 @@ const ( MachineInfrastructureInvalidConditionReportedV1Beta2Reason = InvalidConditionReportedV1Beta2Reason // MachineInfrastructureReadyNoReasonReportedV1Beta2Reason applies to a infrastructure Ready condition (read from an infra machine object) that reports no reason. - MachineInfrastructureReadyNoReasonReportedV1Beta2Reason = NoV1Beta2ReasonReported + MachineInfrastructureReadyNoReasonReportedV1Beta2Reason = NoReasonReportedV1Beta2Reason // MachineInfrastructureInternalErrorV1Beta2Reason surfaces unexpected failures when reading a infra machine object. MachineInfrastructureInternalErrorV1Beta2Reason = InternalErrorV1Beta2Reason diff --git a/api/v1beta1/v1beta2_condition_consts.go b/api/v1beta1/v1beta2_condition_consts.go index bfe873b20f34..d97bc735f6cc 100644 --- a/api/v1beta1/v1beta2_condition_consts.go +++ b/api/v1beta1/v1beta2_condition_consts.go @@ -91,9 +91,9 @@ const ( // (e.g. its status is missing). InvalidConditionReportedV1Beta2Reason = "InvalidConditionReported" - // NoV1Beta2ReasonReported applies to a condition, usually read from an external object, that reports no reason. + // NoReasonReportedV1Beta2Reason applies to a condition, usually read from an external object, that reports no reason. // Note: this could happen e.g. when an external object still uses Cluster API v1beta1 Conditions. - NoV1Beta2ReasonReported = "NoReasonReported" + NoReasonReportedV1Beta2Reason = "NoReasonReported" // InternalErrorV1Beta2Reason surfaces unexpected errors reporting by controllers. // In most cases, it will be required to look at controllers logs to properly triage those issues. diff --git a/internal/controllers/machine/machine_controller_status.go b/internal/controllers/machine/machine_controller_status.go index 98fa2b9fcb08..bdc291294428 100644 --- a/internal/controllers/machine/machine_controller_status.go +++ b/internal/controllers/machine/machine_controller_status.go @@ -234,7 +234,7 @@ func setNodeHealthyAndReadyConditions(ctx context.Context, machine *clusterv1.Ma } reason := condition.Reason if reason == "" { - reason = clusterv1.NoV1Beta2ReasonReported + reason = clusterv1.NoReasonReportedV1Beta2Reason } nodeReady = &metav1.Condition{ Type: clusterv1.MachineNodeReadyV1Beta2Condition, @@ -461,7 +461,7 @@ func (c machineConditionCustomMergeStrategy) Merge(conditions []v1beta2condition } // Note: MachineNodeReadyV1Beta2Condition is not relevant for the summary. } - return v1beta2conditions.GetDefaultMergePriority(nil)(condition) + return v1beta2conditions.GetDefaultMergePriorityFunc(nil)(condition) }).Merge(conditions, conditionTypes) } diff --git a/internal/controllers/machine/machine_controller_status_test.go b/internal/controllers/machine/machine_controller_status_test.go index 3266be75f87e..80a3204c2a37 100644 --- a/internal/controllers/machine/machine_controller_status_test.go +++ b/internal/controllers/machine/machine_controller_status_test.go @@ -132,8 +132,12 @@ func TestSetBootstrapReadyCondition(t *testing.T) { }, }, { - name: "Use status.BoostrapReady flag as a fallback Ready condition from bootstrap config is missing (ready true)", - machine: defaultMachine.DeepCopy(), + name: "Use status.BoostrapReady flag as a fallback Ready condition from bootstrap config is missing (ready true)", + machine: func() *clusterv1.Machine { + m := defaultMachine.DeepCopy() + m.Status.BootstrapReady = true + return m + }(), bootstrapConfig: &unstructured.Unstructured{Object: map[string]interface{}{ "kind": "GenericBootstrapConfig", "apiVersion": "bootstrap.cluster.x-k8s.io/v1beta1", @@ -148,9 +152,9 @@ func TestSetBootstrapReadyCondition(t *testing.T) { bootstrapConfigIsNotFound: false, expectCondition: metav1.Condition{ Type: clusterv1.MachineBootstrapConfigReadyV1Beta2Condition, - Status: metav1.ConditionFalse, + Status: metav1.ConditionTrue, Reason: clusterv1.MachineBootstrapConfigReadyNoReasonReportedV1Beta2Reason, - Message: "GenericBootstrapConfig status.ready is false", + Message: "GenericBootstrapConfig status.ready is true", }, }, { @@ -322,8 +326,12 @@ func TestSetInfrastructureReadyCondition(t *testing.T) { }, }, { - name: "Use status.InfrastructureReady flag as a fallback Ready condition from infra machine is missing (ready true)", - machine: defaultMachine.DeepCopy(), + name: "Use status.InfrastructureReady flag as a fallback Ready condition from infra machine is missing (ready true)", + machine: func() *clusterv1.Machine { + m := defaultMachine.DeepCopy() + m.Status.InfrastructureReady = true + return m + }(), infraMachine: &unstructured.Unstructured{Object: map[string]interface{}{ "kind": "GenericInfrastructureMachine", "apiVersion": "infrastructure.cluster.x-k8s.io/v1beta1", @@ -338,9 +346,9 @@ func TestSetInfrastructureReadyCondition(t *testing.T) { infraMachineIsNotFound: false, expectCondition: metav1.Condition{ Type: clusterv1.MachineInfrastructureReadyV1Beta2Condition, - Status: metav1.ConditionFalse, + Status: metav1.ConditionTrue, Reason: clusterv1.MachineInfrastructureReadyNoReasonReportedV1Beta2Reason, - Message: "GenericInfrastructureMachine status.ready is false", + Message: "GenericInfrastructureMachine status.ready is true", }, }, { diff --git a/util/conditions/v1beta2/merge_strategies.go b/util/conditions/v1beta2/merge_strategies.go index 9c1e43626716..e434931c6879 100644 --- a/util/conditions/v1beta2/merge_strategies.go +++ b/util/conditions/v1beta2/merge_strategies.go @@ -82,16 +82,16 @@ func DefaultMergeStrategyWithCustomPriority(getPriorityFunc func(condition metav func newDefaultMergeStrategy(negativePolarityConditionTypes sets.Set[string]) MergeStrategy { return &defaultMergeStrategy{ - getPriorityFunc: GetDefaultMergePriority(negativePolarityConditionTypes), + getPriorityFunc: GetDefaultMergePriorityFunc(negativePolarityConditionTypes), } } -// GetDefaultMergePriority returns the merge priority for each condition. +// GetDefaultMergePriorityFunc returns the merge priority for each condition. // It assigns following priority values to conditions: // - issues: conditions with positive polarity (normal True) and status False or conditions with negative polarity (normal False) and status True. // - unknown: conditions with status unknown. // - info: conditions with positive polarity (normal True) and status True or conditions with negative polarity (normal False) and status False. -func GetDefaultMergePriority(negativePolarityConditionTypes sets.Set[string]) func(condition metav1.Condition) MergePriority { +func GetDefaultMergePriorityFunc(negativePolarityConditionTypes sets.Set[string]) func(condition metav1.Condition) MergePriority { return func(condition metav1.Condition) MergePriority { switch condition.Status { case metav1.ConditionTrue: diff --git a/util/conditions/v1beta2/merge_strategies_test.go b/util/conditions/v1beta2/merge_strategies_test.go index 0ac6dfc0b8fb..19e7ce09dfa8 100644 --- a/util/conditions/v1beta2/merge_strategies_test.go +++ b/util/conditions/v1beta2/merge_strategies_test.go @@ -108,7 +108,7 @@ func TestSplitConditionsByPriority(t *testing.T) { {OwnerResource: ConditionOwnerInfo{Name: "baz"}, Condition: metav1.Condition{Type: "!C", Status: metav1.ConditionFalse}}, // info } - issueConditions, unknownConditions, infoConditions := splitConditionsByPriority(conditions, GetDefaultMergePriority(sets.New[string]("!C"))) + issueConditions, unknownConditions, infoConditions := splitConditionsByPriority(conditions, GetDefaultMergePriorityFunc(sets.New[string]("!C"))) // Check condition are grouped as expected and order is preserved. @@ -196,7 +196,7 @@ func TestDefaultMergePriority(t *testing.T) { if tt.negativePolarity { negativePolarityConditionTypes.Insert(tt.condition.Type) } - gotPriority := GetDefaultMergePriority(negativePolarityConditionTypes)(tt.condition) + gotPriority := GetDefaultMergePriorityFunc(negativePolarityConditionTypes)(tt.condition) g.Expect(gotPriority).To(Equal(tt.wantPriority)) })