Skip to content

Commit

Permalink
machine: Introduce Deletion status field and add timestamps for drain…
Browse files Browse the repository at this point in the history
… and volumeDetach instead of using the condition
  • Loading branch information
chrischdi committed Sep 10, 2024
1 parent 8df788d commit 42eac4c
Show file tree
Hide file tree
Showing 4 changed files with 31 additions and 10 deletions.
12 changes: 12 additions & 0 deletions api/v1beta1/machine_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -230,10 +230,22 @@ type MachineStatus struct {
// Conditions defines current service state of the Machine.
// +optional
Conditions Conditions `json:"conditions,omitempty"`

// Deletion is the deletion state of the Machine.
// +optional
Deletion MachineStatusDeletion `json:"deletion,omitempty"`
}

// ANCHOR_END: MachineStatus

// MachineStatusDeletion is the deletion state of the Machine.
type MachineStatusDeletion struct {
// NodeDrainStartTime is the time when the drain of the node started.
NodeDrainStartTime *metav1.Time `json:"nodeDrainStartTime,omitempty"`
// WaitForNodeVolumeDetachStartTime is the time when waiting for volume detachment started.
WaitForNodeVolumeDetachStartTime *metav1.Time `json:"nodeVolumeDetachStartTime,omitempty"`
}

// SetTypedPhase sets the Phase field to the string representation of MachinePhase.
func (m *MachineStatus) SetTypedPhase(p MachinePhase) {
m.Phase = string(p)
Expand Down
1 change: 1 addition & 0 deletions internal/apis/core/v1alpha3/conversion.go
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ func (src *Machine) ConvertTo(dstRaw conversion.Hub) error {
dst.Spec.NodeVolumeDetachTimeout = restored.Spec.NodeVolumeDetachTimeout
dst.Status.NodeInfo = restored.Status.NodeInfo
dst.Status.CertificatesExpiryDate = restored.Status.CertificatesExpiryDate
dst.Status.Deletion = restored.Status.Deletion
return nil
}

Expand Down
1 change: 1 addition & 0 deletions internal/apis/core/v1alpha4/conversion.go
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,7 @@ func (src *Machine) ConvertTo(dstRaw conversion.Hub) error {
dst.Spec.NodeDeletionTimeout = restored.Spec.NodeDeletionTimeout
dst.Status.CertificatesExpiryDate = restored.Status.CertificatesExpiryDate
dst.Spec.NodeVolumeDetachTimeout = restored.Spec.NodeVolumeDetachTimeout
dst.Status.Deletion = restored.Status.Deletion
return nil
}

Expand Down
27 changes: 17 additions & 10 deletions internal/controllers/machine/machine_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ import (
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/client-go/tools/record"
"k8s.io/klog/v2"
"k8s.io/utils/ptr"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/builder"
"sigs.k8s.io/controller-runtime/pkg/client"
Expand Down Expand Up @@ -415,6 +416,10 @@ func (r *Reconciler) reconcileDelete(ctx context.Context, cluster *clusterv1.Clu
conditions.MarkFalse(m, clusterv1.VolumeDetachSucceededCondition, clusterv1.WaitingForVolumeDetachReason, clusterv1.ConditionSeverityInfo, "Waiting for node volumes to be detached")
}

if m.Status.Deletion.WaitForNodeVolumeDetachStartTime == nil {
m.Status.Deletion.WaitForNodeVolumeDetachStartTime = ptr.To(metav1.Now())
}

if ok, err := r.shouldWaitForNodeVolumes(ctx, cluster, m.Status.NodeRef.Name); ok || err != nil {
if err != nil {
r.recorder.Eventf(m, corev1.EventTypeWarning, "FailedWaitForVolumeDetach", "error waiting for node volumes detaching, Machine's node %q: %v", m.Status.NodeRef.Name, err)
Expand Down Expand Up @@ -527,34 +532,32 @@ func (r *Reconciler) nodeDrainTimeoutExceeded(machine *clusterv1.Machine) bool {
return false
}

// if the draining succeeded condition does not exist
if conditions.Get(machine, clusterv1.DrainingSucceededCondition) == nil {
// if the NodeDrainStartTime does not exist
if machine.Status.Deletion.NodeDrainStartTime == nil {
return false
}

now := time.Now()
firstTimeDrain := conditions.GetLastTransitionTime(machine, clusterv1.DrainingSucceededCondition)
diff := now.Sub(firstTimeDrain.Time)
diff := now.Sub(machine.Status.Deletion.NodeDrainStartTime.Time)
return diff.Seconds() >= machine.Spec.NodeDrainTimeout.Seconds()
}

// nodeVolumeDetachTimeoutExceeded returns False if either NodeVolumeDetachTimeout is set to nil or <=0 OR
// VolumeDetachSucceededCondition is not set on the Machine. Otherwise returns true if the timeout is expired
// since the last transition time of VolumeDetachSucceededCondition.
// WaitForNodeVolumeDetachStartTime is not set on the Machine. Otherwise returns true if the timeout is expired
// since the WaitForNodeVolumeDetachStartTime.
func (r *Reconciler) nodeVolumeDetachTimeoutExceeded(machine *clusterv1.Machine) bool {
// if the NodeVolumeDetachTimeout type is not set by user
if machine.Spec.NodeVolumeDetachTimeout == nil || machine.Spec.NodeVolumeDetachTimeout.Seconds() <= 0 {
return false
}

// if the volume detaching succeeded condition does not exist
if conditions.Get(machine, clusterv1.VolumeDetachSucceededCondition) == nil {
// if the NodeVolumeDetachStartTime does not exist
if machine.Status.Deletion.WaitForNodeVolumeDetachStartTime == nil {
return false
}

now := time.Now()
firstTimeDetach := conditions.GetLastTransitionTime(machine, clusterv1.VolumeDetachSucceededCondition)
diff := now.Sub(firstTimeDetach.Time)
diff := now.Sub(machine.Status.Deletion.WaitForNodeVolumeDetachStartTime.Time)
return diff.Seconds() >= machine.Spec.NodeVolumeDetachTimeout.Seconds()
}

Expand Down Expand Up @@ -697,6 +700,10 @@ func (r *Reconciler) drainNode(ctx context.Context, cluster *clusterv1.Cluster,
log.V(3).Info("Node is unreachable, draining will use 1s GracePeriodSeconds and will ignore all Pods that have a deletionTimestamp > 1s old")
}

if machine.Status.Deletion.NodeDrainStartTime == nil {
machine.Status.Deletion.NodeDrainStartTime = ptr.To(metav1.Now())
}

if err := drainer.CordonNode(ctx, node); err != nil {
// Machine will be re-reconciled after a cordon failure.
return ctrl.Result{}, errors.Wrapf(err, "failed to cordon Node %s", node.Name)
Expand Down

0 comments on commit 42eac4c

Please sign in to comment.