diff --git a/api/v1beta1/machine_types.go b/api/v1beta1/machine_types.go index e6e0fa8fe0cf..01d681804c08 100644 --- a/api/v1beta1/machine_types.go +++ b/api/v1beta1/machine_types.go @@ -132,6 +132,10 @@ type MachineSpec struct { // Defaults to 10 seconds. // +optional NodeDeletionTimeout *metav1.Duration `json:"nodeDeletionTimeout,omitempty"` + + // NodeDrainPodFilters allows to specify filters for pods to be excluded during node drain + // +optional + NodeDrainPodFilters *metav1.LabelSelector `json:"nodeDrainPodFilters,omitempty"` } // ANCHOR_END: MachineSpec diff --git a/api/v1beta1/zz_generated.deepcopy.go b/api/v1beta1/zz_generated.deepcopy.go index bb0ecf062264..2a4e1a453fa3 100644 --- a/api/v1beta1/zz_generated.deepcopy.go +++ b/api/v1beta1/zz_generated.deepcopy.go @@ -1857,6 +1857,11 @@ func (in *MachineSpec) DeepCopyInto(out *MachineSpec) { *out = new(metav1.Duration) **out = **in } + if in.NodeDrainPodFilters != nil { + in, out := &in.NodeDrainPodFilters, &out.NodeDrainPodFilters + *out = new(metav1.LabelSelector) + (*in).DeepCopyInto(*out) + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MachineSpec. diff --git a/api/v1beta1/zz_generated.openapi.go b/api/v1beta1/zz_generated.openapi.go index 6e5427eef548..85437b23d560 100644 --- a/api/v1beta1/zz_generated.openapi.go +++ b/api/v1beta1/zz_generated.openapi.go @@ -3241,12 +3241,18 @@ func schema_sigsk8sio_cluster_api_api_v1beta1_MachineSpec(ref common.ReferenceCa Ref: ref("k8s.io/apimachinery/pkg/apis/meta/v1.Duration"), }, }, + "nodeDrainPodFilters": { + SchemaProps: spec.SchemaProps{ + Description: "NodeDrainPodFilters allows to specify filters for pods to be excluded during node drain", + Ref: ref("k8s.io/apimachinery/pkg/apis/meta/v1.LabelSelector"), + }, + }, }, Required: []string{"clusterName", "bootstrap", "infrastructureRef"}, }, }, Dependencies: []string{ - "k8s.io/api/core/v1.ObjectReference", "k8s.io/apimachinery/pkg/apis/meta/v1.Duration", "sigs.k8s.io/cluster-api/api/v1beta1.Bootstrap"}, + "k8s.io/api/core/v1.ObjectReference", "k8s.io/apimachinery/pkg/apis/meta/v1.Duration", "k8s.io/apimachinery/pkg/apis/meta/v1.LabelSelector", "sigs.k8s.io/cluster-api/api/v1beta1.Bootstrap"}, } } diff --git a/config/crd/bases/cluster.x-k8s.io_machinedeployments.yaml b/config/crd/bases/cluster.x-k8s.io_machinedeployments.yaml index c4044c287b86..12cd50ec4e95 100644 --- a/config/crd/bases/cluster.x-k8s.io_machinedeployments.yaml +++ b/config/crd/bases/cluster.x-k8s.io_machinedeployments.yaml @@ -1493,6 +1493,53 @@ spec: hosts after the Machine is marked for deletion. A duration of 0 will retry deletion indefinitely. Defaults to 10 seconds. type: string + nodeDrainPodFilters: + description: NodeDrainPodFilters allows to specify filters + for pods to be excluded during node drain + properties: + matchExpressions: + description: matchExpressions is a list of label selector + requirements. The requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label key that the selector + applies to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic nodeDrainTimeout: description: |- NodeDrainTimeout is the total amount of time that the controller will spend on draining a node. diff --git a/config/crd/bases/cluster.x-k8s.io_machinepools.yaml b/config/crd/bases/cluster.x-k8s.io_machinepools.yaml index ba81835620a1..79307ebf75a3 100644 --- a/config/crd/bases/cluster.x-k8s.io_machinepools.yaml +++ b/config/crd/bases/cluster.x-k8s.io_machinepools.yaml @@ -1286,6 +1286,53 @@ spec: hosts after the Machine is marked for deletion. A duration of 0 will retry deletion indefinitely. Defaults to 10 seconds. type: string + nodeDrainPodFilters: + description: NodeDrainPodFilters allows to specify filters + for pods to be excluded during node drain + properties: + matchExpressions: + description: matchExpressions is a list of label selector + requirements. The requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label key that the selector + applies to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic nodeDrainTimeout: description: |- NodeDrainTimeout is the total amount of time that the controller will spend on draining a node. diff --git a/config/crd/bases/cluster.x-k8s.io_machines.yaml b/config/crd/bases/cluster.x-k8s.io_machines.yaml index 1b4b5ced3252..ce97778de810 100644 --- a/config/crd/bases/cluster.x-k8s.io_machines.yaml +++ b/config/crd/bases/cluster.x-k8s.io_machines.yaml @@ -1021,6 +1021,53 @@ spec: hosts after the Machine is marked for deletion. A duration of 0 will retry deletion indefinitely. Defaults to 10 seconds. type: string + nodeDrainPodFilters: + description: NodeDrainPodFilters allows to specify filters for pods + to be excluded during node drain + properties: + matchExpressions: + description: matchExpressions is a list of label selector requirements. + The requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label key that the selector applies + to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic nodeDrainTimeout: description: |- NodeDrainTimeout is the total amount of time that the controller will spend on draining a node. diff --git a/config/crd/bases/cluster.x-k8s.io_machinesets.yaml b/config/crd/bases/cluster.x-k8s.io_machinesets.yaml index 68f0bfbc2c7a..deb733d70fdd 100644 --- a/config/crd/bases/cluster.x-k8s.io_machinesets.yaml +++ b/config/crd/bases/cluster.x-k8s.io_machinesets.yaml @@ -1232,6 +1232,53 @@ spec: hosts after the Machine is marked for deletion. A duration of 0 will retry deletion indefinitely. Defaults to 10 seconds. type: string + nodeDrainPodFilters: + description: NodeDrainPodFilters allows to specify filters + for pods to be excluded during node drain + properties: + matchExpressions: + description: matchExpressions is a list of label selector + requirements. The requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label key that the selector + applies to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic nodeDrainTimeout: description: |- NodeDrainTimeout is the total amount of time that the controller will spend on draining a node. diff --git a/internal/apis/core/v1alpha3/zz_generated.conversion.go b/internal/apis/core/v1alpha3/zz_generated.conversion.go index 1d8c7451e31c..04e630421fc3 100644 --- a/internal/apis/core/v1alpha3/zz_generated.conversion.go +++ b/internal/apis/core/v1alpha3/zz_generated.conversion.go @@ -1210,6 +1210,7 @@ func autoConvert_v1beta1_MachineSpec_To_v1alpha3_MachineSpec(in *v1beta1.Machine out.NodeDrainTimeout = (*metav1.Duration)(unsafe.Pointer(in.NodeDrainTimeout)) // WARNING: in.NodeVolumeDetachTimeout requires manual conversion: does not exist in peer-type // WARNING: in.NodeDeletionTimeout requires manual conversion: does not exist in peer-type + // WARNING: in.NodeDrainPodFilters requires manual conversion: does not exist in peer-type return nil } diff --git a/internal/apis/core/v1alpha4/zz_generated.conversion.go b/internal/apis/core/v1alpha4/zz_generated.conversion.go index e8f640274d79..814fc1b9f1ab 100644 --- a/internal/apis/core/v1alpha4/zz_generated.conversion.go +++ b/internal/apis/core/v1alpha4/zz_generated.conversion.go @@ -1621,6 +1621,7 @@ func autoConvert_v1beta1_MachineSpec_To_v1alpha4_MachineSpec(in *v1beta1.Machine out.NodeDrainTimeout = (*metav1.Duration)(unsafe.Pointer(in.NodeDrainTimeout)) // WARNING: in.NodeVolumeDetachTimeout requires manual conversion: does not exist in peer-type // WARNING: in.NodeDeletionTimeout requires manual conversion: does not exist in peer-type + // WARNING: in.NodeDrainPodFilters requires manual conversion: does not exist in peer-type return nil } diff --git a/internal/controllers/machine/machine_controller.go b/internal/controllers/machine/machine_controller.go index 2dbbd916713b..4d7497c07d5d 100644 --- a/internal/controllers/machine/machine_controller.go +++ b/internal/controllers/machine/machine_controller.go @@ -26,6 +26,7 @@ import ( apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/types" kerrors "k8s.io/apimachinery/pkg/util/errors" "k8s.io/apimachinery/pkg/util/wait" @@ -386,7 +387,7 @@ func (r *Reconciler) reconcileDelete(ctx context.Context, cluster *clusterv1.Clu return ctrl.Result{}, errors.Wrap(err, "failed to patch Machine") } - if result, err := r.drainNode(ctx, cluster, m.Status.NodeRef.Name); !result.IsZero() || err != nil { + if result, err := r.drainNode(ctx, cluster, m); !result.IsZero() || err != nil { if err != nil { conditions.MarkFalse(m, clusterv1.DrainingSucceededCondition, clusterv1.DrainingFailedReason, clusterv1.ConditionSeverityWarning, err.Error()) r.recorder.Eventf(m, corev1.EventTypeWarning, "FailedDrainNode", "error draining Machine's node %q: %v", m.Status.NodeRef.Name, err) @@ -615,7 +616,8 @@ func (r *Reconciler) isDeleteNodeAllowed(ctx context.Context, cluster *clusterv1 return nil } -func (r *Reconciler) drainNode(ctx context.Context, cluster *clusterv1.Cluster, nodeName string) (ctrl.Result, error) { +func (r *Reconciler) drainNode(ctx context.Context, cluster *clusterv1.Cluster, m *clusterv1.Machine) (ctrl.Result, error) { + nodeName := m.Status.NodeRef.Name log := ctrl.LoggerFrom(ctx, "Node", klog.KRef("", nodeName)) restConfig, err := r.Tracker.GetRESTConfig(ctx, util.ObjectKey(cluster)) @@ -667,6 +669,9 @@ func (r *Reconciler) drainNode(ctx context.Context, cluster *clusterv1.Cluster, ErrOut: writer{func(msg string, keysAndValues ...interface{}) { log.Error(nil, msg, keysAndValues...) }}, + AdditionalFilters: []kubedrain.PodFilter{ + SkipFuncGenerator(m.Spec.NodeDrainPodFilters), + }, } if noderefutil.IsNodeUnreachable(node) { @@ -700,6 +705,18 @@ func (r *Reconciler) drainNode(ctx context.Context, cluster *clusterv1.Cluster, return ctrl.Result{}, nil } +func SkipFuncGenerator(labelSelector *metav1.LabelSelector) func(pod corev1.Pod) kubedrain.PodDeleteStatus { + return func(pod corev1.Pod) kubedrain.PodDeleteStatus { + if pod.Labels == nil { + return kubedrain.MakePodDeleteStatusOkay() + } + if labels.Equals(labelSelector.MatchLabels, pod.ObjectMeta.Labels) { + return kubedrain.MakePodDeleteStatusSkip() + } + return kubedrain.MakePodDeleteStatusOkay() + } +} + // shouldWaitForNodeVolumes returns true if node status still have volumes attached and the node is reachable // pod deletion and volume detach happen asynchronously, so pod could be deleted before volume detached from the node // this could cause issue for some storage provisioner, for example, vsphere-volume this is problematic