From 028f6ba452fa044118a64fe97447fdcb0c00d027 Mon Sep 17 00:00:00 2001 From: "wangjianyu.wjy" Date: Tue, 10 Sep 2024 18:26:09 +0800 Subject: [PATCH 1/2] scheduler: support pod preemption from numa awareless reservation Signed-off-by: wangjianyu.wjy --- .../plugins/nodenumaresource/preempt.go | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/pkg/scheduler/plugins/nodenumaresource/preempt.go b/pkg/scheduler/plugins/nodenumaresource/preempt.go index 64c46a172..f92d4009b 100644 --- a/pkg/scheduler/plugins/nodenumaresource/preempt.go +++ b/pkg/scheduler/plugins/nodenumaresource/preempt.go @@ -24,6 +24,7 @@ import ( "k8s.io/klog/v2" "k8s.io/kubernetes/pkg/scheduler/framework" + "github.com/koordinator-sh/koordinator/pkg/scheduler/frameworkext" "github.com/koordinator-sh/koordinator/pkg/util/cpuset" ) @@ -141,7 +142,7 @@ func (p *Plugin) AddPod(_ context.Context, cycleState *framework.CycleState, pre state.schedulingStateData.lock.Unlock() rInfo := p.getPodNominatedReservationInfo(pod, nodeName) - if rInfo == nil { // preempt node unallocated resources + if p.isReservationUnallocatedNUMAResources(rInfo) { // preempt node unallocated resources if nodeState.nodeAlloc == nil { nodeState.nodeAlloc = newPreemptibleAlloc() } @@ -197,7 +198,7 @@ func (p *Plugin) RemovePod(_ context.Context, cycleState *framework.CycleState, state.schedulingStateData.lock.Unlock() rInfo := p.getPodNominatedReservationInfo(pod, nodeName) - if rInfo == nil { // preempt node unallocated resources + if p.isReservationUnallocatedNUMAResources(rInfo) { // preempt node unallocated resources if nodeState.nodeAlloc == nil { nodeState.nodeAlloc = newPreemptibleAlloc() } @@ -219,6 +220,17 @@ func (p *Plugin) RemovePod(_ context.Context, cycleState *framework.CycleState, return nil } +func (p *Plugin) isReservationUnallocatedNUMAResources(rInfo *frameworkext.ReservationInfo) bool { + if rInfo == nil { + return true + } + podAllocatedCPUs, podAllocatedNUMAResources := p.getPodAllocated(rInfo.Pod, rInfo.GetNodeName()) + if podAllocatedCPUs.IsEmpty() && len(podAllocatedNUMAResources) == 0 { + return true + } + return false +} + func (p *Plugin) getPodAllocated(pod *corev1.Pod, nodeName string) (cpus cpuset.CPUSet, numaResources map[int]corev1.ResourceList) { podAllocatedCPUs, ok := p.resourceManager.GetAllocatedCPUSet(nodeName, pod.UID) if ok && !podAllocatedCPUs.IsEmpty() { From 0395fb9957e6047c3b09cc696e8fff248c6085c5 Mon Sep 17 00:00:00 2001 From: "wangjianyu.wjy" Date: Tue, 10 Sep 2024 21:29:23 +0800 Subject: [PATCH 2/2] scheduler: add log for nil allocatedNUMANodeResource and empty allocatedCPUSet Signed-off-by: wangjianyu.wjy --- pkg/scheduler/plugins/nodenumaresource/preempt.go | 9 +++------ .../plugins/nodenumaresource/resource_manager.go | 8 ++++++++ 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/pkg/scheduler/plugins/nodenumaresource/preempt.go b/pkg/scheduler/plugins/nodenumaresource/preempt.go index f92d4009b..1ec2cb8fc 100644 --- a/pkg/scheduler/plugins/nodenumaresource/preempt.go +++ b/pkg/scheduler/plugins/nodenumaresource/preempt.go @@ -142,7 +142,7 @@ func (p *Plugin) AddPod(_ context.Context, cycleState *framework.CycleState, pre state.schedulingStateData.lock.Unlock() rInfo := p.getPodNominatedReservationInfo(pod, nodeName) - if p.isReservationUnallocatedNUMAResources(rInfo) { // preempt node unallocated resources + if rInfo == nil || p.notNUMAAwareReservation(rInfo) { // preempt node unallocated resources if nodeState.nodeAlloc == nil { nodeState.nodeAlloc = newPreemptibleAlloc() } @@ -198,7 +198,7 @@ func (p *Plugin) RemovePod(_ context.Context, cycleState *framework.CycleState, state.schedulingStateData.lock.Unlock() rInfo := p.getPodNominatedReservationInfo(pod, nodeName) - if p.isReservationUnallocatedNUMAResources(rInfo) { // preempt node unallocated resources + if rInfo == nil || p.notNUMAAwareReservation(rInfo) { // preempt node unallocated resources if nodeState.nodeAlloc == nil { nodeState.nodeAlloc = newPreemptibleAlloc() } @@ -220,10 +220,7 @@ func (p *Plugin) RemovePod(_ context.Context, cycleState *framework.CycleState, return nil } -func (p *Plugin) isReservationUnallocatedNUMAResources(rInfo *frameworkext.ReservationInfo) bool { - if rInfo == nil { - return true - } +func (p *Plugin) notNUMAAwareReservation(rInfo *frameworkext.ReservationInfo) bool { podAllocatedCPUs, podAllocatedNUMAResources := p.getPodAllocated(rInfo.Pod, rInfo.GetNodeName()) if podAllocatedCPUs.IsEmpty() && len(podAllocatedNUMAResources) == 0 { return true diff --git a/pkg/scheduler/plugins/nodenumaresource/resource_manager.go b/pkg/scheduler/plugins/nodenumaresource/resource_manager.go index d9887def3..4427bee95 100644 --- a/pkg/scheduler/plugins/nodenumaresource/resource_manager.go +++ b/pkg/scheduler/plugins/nodenumaresource/resource_manager.go @@ -197,18 +197,26 @@ func (c *resourceManager) Allocate(node *corev1.Node, pod *corev1.Pod, options * Name: pod.Name, CPUExclusivePolicy: options.cpuExclusivePolicy, } + klog.V(5).Infof("Allocate pod %s/%s on node %s, numaNodeAffinity: %+v, requestCPUBind %v", pod.Namespace, pod.Name, node.Name, options.hint, options.requestCPUBind) if options.hint.NUMANodeAffinity != nil { resources, err := c.allocateResourcesByHint(node, pod, options) if err != nil { return nil, err } + if len(resources) == 0 { + klog.Warningf("succeed allocateResourcesByHint but allocatedNUMAResources nil, options: %+v", options) + } allocation.NUMANodeResources = resources + } if options.requestCPUBind { cpus, err := c.allocateCPUSet(node, pod, allocation.NUMANodeResources, options) if err != nil { return nil, framework.NewStatus(framework.Unschedulable, err.Error()) } + if cpus.IsEmpty() { + klog.Warningf("succeed allocateCPUSet but allocatedCPUs empty, options: %+v, allocation.NUMANodeResources: %+v", options, allocation.NUMANodeResources) + } allocation.CPUSet = cpus } return allocation, nil