From ceb0531af442cf18f919d08084c0b7468f17cc4f Mon Sep 17 00:00:00 2001
From: HR Wu <5631010+heiruwu@users.noreply.github.com>
Date: Thu, 11 Apr 2024 04:10:54 +0800
Subject: [PATCH] chore(helm): mark GPU node as no CPU resource available
 (#474)

Because

- In current setup, GPU node has CPU resources available, causing pure
CPU model will sometime be scheduled onto GPU node, which is not
desirable, we want CPU models to only be scheduled on CPU node.

This commit

- mark GPU node as no CPU resource available for scheduling
---
 charts/core/templates/ray-service/ray-service.yaml | 3 +++
 charts/core/values.yaml                            | 4 ++--
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/charts/core/templates/ray-service/ray-service.yaml b/charts/core/templates/ray-service/ray-service.yaml
index 5846acd6..b2e8f38d 100644
--- a/charts/core/templates/ray-service/ray-service.yaml
+++ b/charts/core/templates/ray-service/ray-service.yaml
@@ -114,6 +114,9 @@ spec:
       maxReplicas: {{ $workerGroupSpecs.maxReplicas }}
       groupName: {{ $workerGroupSpecs.groupName }}
       rayStartParams:
+        {{- if $workerGroupSpecs.gpuWorkerGroup.enabled }}
+        num-cpus: "0"
+        {{- end }}
         {{- if $workerGroupSpecs.gpuWorkerGroup.customResource }}
         resources: {{ $workerGroupSpecs.gpuWorkerGroup.customResource }}
         {{- end }}
diff --git a/charts/core/values.yaml b/charts/core/values.yaml
index 7901cf11..2a4a21ae 100644
--- a/charts/core/values.yaml
+++ b/charts/core/values.yaml
@@ -687,11 +687,11 @@ rayService:
   headGroupSpec:
     resources:
       limits:
-        cpu: "0"
+        cpu: "2"
         memory: "4Gi"
         nvidia.com/gpu: 0
       requests:
-        cpu: "0"
+        cpu: "2"
         memory: "4Gi"
         nvidia.com/gpu: 0
     affinity: {}