diff --git a/.github/workflows/helm-lint-and-install.yaml b/.github/workflows/helm-lint-and-install.yaml index cbdd6bab..74e055a6 100644 --- a/.github/workflows/helm-lint-and-install.yaml +++ b/.github/workflows/helm-lint-and-install.yaml @@ -37,6 +37,7 @@ jobs: - name: Add external repositories run: | helm repo add bitnami https://charts.bitnami.com/bitnami + helm repo add kuberay https://ray-project.github.io/kuberay-helm - name: Run chart-testing (list-changed) id: list-changed diff --git a/charts/studio/Chart.lock b/charts/studio/Chart.lock index 56682ffe..e00c1538 100644 --- a/charts/studio/Chart.lock +++ b/charts/studio/Chart.lock @@ -8,5 +8,8 @@ dependencies: - name: postgresql repository: https://charts.bitnami.com/bitnami version: 11.9.13 -digest: sha256:17b45dd98206bc3856d787b7399053996e6039ec8b52c9e00a501e7fcd896765 -generated: "2023-07-28T20:11:07.578307878Z" +- name: kuberay-operator + repository: https://ray-project.github.io/kuberay-helm + version: 0.6.0 +digest: sha256:3b2dc8c1ca3e1841605034aca0e0dd7119c1fbcf17eecc47c997bf4510bce4cf +generated: "2023-08-16T00:49:28.269211675Z" diff --git a/charts/studio/Chart.yaml b/charts/studio/Chart.yaml index 56a4b896..c8a6744e 100644 --- a/charts/studio/Chart.yaml +++ b/charts/studio/Chart.yaml @@ -2,7 +2,7 @@ apiVersion: v2 name: studio description: A Helm chart for Kubernetes type: application -version: 0.3.1 +version: 0.4.0 appVersion: "v2.24.0" maintainers: - name: iterative @@ -20,3 +20,7 @@ dependencies: condition: postgresql.enabled version: "11.9.13" repository: "https://charts.bitnami.com/bitnami" + - name: kuberay-operator + condition: ray.enabled + version: "0.6.0" + repository: "https://ray-project.github.io/kuberay-helm" diff --git a/charts/studio/README.md b/charts/studio/README.md index 1ea5b44a..1acd03ce 100644 --- a/charts/studio/README.md +++ b/charts/studio/README.md @@ -1,6 +1,6 @@ # studio -![Version: 0.3.1](https://img.shields.io/badge/Version-0.3.1-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: v2.24.0](https://img.shields.io/badge/AppVersion-v2.24.0-informational?style=flat-square) +![Version: 0.4.0](https://img.shields.io/badge/Version-0.4.0-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: v2.24.0](https://img.shields.io/badge/AppVersion-v2.24.0-informational?style=flat-square) A Helm chart for Kubernetes @@ -17,6 +17,7 @@ A Helm chart for Kubernetes | https://charts.bitnami.com/bitnami | nginx | 13.2.30 | | https://charts.bitnami.com/bitnami | postgresql | 11.9.13 | | https://charts.bitnami.com/bitnami | redis | 17.14.3 | +| https://ray-project.github.io/kuberay-helm | kuberay-operator | 0.6.0 | ## Values @@ -94,6 +95,10 @@ A Helm chart for Kubernetes | postgresql.fullnameOverride | string | `"studio-postgresql"` | Postgres name override | | postgresql.global.postgresql.auth.database | string | `"iterativeai"` | Postgres database | | postgresql.global.postgresql.auth.postgresPassword | string | `"postgres"` | Postgres password | +| ray | object | `{"enabled":false,"head":{"resources":{"cpu":"500m","memory":"512M"}},"image":"rayproject/ray:2.4.0-py310-cpu","version":"2.4.0","worker":{"replicas":{"maximum":4,"minimum":1},"resources":{"cpu":"500m","memory":"512M"}}}` | Ray cluster settings group used by DVCx | +| ray.enabled | bool | `false` | Ray enabled | +| ray.image | string | `"rayproject/ray:2.4.0-py310-cpu"` | Container image used for Ray nodes | +| ray.version | string | `"2.4.0"` | Ray version | | redis.auth | object | `{"enabled":false}` | Redis authentication settings | | redis.auth.enabled | bool | `false` | Redis authentication disabled | | redis.commonConfiguration | string | `"timeout 20"` | Redis common configuration to be added into the ConfigMap | diff --git a/charts/studio/templates/configmap-studio.yaml b/charts/studio/templates/configmap-studio.yaml index d733c7ab..c3a25b93 100644 --- a/charts/studio/templates/configmap-studio.yaml +++ b/charts/studio/templates/configmap-studio.yaml @@ -121,6 +121,9 @@ data: DQL_UDF_ENABLED: {{ $dvcx.udfEnabled | default "False" | quote }} DVCX_ENABLED: {{ $dvcx.enabled | default "False" | quote }} DVCX_UDF_ENABLED: {{ $dvcx.udfEnabled | default "False" | quote }} + {{- if ((.Values.ray).enabled) }} + DVCX_RAY_URL: "http://{{ .Release.Name }}-ray-head-svc.{{ .Release.Namespace }}.svc.cluster.local:8265" + {{- end }} DQL_ROOT_DIR: {{ $dvcx.rootDir | default "/tmp" | quote }} DQL_CH_HOST: {{ $dvcxClickhouse.host | default "" | quote }} diff --git a/charts/studio/templates/raycluster-studio.yaml b/charts/studio/templates/raycluster-studio.yaml new file mode 100644 index 00000000..83471092 --- /dev/null +++ b/charts/studio/templates/raycluster-studio.yaml @@ -0,0 +1,70 @@ +{{- if ((.Values.ray).enabled) -}} +apiVersion: ray.io/v1alpha1 +kind: RayCluster +metadata: + labels: + controller-tools.k8s.io: "1.0" + name: {{ .Release.Name }}-ray +spec: + rayVersion: {{ .Values.ray.version }} + enableInTreeAutoscaling: true + autoscalerOptions: + upscalingMode: Default + idleTimeoutSeconds: 60 + imagePullPolicy: IfNotPresent + resources: + resources: + cpu: 500m + memory: 500M + resources: + cpu: 500m + memory: 500M + headGroupSpec: + rayStartParams: + dashboard-host: 0.0.0.0 + template: + spec: + containers: + - name: head + image: {{ .Values.ray.image }} + resources: + requests: + cpu: {{ .Values.ray.head.resources.cpu }} + memory: {{ .Values.ray.head.resources.memory }} + limits: + cpu: {{ .Values.ray.head.resources.cpu }} + memory: {{ .Values.ray.head.resources.memory }} + ports: + - containerPort: 6379 + name: gcs + - containerPort: 8265 + name: dashboard + - containerPort: 10001 + name: client + lifecycle: + preStop: + exec: + command: ["/bin/sh","-c","ray stop"] + workerGroupSpecs: + - replicas: {{ .Values.ray.worker.replicas.minimum }} + minReplicas: {{ .Values.ray.worker.replicas.minimum }} + maxReplicas: {{ .Values.ray.worker.replicas.maximum }} + groupName: workers + rayStartParams: {} + template: + spec: + containers: + - name: worker + image: {{ .Values.ray.image }} + resources: + requests: + cpu: {{ .Values.ray.worker.resources.cpu }} + memory: {{ .Values.ray.worker.resources.memory }} + limits: + cpu: {{ .Values.ray.worker.resources.cpu }} + memory: {{ .Values.ray.worker.resources.memory }} + lifecycle: + preStop: + exec: + command: ["/bin/sh", "-c", "ray stop"] +{{- end }} diff --git a/charts/studio/values.yaml b/charts/studio/values.yaml index a22b7f81..8d0f7f04 100644 --- a/charts/studio/values.yaml +++ b/charts/studio/values.yaml @@ -335,6 +335,34 @@ postgresql: # -- Postgres database database: iterativeai +# -- Ray cluster settings group used by DVCx +ray: + # -- Ray enabled + enabled: false + + # -- Ray version + version: 2.4.0 + + # -- Container image used for Ray nodes + image: rayproject/ray:2.4.0-py310-cpu + + # Recommendation: adjust resources (i.e. combined requests & limits) so a + # single Ray node (e.g. head or worker) is scheduled on each Kubernetes node, + # using all the Kubernetes node resources. + + head: + resources: + cpu: 500m + memory: 512M + + worker: + resources: + cpu: 500m + memory: 512M + replicas: + minimum: 1 + maximum: 4 + # -- Studio UI settings group studioUi: # -- Additional environment variables for ui pods