From bb0524dc9c37d5ad05e43ad4c9d302352c748e3b Mon Sep 17 00:00:00 2001 From: Andrew Sy Kim Date: Thu, 21 Mar 2024 15:32:00 -0400 Subject: [PATCH] update default RayCluster name to 'ray-cluster' (#417) Signed-off-by: Andrew Sy Kim --- applications/rag/README.md | 2 +- .../rag-kaggle-ray-sql-latest.ipynb | 2 +- .../ray-hf-cloudsql-latest.ipynb | 6 ++--- .../ray/kuberay-tpu-webhook/tests/tpu-test.py | 2 +- .../raytrain-with-gcsfusecsi/README.md | 10 ++++----- .../kuberaytf/user/modules/kuberay/kuberay.tf | 2 +- applications/ray/variables.tf | 2 +- applications/ray/workloads.tfvars | 2 +- cloudbuild.yaml | 4 ++-- modules/kuberay-cluster/variables.tf | 2 +- ray-on-gke/README.md | 6 ++--- .../examples/notebooks/gpt-j-online.ipynb | 4 ++-- ray-on-gke/examples/notebooks/jax-tpu.ipynb | 2 +- .../examples/notebooks/ray-dist-mnist.ipynb | 2 +- .../ray-fine-tune-hugging-face.ipynb | 2 +- ray-on-gke/examples/notebooks/ray_basic.ipynb | 22 +++++++++---------- ray-on-gke/examples/notebooks/ray_mnist.ipynb | 16 +++++++------- .../notebooks/raytrain-stablediffusion.ipynb | 12 +++++----- .../notebooks/stable-diffusion-tpu.ipynb | 4 ++-- .../examples/notebooks/stable_diffusion.ipynb | 4 ++-- .../guides/raytrain-with-gcsfusecsi/README.md | 10 ++++----- ray-on-gke/guides/tpu/README.md | 4 ++-- tpu-provisioner/README.md | 2 +- .../e2e-genai-langchain.ipynb | 4 ++-- .../src/backend/main.py | 2 +- .../src/backend/model.py | 2 +- 26 files changed, 66 insertions(+), 66 deletions(-) diff --git a/applications/rag/README.md b/applications/rag/README.md index 28deb105f..e89f5aa8e 100644 --- a/applications/rag/README.md +++ b/applications/rag/README.md @@ -75,7 +75,7 @@ Ensure your k8s client is using the correct cluster by running: gcloud container clusters get-credentials ${CLUSTER_NAME:?} --location ${CLUSTER_REGION:?} ``` -1. Verify Kuberay is setup: run `kubectl get pods -n ${NAMESPACE:?}`. There should be a Ray head (and Ray worker pod on GKE Standard only) in `Running` state (prefixed by `example-cluster-kuberay-head-` and `example-cluster-kuberay-worker-workergroup-`). +1. Verify Kuberay is setup: run `kubectl get pods -n ${NAMESPACE:?}`. There should be a Ray head (and Ray worker pod on GKE Standard only) in `Running` state (prefixed by `ray-cluster-kuberay-head-` and `ray-cluster-kuberay-worker-workergroup-`). 2. Verify Jupyterhub service is setup: * Fetch the service IP/Domain: diff --git a/applications/rag/example_notebooks/rag-kaggle-ray-sql-latest.ipynb b/applications/rag/example_notebooks/rag-kaggle-ray-sql-latest.ipynb index 66fe1185c..132a6336c 100644 --- a/applications/rag/example_notebooks/rag-kaggle-ray-sql-latest.ipynb +++ b/applications/rag/example_notebooks/rag-kaggle-ray-sql-latest.ipynb @@ -261,7 +261,7 @@ "source": [ "import ray, time\n", "from ray.job_submission import JobSubmissionClient\n", - "client = JobSubmissionClient(\"ray://example-cluster-kuberay-head-svc:10001\")" + "client = JobSubmissionClient(\"ray://ray-cluster-kuberay-head-svc:10001\")" ] }, { diff --git a/applications/rag/example_notebooks/ray-hf-cloudsql-latest.ipynb b/applications/rag/example_notebooks/ray-hf-cloudsql-latest.ipynb index 09d9dad20..7fefc84b0 100644 --- a/applications/rag/example_notebooks/ray-hf-cloudsql-latest.ipynb +++ b/applications/rag/example_notebooks/ray-hf-cloudsql-latest.ipynb @@ -230,7 +230,7 @@ "source": [ "import ray\n", "from ray.job_submission import JobSubmissionClient\n", - "client = JobSubmissionClient(\"ray://example-cluster-kuberay-head-svc:10001\")" + "client = JobSubmissionClient(\"ray://ray-cluster-kuberay-head-svc:10001\")" ] }, { @@ -268,9 +268,9 @@ "metadata": {}, "outputs": [], "source": [ - "# Need to run kubectl port-forward -n service/example-cluster-kuberay-head-svc 8265:8265 to see the UI\n", + "# Need to run kubectl port-forward -n service/ray-cluster-kuberay-head-svc 8265:8265 to see the UI\n", "# Fetch job status\n", - "!ray job status {job_id} --address \"ray://example-cluster-kuberay-head-svc:10001\" " + "!ray job status {job_id} --address \"ray://ray-cluster-kuberay-head-svc:10001\" " ] } ], diff --git a/applications/ray/kuberay-tpu-webhook/tests/tpu-test.py b/applications/ray/kuberay-tpu-webhook/tests/tpu-test.py index 411413638..f707ac0e3 100644 --- a/applications/ray/kuberay-tpu-webhook/tests/tpu-test.py +++ b/applications/ray/kuberay-tpu-webhook/tests/tpu-test.py @@ -9,7 +9,7 @@ def tpu_cores(): ray.init( - address="ray://example-cluster-kuberay-head-svc:10001", + address="ray://ray-cluster-kuberay-head-svc:10001", runtime_env={ "pip": [ "jax[tpu]==0.4.11", diff --git a/applications/ray/raytrain-examples/raytrain-with-gcsfusecsi/README.md b/applications/ray/raytrain-examples/raytrain-with-gcsfusecsi/README.md index cbbc54cd2..7e2a8a9f5 100644 --- a/applications/ray/raytrain-examples/raytrain-with-gcsfusecsi/README.md +++ b/applications/ray/raytrain-examples/raytrain-with-gcsfusecsi/README.md @@ -57,14 +57,14 @@ pod/kuberay-operator-64b7b88759-5ppfw 1/1 Running 0 95 ``` $ kubectl get all -n example NAME READY STATUS RESTARTS AGE -pod/example-cluster-kuberay-head-9x2q6 2/2 Running 0 3m12s -pod/example-cluster-kuberay-worker-workergroup-95nm2 2/2 Running 0 3m12s -pod/example-cluster-kuberay-worker-workergroup-tfg9n 2/2 Running 0 3m12s +pod/ray-cluster-kuberay-head-9x2q6 2/2 Running 0 3m12s +pod/ray-cluster-kuberay-worker-workergroup-95nm2 2/2 Running 0 3m12s +pod/ray-cluster-kuberay-worker-workergroup-tfg9n 2/2 Running 0 3m12s pod/kuberay-operator-64b7b88759-5ppfw 1/1 Running 0 4m4s pod/tensorflow-0 2/2 Running 0 16s NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE -service/example-cluster-kuberay-head-svc ClusterIP 10.8.10.33 10001/TCP,8265/TCP,8080/TCP,6379/TCP,8000/TCP 3m12s +service/ray-cluster-kuberay-head-svc ClusterIP 10.8.10.33 10001/TCP,8265/TCP,8080/TCP,6379/TCP,8000/TCP 3m12s service/kuberay-operator ClusterIP 10.8.14.245 8080/TCP 4m4s service/tensorflow ClusterIP None 8888/TCP 16s service/tensorflow-jupyter LoadBalancer 10.8.3.9 80:31891/TCP 16s @@ -96,7 +96,7 @@ http://tensorflow-0:8888/?token= :: /home/jovyan 12. Follow the comments and execute the cells in the notebook to run a distributed training job and then inference on the tuned model 13. Port forward the ray service port to examine the ray dashboard for jobs progress details, The dashboard is reachable at localhost:8286 in the local browser ``` -kubectl port-forward -n example service/example-cluster-kuberay-head-svc 8265:8265 +kubectl port-forward -n example service/ray-cluster-kuberay-head-svc 8265:8265 ``` 14. During an ongoing traing, the pod resource usage of CPU, Memory, GPU, GPU Memory can be visualized with the GKE Cloud Console for the workloads example ![Ray Head resources](https://github.com/GoogleCloudPlatform/ai-on-gke/blob/main/ray-on-gke/raytrain-examples/images/ray-head-resources.png) and ![Ray Worker resources](https://github.com/GoogleCloudPlatform/ai-on-gke/blob/main/ray-on-gke/raytrain-examples/images/ray-worker-resources.png) diff --git a/applications/ray/raytrain-examples/raytrain-with-gcsfusecsi/kuberaytf/user/modules/kuberay/kuberay.tf b/applications/ray/raytrain-examples/raytrain-with-gcsfusecsi/kuberaytf/user/modules/kuberay/kuberay.tf index 39dd73901..dce76229d 100644 --- a/applications/ray/raytrain-examples/raytrain-with-gcsfusecsi/kuberaytf/user/modules/kuberay/kuberay.tf +++ b/applications/ray/raytrain-examples/raytrain-with-gcsfusecsi/kuberaytf/user/modules/kuberay/kuberay.tf @@ -13,7 +13,7 @@ # limitations under the License. resource "helm_release" "ray-cluster" { - name = "example-cluster" + name = "ray-cluster" repository = "https://ray-project.github.io/kuberay-helm/" chart = "ray-cluster" namespace = var.namespace diff --git a/applications/ray/variables.tf b/applications/ray/variables.tf index 7e8402496..ccf10ebe8 100644 --- a/applications/ray/variables.tf +++ b/applications/ray/variables.tf @@ -77,7 +77,7 @@ variable "create_ray_cluster" { variable "ray_cluster_name" { type = string - default = "example-cluster" + default = "ray-cluster" } variable "enable_gpu" { diff --git a/applications/ray/workloads.tfvars b/applications/ray/workloads.tfvars index 1adddfab9..309dfb2c2 100644 --- a/applications/ray/workloads.tfvars +++ b/applications/ray/workloads.tfvars @@ -38,5 +38,5 @@ workload_identity_service_account = "ray-service-account" create_gcs_bucket = true gcs_bucket = "ray-bucket-zydg" create_ray_cluster = true -ray_cluster_name = "example-cluster" +ray_cluster_name = "ray-cluster" enable_grafana_on_ray_dashboard = false diff --git a/cloudbuild.yaml b/cloudbuild.yaml index 9a4e9e9c6..fc94faaca 100644 --- a/cloudbuild.yaml +++ b/cloudbuild.yaml @@ -96,7 +96,7 @@ steps: # Make sure pods are running kubectl wait --all pods -n ml-$SHORT_SHA-$_BUILD_ID --for=condition=Ready --timeout=300s - kubectl port-forward -n ml-$SHORT_SHA-$_BUILD_ID service/example-cluster-kuberay-head-svc 8265:8265 & + kubectl port-forward -n ml-$SHORT_SHA-$_BUILD_ID service/ray-cluster-kuberay-head-svc 8265:8265 & # Wait port-forwarding to take its place sleep 5s @@ -218,7 +218,7 @@ steps: # Validate Ray: Make sure pods are running kubectl wait --all pods -n rag-$SHORT_SHA-$_BUILD_ID --for=condition=Ready --timeout=300s - kubectl port-forward -n rag-$SHORT_SHA-$_BUILD_ID service/example-cluster-kuberay-head-svc 8265:8265 & + kubectl port-forward -n rag-$SHORT_SHA-$_BUILD_ID service/ray-cluster-kuberay-head-svc 8265:8265 & # Wait port-forwarding to take its place sleep 5s diff --git a/modules/kuberay-cluster/variables.tf b/modules/kuberay-cluster/variables.tf index f3bc2fc72..3df04e111 100644 --- a/modules/kuberay-cluster/variables.tf +++ b/modules/kuberay-cluster/variables.tf @@ -20,7 +20,7 @@ variable "project_id" { variable "name" { type = string description = "Name of the ray cluster" - default = "example-cluster" + default = "ray-cluster" } variable "db_region" { diff --git a/ray-on-gke/README.md b/ray-on-gke/README.md index 2af6a6e4f..9acd3b081 100644 --- a/ray-on-gke/README.md +++ b/ray-on-gke/README.md @@ -26,7 +26,7 @@ Validate that the RayCluster is ready: ``` $ kubectl get raycluster NAME DESIRED WORKERS AVAILABLE WORKERS STATUS AGE -example-cluster-kuberay 1 1 ready 3m41s +ray-cluster-kuberay 1 1 ready 3m41s ``` ### Install Ray @@ -39,7 +39,7 @@ To submit a Ray job, first establish a connection to the Ray head. For this exam to connect to the Ray head via localhost. ```bash -$ kubectl -n ml port-forward service/example-cluster-kuberay-head-svc 8265 & +$ kubectl -n ml port-forward service/ray-cluster-kuberay-head-svc 8265 & ``` Submit a Ray job that prints resources available in your Ray cluster: @@ -79,7 +79,7 @@ To use the client, first establish a connection to the Ray head. For this exampl to connect to the Ray head Service via localhost. ```bash -$ kubectl -n ml port-forward service/example-cluster-kuberay-head-svc 10001 & +$ kubectl -n ml port-forward service/ray-cluster-kuberay-head-svc 10001 & ``` Next, define a Python script containing remote code you want to run on your Ray cluster. Similar to the previous example, diff --git a/ray-on-gke/examples/notebooks/gpt-j-online.ipynb b/ray-on-gke/examples/notebooks/gpt-j-online.ipynb index aa3dbd693..bbda153af 100644 --- a/ray-on-gke/examples/notebooks/gpt-j-online.ipynb +++ b/ray-on-gke/examples/notebooks/gpt-j-online.ipynb @@ -142,7 +142,7 @@ "outputs": [], "source": [ "ray.init(\n", - " address=\"ray://example-cluster-kuberay-head-svc:10001\",\n", + " address=\"ray://ray-cluster-kuberay-head-svc:10001\",\n", " runtime_env={\n", " \"pip\": [\n", " \"IPython\",\n", @@ -248,7 +248,7 @@ "\n", "sample_input = {\"text\": prompt}\n", "\n", - "output = requests.post(\"http://example-cluster-kuberay-head-svc:8000/\", json=[sample_input]).json()\n", + "output = requests.post(\"http://ray-cluster-kuberay-head-svc:8000/\", json=[sample_input]).json()\n", "print(output)" ] }, diff --git a/ray-on-gke/examples/notebooks/jax-tpu.ipynb b/ray-on-gke/examples/notebooks/jax-tpu.ipynb index 154651b83..3722f52b5 100644 --- a/ray-on-gke/examples/notebooks/jax-tpu.ipynb +++ b/ray-on-gke/examples/notebooks/jax-tpu.ipynb @@ -64,7 +64,7 @@ "import ray\n", "\n", "ray.init(\n", - " address=\"ray://example-cluster-kuberay-head-svc:10001\",\n", + " address=\"ray://ray-cluster-kuberay-head-svc:10001\",\n", " runtime_env={\n", " \"pip\": [\n", " \"jax[tpu]==0.4.11\",\n", diff --git a/ray-on-gke/examples/notebooks/ray-dist-mnist.ipynb b/ray-on-gke/examples/notebooks/ray-dist-mnist.ipynb index 703d303df..db6da3565 100644 --- a/ray-on-gke/examples/notebooks/ray-dist-mnist.ipynb +++ b/ray-on-gke/examples/notebooks/ray-dist-mnist.ipynb @@ -61,7 +61,7 @@ "outputs": [], "source": [ "ray.init(\n", - " address=\"ray://example-cluster-kuberay-head-svc:10001\",\n", + " address=\"ray://ray-cluster-kuberay-head-svc:10001\",\n", " runtime_env={\n", " \"pip\": [\n", " \"IPython\",\n", diff --git a/ray-on-gke/examples/notebooks/ray-fine-tune-hugging-face.ipynb b/ray-on-gke/examples/notebooks/ray-fine-tune-hugging-face.ipynb index 8febe0062..de7b4735b 100644 --- a/ray-on-gke/examples/notebooks/ray-fine-tune-hugging-face.ipynb +++ b/ray-on-gke/examples/notebooks/ray-fine-tune-hugging-face.ipynb @@ -24,7 +24,7 @@ "import ray\n", "\n", "ray.init(\n", - " address=\"ray://example-cluster-kuberay-head-svc:10001\",\n", + " address=\"ray://ray-cluster-kuberay-head-svc:10001\",\n", " runtime_env={\n", " \"pip\": [\n", " \"IPython\",\n", diff --git a/ray-on-gke/examples/notebooks/ray_basic.ipynb b/ray-on-gke/examples/notebooks/ray_basic.ipynb index 23c7f6c0f..d689a561e 100644 --- a/ray-on-gke/examples/notebooks/ray_basic.ipynb +++ b/ray-on-gke/examples/notebooks/ray_basic.ipynb @@ -133,7 +133,7 @@ } ], "source": [ - "ray.init(\"ray://example-cluster-kuberay-head-svc:10001\")" + "ray.init(\"ray://ray-cluster-kuberay-head-svc:10001\")" ] }, { @@ -147,25 +147,25 @@ "output_type": "stream", "text": [ "Iteration 0\n", - "Counter({('example-cluster-kuberay-worker-workergroup-9bnxn', 'example-cluster-kuberay-worker-workergroup-9bnxn'): 38, ('example-cluster-kuberay-worker-workergroup-9bnxn', 'example-cluster-kuberay-head-fd9g6'): 27, ('example-cluster-kuberay-head-fd9g6', 'example-cluster-kuberay-head-fd9g6'): 26, ('example-cluster-kuberay-head-fd9g6', 'example-cluster-kuberay-worker-workergroup-9bnxn'): 9})\n", + "Counter({('ray-cluster-kuberay-worker-workergroup-9bnxn', 'ray-cluster-kuberay-worker-workergroup-9bnxn'): 38, ('ray-cluster-kuberay-worker-workergroup-9bnxn', 'ray-cluster-kuberay-head-fd9g6'): 27, ('ray-cluster-kuberay-head-fd9g6', 'ray-cluster-kuberay-head-fd9g6'): 26, ('ray-cluster-kuberay-head-fd9g6', 'ray-cluster-kuberay-worker-workergroup-9bnxn'): 9})\n", "Iteration 1\n", - "Counter({('example-cluster-kuberay-head-fd9g6', 'example-cluster-kuberay-head-fd9g6'): 31, ('example-cluster-kuberay-worker-workergroup-9bnxn', 'example-cluster-kuberay-worker-workergroup-9bnxn'): 26, ('example-cluster-kuberay-head-fd9g6', 'example-cluster-kuberay-worker-workergroup-9bnxn'): 23, ('example-cluster-kuberay-worker-workergroup-9bnxn', 'example-cluster-kuberay-head-fd9g6'): 20})\n", + "Counter({('ray-cluster-kuberay-head-fd9g6', 'ray-cluster-kuberay-head-fd9g6'): 31, ('ray-cluster-kuberay-worker-workergroup-9bnxn', 'ray-cluster-kuberay-worker-workergroup-9bnxn'): 26, ('ray-cluster-kuberay-head-fd9g6', 'ray-cluster-kuberay-worker-workergroup-9bnxn'): 23, ('ray-cluster-kuberay-worker-workergroup-9bnxn', 'ray-cluster-kuberay-head-fd9g6'): 20})\n", "Iteration 2\n", - "Counter({('example-cluster-kuberay-head-fd9g6', 'example-cluster-kuberay-head-fd9g6'): 33, ('example-cluster-kuberay-worker-workergroup-9bnxn', 'example-cluster-kuberay-worker-workergroup-9bnxn'): 25, ('example-cluster-kuberay-worker-workergroup-9bnxn', 'example-cluster-kuberay-head-fd9g6'): 22, ('example-cluster-kuberay-head-fd9g6', 'example-cluster-kuberay-worker-workergroup-9bnxn'): 20})\n", + "Counter({('ray-cluster-kuberay-head-fd9g6', 'ray-cluster-kuberay-head-fd9g6'): 33, ('ray-cluster-kuberay-worker-workergroup-9bnxn', 'ray-cluster-kuberay-worker-workergroup-9bnxn'): 25, ('ray-cluster-kuberay-worker-workergroup-9bnxn', 'ray-cluster-kuberay-head-fd9g6'): 22, ('ray-cluster-kuberay-head-fd9g6', 'ray-cluster-kuberay-worker-workergroup-9bnxn'): 20})\n", "Iteration 3\n", - "Counter({('example-cluster-kuberay-head-fd9g6', 'example-cluster-kuberay-head-fd9g6'): 32, ('example-cluster-kuberay-worker-workergroup-9bnxn', 'example-cluster-kuberay-head-fd9g6'): 26, ('example-cluster-kuberay-head-fd9g6', 'example-cluster-kuberay-worker-workergroup-9bnxn'): 23, ('example-cluster-kuberay-worker-workergroup-9bnxn', 'example-cluster-kuberay-worker-workergroup-9bnxn'): 19})\n", + "Counter({('ray-cluster-kuberay-head-fd9g6', 'ray-cluster-kuberay-head-fd9g6'): 32, ('ray-cluster-kuberay-worker-workergroup-9bnxn', 'ray-cluster-kuberay-head-fd9g6'): 26, ('ray-cluster-kuberay-head-fd9g6', 'ray-cluster-kuberay-worker-workergroup-9bnxn'): 23, ('ray-cluster-kuberay-worker-workergroup-9bnxn', 'ray-cluster-kuberay-worker-workergroup-9bnxn'): 19})\n", "Iteration 4\n", - "Counter({('example-cluster-kuberay-head-fd9g6', 'example-cluster-kuberay-head-fd9g6'): 30, ('example-cluster-kuberay-worker-workergroup-9bnxn', 'example-cluster-kuberay-head-fd9g6'): 27, ('example-cluster-kuberay-worker-workergroup-9bnxn', 'example-cluster-kuberay-worker-workergroup-9bnxn'): 23, ('example-cluster-kuberay-head-fd9g6', 'example-cluster-kuberay-worker-workergroup-9bnxn'): 20})\n", + "Counter({('ray-cluster-kuberay-head-fd9g6', 'ray-cluster-kuberay-head-fd9g6'): 30, ('ray-cluster-kuberay-worker-workergroup-9bnxn', 'ray-cluster-kuberay-head-fd9g6'): 27, ('ray-cluster-kuberay-worker-workergroup-9bnxn', 'ray-cluster-kuberay-worker-workergroup-9bnxn'): 23, ('ray-cluster-kuberay-head-fd9g6', 'ray-cluster-kuberay-worker-workergroup-9bnxn'): 20})\n", "Iteration 5\n", - "Counter({('example-cluster-kuberay-head-fd9g6', 'example-cluster-kuberay-head-fd9g6'): 41, ('example-cluster-kuberay-worker-workergroup-9bnxn', 'example-cluster-kuberay-worker-workergroup-9bnxn'): 32, ('example-cluster-kuberay-worker-workergroup-9bnxn', 'example-cluster-kuberay-head-fd9g6'): 15, ('example-cluster-kuberay-head-fd9g6', 'example-cluster-kuberay-worker-workergroup-9bnxn'): 12})\n", + "Counter({('ray-cluster-kuberay-head-fd9g6', 'ray-cluster-kuberay-head-fd9g6'): 41, ('ray-cluster-kuberay-worker-workergroup-9bnxn', 'ray-cluster-kuberay-worker-workergroup-9bnxn'): 32, ('ray-cluster-kuberay-worker-workergroup-9bnxn', 'ray-cluster-kuberay-head-fd9g6'): 15, ('ray-cluster-kuberay-head-fd9g6', 'ray-cluster-kuberay-worker-workergroup-9bnxn'): 12})\n", "Iteration 6\n", - "Counter({('example-cluster-kuberay-head-fd9g6', 'example-cluster-kuberay-head-fd9g6'): 30, ('example-cluster-kuberay-head-fd9g6', 'example-cluster-kuberay-worker-workergroup-9bnxn'): 28, ('example-cluster-kuberay-worker-workergroup-9bnxn', 'example-cluster-kuberay-worker-workergroup-9bnxn'): 21, ('example-cluster-kuberay-worker-workergroup-9bnxn', 'example-cluster-kuberay-head-fd9g6'): 21})\n", + "Counter({('ray-cluster-kuberay-head-fd9g6', 'ray-cluster-kuberay-head-fd9g6'): 30, ('ray-cluster-kuberay-head-fd9g6', 'ray-cluster-kuberay-worker-workergroup-9bnxn'): 28, ('ray-cluster-kuberay-worker-workergroup-9bnxn', 'ray-cluster-kuberay-worker-workergroup-9bnxn'): 21, ('ray-cluster-kuberay-worker-workergroup-9bnxn', 'ray-cluster-kuberay-head-fd9g6'): 21})\n", "Iteration 7\n", - "Counter({('example-cluster-kuberay-head-fd9g6', 'example-cluster-kuberay-head-fd9g6'): 33, ('example-cluster-kuberay-worker-workergroup-9bnxn', 'example-cluster-kuberay-head-fd9g6'): 24, ('example-cluster-kuberay-worker-workergroup-9bnxn', 'example-cluster-kuberay-worker-workergroup-9bnxn'): 23, ('example-cluster-kuberay-head-fd9g6', 'example-cluster-kuberay-worker-workergroup-9bnxn'): 20})\n", + "Counter({('ray-cluster-kuberay-head-fd9g6', 'ray-cluster-kuberay-head-fd9g6'): 33, ('ray-cluster-kuberay-worker-workergroup-9bnxn', 'ray-cluster-kuberay-head-fd9g6'): 24, ('ray-cluster-kuberay-worker-workergroup-9bnxn', 'ray-cluster-kuberay-worker-workergroup-9bnxn'): 23, ('ray-cluster-kuberay-head-fd9g6', 'ray-cluster-kuberay-worker-workergroup-9bnxn'): 20})\n", "Iteration 8\n", - "Counter({('example-cluster-kuberay-head-fd9g6', 'example-cluster-kuberay-head-fd9g6'): 38, ('example-cluster-kuberay-worker-workergroup-9bnxn', 'example-cluster-kuberay-worker-workergroup-9bnxn'): 29, ('example-cluster-kuberay-head-fd9g6', 'example-cluster-kuberay-worker-workergroup-9bnxn'): 18, ('example-cluster-kuberay-worker-workergroup-9bnxn', 'example-cluster-kuberay-head-fd9g6'): 15})\n", + "Counter({('ray-cluster-kuberay-head-fd9g6', 'ray-cluster-kuberay-head-fd9g6'): 38, ('ray-cluster-kuberay-worker-workergroup-9bnxn', 'ray-cluster-kuberay-worker-workergroup-9bnxn'): 29, ('ray-cluster-kuberay-head-fd9g6', 'ray-cluster-kuberay-worker-workergroup-9bnxn'): 18, ('ray-cluster-kuberay-worker-workergroup-9bnxn', 'ray-cluster-kuberay-head-fd9g6'): 15})\n", "Iteration 9\n", - "Counter({('example-cluster-kuberay-head-fd9g6', 'example-cluster-kuberay-head-fd9g6'): 28, ('example-cluster-kuberay-worker-workergroup-9bnxn', 'example-cluster-kuberay-head-fd9g6'): 27, ('example-cluster-kuberay-head-fd9g6', 'example-cluster-kuberay-worker-workergroup-9bnxn'): 26, ('example-cluster-kuberay-worker-workergroup-9bnxn', 'example-cluster-kuberay-worker-workergroup-9bnxn'): 19})\n", + "Counter({('ray-cluster-kuberay-head-fd9g6', 'ray-cluster-kuberay-head-fd9g6'): 28, ('ray-cluster-kuberay-worker-workergroup-9bnxn', 'ray-cluster-kuberay-head-fd9g6'): 27, ('ray-cluster-kuberay-head-fd9g6', 'ray-cluster-kuberay-worker-workergroup-9bnxn'): 26, ('ray-cluster-kuberay-worker-workergroup-9bnxn', 'ray-cluster-kuberay-worker-workergroup-9bnxn'): 19})\n", "Success!\n" ] } diff --git a/ray-on-gke/examples/notebooks/ray_mnist.ipynb b/ray-on-gke/examples/notebooks/ray_mnist.ipynb index 8fc7ec351..043d0f54c 100644 --- a/ray-on-gke/examples/notebooks/ray_mnist.ipynb +++ b/ray-on-gke/examples/notebooks/ray_mnist.ipynb @@ -165,7 +165,7 @@ } ], "source": [ - "ray.init(\"ray://example-cluster-kuberay-head-svc.ray.svc.cluster.local:10001\")" + "ray.init(\"ray://ray-cluster-kuberay-head-svc.ray.svc.cluster.local:10001\")" ] }, { @@ -304,15 +304,15 @@ "\u001b[2m\u001b[36m(TensorflowTrainer pid=999)\u001b[0m 2023-04-06 16:16:25,935\tINFO bulk_executor.py:39 -- Executing DAG InputDataBuffer[Input] -> AllToAllOperator[randomize_block_order]\n", "\u001b[2m\u001b[36m(TensorflowTrainer pid=999)\u001b[0m /home/ray/anaconda3/lib/python3.10/site-packages/ray/data/_internal/bulk_dataset_iterator.py:108: UserWarning: session.get_dataset_shard returns a ray.data.DatasetIterator instead of a Dataset as of Ray v2.3. Use iter_torch_batches(), to_tf(), or iter_batches() to iterate over one epoch. See https://docs.ray.io/en/latest/data/api/dataset_iterator.html for full DatasetIterator docs.\n", "\u001b[2m\u001b[36m(TensorflowTrainer pid=999)\u001b[0m warnings.warn(\n", - "\u001b[2m\u001b[36m(RayTrainWorker pid=213, ip=10.8.8.8)\u001b[0m 2023-04-06 16:16:26.026073: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:168] retrieving CUDA diagnostic information for host: example-cluster-kuberay-worker-workergroup-dw77g\n", - "\u001b[2m\u001b[36m(RayTrainWorker pid=213, ip=10.8.8.8)\u001b[0m 2023-04-06 16:16:26.026142: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:175] hostname: example-cluster-kuberay-worker-workergroup-dw77g\n", + "\u001b[2m\u001b[36m(RayTrainWorker pid=213, ip=10.8.8.8)\u001b[0m 2023-04-06 16:16:26.026073: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:168] retrieving CUDA diagnostic information for host: ray-cluster-kuberay-worker-workergroup-dw77g\n", + "\u001b[2m\u001b[36m(RayTrainWorker pid=213, ip=10.8.8.8)\u001b[0m 2023-04-06 16:16:26.026142: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:175] hostname: ray-cluster-kuberay-worker-workergroup-dw77g\n", "\u001b[2m\u001b[36m(RayTrainWorker pid=213, ip=10.8.8.8)\u001b[0m 2023-04-06 16:16:26.026227: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:199] libcuda reported version is: NOT_FOUND: was unable to find libcuda.so DSO loaded into this program\n", "\u001b[2m\u001b[36m(RayTrainWorker pid=213, ip=10.8.8.8)\u001b[0m 2023-04-06 16:16:26.026265: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:203] kernel reported version is: 470.161.3\n", "\u001b[2m\u001b[36m(RayTrainWorker pid=213, ip=10.8.8.8)\u001b[0m 2023-04-06 16:16:26.033349: I tensorflow/core/distributed_runtime/rpc/grpc_server_lib.cc:450] Started server with target: grpc://10.8.8.8:47851\n", "\u001b[2m\u001b[36m(RayTrainWorker pid=213, ip=10.8.8.8)\u001b[0m 2023-04-06 16:16:26.040715: I tensorflow/tsl/distributed_runtime/coordination/coordination_service.cc:525] /job:worker/replica:0/task:0 has connected to coordination service. Incarnation: 6656391604461883464\n", "\u001b[2m\u001b[36m(RayTrainWorker pid=213, ip=10.8.8.8)\u001b[0m 2023-04-06 16:16:26.041023: I tensorflow/tsl/distributed_runtime/coordination/coordination_service_agent.cc:298] Coordination agent has successfully connected.\n", - "\u001b[2m\u001b[36m(RayTrainWorker pid=1146)\u001b[0m 2023-04-06 16:16:26.026232: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:168] retrieving CUDA diagnostic information for host: example-cluster-kuberay-head-8jw8d\n", - "\u001b[2m\u001b[36m(RayTrainWorker pid=1146)\u001b[0m 2023-04-06 16:16:26.026259: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:175] hostname: example-cluster-kuberay-head-8jw8d\n", + "\u001b[2m\u001b[36m(RayTrainWorker pid=1146)\u001b[0m 2023-04-06 16:16:26.026232: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:168] retrieving CUDA diagnostic information for host: ray-cluster-kuberay-head-8jw8d\n", + "\u001b[2m\u001b[36m(RayTrainWorker pid=1146)\u001b[0m 2023-04-06 16:16:26.026259: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:175] hostname: ray-cluster-kuberay-head-8jw8d\n", "\u001b[2m\u001b[36m(RayTrainWorker pid=1146)\u001b[0m 2023-04-06 16:16:26.026330: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:199] libcuda reported version is: NOT_FOUND: was unable to find libcuda.so DSO loaded into this program\n", "\u001b[2m\u001b[36m(RayTrainWorker pid=1146)\u001b[0m 2023-04-06 16:16:26.026365: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:203] kernel reported version is: 470.161.3\n", "\u001b[2m\u001b[36m(RayTrainWorker pid=1146)\u001b[0m 2023-04-06 16:16:26.033579: I tensorflow/core/distributed_runtime/rpc/grpc_server_lib.cc:450] Started server with target: grpc://10.8.7.8:39057\n", @@ -355,7 +355,7 @@ "\u001b[2m\u001b[36m(TunerInternal pid=798)\u001b[0m date: 2023-04-06_16-16-28\n", "\u001b[2m\u001b[36m(TunerInternal pid=798)\u001b[0m done: false\n", "\u001b[2m\u001b[36m(TunerInternal pid=798)\u001b[0m experiment_id: f2d05e60392044ee96706251fcdd2a67\n", - "\u001b[2m\u001b[36m(TunerInternal pid=798)\u001b[0m hostname: example-cluster-kuberay-head-8jw8d\n", + "\u001b[2m\u001b[36m(TunerInternal pid=798)\u001b[0m hostname: ray-cluster-kuberay-head-8jw8d\n", "\u001b[2m\u001b[36m(TunerInternal pid=798)\u001b[0m iterations_since_restore: 1\n", "\u001b[2m\u001b[36m(TunerInternal pid=798)\u001b[0m loss: 1.3728505373001099\n", "\u001b[2m\u001b[36m(TunerInternal pid=798)\u001b[0m mean_squared_error: 1.3728505373001099\n", @@ -515,7 +515,7 @@ "\u001b[2m\u001b[36m(TunerInternal pid=798)\u001b[0m done: true\n", "\u001b[2m\u001b[36m(TunerInternal pid=798)\u001b[0m experiment_id: f2d05e60392044ee96706251fcdd2a67\n", "\u001b[2m\u001b[36m(TunerInternal pid=798)\u001b[0m experiment_tag: '0'\n", - "\u001b[2m\u001b[36m(TunerInternal pid=798)\u001b[0m hostname: example-cluster-kuberay-head-8jw8d\n", + "\u001b[2m\u001b[36m(TunerInternal pid=798)\u001b[0m hostname: ray-cluster-kuberay-head-8jw8d\n", "\u001b[2m\u001b[36m(TunerInternal pid=798)\u001b[0m iterations_since_restore: 4\n", "\u001b[2m\u001b[36m(TunerInternal pid=798)\u001b[0m loss: 0.928823709487915\n", "\u001b[2m\u001b[36m(TunerInternal pid=798)\u001b[0m mean_squared_error: 0.928823709487915\n", @@ -532,7 +532,7 @@ "\u001b[2m\u001b[36m(TunerInternal pid=798)\u001b[0m warmup_time: 0.03273630142211914\n", "\u001b[2m\u001b[36m(TunerInternal pid=798)\u001b[0m \n", "\u001b[2m\u001b[36m(TunerInternal pid=798)\u001b[0m Trial TensorflowTrainer_072e8_00000 completed.\n", - "{'loss': 0.928823709487915, 'mean_squared_error': 0.928823709487915, '_timestamp': 1680822991, '_time_this_iter_s': 0.8873341083526611, '_training_iteration': 4, 'time_this_iter_s': 0.9077465534210205, 'should_checkpoint': True, 'done': True, 'timesteps_total': None, 'episodes_total': None, 'training_iteration': 4, 'trial_id': '072e8_00000', 'experiment_id': 'f2d05e60392044ee96706251fcdd2a67', 'date': '2023-04-06_16-16-31', 'timestamp': 1680822991, 'time_total_s': 10.116967916488647, 'pid': 999, 'hostname': 'example-cluster-kuberay-head-8jw8d', 'node_ip': '10.8.7.8', 'config': {'train_loop_config': {'lr': 0.001, 'batch_size': 32, 'epochs': 4}}, 'time_since_restore': 10.116967916488647, 'timesteps_since_restore': 0, 'iterations_since_restore': 4, 'warmup_time': 0.03273630142211914, 'experiment_tag': '0'}\n" + "{'loss': 0.928823709487915, 'mean_squared_error': 0.928823709487915, '_timestamp': 1680822991, '_time_this_iter_s': 0.8873341083526611, '_training_iteration': 4, 'time_this_iter_s': 0.9077465534210205, 'should_checkpoint': True, 'done': True, 'timesteps_total': None, 'episodes_total': None, 'training_iteration': 4, 'trial_id': '072e8_00000', 'experiment_id': 'f2d05e60392044ee96706251fcdd2a67', 'date': '2023-04-06_16-16-31', 'timestamp': 1680822991, 'time_total_s': 10.116967916488647, 'pid': 999, 'hostname': 'ray-cluster-kuberay-head-8jw8d', 'node_ip': '10.8.7.8', 'config': {'train_loop_config': {'lr': 0.001, 'batch_size': 32, 'epochs': 4}}, 'time_since_restore': 10.116967916488647, 'timesteps_since_restore': 0, 'iterations_since_restore': 4, 'warmup_time': 0.03273630142211914, 'experiment_tag': '0'}\n" ] } ], diff --git a/ray-on-gke/examples/notebooks/raytrain-stablediffusion.ipynb b/ray-on-gke/examples/notebooks/raytrain-stablediffusion.ipynb index bc115ac66..87eec8eb2 100644 --- a/ray-on-gke/examples/notebooks/raytrain-stablediffusion.ipynb +++ b/ray-on-gke/examples/notebooks/raytrain-stablediffusion.ipynb @@ -146,7 +146,7 @@ "# Connect to the ray cluster\n", "import ray\n", "from ray.job_submission import JobSubmissionClient\n", - "client = JobSubmissionClient(\"ray://example-cluster-kuberay-head-svc:10001\")" + "client = JobSubmissionClient(\"ray://ray-cluster-kuberay-head-svc:10001\")" ] }, { @@ -224,11 +224,11 @@ ], "source": [ "# Replace with the jobid from the previous cell\n", - "!ray job status raysubmit_uCgwcqXZKTPVD9DY --address \"ray://example-cluster-kuberay-head-svc:10001\" \n", + "!ray job status raysubmit_uCgwcqXZKTPVD9DY --address \"ray://ray-cluster-kuberay-head-svc:10001\" \n", "\n", "# some helpful commands to stop and delete ray jobs\n", - "# !ray job stop --address \"ray://example-cluster-kuberay-head-svc:10001\" \n", - "# !ray job delete --address \"ray://example-cluster-kuberay-head-svc:10001\" " + "# !ray job stop --address \"ray://ray-cluster-kuberay-head-svc:10001\" \n", + "# !ray job delete --address \"ray://ray-cluster-kuberay-head-svc:10001\" " ] }, { @@ -298,7 +298,7 @@ } ], "source": [ - "!ray job status raysubmit_MPLv7gycz3vnMuHX --address \"ray://example-cluster-kuberay-head-svc:10001\" " + "!ray job status raysubmit_MPLv7gycz3vnMuHX --address \"ray://ray-cluster-kuberay-head-svc:10001\" " ] }, { @@ -365,7 +365,7 @@ } ], "source": [ - "!ray job status raysubmit_jbUPz8WVRtxRUR2j --address \"ray://example-cluster-kuberay-head-svc:10001\" " + "!ray job status raysubmit_jbUPz8WVRtxRUR2j --address \"ray://ray-cluster-kuberay-head-svc:10001\" " ] }, { diff --git a/ray-on-gke/examples/notebooks/stable-diffusion-tpu.ipynb b/ray-on-gke/examples/notebooks/stable-diffusion-tpu.ipynb index 346104332..555d2807d 100644 --- a/ray-on-gke/examples/notebooks/stable-diffusion-tpu.ipynb +++ b/ray-on-gke/examples/notebooks/stable-diffusion-tpu.ipynb @@ -38,7 +38,7 @@ "import ray\n", "\n", "ray.init(\n", - " address=\"ray://example-cluster-kuberay-head-svc:10001\",\n", + " address=\"ray://ray-cluster-kuberay-head-svc:10001\",\n", " runtime_env={\n", " \"pip\": [\n", " \"jax[tpu]==0.4.11\",\n", @@ -283,7 +283,7 @@ "def send_request_and_receive_image(prompt: str):\n", " \"\"\"Sends a single prompt request and returns the Image.\"\"\"\n", " inputs = \"%20\".join(prompt.split(\" \"))\n", - " resp = requests.get(f\"http://example-cluster-kuberay-head-svc:8000/imagine?prompt={inputs}\")\n", + " resp = requests.get(f\"http://ray-cluster-kuberay-head-svc:8000/imagine?prompt={inputs}\")\n", " return BytesIO(resp.content)\n", "\n", "\n", diff --git a/ray-on-gke/examples/notebooks/stable_diffusion.ipynb b/ray-on-gke/examples/notebooks/stable_diffusion.ipynb index e9f5796d9..df81cc33c 100644 --- a/ray-on-gke/examples/notebooks/stable_diffusion.ipynb +++ b/ray-on-gke/examples/notebooks/stable_diffusion.ipynb @@ -29,7 +29,7 @@ "\n", "# Initialize Ray Cluster\n", "ray.init(\n", - " address=\"ray://example-cluster-kuberay-head-svc:10001\",\n", + " address=\"ray://ray-cluster-kuberay-head-svc:10001\",\n", " runtime_env={\n", " \"pip\": [\n", " \"IPython\",\n", @@ -139,7 +139,7 @@ "\n", "prompt = \"a cute cat is dancing on the grass.\"\n", "input = \"%20\".join(prompt.split(\" \"))\n", - "resp = requests.get(f\"http://example-cluster-kuberay-head-svc:8000/imagine?prompt={input}\")\n", + "resp = requests.get(f\"http://ray-cluster-kuberay-head-svc:8000/imagine?prompt={input}\")\n", "with open(\"output.png\", 'wb') as f:\n", " f.write(resp.content)" ] diff --git a/ray-on-gke/guides/raytrain-with-gcsfusecsi/README.md b/ray-on-gke/guides/raytrain-with-gcsfusecsi/README.md index cbbc54cd2..7e2a8a9f5 100644 --- a/ray-on-gke/guides/raytrain-with-gcsfusecsi/README.md +++ b/ray-on-gke/guides/raytrain-with-gcsfusecsi/README.md @@ -57,14 +57,14 @@ pod/kuberay-operator-64b7b88759-5ppfw 1/1 Running 0 95 ``` $ kubectl get all -n example NAME READY STATUS RESTARTS AGE -pod/example-cluster-kuberay-head-9x2q6 2/2 Running 0 3m12s -pod/example-cluster-kuberay-worker-workergroup-95nm2 2/2 Running 0 3m12s -pod/example-cluster-kuberay-worker-workergroup-tfg9n 2/2 Running 0 3m12s +pod/ray-cluster-kuberay-head-9x2q6 2/2 Running 0 3m12s +pod/ray-cluster-kuberay-worker-workergroup-95nm2 2/2 Running 0 3m12s +pod/ray-cluster-kuberay-worker-workergroup-tfg9n 2/2 Running 0 3m12s pod/kuberay-operator-64b7b88759-5ppfw 1/1 Running 0 4m4s pod/tensorflow-0 2/2 Running 0 16s NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE -service/example-cluster-kuberay-head-svc ClusterIP 10.8.10.33 10001/TCP,8265/TCP,8080/TCP,6379/TCP,8000/TCP 3m12s +service/ray-cluster-kuberay-head-svc ClusterIP 10.8.10.33 10001/TCP,8265/TCP,8080/TCP,6379/TCP,8000/TCP 3m12s service/kuberay-operator ClusterIP 10.8.14.245 8080/TCP 4m4s service/tensorflow ClusterIP None 8888/TCP 16s service/tensorflow-jupyter LoadBalancer 10.8.3.9 80:31891/TCP 16s @@ -96,7 +96,7 @@ http://tensorflow-0:8888/?token= :: /home/jovyan 12. Follow the comments and execute the cells in the notebook to run a distributed training job and then inference on the tuned model 13. Port forward the ray service port to examine the ray dashboard for jobs progress details, The dashboard is reachable at localhost:8286 in the local browser ``` -kubectl port-forward -n example service/example-cluster-kuberay-head-svc 8265:8265 +kubectl port-forward -n example service/ray-cluster-kuberay-head-svc 8265:8265 ``` 14. During an ongoing traing, the pod resource usage of CPU, Memory, GPU, GPU Memory can be visualized with the GKE Cloud Console for the workloads example ![Ray Head resources](https://github.com/GoogleCloudPlatform/ai-on-gke/blob/main/ray-on-gke/raytrain-examples/images/ray-head-resources.png) and ![Ray Worker resources](https://github.com/GoogleCloudPlatform/ai-on-gke/blob/main/ray-on-gke/raytrain-examples/images/ray-worker-resources.png) diff --git a/ray-on-gke/guides/tpu/README.md b/ray-on-gke/guides/tpu/README.md index 12061cc65..20dd5f470 100644 --- a/ray-on-gke/guides/tpu/README.md +++ b/ray-on-gke/guides/tpu/README.md @@ -86,7 +86,7 @@ To deploy a multi-host Ray Cluster, modify the `worker` spec [here](https://gith import ray ray.init( - address="ray://example-cluster-kuberay-head-svc:10001", + address="ray://ray-cluster-kuberay-head-svc:10001", runtime_env={ "pip": [ "jax[tpu]==0.4.12", @@ -105,7 +105,7 @@ num_workers = 4 result = [tpu_cores.remote() for _ in range(num_workers)] print(ray.get(result)) ``` -2. `kubectl port-forward svc/example-cluster-kuberay-head-svc 8265:8265 &` +2. `kubectl port-forward svc/ray-cluster-kuberay-head-svc 8265:8265 &` 3. `export RAY_ADDRESS=http://localhost:8265` 4. `ray job submit --runtime-env-json='{"working_dir": "."}' -- python test_tpu.py` diff --git a/tpu-provisioner/README.md b/tpu-provisioner/README.md index 858717605..8b4cd3c29 100644 --- a/tpu-provisioner/README.md +++ b/tpu-provisioner/README.md @@ -43,7 +43,7 @@ Build and push your image. For example: ```bash export PROJECT_ID=example-project -export CLUSTER_NAME=example-cluster +export CLUSTER_NAME=ray-cluster ``` ```bash diff --git a/tutorials-and-examples/genAI-LLM/e2e-genai-langchain-app/e2e-genai-langchain.ipynb b/tutorials-and-examples/genAI-LLM/e2e-genai-langchain-app/e2e-genai-langchain.ipynb index b35516f6e..a7b33b6d5 100644 --- a/tutorials-and-examples/genAI-LLM/e2e-genai-langchain-app/e2e-genai-langchain.ipynb +++ b/tutorials-and-examples/genAI-LLM/e2e-genai-langchain-app/e2e-genai-langchain.ipynb @@ -112,7 +112,7 @@ "\n", "# initialize ray\n", "ray.init(\n", - " address=\"ray://example-cluster-kuberay-head-svc:10001\",\n", + " address=\"ray://ray-cluster-kuberay-head-svc:10001\",\n", " runtime_env={\n", " \"pip\": [\n", " \"transformers>=4.26.0\",\n", @@ -194,7 +194,7 @@ "import requests\n", "\n", "query = \"bunny\"\n", - "response = requests.post(f'http://example-cluster-kuberay-head-svc:8000/?text={query}')\n", + "response = requests.post(f'http://ray-cluster-kuberay-head-svc:8000/?text={query}')\n", "print(response.text)" ] } diff --git a/tutorials-and-examples/genAI-LLM/e2e-genai-langchain-app/src/backend/main.py b/tutorials-and-examples/genAI-LLM/e2e-genai-langchain-app/src/backend/main.py index 513f8b977..cafc36b85 100644 --- a/tutorials-and-examples/genAI-LLM/e2e-genai-langchain-app/src/backend/main.py +++ b/tutorials-and-examples/genAI-LLM/e2e-genai-langchain-app/src/backend/main.py @@ -22,7 +22,7 @@ CORS(app) logging.basicConfig(level=logging.INFO) -RAY_ENDPOINT = 'http://example-cluster-kuberay-head-svc:8000' # Consider moving to configuration +RAY_ENDPOINT = 'http://ray-cluster-kuberay-head-svc:8000' # Consider moving to configuration # NOTE: this example starts a new instance of Ray.serve deployment for simplicity. # For production, recommendation would be to move this initialization into a different component diff --git a/tutorials-and-examples/genAI-LLM/e2e-genai-langchain-app/src/backend/model.py b/tutorials-and-examples/genAI-LLM/e2e-genai-langchain-app/src/backend/model.py index 0a4fc8e1b..6fe61f970 100644 --- a/tutorials-and-examples/genAI-LLM/e2e-genai-langchain-app/src/backend/model.py +++ b/tutorials-and-examples/genAI-LLM/e2e-genai-langchain-app/src/backend/model.py @@ -27,7 +27,7 @@ # Configurations (consider using environment variables or a dedicated config module) MODEL_ID = os.environ.get('MODEL_ID', 'google/flan-t5-small') -RAY_ADDRESS = os.environ.get('RAY_ADDRESS', 'ray://example-cluster-kuberay-head-svc:10001') +RAY_ADDRESS = os.environ.get('RAY_ADDRESS', 'ray://ray-cluster-kuberay-head-svc:10001') def create_chains(llm): template1 = "Give me a fact about {topic}."