Skip to content

Commit

Permalink
remove T4, A100, TPU profil options for jupyterhub
Browse files Browse the repository at this point in the history
  • Loading branch information
chiayi committed Apr 1, 2024
1 parent 82228d7 commit 79a0730
Show file tree
Hide file tree
Showing 3 changed files with 6 additions and 155 deletions.
6 changes: 6 additions & 0 deletions cloudbuild.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,12 @@ steps:
cd /workspace/applications/rag/tests
python3 test_frontend.py "127.0.0.1:8081"
echo "pass" > /workspace/rag_frontend_result.txt
sleep 5s
cd /workspace/
find . -type f -name "*.ipynb" > notebook_file_list.txt
while IFS= read -r line; do gsutil cp $line gs://gke-aieco-rag-$SHORT_SHA-$_BUILD_ID/ ; done < notebook_file_list.txt
kubectl apply -f notebook_job.yaml
allowFailure: true
waitFor: ['cleanup jupyterhub', 'cleanup ray cluster']

Expand Down
62 changes: 0 additions & 62 deletions modules/jupyter/jupyter_config/config-selfauth-autopilot.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -144,37 +144,6 @@ singleuser:
cloud.google.com/machine-family: "c3"
cloud.google.com/gke-ephemeral-storage-local-ssd: "true"
default: true
- display_name: "GPU (T4)"
description: "Creates GPU VMs (T4) as the compute for notebook execution."
profile_options:
storage:
display_name: "Storage"
choices:
DefaultStorage:
display_name: "Local (Standard)"
kubespawner_override:
default: true
GCSFuse:
display_name: "GCSFuse"
kubespawner_override:
volume_mounts:
- name: gcs-fuse-csi-ephemeral
mountPath: /home/jovyan
volumes:
- name: gcs-fuse-csi-ephemeral
csi:
driver: gcsfuse.csi.storage.gke.io
volumeAttributes:
bucketName: ${gcs_bucket}
mountOptions: "uid=1000,gid=100,o=noexec,implicit-dirs,dir-mode=777,file-mode=777,only-dir=notebooks/{username}"
kubespawner_override:
image: ${notebook_image}:${notebook_image_tag}
extra_resource_limits:
nvidia.com/gpu: "2"
extra_resource_guarantees:
nvidia.com/gpu: "2"
node_selector:
cloud.google.com/gke-accelerator: "nvidia-tesla-t4"
- display_name: "GPU (L4)"
description: "Creates GPU VMs (L4) as the compute for notebook execution."
profile_options:
Expand Down Expand Up @@ -208,37 +177,6 @@ singleuser:
cloud.google.com/compute-class: "Accelerator"
cloud.google.com/gke-accelerator: "nvidia-l4"
cloud.google.com/gke-ephemeral-storage-local-ssd: "true"
- display_name: "GPU (A100)"
description: "Creates GPU VMs (A100) as the compute for notebook execution."
profile_options:
storage:
display_name: "Storage"
choices:
DefaultStorage:
display_name: "Local (Standard)"
kubespawner_override:
default: true
GCSFuse:
display_name: "GCSFuse"
kubespawner_override:
volume_mounts:
- name: gcs-fuse-csi-ephemeral
mountPath: /home/jovyan
volumes:
- name: gcs-fuse-csi-ephemeral
csi:
driver: gcsfuse.csi.storage.gke.io
volumeAttributes:
bucketName: ${gcs_bucket}
mountOptions: "uid=1000,gid=100,o=noexec,implicit-dirs,dir-mode=777,file-mode=777,only-dir=notebooks/{username}"
kubespawner_override:
image: ${notebook_image}:${notebook_image_tag}
extra_resource_limits:
nvidia.com/gpu: "2"
extra_resource_guarantees:
nvidia.com/gpu: "2"
node_selector:
cloud.google.com/gke-accelerator: "nvidia-tesla-a100"
cmd: null
cloudMetadata:
blockWithIptables: false
Expand Down
93 changes: 0 additions & 93 deletions modules/jupyter/jupyter_config/config-selfauth.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -155,99 +155,6 @@ singleuser:
bucketName: ${gcs_bucket}
mountOptions: "uid=1000,gid=100,o=noexec,implicit-dirs,dir-mode=777,file-mode=777,only-dir=notebooks/{username}"
default: true
- display_name: "TPU"
description: "Creates TPUs VMs as the compute for notebook execution. Will only work if TPU is enabled."
profile_options:
storage:
display_name: "Storage"
choices:
DefaultStorage:
display_name: "DefaultStorage"
kubespawner_override:
default: true
GCSFuse:
display_name: "GCSFuse"
kubespawner_override:
volume_mounts:
- name: gcs-fuse-csi-ephemeral
mountPath: /home/jovyan
volumes:
- name: gcs-fuse-csi-ephemeral
csi:
driver: gcsfuse.csi.storage.gke.io
volumeAttributes:
bucketName: ${gcs_bucket}
mountOptions: "uid=1000,gid=100,o=noexec,implicit-dirs,dir-mode=777,file-mode=777,only-dir=notebooks/{username}"
kubespawner_override:
image: jupyter/tensorflow-notebook:python-3.10
extra_resource_limits:
google.com/tpu: "4"
node_selector:
cloud.google.com/gke-tpu-accelerator: "tpu-v4-podslice"
cloud.google.com/gke-tpu-topology: "2x2x1"
- display_name: "GPU T4"
description: "Creates GPU VMs (T4) as the compute for notebook execution"
profile_options:
storage:
display_name: "Storage"
choices:
DefaultStorage:
display_name: "DefaultStorage"
kubespawner_override:
default: true
GCSFuse:
display_name: "GCSFuse"
kubespawner_override:
volume_mounts:
- name: gcs-fuse-csi-ephemeral
mountPath: /home/jovyan
volumes:
- name: gcs-fuse-csi-ephemeral
csi:
driver: gcsfuse.csi.storage.gke.io
volumeAttributes:
bucketName: ${gcs_bucket}
mountOptions: "uid=1000,gid=100,o=noexec,implicit-dirs,dir-mode=777,file-mode=777,only-dir=notebooks/{username}"
kubespawner_override:
image: ${notebook_image}:${notebook_image_tag}
extra_resource_limits:
# number of gpus needed on the node
nvidia.com/gpu: "2"
# possible values: nvidia-tesla-k80, nvidia-tesla-p100, nvidia-tesla-p4, nvidia-tesla-v100, nvidia-tesla-t4, nvidia-tesla-a100, nvidia-a100-80gb, nvidia-l4
node_selector:
cloud.google.com/gke-accelerator: "nvidia-tesla-t4"
- display_name: "GPU A100"
description: "Creates GPU VMs (A100) as the compute for notebook execution"
profile_options:
storage:
display_name: "Storage"
choices:
DefaultStorage:
display_name: "DefaultStorage"
kubespawner_override:
default: true
GCSFuse:
display_name: "GCSFuse"
kubespawner_override:
volume_mounts:
- name: gcs-fuse-csi-ephemeral
mountPath: /home/jovyan
volumes:
- name: gcs-fuse-csi-ephemeral
csi:
driver: gcsfuse.csi.storage.gke.io
volumeAttributes:
bucketName: ${gcs_bucket}
mountOptions: "uid=1000,gid=100,o=noexec,implicit-dirs,dir-mode=777,file-mode=777,only-dir=notebooks/{username}"
kubespawner_override:
image: ${notebook_image}:${notebook_image_tag}
extra_resource_limits:
nvidia.com/gpu: "2"
# GPU requests
extra_resource_guarantees:
nvidia.com/gpu: "2"
node_selector:
cloud.google.com/gke-accelerator: "nvidia-tesla-a100"
cmd: null
cloudMetadata:
blockWithIptables: false
Expand Down

0 comments on commit 79a0730

Please sign in to comment.