diff --git a/gke-platform/modules/gke_standard/main.tf b/gke-platform/modules/gke_standard/main.tf index 98b4223db..f14b26522 100644 --- a/gke-platform/modules/gke_standard/main.tf +++ b/gke-platform/modules/gke_standard/main.tf @@ -46,6 +46,12 @@ resource "google_container_cluster" "ml_cluster" { workload_pool = "${var.project_id}.svc.id.goog" } + addons_config { + gcs_fuse_csi_driver_config { + enabled = true + } + } + release_channel { channel = "RAPID" } diff --git a/jupyter-on-gke/README.md b/jupyter-on-gke/README.md index bf7220e0a..6e60c505c 100644 --- a/jupyter-on-gke/README.md +++ b/jupyter-on-gke/README.md @@ -134,6 +134,16 @@ Continue to Step 3 of [below](#if-auth-is-enabled). 4. Select profile and open a Jupyter Notebook +## Persistent Storage + +Currently there are 2 choices for storage: + +1. Default Jupyterhub Storage - `pd.csi.storage.gke.io` with reclaim policy `Delete` + +2. GCSFuse - `gcsfuse.csi.storage.gke.io` uses GCS Buckets and require users to pre-create buckets with name format `gcsfuse-{username}` + +For more information about Persistent storage and the available options, visit [here](https://github.com/GoogleCloudPlatform/ai-on-gke/blob/main/jupyter-on-gke/storage.md) + ## Running GPT-J-6B This example is adapted from Ray AIR's examples [here](https://docs.ray.io/en/master/ray-air/examples/gptj_serving.html). diff --git a/jupyter-on-gke/images/gcs_bucket.png b/jupyter-on-gke/images/gcs_bucket.png new file mode 100644 index 000000000..abb419824 Binary files /dev/null and b/jupyter-on-gke/images/gcs_bucket.png differ diff --git a/jupyter-on-gke/jupyter_config/config-selfauth.yaml b/jupyter-on-gke/jupyter_config/config-selfauth.yaml index b7b41bb29..c8de9530f 100644 --- a/jupyter-on-gke/jupyter_config/config-selfauth.yaml +++ b/jupyter-on-gke/jupyter_config/config-selfauth.yaml @@ -78,10 +78,21 @@ singleuser: ephemeral-storage: 10Gi nodeSelector: iam.gke.io/gke-metadata-server-enabled: "true" +# Used for GCSFuse to set the ephemeral storage as the home directory. If not set, it will show a permission error on the pod log when using GCSFuse. + extraEnv: + JUPYTER_ALLOW_INSECURE_WRITES: "true" image: name: jupyter/tensorflow-notebook tag: python-3.10 startTimeout: 1000 + extraAnnotations: + gke-gcsfuse/volumes: "true" + gke-gcsfuse/cpu-limit: 500m + gke-gcsfuse/memory-limit: 250Mi + gke-gcsfuse/ephemeral-storage-limit: 10Gi + storage: + dynamic: + pvcNameTemplate: claim-{username} # More info on kubespawner overrides: https://jupyterhub-kubespawner.readthedocs.io/en/latest/spawner.html#kubespawner.KubeSpawner # profile example: # - display_name: "Learning Data Science" @@ -101,11 +112,57 @@ singleuser: # - > # gitpuller https://github.com/data-8/materials-fa17 master materials-fa; profileList: - - display_name: "Basic" + - display_name: "CPU" description: "Creates CPU VMs as the compute for notebook execution." + profile_options: + storage: + display_name: "Storage" + choices: + DefaultStorage: + display_name: "DefaultStorage" + kubespawner_override: + default: true + GCSFuse: + display_name: "GCSFuse" + kubespawner_override: + volume_mounts: + - name: gcs-fuse-csi-ephemeral + mountPath: /home/jovyan + volumes: + - name: gcs-fuse-csi-ephemeral + csi: + driver: gcsfuse.csi.storage.gke.io + volumeAttributes: + bucketName: gcsfuse-{username} + mountOptions: "uid=1000,gid=100,o=noexec,implicit-dirs,dir-mode=777,file-mode=777" + node_selector: + iam.gke.io/gke-metadata-server-enabled: "true" default: true - display_name: "GPU T4" description: "Creates GPU VMs (T4) as the compute for notebook execution" + profile_options: + storage: + display_name: "Storage" + choices: + DefaultStorage: + display_name: "DefaultStorage" + kubespawner_override: + default: true + GCSFuse: + display_name: "GCSFuse" + kubespawner_override: + volume_mounts: + - name: gcs-fuse-csi-ephemeral + mountPath: /home/jovyan + volumes: + - name: gcs-fuse-csi-ephemeral + csi: + driver: gcsfuse.csi.storage.gke.io + volumeAttributes: + bucketName: gcsfuse-{username} + mountOptions: "uid=1000,gid=100,o=noexec,implicit-dirs,dir-mode=777,file-mode=777" + node_selector: + iam.gke.io/gke-metadata-server-enabled: "true" kubespawner_override: image: jupyter/tensorflow-notebook:python-3.10 extra_resource_limits: @@ -116,6 +173,29 @@ singleuser: cloud.google.com/gke-accelerator: "nvidia-tesla-t4" - display_name: "GPU A100" description: "Creates GPU VMs (A100) as the compute for notebook execution" + profile_options: + storage: + display_name: "Storage" + choices: + DefaultStorage: + display_name: "DefaultStorage" + kubespawner_override: + default: true + GCSFuse: + display_name: "GCSFuse" + kubespawner_override: + volume_mounts: + - name: gcs-fuse-csi-ephemeral + mountPath: /home/jovyan + volumes: + - name: gcs-fuse-csi-ephemeral + csi: + driver: gcsfuse.csi.storage.gke.io + volumeAttributes: + bucketName: gcsfuse-{username} + mountOptions: "uid=1000,gid=100,o=noexec,implicit-dirs,dir-mode=777,file-mode=777" + node_selector: + iam.gke.io/gke-metadata-server-enabled: "true" kubespawner_override: image: jupyter/tensorflow-notebook:python-3.10 extra_resource_limits: diff --git a/jupyter-on-gke/jupyterhub.tf b/jupyter-on-gke/jupyterhub.tf index 5f35dd4a4..944efeee0 100644 --- a/jupyter-on-gke/jupyterhub.tf +++ b/jupyter-on-gke/jupyterhub.tf @@ -60,7 +60,7 @@ resource "google_project_service" "project_service" { disable_on_destroy = false } -# Creates a "Brand", equivalent to the OAuth consent screen on GCP UI +# Creates a "Brand", equivalent to the OAuth consent screen on Cloud console resource "google_iap_brand" "project_brand" { count = var.brand != "" ? 1 : 0 support_email = var.support_email @@ -70,7 +70,7 @@ resource "google_iap_brand" "project_brand" { # Creates the OAuth client used in IAP resource "google_iap_client" "iap_oauth_client" { - count = var.client_id != "" ? 0 : 1 + count = var.client_id != "" ? 0 : 1 display_name = "Jupyter-Client" brand = "projects/${data.google_project.project.number}/brands/${data.google_project.project.number}" } @@ -93,17 +93,26 @@ module "iap_auth" { project_id = var.project_id namespace = var.namespace service_name = var.service_name - client_id = var.client_id != "" ? var.client_id : google_iap_client.iap_oauth_client[0].client_id - client_secret = var.client_id != "" ? var.client_secret : google_iap_client.iap_oauth_client[0].secret + client_id = var.client_id != "" ? var.client_id : google_iap_client.iap_oauth_client[0].client_id + client_secret = var.client_id != "" ? var.client_secret : google_iap_client.iap_oauth_client[0].secret url_domain_addr = var.url_domain_addr url_domain_name = var.url_domain_name depends_on = [ helm_release.jupyterhub, kubernetes_namespace.namespace, + module.workload_identity_service_account ] } +module "workload_identity_service_account" { + source = "./service_accounts_module" + + project_id = var.project_id + namespace = var.namespace + service_account = "jupyter-service-account" +} + resource "helm_release" "jupyterhub" { name = "jupyterhub" repository = "https://jupyterhub.github.io/helm-chart" diff --git a/jupyter-on-gke/service_accounts_module/service_accounts.tf b/jupyter-on-gke/service_accounts_module/service_accounts.tf new file mode 100644 index 000000000..e788bfe22 --- /dev/null +++ b/jupyter-on-gke/service_accounts_module/service_accounts.tf @@ -0,0 +1,51 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +resource "google_service_account" "sa" { + project = "${var.project_id}" + account_id = "${var.service_account}" + display_name = "Jupyterhub service account" +} + +resource "google_service_account_iam_binding" "workload-identity-user" { + service_account_id = google_service_account.sa.name + role = "roles/iam.workloadIdentityUser" + + members = [ + "serviceAccount:${var.project_id}.svc.id.goog[${var.namespace}/default]", + ] +} + +resource "google_project_iam_binding" "cloud_role" { + project = var.project_id + for_each = toset([ + "roles/storage.admin", + "roles/artifactregistry.reader" + ]) + role = each.key + members = [ + "serviceAccount:${var.project_id}.svc.id.goog[${var.namespace}/default]", + ] +} + +resource "kubernetes_annotations" "default" { + api_version = "v1" + kind = "ServiceAccount" + metadata { + name = "default" + } + annotations = { + "iam.gke.io/gcp-service-account" = "${google_service_account.sa.account_id}@${var.project_id}.iam.gserviceaccount.com" + } +} diff --git a/jupyter-on-gke/service_accounts_module/variables.tf b/jupyter-on-gke/service_accounts_module/variables.tf new file mode 100644 index 000000000..324858447 --- /dev/null +++ b/jupyter-on-gke/service_accounts_module/variables.tf @@ -0,0 +1,30 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +variable "project_id" { + type = string + description = "GCP project id" +} + +variable "namespace" { + type = string + description = "Kubernetes namespace where resources are deployed" + default = "jup" +} + +variable "service_account" { + type = string + description = "Google Cloud IAM service account for authenticating with GCP services" + default = "jup-system-account" +} diff --git a/jupyter-on-gke/service_accounts_module/versions.tf b/jupyter-on-gke/service_accounts_module/versions.tf new file mode 100644 index 000000000..d3bbf86e1 --- /dev/null +++ b/jupyter-on-gke/service_accounts_module/versions.tf @@ -0,0 +1,26 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +terraform { + required_providers { + google = { + source = "hashicorp/google" + version = "4.56.0" + } + kubernetes = { + source = "hashicorp/kubernetes" + version = "2.18.1" + } + } +} diff --git a/jupyter-on-gke/storage.md b/jupyter-on-gke/storage.md new file mode 100644 index 000000000..dc5c067a3 --- /dev/null +++ b/jupyter-on-gke/storage.md @@ -0,0 +1,11 @@ +# Persistent Storage + +## GCSFuse + +**Important Note:** To use option, a GCS bucket must already be created within the project with the name in the format of `gcsfuse-{username}` + +GCSFuse allow users to mount GCS Buckets as their local filesystem. This option allows ease of access on Cloud UI: + +![Profiles Page](images/gcs_bucket.png) + +Since this bucket in GCS, there is built in permission control and access outside of the clutser. diff --git a/jupyter-on-gke/variables.tf b/jupyter-on-gke/variables.tf index d8b74e2ea..cbeb0183e 100644 --- a/jupyter-on-gke/variables.tf +++ b/jupyter-on-gke/variables.tf @@ -88,5 +88,5 @@ variable "client_secret" { type = string description = "Client secret used for enabling IAP" default = "" - sensitive = true + sensitive = true } \ No newline at end of file