From 6b0f717fc302726cf81dcc5d4daca0d4ca2012de Mon Sep 17 00:00:00 2001 From: artemvmin Date: Tue, 5 Mar 2024 15:39:10 -0800 Subject: [PATCH 1/2] Add release channel to GKE infra (#286) - Default to RAPID channel to support the latest GKE and K8s AI/ML features --- infrastructure/main.tf | 4 ++++ infrastructure/variables.tf | 5 +++++ modules/gke-autopilot-private-cluster/README.md | 1 + modules/gke-autopilot-private-cluster/main.tf | 1 + modules/gke-autopilot-private-cluster/variables.tf | 4 ++++ modules/gke-autopilot-public-cluster/README.md | 3 ++- modules/gke-autopilot-public-cluster/main.tf | 2 +- modules/gke-autopilot-public-cluster/variables.tf | 6 +++++- modules/gke-standard-private-cluster/README.md | 3 ++- modules/gke-standard-private-cluster/main.tf | 1 + modules/gke-standard-private-cluster/variables.tf | 4 ++++ modules/gke-standard-public-cluster/README.md | 3 ++- modules/gke-standard-public-cluster/main.tf | 1 + modules/gke-standard-public-cluster/variables.tf | 6 +++++- 14 files changed, 38 insertions(+), 6 deletions(-) diff --git a/infrastructure/main.tf b/infrastructure/main.tf index c99521a76..e8826c5f9 100644 --- a/infrastructure/main.tf +++ b/infrastructure/main.tf @@ -57,6 +57,7 @@ module "public-gke-standard-cluster" { cluster_name = var.cluster_name cluster_labels = var.cluster_labels kubernetes_version = var.kubernetes_version + release_channel = var.release_channel cluster_region = var.cluster_region cluster_zones = var.cluster_zones ip_range_pods = var.ip_range_pods @@ -93,6 +94,7 @@ module "public-gke-autopilot-cluster" { cluster_name = var.cluster_name cluster_labels = var.cluster_labels kubernetes_version = var.kubernetes_version + release_channel = var.release_channel cluster_region = var.cluster_region cluster_zones = var.cluster_zones ip_range_pods = var.ip_range_pods @@ -117,6 +119,7 @@ module "private-gke-standard-cluster" { cluster_name = var.cluster_name cluster_labels = var.cluster_labels kubernetes_version = var.kubernetes_version + release_channel = var.release_channel cluster_region = var.cluster_region cluster_zones = var.cluster_zones ip_range_pods = var.ip_range_pods @@ -154,6 +157,7 @@ module "private-gke-autopilot-cluster" { cluster_name = var.cluster_name cluster_labels = var.cluster_labels kubernetes_version = var.kubernetes_version + release_channel = var.release_channel cluster_region = var.cluster_region cluster_zones = var.cluster_zones ip_range_pods = var.ip_range_pods diff --git a/infrastructure/variables.tf b/infrastructure/variables.tf index 5eba7fc87..27c48696a 100644 --- a/infrastructure/variables.tf +++ b/infrastructure/variables.tf @@ -98,6 +98,11 @@ variable "kubernetes_version" { default = "latest" } +variable "release_channel" { + type = string + default = "RAPID" +} + variable "cluster_region" { type = string } diff --git a/modules/gke-autopilot-private-cluster/README.md b/modules/gke-autopilot-private-cluster/README.md index 02c75db57..f8af06e23 100644 --- a/modules/gke-autopilot-private-cluster/README.md +++ b/modules/gke-autopilot-private-cluster/README.md @@ -33,6 +33,7 @@ | [network\_name](#input\_network\_name) | # network variables | `string` | n/a | yes | | [project\_id](#input\_project\_id) | GCP project id | `string` | n/a | yes | | [region](#input\_region) | GCP project region or zone | `string` | `"us-central1"` | no | +| [release\_channel](#input\_release\_channel) | n/a | `string` | n/a | yes | | [subnetwork\_name](#input\_subnetwork\_name) | n/a | `string` | n/a | yes | ## Outputs diff --git a/modules/gke-autopilot-private-cluster/main.tf b/modules/gke-autopilot-private-cluster/main.tf index 243325cb5..abadbe777 100644 --- a/modules/gke-autopilot-private-cluster/main.tf +++ b/modules/gke-autopilot-private-cluster/main.tf @@ -25,6 +25,7 @@ module "gke" { name = var.cluster_name cluster_resource_labels = var.cluster_labels kubernetes_version = var.kubernetes_version + release_channel = var.release_channel region = var.cluster_region zones = var.cluster_zones network = var.network_name diff --git a/modules/gke-autopilot-private-cluster/variables.tf b/modules/gke-autopilot-private-cluster/variables.tf index 89e744753..4009d186c 100644 --- a/modules/gke-autopilot-private-cluster/variables.tf +++ b/modules/gke-autopilot-private-cluster/variables.tf @@ -46,6 +46,10 @@ variable "kubernetes_version" { type = string } +variable "release_channel" { + type = string +} + variable "cluster_regional" { type = bool } diff --git a/modules/gke-autopilot-public-cluster/README.md b/modules/gke-autopilot-public-cluster/README.md index 01fc84e58..ad8de36ff 100644 --- a/modules/gke-autopilot-public-cluster/README.md +++ b/modules/gke-autopilot-public-cluster/README.md @@ -25,10 +25,11 @@ No resources. | [network\_name](#input\_network\_name) | # network variables | `string` | n/a | yes | | [project\_id](#input\_project\_id) | GCP project id | `string` | n/a | yes | | [region](#input\_region) | GCP project region or zone | `string` | `"us-central1"` | no | +| [release\_channel](#input\_release\_channel) | n/a | `string` | n/a | yes | | [subnetwork\_name](#input\_subnetwork\_name) | n/a | `string` | n/a | yes | ## Outputs | Name | Description | |------|-------------| -| [cluster](#output\_cluster) | n/a | \ No newline at end of file +| [cluster](#output\_cluster) | n/a | diff --git a/modules/gke-autopilot-public-cluster/main.tf b/modules/gke-autopilot-public-cluster/main.tf index b1ae08f5b..d86092c09 100644 --- a/modules/gke-autopilot-public-cluster/main.tf +++ b/modules/gke-autopilot-public-cluster/main.tf @@ -21,6 +21,7 @@ module "gke" { cluster_resource_labels = var.cluster_labels region = var.cluster_region kubernetes_version = var.kubernetes_version + release_channel = var.release_channel zones = var.cluster_zones network = var.network_name subnetwork = var.subnetwork_name @@ -28,5 +29,4 @@ module "gke" { ip_range_services = var.ip_range_services master_authorized_networks = var.master_authorized_networks deletion_protection = var.deletion_protection - } diff --git a/modules/gke-autopilot-public-cluster/variables.tf b/modules/gke-autopilot-public-cluster/variables.tf index d29828e8f..549086888 100644 --- a/modules/gke-autopilot-public-cluster/variables.tf +++ b/modules/gke-autopilot-public-cluster/variables.tf @@ -50,6 +50,10 @@ variable "kubernetes_version" { type = string } +variable "release_channel" { + type = string +} + variable "cluster_region" { type = string } @@ -74,4 +78,4 @@ variable "master_authorized_networks" { variable "deletion_protection" { type = bool default = false -} \ No newline at end of file +} diff --git a/modules/gke-standard-private-cluster/README.md b/modules/gke-standard-private-cluster/README.md index f665ff3be..6d1b6839a 100644 --- a/modules/gke-standard-private-cluster/README.md +++ b/modules/gke-standard-private-cluster/README.md @@ -43,6 +43,7 @@ | [network\_name](#input\_network\_name) | # network variables | `string` | n/a | yes | | [project\_id](#input\_project\_id) | GCP project id | `string` | n/a | yes | | [region](#input\_region) | GCP project region or zone | `string` | `"us-central1"` | no | +| [release\_channel](#input\_release\_channel) | n/a | `string` | n/a | yes | | [subnetwork\_name](#input\_subnetwork\_name) | n/a | `string` | n/a | yes | | [tpu\_pools](#input\_tpu\_pools) | n/a | `list(map(any))` | n/a | yes | @@ -50,4 +51,4 @@ | Name | Description | |------|-------------| -| [cluster](#output\_cluster) | n/a | \ No newline at end of file +| [cluster](#output\_cluster) | n/a | diff --git a/modules/gke-standard-private-cluster/main.tf b/modules/gke-standard-private-cluster/main.tf index 8e6c36a24..01dc4bbbb 100644 --- a/modules/gke-standard-private-cluster/main.tf +++ b/modules/gke-standard-private-cluster/main.tf @@ -29,6 +29,7 @@ module "gke" { name = var.cluster_name cluster_resource_labels = var.cluster_labels kubernetes_version = var.kubernetes_version + release_channel = var.release_channel region = var.cluster_region zones = var.cluster_zones network = var.network_name diff --git a/modules/gke-standard-private-cluster/variables.tf b/modules/gke-standard-private-cluster/variables.tf index 0d3d708dc..4d3993f2b 100644 --- a/modules/gke-standard-private-cluster/variables.tf +++ b/modules/gke-standard-private-cluster/variables.tf @@ -46,6 +46,10 @@ variable "kubernetes_version" { type = string } +variable "release_channel" { + type = string +} + variable "cluster_regional" { type = bool } diff --git a/modules/gke-standard-public-cluster/README.md b/modules/gke-standard-public-cluster/README.md index 926437d81..d8d34c5a7 100644 --- a/modules/gke-standard-public-cluster/README.md +++ b/modules/gke-standard-public-cluster/README.md @@ -35,6 +35,7 @@ No resources. | [network\_name](#input\_network\_name) | # network variables | `string` | n/a | yes | | [project\_id](#input\_project\_id) | GCP project id | `string` | n/a | yes | | [region](#input\_region) | GCP project region or zone | `string` | `"us-central1"` | no | +| [release\_channel](#input\_release\_channel) | n/a | `string` | n/a | yes | | [subnetwork\_name](#input\_subnetwork\_name) | n/a | `string` | n/a | yes | | [tpu\_pools](#input\_tpu\_pools) | n/a | `list(map(any))` | n/a | yes | @@ -42,4 +43,4 @@ No resources. | Name | Description | |------|-------------| -| [cluster](#output\_cluster) | n/a | \ No newline at end of file +| [cluster](#output\_cluster) | n/a | diff --git a/modules/gke-standard-public-cluster/main.tf b/modules/gke-standard-public-cluster/main.tf index 45336ac06..fc7971444 100644 --- a/modules/gke-standard-public-cluster/main.tf +++ b/modules/gke-standard-public-cluster/main.tf @@ -25,6 +25,7 @@ module "gke" { cluster_resource_labels = var.cluster_labels region = var.cluster_region kubernetes_version = var.kubernetes_version + release_channel = var.release_channel zones = var.cluster_zones network = var.network_name subnetwork = var.subnetwork_name diff --git a/modules/gke-standard-public-cluster/variables.tf b/modules/gke-standard-public-cluster/variables.tf index cdb0df88f..6f1a3d452 100644 --- a/modules/gke-standard-public-cluster/variables.tf +++ b/modules/gke-standard-public-cluster/variables.tf @@ -50,6 +50,10 @@ variable "kubernetes_version" { type = string } +variable "release_channel" { + type = string +} + variable "cluster_region" { type = string } @@ -117,4 +121,4 @@ variable "gpu_pools" { variable "tpu_pools" { type = list(map(any)) -} \ No newline at end of file +} From 7093da37c616a21f2e0a7ea799b480418be40445 Mon Sep 17 00:00:00 2001 From: imreddy13 <132504814+imreddy13@users.noreply.github.com> Date: Tue, 5 Mar 2024 17:21:35 -0800 Subject: [PATCH 2/2] Fix kuberay_monitoring module dependency for RAG (#290) Fix `kuberay_monitoring` module dependency for RAG 'kuberay_monitoring' module should depends on 'namespace' module --- applications/rag/main.tf | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/applications/rag/main.tf b/applications/rag/main.tf index 74b8abbcf..654151aaa 100644 --- a/applications/rag/main.tf +++ b/applications/rag/main.tf @@ -181,6 +181,7 @@ module "kuberay-monitoring" { create_namespace = true enable_grafana_on_ray_dashboard = var.enable_grafana_on_ray_dashboard k8s_service_account = var.ray_service_account + depends_on = [module.namespace] } module "inference-server" { @@ -218,5 +219,5 @@ module "frontend" { url_domain_addr = var.frontend_url_domain_addr url_domain_name = var.frontend_url_domain_name members_allowlist = var.frontend_members_allowlist - depends_on = [ module.namespace ] + depends_on = [ module.namespace ] }