From 2d987058c542426ca0a3c99360888f5e7db85592 Mon Sep 17 00:00:00 2001 From: Michael Fraenkel Date: Thu, 3 Mar 2022 07:36:48 -0700 Subject: [PATCH] PLAT-4130: Support regional k8s clusters (#80) 1. Use the proper cli option when generating the kubeconfig. 2. Determine whether the location is regional or zonal once. 3. Allow node pools to specify zonal locations when regional. GPUs not available in all zones. --- main.tf | 18 ++++++++++-------- templates/kubeconfig.tpl | 21 --------------------- tests/test.tf | 6 +++--- variables.tf | 9 ++++++--- 4 files changed, 19 insertions(+), 35 deletions(-) delete mode 100644 templates/kubeconfig.tpl diff --git a/main.tf b/main.tf index f3adb43..f633c34 100644 --- a/main.tf +++ b/main.tf @@ -3,12 +3,13 @@ locals { uuid = "${var.cluster_name}-${random_uuid.id.result}" # Converts a cluster's location to a zone/region. A 'location' may be a region or zone: a region becomes the '[region]-a' zone. - region = length(split("-", var.location)) == 2 ? var.location : substr(var.location, 0, length(var.location) - 2) - zone = length(split("-", var.location)) == 3 ? var.location : format("%s-a", var.location) + is_regional = length(split("-", var.location)) == 2 + region = local.is_regional ? var.location : substr(var.location, 0, length(var.location) - 2) + zone = local.is_regional ? format("%s-a", var.location) : var.location node_pools = { for node_pool, attrs in var.node_pools : - node_pool => merge(attrs, lookup(var.node_pool_overrides, node_pool, {})) + node_pool => merge(attrs, lookup(var.node_pool_overrides, node_pool, null)) } taint_effects = { "NoSchedule" : "NO_SCHEDULE", "PreferNoSchedule" : "PREFER_NO_SCHEDULE", "NoExecute" : "NO_EXECUTE" } } @@ -182,7 +183,7 @@ resource "google_container_cluster" "domino_cluster" { if ! gcloud auth print-identity-token 2>/dev/null; then printf "%s" "$GOOGLE_CREDENTIALS" | gcloud auth activate-service-account --project="${var.project}" --key-file=- fi - gcloud container clusters get-credentials ${var.cluster_name} --zone ${local.zone} + gcloud container clusters get-credentials ${var.cluster_name} ${local.is_regional ? "--region" : "--zone"} ${var.location} EOF } } @@ -202,9 +203,10 @@ resource "google_kms_crypto_key" "crypto_key" { resource "google_container_node_pool" "node_pools" { for_each = local.node_pools - name = each.key - location = google_container_cluster.domino_cluster.location - cluster = google_container_cluster.domino_cluster.name + name = each.key + location = google_container_cluster.domino_cluster.location + cluster = google_container_cluster.domino_cluster.name + node_locations = length(each.value.node_locations) != 0 ? each.value.node_locations : google_container_cluster.domino_cluster.node_locations initial_node_count = each.value.initial_count max_pods_per_node = each.value.max_pods @@ -264,7 +266,7 @@ resource "google_container_node_pool" "node_pools" { } lifecycle { - ignore_changes = [autoscaling, node_config[0].taint] + ignore_changes = [autoscaling, node_config[0].taint, node_locations] } } diff --git a/templates/kubeconfig.tpl b/templates/kubeconfig.tpl deleted file mode 100644 index 58af35c..0000000 --- a/templates/kubeconfig.tpl +++ /dev/null @@ -1,21 +0,0 @@ -# vi: ft=yaml - -apiVersion: v1 -clusters: -- name: ${cluster_name} - cluster: - certificate-authority-data: ${ca_certificate} - server: https://${server} -contexts: -- name: ${cluster_name} - context: - cluster: ${cluster_name} - user: client -current-context: ${cluster_name} -kind: Config -preferences: {} -users: -- name: client - user: - client-certificate-data: ${client_certificate} - client-key-data: ${client_key} diff --git a/tests/test.tf b/tests/test.tf index 70eef0f..0608fde 100644 --- a/tests/test.tf +++ b/tests/test.tf @@ -3,13 +3,13 @@ terraform { required_providers { google = { - version = ">=3.68" + version = "~>3.68" } google-beta = { - version = ">=3.68" + version = "~>3.68" } random = { - version = ">=3.1" + version = "~>3.1" } } diff --git a/variables.tf b/variables.tf index 76879b9..88ca7ae 100644 --- a/variables.tf +++ b/variables.tf @@ -110,6 +110,7 @@ variable "node_pools" { gpu_accelerator = string labels = map(string) taints = list(string) + node_locations = list(string) })) default = { compute = { @@ -125,7 +126,8 @@ variable "node_pools" { labels = { "dominodatalab.com/node-pool" = "default" } - taints = [] + taints = [] + node_locations = [] } gpu = { min_count = 0 @@ -144,6 +146,7 @@ variable "node_pools" { taints = [ "nvidia.com/gpu=true:NoExecute" ] + node_locations = [] } platform = { min_count = 1 @@ -158,13 +161,13 @@ variable "node_pools" { labels = { "dominodatalab.com/node-pool" = "platform" } - taints = [] + taints = [] + node_locations = [] } } } variable "node_pool_overrides" { - type = map(map(any)) default = {} }