From 290a465f54fba01e50832dbd244e5c740167fd07 Mon Sep 17 00:00:00 2001 From: Abhi Yerra Date: Wed, 20 Jan 2021 11:44:22 -0800 Subject: [PATCH] Add ASG for EKS (#112) - ASG - Autoscaler --- eks/autoscaler/Makefile | 12 ++ .../cluster-autoscaler-autodiscover.yaml | 168 ++++++++++++++++++ eks/iam.tf | 68 +++++++ eks/inputs.tf | 9 + eks/main.tf | 45 +++-- eks/provider.tf | 1 + eks/sql.tf | 1 + eks/vpc_flow_log.tf | 2 + eks/vpn.tf | 15 +- 9 files changed, 293 insertions(+), 28 deletions(-) create mode 100644 eks/autoscaler/Makefile create mode 100644 eks/autoscaler/cluster-autoscaler-autodiscover.yaml create mode 100644 eks/provider.tf diff --git a/eks/autoscaler/Makefile b/eks/autoscaler/Makefile new file mode 100644 index 00000000..b8802519 --- /dev/null +++ b/eks/autoscaler/Makefile @@ -0,0 +1,12 @@ +AWS_ACCOUNT= +KUBECONFIG= +CLUSTER_NAME= + +# https://docs.aws.amazon.com/eks/latest/userguide/cluster-autoscaler.html +autoscaler-%: + KUBECONFIG=$(KUBECONFIG) kubectl apply -f https://github.com/kubernetes-sigs/metrics-server/releases/download/v0.4.1/components.yaml + cat cluster-autoscaler-autodiscover.yaml | sed -e 's/OPSZERO_CLUSTER_NAME/$(CLUSTER_NAME)/g' > autoscaler.yaml + KUBECONFIG=$(KUBECONFIG) kubectl apply -f autoscaler.yaml + -KUBECONFIG=$(KUBECONFIG) kubectl annotate serviceaccount cluster-autoscaler -n kube-system --overwrite eks.amazonaws.com/role-arn=arn:aws:iam::$(AWS_ACCOUNT):role/$(CLUSTER_NAME)-$*-node-oidc + KUBECONFIG=$(KUBECONFIG) kubectl patch deployment cluster-autoscaler -n kube-system -p '{"spec":{"template":{"metadata":{"annotations":{"cluster-autoscaler.kubernetes.io/safe-to-evict": "false"}}}}}' + KUBECONFIG=$(KUBECONFIG) kubectl set image deployment cluster-autoscaler -n kube-system cluster-autoscaler=k8s.gcr.io/autoscaling/cluster-autoscaler:v1.18.3 diff --git a/eks/autoscaler/cluster-autoscaler-autodiscover.yaml b/eks/autoscaler/cluster-autoscaler-autodiscover.yaml new file mode 100644 index 00000000..f03f7292 --- /dev/null +++ b/eks/autoscaler/cluster-autoscaler-autodiscover.yaml @@ -0,0 +1,168 @@ +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + labels: + k8s-addon: cluster-autoscaler.addons.k8s.io + k8s-app: cluster-autoscaler + name: cluster-autoscaler + namespace: kube-system +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: cluster-autoscaler + labels: + k8s-addon: cluster-autoscaler.addons.k8s.io + k8s-app: cluster-autoscaler +rules: + - apiGroups: [""] + resources: ["events", "endpoints"] + verbs: ["create", "patch"] + - apiGroups: [""] + resources: ["pods/eviction"] + verbs: ["create"] + - apiGroups: [""] + resources: ["pods/status"] + verbs: ["update"] + - apiGroups: [""] + resources: ["endpoints"] + resourceNames: ["cluster-autoscaler"] + verbs: ["get", "update"] + - apiGroups: [""] + resources: ["nodes"] + verbs: ["watch", "list", "get", "update"] + - apiGroups: [""] + resources: + - "pods" + - "services" + - "replicationcontrollers" + - "persistentvolumeclaims" + - "persistentvolumes" + verbs: ["watch", "list", "get"] + - apiGroups: ["extensions"] + resources: ["replicasets", "daemonsets"] + verbs: ["watch", "list", "get"] + - apiGroups: ["policy"] + resources: ["poddisruptionbudgets"] + verbs: ["watch", "list"] + - apiGroups: ["apps"] + resources: ["statefulsets", "replicasets", "daemonsets"] + verbs: ["watch", "list", "get"] + - apiGroups: ["storage.k8s.io"] + resources: ["storageclasses", "csinodes"] + verbs: ["watch", "list", "get"] + - apiGroups: ["batch", "extensions"] + resources: ["jobs"] + verbs: ["get", "list", "watch", "patch"] + - apiGroups: ["coordination.k8s.io"] + resources: ["leases"] + verbs: ["create"] + - apiGroups: ["coordination.k8s.io"] + resourceNames: ["cluster-autoscaler"] + resources: ["leases"] + verbs: ["get", "update"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: cluster-autoscaler + namespace: kube-system + labels: + k8s-addon: cluster-autoscaler.addons.k8s.io + k8s-app: cluster-autoscaler +rules: + - apiGroups: [""] + resources: ["configmaps"] + verbs: ["create","list","watch"] + - apiGroups: [""] + resources: ["configmaps"] + resourceNames: ["cluster-autoscaler-status", "cluster-autoscaler-priority-expander"] + verbs: ["delete", "get", "update", "watch"] + +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: cluster-autoscaler + labels: + k8s-addon: cluster-autoscaler.addons.k8s.io + k8s-app: cluster-autoscaler +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: cluster-autoscaler +subjects: + - kind: ServiceAccount + name: cluster-autoscaler + namespace: kube-system + +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: cluster-autoscaler + namespace: kube-system + labels: + k8s-addon: cluster-autoscaler.addons.k8s.io + k8s-app: cluster-autoscaler +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: cluster-autoscaler +subjects: + - kind: ServiceAccount + name: cluster-autoscaler + namespace: kube-system + +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: cluster-autoscaler + namespace: kube-system + labels: + app: cluster-autoscaler +spec: + replicas: 1 + selector: + matchLabels: + app: cluster-autoscaler + template: + metadata: + labels: + app: cluster-autoscaler + annotations: + prometheus.io/scrape: 'true' + prometheus.io/port: '8085' + spec: + serviceAccountName: cluster-autoscaler + containers: + - image: k8s.gcr.io/autoscaling/cluster-autoscaler:v1.17.3 + name: cluster-autoscaler + resources: + limits: + cpu: 100m + memory: 300Mi + requests: + cpu: 100m + memory: 300Mi + command: + - ./cluster-autoscaler + - --v=4 + - --stderrthreshold=info + - --cloud-provider=aws + - --skip-nodes-with-local-storage=false + - --expander=least-waste + - --node-group-auto-discovery=asg:tag=k8s.io/cluster-autoscaler/enabled,k8s.io/cluster-autoscaler/OPSZERO_CLUSTER_NAME + - --balance-similar-node-groups + - --skip-nodes-with-system-pods=false + volumeMounts: + - name: ssl-certs + mountPath: /etc/ssl/certs/ca-certificates.crt #/etc/ssl/certs/ca-bundle.crt for Amazon Linux Worker Nodes + readOnly: true + imagePullPolicy: "Always" + volumes: + - name: ssl-certs + hostPath: + path: "/etc/ssl/certs/ca-bundle.crt" diff --git a/eks/iam.tf b/eks/iam.tf index 942f8d51..2d97c85a 100644 --- a/eks/iam.tf +++ b/eks/iam.tf @@ -18,6 +18,16 @@ POLICY } +data "tls_certificate" "cluster" { + url = aws_eks_cluster.cluster.identity.0.oidc.0.issuer +} + +resource "aws_iam_openid_connect_provider" "cluster" { + client_id_list = ["sts.amazonaws.com"] + thumbprint_list = [data.tls_certificate.cluster.certificates.0.sha1_fingerprint] + url = aws_eks_cluster.cluster.identity.0.oidc.0.issuer +} + resource "aws_iam_role_policy_attachment" "cluster-AmazonEKSClusterPolicy" { policy_arn = "arn:aws:iam::aws:policy/AmazonEKSClusterPolicy" role = aws_iam_role.cluster.name @@ -48,6 +58,37 @@ POLICY } +resource "aws_iam_role" "node_oidc" { + name = "${var.environment_name}-node-oidc" + + assume_role_policy = <> /etc/sysctl.conf -# echo "net.core.rmem_max=16777216" >> /etc/sysctl.conf -# echo "net.core.somaxconn=16096" >> /etc/sysctl.conf -# echo "net.core.wmem_max=16777216" >> /etc/sysctl.conf -# echo "net.ipv4.ip_local_port_range=1024 65535" >> /etc/sysctl.conf -# echo "net.ipv4.tcp_fin_timeout=15" >> /etc/sysctl.conf -# echo "net.ipv4.tcp_max_syn_backlog=20480" >> /etc/sysctl.conf -# echo "net.ipv4.tcp_max_tw_buckets=400000" >> /etc/sysctl.conf -# echo "net.ipv4.tcp_no_metrics_save=1" >> /etc/sysctl.conf -# echo "net.ipv4.tcp_rmem=4096 87380 16777216" >> /etc/sysctl.conf -# echo "net.ipv4.tcp_syn_retries=2" >> /etc/sysctl.conf -# echo "net.ipv4.tcp_synack_retries=2" >> /etc/sysctl.conf -# echo "net.ipv4.tcp_syncookies=1" >> /etc/sysctl.conf -# echo "net.ipv4.tcp_wmem=4096 65536 16777216" >> /etc/sysctl.conf -# echo "proc.file-max=2097152" >> /etc/sysctl.conf -# echo "proc.min_free_kbytes=65536" >> /etc/sysctl.conf -# echo "vm.min_free_kbytes=65536" >> /etc/sysctl.conf -# sysctl -p /etc/sysctl.conf - -${var.instance_userdata} - /etc/eks/bootstrap.sh --apiserver-endpoint '${aws_eks_cluster.cluster.endpoint}' --b64-cluster-ca '${aws_eks_cluster.cluster.certificate_authority[0].data}' '${var.environment_name}' USERDATA @@ -103,7 +82,7 @@ USERDATA resource "aws_launch_configuration" "nodes_blue" { iam_instance_profile = aws_iam_instance_profile.node.name - image_id = data.aws_ssm_parameter.eks_ami.value + image_id = var.ami_image == "" ? data.aws_ssm_parameter.eks_ami.value : var.ami_image instance_type = var.nodes_blue_instance_type name_prefix = "${var.environment_name}-nodes-blue" security_groups = [aws_security_group.node.id] @@ -145,12 +124,22 @@ resource "aws_autoscaling_group" "nodes_blue" { value = "owned" propagate_at_launch = true }, + { + key = "k8s.io/cluster-autoscaler/${var.environment_name}" + value = "owned" + propagate_at_launch = true + }, + { + key = "k8s.io/cluster-autoscaler/enabled" + value = "TRUE" + propagate_at_launch = true + }, ] } resource "aws_launch_configuration" "nodes_green" { iam_instance_profile = aws_iam_instance_profile.node.name - image_id = data.aws_ssm_parameter.eks_ami.value + image_id = var.ami_image == "" ? data.aws_ssm_parameter.eks_ami.value : var.ami_image instance_type = var.nodes_green_instance_type name_prefix = "${var.environment_name}-nodes-green" security_groups = [aws_security_group.node.id] @@ -192,6 +181,16 @@ resource "aws_autoscaling_group" "nodes_green" { value = "owned" propagate_at_launch = true }, + { + key = "k8s.io/cluster-autoscaler/${var.environment_name}" + value = "owned" + propagate_at_launch = true + }, + { + key = "k8s.io/cluster-autoscaler/enabled" + value = "TRUE" + propagate_at_launch = true + }, ] } diff --git a/eks/provider.tf b/eks/provider.tf new file mode 100644 index 00000000..20500871 --- /dev/null +++ b/eks/provider.tf @@ -0,0 +1 @@ +provider tls {} diff --git a/eks/sql.tf b/eks/sql.tf index e3f117d2..a8a5f76b 100644 --- a/eks/sql.tf +++ b/eks/sql.tf @@ -10,6 +10,7 @@ resource "aws_rds_cluster" "default" { engine = var.sql_engine engine_mode = var.sql_engine_mode + engine_version = var.sql_engine_version database_name = var.sql_database_name master_username = var.sql_master_username diff --git a/eks/vpc_flow_log.tf b/eks/vpc_flow_log.tf index a83ee027..7bd78e77 100644 --- a/eks/vpc_flow_log.tf +++ b/eks/vpc_flow_log.tf @@ -1,4 +1,6 @@ resource "aws_flow_log" "vpc" { + count = var.vpc_flow_logs_enabled ? 1 : 0 + iam_role_arn = aws_iam_role.vpc.arn log_destination = aws_cloudwatch_log_group.vpc.arn traffic_type = "ALL" diff --git a/eks/vpn.tf b/eks/vpn.tf index 30d3510d..56d7b1fe 100644 --- a/eks/vpn.tf +++ b/eks/vpn.tf @@ -14,14 +14,16 @@ resource "aws_security_group" "vpn" { to_port = 0 protocol = "-1" cidr_blocks = [ - "0.0.0.0/0"] + "0.0.0.0/0" + ] } ingress { from_port = 500 protocol = "udp" to_port = 500 cidr_blocks = [ - "0.0.0.0/0"] + "0.0.0.0/0" + ] } ingress { from_port = 22 @@ -34,21 +36,24 @@ resource "aws_security_group" "vpn" { protocol = "50" to_port = 0 cidr_blocks = [ - "0.0.0.0/0"] + "0.0.0.0/0" + ] } ingress { from_port = 4500 protocol = "udp" to_port = 4500 cidr_blocks = [ - "0.0.0.0/0"] + "0.0.0.0/0" + ] } ingress { from_port = 1701 protocol = "udp" to_port = 1701 cidr_blocks = [ - "0.0.0.0/0"] + "0.0.0.0/0" + ] } tags = {