Skip to content

Commit

Permalink
Merge pull request #19 from comet-ml/druid-mng-subnets
Browse files Browse the repository at this point in the history
Expand subnets, add new MPM compute and new MPM buckets/perms
  • Loading branch information
burmek authored Apr 17, 2024
2 parents 0353ef2 + d59140c commit 9720b23
Show file tree
Hide file tree
Showing 9 changed files with 237 additions and 33 deletions.
13 changes: 12 additions & 1 deletion main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,15 @@ module "comet_eks" {

s3_enabled = var.enable_s3
comet_ec2_s3_iam_policy = var.enable_s3 ? module.comet_s3[0].comet_s3_iam_policy_arn : null

enable_mpm_infra = var.enable_mpm_infra

eks_druid_instance_type = var.eks_druid_instance_type
eks_druid_node_count = var.eks_druid_node_count
eks_zookeeper_instance_type = var.eks_zookeeper_instance_type
eks_zookeeper_node_count = var.eks_zookeeper_node_count
eks_airflow_instance_type = var.eks_airflow_instance_type
eks_airflow_node_count = var.eks_airflow_node_count
}

module "comet_elasticache" {
Expand Down Expand Up @@ -124,6 +133,8 @@ module "comet_s3" {
count = var.enable_s3 ? 1 : 0
environment = var.environment

comet_s3_bucket = var.s3_bucket_name
comet_s3_bucket = var.s3_bucket_name
s3_force_destroy = var.s3_force_destroy

enable_mpm_infra = var.enable_mpm_infra
}
115 changes: 95 additions & 20 deletions modules/comet_eks/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@ locals {
Terraform = "true"
Environment = var.environment
}
volume_type = "gp3"
volume_encrypted = false
volume_delete_on_termination = true
}

data "aws_iam_policy" "ebs_csi_policy" {
Expand All @@ -22,32 +25,104 @@ module "eks" {

eks_managed_node_group_defaults = { ami_type = var.eks_mng_ami_type }

eks_managed_node_groups = {
one = {
name = var.eks_mng_name
instance_types = var.eks_node_types
min_size = var.eks_mng_desired_size
max_size = var.eks_mng_max_size
desired_size = var.eks_mng_desired_size
block_device_mappings = {
xvda = {
device_name = "/dev/xvda"
ebs = {
volume_size = var.eks_mng_disk_size
volume_type = "gp3"
encrypted = false
delete_on_termination = true
eks_managed_node_groups = merge(
{
comet = {
name = var.eks_mng_name
instance_types = var.eks_node_types
min_size = var.eks_mng_desired_size
max_size = var.eks_mng_max_size
desired_size = var.eks_mng_desired_size
block_device_mappings = {
xvda = {
device_name = "/dev/xvda"
ebs = {
volume_size = var.eks_mng_disk_size
volume_type = local.volume_type
encrypted = local.volume_encrypted
delete_on_termination = local.volume_delete_on_termination
}
}
}
labels = {
nodegroup_name = "comet"
}
iam_role_additional_policies = var.s3_enabled ? { comet_s3_access = var.comet_ec2_s3_iam_policy } : {}
}

iam_role_additional_policies = var.s3_enabled ? { comet_s3_access = var.comet_ec2_s3_iam_policy } : {}
}
}

},
var.enable_mpm_infra ? {
druid = {
name = "druid"
instance_types = [var.eks_druid_instance_type]
min_size = var.eks_druid_node_count
max_size = var.eks_druid_node_count
desired_size = var.eks_druid_node_count
block_device_mappings = {
xvda = {
device_name = "/dev/xvda"
ebs = {
volume_size = var.eks_mng_disk_size
volume_type = local.volume_type
encrypted = local.volume_encrypted
delete_on_termination = local.volume_delete_on_termination
}
}
}
labels = {
nodegroup_name = "druid"
}
iam_role_additional_policies = var.s3_enabled ? { comet_s3_access = var.comet_ec2_s3_iam_policy } : {}
},
zookeeper = {
name = "zookeeper"
instance_types = [var.eks_zookeeper_instance_type]
min_size = var.eks_zookeeper_node_count
max_size = var.eks_zookeeper_node_count
desired_size = var.eks_zookeeper_node_count
block_device_mappings = {
xvda = {
device_name = "/dev/xvda"
ebs = {
volume_size = var.eks_mng_disk_size
volume_type = local.volume_type
encrypted = local.volume_encrypted
delete_on_termination = local.volume_delete_on_termination
}
}
}
labels = {
nodegroup_name = "zookeeper"
}
iam_role_additional_policies = var.s3_enabled ? { comet_s3_access = var.comet_ec2_s3_iam_policy } : {}
},
airflow = {
name = "airflow"
instance_types = [var.eks_airflow_instance_type]
min_size = var.eks_airflow_node_count
max_size = var.eks_airflow_node_count
desired_size = var.eks_airflow_node_count
block_device_mappings = {
xvda = {
device_name = "/dev/xvda"
ebs = {
volume_size = var.eks_mng_disk_size
volume_type = local.volume_type
encrypted = local.volume_encrypted
delete_on_termination = local.volume_delete_on_termination
}
}
}
labels = {
nodegroup_name = "airflow"
}
iam_role_additional_policies = var.s3_enabled ? { comet_s3_access = var.comet_ec2_s3_iam_policy } : {}
}
} : {}
)
tags = local.tags
}


module "irsa-ebs-csi" {
source = "terraform-aws-modules/iam/aws//modules/iam-assumable-role-with-oidc"
version = "4.7.0"
Expand Down
35 changes: 35 additions & 0 deletions modules/comet_eks/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -88,4 +88,39 @@ variable "comet_ec2_s3_iam_policy" {
description = "Policy with access to S3 to associate with EKS worker nodes"
type = string
default = null
}

variable "enable_mpm_infra" {
description = "Sets MNGs to be created for MPM compute"
type = bool
}

variable "eks_druid_instance_type" {
description = "Instance type for EKS Druid nodes"
type = string
}

variable "eks_zookeeper_instance_type" {
description = "Instance type for EKS Zookeeper nodes"
type = string
}

variable "eks_airflow_instance_type" {
description = "Instance type for EKS Airflow nodes"
type = string
}

variable "eks_druid_node_count" {
description = "Instance count for EKS Druid nodes"
type = number
}

variable "eks_zookeeper_node_count" {
description = "Instance count for EKS Zookeeper nodes"
type = number
}

variable "eks_airflow_node_count" {
description = "Instance count for EKS Airflow nodes"
type = number
}
54 changes: 44 additions & 10 deletions modules/comet_s3/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ locals {
Terraform = "true"
Environment = var.environment
}
suffix = substr(sha1("${var.environment}"), 0, 8)
}

resource "aws_s3_bucket" "comet_s3_bucket" {
Expand All @@ -15,19 +16,52 @@ resource "aws_s3_bucket" "comet_s3_bucket" {
})
}

resource "aws_s3_bucket" "comet_druid_bucket" {
count = var.enable_mpm_infra ? 1 : 0

bucket = "comet-druid-${local.suffix}"

force_destroy = var.s3_force_destroy

tags = merge(local.tags, {
Name = "comet-druid-${local.suffix}"
})
}

resource "aws_s3_bucket" "comet_airflow_bucket" {
count = var.enable_mpm_infra ? 1 : 0

bucket = "comet-airflow-${local.suffix}"

force_destroy = var.s3_force_destroy

tags = merge(local.tags, {
Name = "comet-airflow-${local.suffix}"
})
}

resource "aws_iam_policy" "comet_s3_iam_policy" {
name = "comet-s3-access-policy"
description = "comet-s3-access-policy"
name = "comet-s3-access-policy-${local.suffix}"
description = "Policy for access to comet S3 buckets"

policy = jsonencode({
"Version" : "2012-10-17",
"Statement" : [
Version = "2012-10-17",
Statement = [
{
"Effect" : "Allow",
"Action" : "s3:*",
"Resource" : [
"arn:aws:s3:::${var.comet_s3_bucket}",
"arn:aws:s3:::${var.comet_s3_bucket}/*"
]
Effect = "Allow",
Action = "s3:*",
Resource = concat(
[
aws_s3_bucket.comet_s3_bucket.arn,
"${aws_s3_bucket.comet_s3_bucket.arn}/*"
],
var.enable_mpm_infra ? [
aws_s3_bucket.comet_druid_bucket[0].arn,
"${aws_s3_bucket.comet_druid_bucket[0].arn}/*",
aws_s3_bucket.comet_airflow_bucket[0].arn,
"${aws_s3_bucket.comet_airflow_bucket[0].arn}/*"
] : []
)
}
]
})
Expand Down
2 changes: 1 addition & 1 deletion modules/comet_s3/outputs.tf
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
output "comet_s3_iam_policy_arn" {
description = "ARN of the IAM policy granting access to the provisioned bucket"
description = "ARN of the IAM policy granting access to the provisioned bucket(s)"
value = aws_iam_policy.comet_s3_iam_policy.arn
}
5 changes: 5 additions & 0 deletions modules/comet_s3/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,9 @@ variable "comet_s3_bucket" {
variable "s3_force_destroy" {
description = "Option to enable force delete of S3 bucket"
type = bool
}

variable "enable_mpm_infra" {
description = "Sets buckets to be created for MPM Druid/Airflow"
type = bool
}
2 changes: 1 addition & 1 deletion modules/comet_vpc/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ module "vpc" {

azs = local.azs
public_subnets = [for k, v in local.azs : cidrsubnet(local.vpc_cidr, 8, k)]
private_subnets = [for k, v in local.azs : cidrsubnet(local.vpc_cidr, 8, k + 10)]
private_subnets = [for k, v in local.azs : cidrsubnet(local.vpc_cidr, 5, 3 * k + 1)]

enable_nat_gateway = true
enable_dns_hostnames = true
Expand Down
3 changes: 3 additions & 0 deletions terraform.tfvars
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@ enable_rds = false
# Create S3 resources for storing Comet objects
enable_s3 = false

# Create EKS nodegroups for MPM compute
enable_mpm_infra = false

################
#### Global ####
################
Expand Down
41 changes: 41 additions & 0 deletions variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,11 @@ variable "enable_s3" {
type = bool
}

variable "enable_mpm_infra" {
description = "Sets MNGs to be created for MPM compute"
type = bool
}

################
#### Global ####
################
Expand Down Expand Up @@ -213,6 +218,42 @@ variable "eks_external_dns_r53_zones" {
]
}

variable "eks_druid_instance_type" {
description = "Instance type for EKS Druid nodes"
type = string
default = "m6i.4xlarge"
}

variable "eks_zookeeper_instance_type" {
description = "Instance type for EKS Zookeeper nodes"
type = string
default = "m6i.4xlarge"
}

variable "eks_airflow_instance_type" {
description = "Instance type for EKS Airflow nodes"
type = string
default = "m6i.4xlarge"
}

variable "eks_druid_node_count" {
description = "Instance count for EKS Druid nodes"
type = number
default = 6
}

variable "eks_zookeeper_node_count" {
description = "Instance count for EKS Zookeeper nodes"
type = number
default = 3
}

variable "eks_airflow_node_count" {
description = "Instance count for EKS Airflow nodes"
type = number
default = 3
}

#### comet_elasticache ####
variable "elasticache_allow_from_sg" {
description = "Security group from which to allow connections to ElastiCache, to use when provisioning with existing compute"
Expand Down

0 comments on commit 9720b23

Please sign in to comment.