From c296077207a14f6e4c89640a4dad73a2628e2d95 Mon Sep 17 00:00:00 2001 From: Shubham Hibare Date: Thu, 27 Jun 2024 11:44:32 +0530 Subject: [PATCH] Ingestion IAC --- infrastructure/ingestion/aws/README.md | 86 ++++++++ infrastructure/ingestion/aws/cloudwatch.tf | 17 ++ .../ingestion_sfn_definition.json | 95 +++++++++ infrastructure/ingestion/aws/iam.tf | 160 ++++++++++++++ infrastructure/ingestion/aws/lambda.tf | 100 +++++++++ .../ingestion/aws/lambda/ingestion/Dockerfile | 12 ++ .../ingestion/aws/lambda/ingestion/README.md | 37 ++++ .../aws/lambda/ingestion/ingestion.py | 68 ++++++ .../aws/lambda/ingestion/modules/__init__.py | 0 .../ingestion/modules/common/__init__.py | 0 .../aws/lambda/ingestion/modules/common/s3.py | 123 +++++++++++ .../lambda/ingestion/modules/common/utils.py | 1 + .../ingestion/modules/findings_ingestion.py | 197 ++++++++++++++++++ .../ingestion/aws/lambda/ingestion/package.sh | 6 + .../aws/lambda/ingestion/pyproject.toml | 21 ++ .../ingestion/aws/lambda/migration/Dockerfile | 12 ++ .../ingestion/aws/lambda/migration/README.md | 16 ++ .../ingestion/aws/lambda/migration/package.sh | 8 + infrastructure/ingestion/aws/locals.tf | 11 + infrastructure/ingestion/aws/outputs.tf | 3 + infrastructure/ingestion/aws/providers.tf | 23 ++ infrastructure/ingestion/aws/rds.tf | 18 ++ infrastructure/ingestion/aws/s3.tfbackend | 5 + infrastructure/ingestion/aws/secrets.tf | 19 ++ .../ingestion/aws/securitygroups.tf | 33 +++ infrastructure/ingestion/aws/sfn.tf | 13 ++ infrastructure/ingestion/aws/sts.tf | 1 + .../ingestion/aws/terraform.tfvars.example | 17 ++ infrastructure/ingestion/aws/variables.tf | 115 ++++++++++ infrastructure/ingestion/aws/vpc.tf | 22 ++ 30 files changed, 1239 insertions(+) create mode 100644 infrastructure/ingestion/aws/README.md create mode 100644 infrastructure/ingestion/aws/cloudwatch.tf create mode 100644 infrastructure/ingestion/aws/configuration/ingestion_sfn_definition.json create mode 100644 infrastructure/ingestion/aws/iam.tf create mode 100644 infrastructure/ingestion/aws/lambda.tf create mode 100644 infrastructure/ingestion/aws/lambda/ingestion/Dockerfile create mode 100644 infrastructure/ingestion/aws/lambda/ingestion/README.md create mode 100644 infrastructure/ingestion/aws/lambda/ingestion/ingestion.py create mode 100644 infrastructure/ingestion/aws/lambda/ingestion/modules/__init__.py create mode 100644 infrastructure/ingestion/aws/lambda/ingestion/modules/common/__init__.py create mode 100644 infrastructure/ingestion/aws/lambda/ingestion/modules/common/s3.py create mode 100644 infrastructure/ingestion/aws/lambda/ingestion/modules/common/utils.py create mode 100644 infrastructure/ingestion/aws/lambda/ingestion/modules/findings_ingestion.py create mode 100755 infrastructure/ingestion/aws/lambda/ingestion/package.sh create mode 100644 infrastructure/ingestion/aws/lambda/ingestion/pyproject.toml create mode 100644 infrastructure/ingestion/aws/lambda/migration/Dockerfile create mode 100644 infrastructure/ingestion/aws/lambda/migration/README.md create mode 100755 infrastructure/ingestion/aws/lambda/migration/package.sh create mode 100644 infrastructure/ingestion/aws/locals.tf create mode 100644 infrastructure/ingestion/aws/outputs.tf create mode 100644 infrastructure/ingestion/aws/providers.tf create mode 100644 infrastructure/ingestion/aws/rds.tf create mode 100644 infrastructure/ingestion/aws/s3.tfbackend create mode 100644 infrastructure/ingestion/aws/secrets.tf create mode 100644 infrastructure/ingestion/aws/securitygroups.tf create mode 100644 infrastructure/ingestion/aws/sfn.tf create mode 100644 infrastructure/ingestion/aws/sts.tf create mode 100644 infrastructure/ingestion/aws/terraform.tfvars.example create mode 100644 infrastructure/ingestion/aws/variables.tf create mode 100644 infrastructure/ingestion/aws/vpc.tf diff --git a/infrastructure/ingestion/aws/README.md b/infrastructure/ingestion/aws/README.md new file mode 100644 index 0000000..14f611a --- /dev/null +++ b/infrastructure/ingestion/aws/README.md @@ -0,0 +1,86 @@ +# infrastructure + + +## Requirements + +| Name | Version | +|------|---------| +| [terraform](#requirement\_terraform) | >=1.3 | +| [aws](#requirement\_aws) | ~> 5.0 | + +## Providers + +| Name | Version | +|------|---------| +| [aws](#provider\_aws) | ~> 5.0 | +| [local](#provider\_local) | n/a | +| [null](#provider\_null) | n/a | +| [random](#provider\_random) | n/a | + +## Modules + +No modules. + +## Resources + +| Name | Type | +|------|------| +| [aws_cloudwatch_event_rule.ingestion_sfn_trigger_rule](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/cloudwatch_event_rule) | resource | +| [aws_cloudwatch_event_target.ingestion_sfn_trigger](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/cloudwatch_event_target) | resource | +| [aws_db_instance.rds_postgres](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/db_instance) | resource | +| [aws_iam_policy.policy_for_execution_role](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_policy) | resource | +| [aws_iam_role.cloudwatch_role](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role) | resource | +| [aws_iam_role.lambda_execution_role](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role) | resource | +| [aws_iam_role.sfn_role](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role) | resource | +| [aws_iam_role_policy.cloudwatch_policy](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role_policy) | resource | +| [aws_iam_role_policy.sfn_policy](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role_policy) | resource | +| [aws_iam_role_policy_attachment.LambdaExecutionRolePolicyAttachment](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role_policy_attachment) | resource | +| [aws_lambda_function.ingestion-lambda](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/lambda_function) | resource | +| [aws_lambda_function.migration-lambda](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/lambda_function) | resource | +| [aws_secretsmanager_secret.rds_master_password](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/secretsmanager_secret) | resource | +| [aws_secretsmanager_secret_version.rds_master_password](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/secretsmanager_secret_version) | resource | +| [aws_security_group.lambda_sg](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/security_group) | resource | +| [aws_security_group.rds_sg](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/security_group) | resource | +| [aws_sfn_state_machine.ingestion-step-function](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/sfn_state_machine) | resource | +| [null_resource.ingestion_lambda_build](https://registry.terraform.io/providers/hashicorp/null/latest/docs/resources/resource) | resource | +| [null_resource.migration_lambda_build](https://registry.terraform.io/providers/hashicorp/null/latest/docs/resources/resource) | resource | +| [random_password.rds_master_password](https://registry.terraform.io/providers/hashicorp/random/latest/docs/resources/password) | resource | +| [aws_caller_identity.current](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/caller_identity) | data source | +| [aws_iam_policy_document.cloudwatch_assume_role](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/iam_policy_document) | data source | +| [aws_iam_policy_document.cloudwatch_policy_document](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/iam_policy_document) | data source | +| [aws_iam_policy_document.lambda_assume_role](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/iam_policy_document) | data source | +| [aws_iam_policy_document.permissions_for_execution_role](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/iam_policy_document) | data source | +| [aws_iam_policy_document.sf_assume_role](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/iam_policy_document) | data source | +| [aws_iam_policy_document.sfn_policy_document](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/iam_policy_document) | data source | +| [aws_security_group.default](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/security_group) | data source | +| [aws_subnet.selected](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/subnet) | data source | +| [aws_subnets.default](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/subnets) | data source | +| [aws_vpc.selected](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/vpc) | data source | +| [local_file.ingestion_lambda_build](https://registry.terraform.io/providers/hashicorp/local/latest/docs/data-sources/file) | data source | +| [local_file.migration_lambda_build](https://registry.terraform.io/providers/hashicorp/local/latest/docs/data-sources/file) | data source | + +## Inputs + +| Name | Description | Type | Default | Required | +|------|-------------|------|---------|:--------:| +| [aws\_profile](#input\_aws\_profile) | AWS profile to use for authentication | `string` | n/a | yes | +| [aws\_region](#input\_aws\_region) | AWS region where to deploy resources | `string` | n/a | yes | +| [db\_subnet\_group\_name](#input\_db\_subnet\_group\_name) | Name of the RDS subnet group | `string` | n/a | yes | +| [disable\_ingestion\_schedule](#input\_disable\_ingestion\_schedule) | Disable the ingestion schedule | `bool` | `false` | no | +| [environment\_type](#input\_environment\_type) | Environment type | `string` | n/a | yes | +| [ingestion\_schedule](#input\_ingestion\_schedule) | Cron schedule for the CloudWatch Event Rule | `string` | `"rate(24 hours)"` | no | +| [permissions\_boundary\_arn](#input\_permissions\_boundary\_arn) | ARN of the permissions boundary to use for the IAM role | `string` | n/a | yes | +| [project\_name](#input\_project\_name) | Name of the project | `string` | `"secrets-finder"` | no | +| [rds\_db\_name](#input\_rds\_db\_name) | Name of the database to create in the RDS instance | `string` | `"secrets_finder"` | no | +| [rds\_username](#input\_rds\_username) | Username for the RDS instance | `string` | `"secrets_finder"` | no | +| [s3\_bucket\_name](#input\_s3\_bucket\_name) | Name of the S3 bucket to create | `string` | n/a | yes | +| [subnet\_name](#input\_subnet\_name) | Name of the subnet where to deploy the resources (wildcards are allowed: first match is used) | `string` | n/a | yes | +| [tags](#input\_tags) | A map of tags to add to the resources | `map(string)` | n/a | yes | +| [vpc\_name](#input\_vpc\_name) | Identifier of the VPC to use for secrets-finder | `string` | n/a | yes | + +## Outputs + +| Name | Description | +|------|-------------| +| [rds\_pg\_endpoint](#output\_rds\_pg\_endpoint) | n/a | + diff --git a/infrastructure/ingestion/aws/cloudwatch.tf b/infrastructure/ingestion/aws/cloudwatch.tf new file mode 100644 index 0000000..90571d3 --- /dev/null +++ b/infrastructure/ingestion/aws/cloudwatch.tf @@ -0,0 +1,17 @@ +resource "aws_cloudwatch_event_rule" "ingestion_sfn_trigger_rule" { + name = "${var.project_name}-ingestion-sfn-trigger" + description = "Triggers the Step function on schedule" + schedule_expression = var.ingestion_schedule + state = var.disable_ingestion_schedule ? "DISABLED" : "ENABLED" +} + +resource "aws_cloudwatch_event_target" "ingestion_sfn_trigger" { + rule = aws_cloudwatch_event_rule.ingestion_sfn_trigger_rule.name + arn = aws_sfn_state_machine.ingestion-step-function.arn + role_arn = aws_iam_role.cloudwatch_role.arn + + depends_on = [ + aws_iam_role.cloudwatch_role, + aws_iam_role_policy.cloudwatch_policy, + ] +} diff --git a/infrastructure/ingestion/aws/configuration/ingestion_sfn_definition.json b/infrastructure/ingestion/aws/configuration/ingestion_sfn_definition.json new file mode 100644 index 0000000..db2f5fd --- /dev/null +++ b/infrastructure/ingestion/aws/configuration/ingestion_sfn_definition.json @@ -0,0 +1,95 @@ +{ + "Comment": "Ingestion State Machine", + "StartAt": "BootStrapState", + "States": { + "BootStrapState": { + "Type": "Task", + "Resource": "${migrate_lambda_arn}", + "Next": "IngestionState" + }, + "IngestionState": { + "Type": "Parallel", + "Branches": [ + { + "Comment": "Ingest Scheduled Scan Findings", + "StartAt": "ListScheduledScanFindingsFiles", + "States": { + "ListScheduledScanFindingsFiles": { + "Type": "Task", + "Resource": "${ingestion_lambda_arn}", + "ResultPath": "$.lambdaResult", + "Parameters": { + "action": "list_files", + "prefix": "secrets-finder/scheduled-scans/results/" + }, + "Next": "IngestScheduledScanFindingsFiles" + }, + "IngestScheduledScanFindingsFiles": { + "Type": "Map", + "ItemsPath": "$.lambdaResult.body.files", + "Parameters": { + "index.$": "$$.Map.Item.Index", + "key.$": "$$.Map.Item.Value" + }, + "Iterator": { + "StartAt": "IngestScheduledScanFindings", + "States": { + "IngestScheduledScanFindings": { + "Type": "Task", + "Resource": "${ingestion_lambda_arn}", + "Parameters": { + "action": "ingest_findings", + "file_key.$": "$.key" + }, + "End": true + } + } + }, + "End": true + } + } + }, + { + "Comment": "Ingest Ongoing Scan Findings", + "StartAt": "ListOngoingScanFindingsFiles", + "States": { + "ListOngoingScanFindingsFiles": { + "Type": "Task", + "Resource": "${ingestion_lambda_arn}", + "ResultPath": "$.lambdaResult", + "Parameters": { + "action": "list_files", + "prefix": "secrets-finder/ongoing-scans/results/" + }, + "Next": "IngestOngoingScanFindingsFiles" + }, + "IngestOngoingScanFindingsFiles": { + "Type": "Map", + "ItemsPath": "$.lambdaResult.body.files", + "Parameters": { + "index.$": "$$.Map.Item.Index", + "key.$": "$$.Map.Item.Value" + }, + "Iterator": { + "StartAt": "IngestOngoingScanFindings", + "States": { + "IngestOngoingScanFindings": { + "Type": "Task", + "Resource": "${ingestion_lambda_arn}", + "Parameters": { + "action": "ingest_findings", + "file_key.$": "$.key" + }, + "End": true + } + } + }, + "End": true + } + } + } + ], + "End": true + } + } +} diff --git a/infrastructure/ingestion/aws/iam.tf b/infrastructure/ingestion/aws/iam.tf new file mode 100644 index 0000000..15dbf9c --- /dev/null +++ b/infrastructure/ingestion/aws/iam.tf @@ -0,0 +1,160 @@ +# Lambda execution role +data "aws_iam_policy_document" "lambda_assume_role" { + statement { + effect = "Allow" + principals { + identifiers = ["lambda.amazonaws.com"] + type = "Service" + } + actions = ["sts:AssumeRole"] + } +} + +resource "aws_iam_role" "lambda_execution_role" { + name = "${var.project_name}-ingestion-lambda-execution-role" + assume_role_policy = data.aws_iam_policy_document.lambda_assume_role.json + path = "/" + permissions_boundary = var.permissions_boundary_arn +} + +data "aws_iam_policy_document" "permissions_for_execution_role" { + statement { + sid = "WriteToCloudWatchLogGroup" + effect = "Allow" + actions = [ + "logs:CreateLogStream", + "logs:PutLogEvents", + ] + resources = ["arn:aws:logs:*:*:*"] + } + + statement { + sid = "AllowAccessToBucket" + effect = "Allow" + actions = [ + "s3:ListBucket", + "s3:GetObject", + "s3:DeleteObject" + ] + resources = [ + "${local.s3_bucket_arn}", + "${local.s3_bucket_arn}/*" + ] + } + + statement { + sid = "AllowAccessToRDS" + effect = "Allow" + actions = [ + "rds-data:ExecuteStatement", + "rds-data:BatchExecuteStatement", + "rds-data:BeginTransaction", + "rds-data:CommitTransaction", + "rds-data:RollbackTransaction" + ] + resources = [ + aws_db_instance.rds_postgres.arn + ] + } + + statement { + sid = "AllowEC2Perms" + effect = "Allow" + actions = [ + "ec2:DescribeNetworkInterfaces", + "ec2:CreateNetworkInterface", + "ec2:DeleteNetworkInterface", + "ec2:DescribeInstances", + "ec2:AttachNetworkInterface" + ] + resources = ["*"] + } +} + +resource "aws_iam_policy" "policy_for_execution_role" { + name = "${var.project_name}-ingestion-lambda-execution-role-permissions" + description = "Policy granting necessary permissions to Lambda execution instance" + policy = data.aws_iam_policy_document.permissions_for_execution_role.json +} + +resource "aws_iam_role_policy_attachment" "LambdaExecutionRolePolicyAttachment" { + policy_arn = aws_iam_policy.policy_for_execution_role.arn + role = aws_iam_role.lambda_execution_role.name +} + +# Step function role + +data "aws_iam_policy_document" "sf_assume_role" { + statement { + effect = "Allow" + principals { + identifiers = ["states.amazonaws.com"] + type = "Service" + } + actions = ["sts:AssumeRole"] + } +} + +resource "aws_iam_role" "sfn_role" { + name = "${var.project_name}-ingestion-sf-execution-role" + path = "/" + permissions_boundary = var.permissions_boundary_arn + assume_role_policy = data.aws_iam_policy_document.sf_assume_role.json +} + +data "aws_iam_policy_document" "sfn_policy_document" { + statement { + effect = "Allow" + actions = [ + "lambda:InvokeFunction" + ] + resources = [ + aws_lambda_function.ingestion-lambda.arn, + aws_lambda_function.migration-lambda.arn + ] + } +} + +resource "aws_iam_role_policy" "sfn_policy" { + name = "${var.project_name}-ingestion-sf-execution-policy" + role = aws_iam_role.sfn_role.id + policy = data.aws_iam_policy_document.sfn_policy_document.json +} + +# Cloudwatch role + +data "aws_iam_policy_document" "cloudwatch_assume_role" { + statement { + effect = "Allow" + principals { + identifiers = ["events.amazonaws.com"] + type = "Service" + } + actions = ["sts:AssumeRole"] + } +} + +resource "aws_iam_role" "cloudwatch_role" { + name = "${var.project_name}-ingestion-cloud-watch-role" + path = "/" + permissions_boundary = var.permissions_boundary_arn + assume_role_policy = data.aws_iam_policy_document.cloudwatch_assume_role.json +} + +data "aws_iam_policy_document" "cloudwatch_policy_document" { + statement { + effect = "Allow" + actions = [ + "states:StartExecution" + ] + resources = [ + aws_sfn_state_machine.ingestion-step-function.arn + ] + } +} + +resource "aws_iam_role_policy" "cloudwatch_policy" { + name = "${var.project_name}-cloudwatch-event-policy" + role = aws_iam_role.cloudwatch_role.id + policy = data.aws_iam_policy_document.cloudwatch_policy_document.json +} diff --git a/infrastructure/ingestion/aws/lambda.tf b/infrastructure/ingestion/aws/lambda.tf new file mode 100644 index 0000000..c7d7b03 --- /dev/null +++ b/infrastructure/ingestion/aws/lambda.tf @@ -0,0 +1,100 @@ +resource "null_resource" "ingestion_lambda_build" { + provisioner "local-exec" { + command = "./package.sh" + working_dir = "${local.ingestion_lambda_dir}/" + } + + triggers = { + always_run = timestamp() + } +} + +data "local_file" "ingestion_lambda_build" { + filename = local.ingestion_lambda_archive + depends_on = [null_resource.ingestion_lambda_build] +} + +resource "aws_lambda_function" "ingestion-lambda" { + function_name = "${var.project_name}-ingestion-lambda" + role = aws_iam_role.lambda_execution_role.arn + architectures = ["arm64"] + runtime = "python3.9" + handler = "ingestion.handler" + timeout = 900 # 15 minutes + memory_size = 512 # 512 MB + filename = local.ingestion_lambda_archive + source_code_hash = data.local_file.ingestion_lambda_build.content_sha256 + + vpc_config { + subnet_ids = [data.aws_subnet.selected.id] + security_group_ids = [aws_security_group.lambda_sg.id] + } + + ephemeral_storage { + size = 1024 # 1 GB + } + + environment { + variables = { + BUCKET_NAME = var.s3_bucket_name + DB_URL = local.db_url + } + } + + depends_on = [ + data.local_file.ingestion_lambda_build, + aws_iam_role.lambda_execution_role, + aws_iam_policy.policy_for_execution_role, + aws_iam_role_policy_attachment.LambdaExecutionRolePolicyAttachment + ] +} + +resource "null_resource" "migration_lambda_build" { + provisioner "local-exec" { + command = "./package.sh" + working_dir = "${local.migration_lambda_dir}/" + } + + triggers = { + always_run = timestamp() + } +} + +data "local_file" "migration_lambda_build" { + filename = local.migration_lambda_archive + depends_on = [null_resource.migration_lambda_build] +} + +resource "aws_lambda_function" "migration-lambda" { + function_name = "${var.project_name}-migration-lambda" + role = aws_iam_role.lambda_execution_role.arn + architectures = ["arm64"] + runtime = "python3.9" + handler = "migrate.migrate" + timeout = 60 # 1 minute + memory_size = 512 # 512 MB + filename = local.migration_lambda_archive + source_code_hash = data.local_file.migration_lambda_build.content_sha256 + + vpc_config { + subnet_ids = [data.aws_subnet.selected.id] + security_group_ids = [aws_security_group.lambda_sg.id] + } + + ephemeral_storage { + size = 512 # 512 MB + } + + environment { + variables = { + DB_URL = local.db_url + } + } + + depends_on = [ + data.local_file.migration_lambda_build, + aws_iam_role.lambda_execution_role, + aws_iam_policy.policy_for_execution_role, + aws_iam_role_policy_attachment.LambdaExecutionRolePolicyAttachment + ] +} diff --git a/infrastructure/ingestion/aws/lambda/ingestion/Dockerfile b/infrastructure/ingestion/aws/lambda/ingestion/Dockerfile new file mode 100644 index 0000000..b81d74f --- /dev/null +++ b/infrastructure/ingestion/aws/lambda/ingestion/Dockerfile @@ -0,0 +1,12 @@ +FROM python@sha256:320a7a4250aba4249f458872adecf92eea88dc6abd2d76dc5c0f01cac9b53990 + +RUN pip install poetry==1.8.3 --no-cache-dir + +WORKDIR /app + +COPY . /app/ + +RUN poetry self add poetry-plugin-lambda-build \ + && poetry self add poetry-plugin-export \ + && poetry lock --no-update \ + && poetry build-lambda diff --git a/infrastructure/ingestion/aws/lambda/ingestion/README.md b/infrastructure/ingestion/aws/lambda/ingestion/README.md new file mode 100644 index 0000000..2a8579c --- /dev/null +++ b/infrastructure/ingestion/aws/lambda/ingestion/README.md @@ -0,0 +1,37 @@ +# Ingestion + +This directory contains data ingestion lambda. The Lambda is invoked by a Step Function. + +The packaging process uses the Poetry Lambda plugin and Docker to generate Lambda packages for the correct platform. This is automated when applying Terraform. + +Lambda takes a set of actions as input. Each action performs a specific function. + +## Lambda Actions + +- `list_files` : This action list files in a S3 bucket at a give prefix + Example: + ```json + { + "action": "list_files", + "prefix": "secrets-finder/scheduled-scans/results/" + } + ``` +- `ingest_findings` : This action read a given `.json` file and create new records in `findings`, `scans` and `jobs` table. Corresponding file is deleted from S3 on successful ingestion + Example: + ```json + { + "action": "ingest_findings", + "file_key": "secrets-finder/scheduled-scans/results/7eb4d1ab-ac6a-4b84-a18d-4bd944d4ef2a.json" + } + ``` + +## Add New Ingestion + +Creating a new ingestion is a 4 step process. + +1. Create necessary DB migration version under `migrations` directory. Refer [Create New Revisions](../../../../../migrations/README.md#creating-new-revision) +2. Create a new ingestion script under `modules` directory. +3. Register new ingestion with an action in `ingestion.py` under `ingestion_callback_mapping` +4. Add a new branch in [step function definition](../../configuration/ingestion_sfn_definition.json). + +Use `terraform apply` to build and deploy the Lambda. Once deployed, the next Step Function invocation will automatically trigger the new ingestion. diff --git a/infrastructure/ingestion/aws/lambda/ingestion/ingestion.py b/infrastructure/ingestion/aws/lambda/ingestion/ingestion.py new file mode 100644 index 0000000..442b9c7 --- /dev/null +++ b/infrastructure/ingestion/aws/lambda/ingestion/ingestion.py @@ -0,0 +1,68 @@ +import os +from typing import List, Dict, Any, Callable, Union +import logging +from modules.common.s3 import S3 +from modules.findings_ingestion import ingest_findings + +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s - %(levelname)s - %(message)s", + handlers=[logging.StreamHandler()], +) + +bucket_name: str = os.environ.get("BUCKET_NAME") +db_url: str = os.environ.get("DB_URL") + +ingestion_callback_mapping: Dict[str, Callable[[str, str, str], bool]] = { + "ingest_findings": ingest_findings +} + + +def list_files(prefix: str) -> Dict[str, Union[int, Dict[str, List[str]]]]: + s3 = S3(bucket_name) + files = s3.list_files(prefix) + return {"statusCode": 200, "body": {"files": files}} + + +def handler(event: Dict[str, Any], _) -> Dict[str, Any]: + """ + Handle the Lambda function invocation. + + Args: + event (Dict[str, Any]): The event data passed to the Lambda function. + _ (Any): The context object representing the runtime information. + + Returns: + Dict[str, Any]: The response data returned by the Lambda function. + + Raises: + ValueError: If the request is invalid or the action is not supported. + """ + action: str = event.get("action") + + if action == "list_files": + prefix: str = event.get("prefix") + if not prefix: + logging.error("missing prefix in request for action list_files") + raise ValueError("Invalid request") + + response: Dict[str, Union[int, Dict[str, List[str]]]] = list_files(prefix) + return response + + elif action in ingestion_callback_mapping: + file_key: str = event.get("file_key") + if not file_key: + logging.error("missing file_key in request for action ingest_findings") + raise ValueError("Invalid request") + + status: bool = ingestion_callback_mapping[action](db_url, bucket_name, file_key) + + if not status: + logging.error("Error ingesting data") + raise ValueError("Error ingesting data") + + return {"statusCode": 200, "body": {"success": status}} + + else: + logging.error(f"Invalid action: {action}") + raise ValueError("Invalid request") diff --git a/infrastructure/ingestion/aws/lambda/ingestion/modules/__init__.py b/infrastructure/ingestion/aws/lambda/ingestion/modules/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/infrastructure/ingestion/aws/lambda/ingestion/modules/common/__init__.py b/infrastructure/ingestion/aws/lambda/ingestion/modules/common/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/infrastructure/ingestion/aws/lambda/ingestion/modules/common/s3.py b/infrastructure/ingestion/aws/lambda/ingestion/modules/common/s3.py new file mode 100644 index 0000000..a512dd6 --- /dev/null +++ b/infrastructure/ingestion/aws/lambda/ingestion/modules/common/s3.py @@ -0,0 +1,123 @@ +import os +import tempfile +import boto3 +from typing import List, Tuple + + +class S3: + """ + Represents an S3 client for interacting with an S3 bucket. + + Args: + bucket_name (str): The name of the S3 bucket. + + Attributes: + client (boto3.client): The S3 client. + bucket_name (str): The name of the S3 bucket. + + """ + + client: boto3.client = None + bucket_name: str = None + + def __init__(self, bucket_name: str) -> None: + """ + Initializes the S3 client. + + Args: + bucket_name (str): The name of the S3 bucket. + + """ + self.client = boto3.client("s3") + self.bucket_name = bucket_name + + def list_files(self, prefix: str) -> List[str]: + """ + Lists all the files in the S3 bucket with the specified prefix. + + Args: + prefix (str): The prefix to filter the files. + + Returns: + List[str]: A list of file keys. + + """ + keys: List[str] = [] + continuation_token: str = None + + if not prefix.endswith("/"): + prefix += "/" + + while True: + kwargs: dict = { + "Bucket": self.bucket_name, + "Prefix": prefix, + "Delimiter": "/", + } + + if continuation_token: + kwargs["ContinuationToken"] = continuation_token + + response: dict = self.client.list_objects_v2(**kwargs) + contents: List[dict] = response.get("Contents", []) + _keys: List[str] = [ + content["Key"] + for content in contents + if not content["Key"].endswith("/") + ] + keys.extend(_keys) + + if not response.get("IsTruncated"): + break + + continuation_token = response.get("NextContinuationToken") + + return keys + + def download_file(self, file_key: str) -> str: + """ + Downloads the file with the specified key from the bucket. + + Args: + file_key (str): The key of the file to download. + + Returns: + str: The local path of the downloaded file. + + """ + file_name: str = os.path.basename(file_key) + local_path: str = os.path.join(tempfile.gettempdir(), file_name) + self.client.download_file(self.bucket_name, file_key, local_path) + return local_path + + def download_first_file(self, prefix: str) -> Tuple[str, str]: + """ + Downloads the first file with the specified prefix from the bucket. + + Args: + prefix (str): The prefix to filter the files. + + Returns: + Tuple[str, str]: A tuple containing the file key and the local path of the downloaded file. + + """ + files = self.list_files(prefix) + if not files: + return None + + key = files[0] + return key, self.download_file(files[0]) + + def delete_file(self, file_key: str) -> bool: + """ + Deletes the file with the specified key from the bucket. + + Args: + file_key (str): The key of the file to delete. + + Returns: + bool: True if the file was successfully deleted, False otherwise. + + """ + self.client.delete_object(Bucket=self.bucket_name, Key=file_key) + return True diff --git a/infrastructure/ingestion/aws/lambda/ingestion/modules/common/utils.py b/infrastructure/ingestion/aws/lambda/ingestion/modules/common/utils.py new file mode 100644 index 0000000..c61ddd0 --- /dev/null +++ b/infrastructure/ingestion/aws/lambda/ingestion/modules/common/utils.py @@ -0,0 +1 @@ +DATE_TIME_FORMAT: str = "%Y-%m-%dT%H:%M:%S.%f" diff --git a/infrastructure/ingestion/aws/lambda/ingestion/modules/findings_ingestion.py b/infrastructure/ingestion/aws/lambda/ingestion/modules/findings_ingestion.py new file mode 100644 index 0000000..9246eb9 --- /dev/null +++ b/infrastructure/ingestion/aws/lambda/ingestion/modules/findings_ingestion.py @@ -0,0 +1,197 @@ +import datetime +import json +import logging +from sqlalchemy import ( + JSON, + VARCHAR, + Boolean, + Column, + Integer, + String, + DateTime, + create_engine, +) +from sqlalchemy.orm import sessionmaker, declarative_base +from modules.common.s3 import S3 +from modules.common.utils import DATE_TIME_FORMAT +import uuid + +Base = declarative_base() + + +class Finding(Base): + __tablename__ = "findings" + uuid = Column(String, primary_key=True) + scan_uuid = Column(String, nullable=False) + job_uuid = Column(String, nullable=False) + organization = Column(String, nullable=True) + scan_context = Column(String, nullable=False) + created_on = Column(DateTime, nullable=False) + decoder_name = Column(String, nullable=False) + detector_name = Column(String, nullable=False) + detector_type = Column(Integer, nullable=False) + raw = Column(VARCHAR, nullable=False) + raw_v2 = Column(VARCHAR, nullable=True) + redacted = Column(String, nullable=True) + source_name = Column(String, nullable=False) + source_type = Column(Integer, nullable=False) + verified = Column(Boolean, nullable=False) + extra_data = Column(JSON, nullable=True) + repository = Column(String, nullable=True) + filename = Column(String, nullable=False) + commit_hash = Column(String, nullable=True) + committer_email = Column(String, nullable=True) + commit_timestamp = Column(DateTime, nullable=True) + line_number = Column(Integer, nullable=False) + is_still_valid = Column(Boolean, nullable=False) + last_validated_on = Column(DateTime, nullable=False) + + +class Scans(Base): + __tablename__ = "scans" + uuid = Column(String, primary_key=True) + job_uuid = Column(String, nullable=False) + scan_identifier = Column(String, nullable=True) + scm = Column(String, nullable=False) + organization = Column(String, nullable=True) + repository = Column(String, nullable=False) + scan_context = Column(String, nullable=False) + started_on = Column(DateTime, nullable=False) + completed_on = Column(DateTime, nullable=False) + status = Column(Integer, nullable=False) + scan_mode = Column(String, nullable=False) + scan_type = Column(String, nullable=False) + # metadata is a reserved attribute name in SQLAlchemy + metadata_ = Column("metadata", JSON, nullable=True) + + +class Jobs(Base): + __tablename__ = "jobs" + uuid = Column(String, primary_key=True) + scan_identifier = Column(String, nullable=False) + scm = Column(String, nullable=False) + scan_context = Column(String, nullable=False) + started_on = Column(DateTime, nullable=False) + completed_on = Column(DateTime, nullable=False) + status = Column(Integer, nullable=False) + scan_mode = Column(String, nullable=False) + scan_type = Column(String, nullable=False) + + +def ingest_findings(db_url: str, bucket_name: str, file_key: str) -> bool: + """ + Ingests findings from a file downloaded from S3 into a database. + + Args: + db_url (str): The URL of the database to connect to. + bucket_name (str): The name of the S3 bucket. + file_key (str): The key of the file in the S3 bucket. + + Returns: + bool: True if the ingestion is successful, False otherwise. + """ + logging.info(f"Downloading file from S3, key: {file_key}, bucket: {bucket_name}") + s3 = S3(bucket_name) + file_path = s3.download_file(file_key) + logging.info(f"File downloaded to {file_path}, key: {file_key}") + + with open(file_path, "r") as file: + data = json.load(file) + + if not data: + logging.error("No data in the file") + return False + + # Create a SQLAlchemy engine to connect to the database + engine = create_engine(db_url) + + # Create a session + Session = sessionmaker(bind=engine) + session = Session() + + job = Jobs( + uuid=data["scan_uuid"], + scan_identifier=data["scan_identifier"], + scm=data["scm"], + scan_context=data["scan_context"], + started_on=datetime.datetime.strptime(data["start"], DATE_TIME_FORMAT), + completed_on=datetime.datetime.strptime(data["end"], DATE_TIME_FORMAT), + status=data["status"], + scan_type=data["scan_type"], + scan_mode=data["scan_mode"], + ) + + session.add(job) + + for result in data.get("results", []): + scan = Scans( + uuid=result["scan_uuid"], + job_uuid=job.uuid, + scan_identifier=job.scan_identifier, + scm=job.scm, + organization=result["organization"], + repository=result["repository"], + scan_context=job.scan_context, + started_on=datetime.datetime.strptime(result["start"], DATE_TIME_FORMAT), + completed_on=datetime.datetime.strptime(result["end"], DATE_TIME_FORMAT), + status=result.get("status"), + scan_mode=job.scan_mode, + scan_type=job.scan_type, + metadata_=result.get("metadata", {}), + ) + + logging.info(f'Ingesting scan: {result["scan_uuid"]}') + session.add(scan) + + for finding in result.get("findings", []): + source_meta_data = list( + finding.get("SourceMetadata", {}).get("Data", {}).values() + )[0] + finding = Finding( + uuid=str(uuid.uuid4()), + scan_uuid=result["scan_uuid"], + job_uuid=job.uuid, + organization=result["organization"], + scan_context=job.scan_context, + created_on=datetime.datetime.now(), + decoder_name=finding["DetectorName"], + detector_name=finding["DetectorName"], + detector_type=finding["DetectorType"], + raw=finding["Raw"], + raw_v2=finding.get("RawV2", ""), + redacted=finding.get("Redacted", ""), + source_name=finding["SourceName"], + source_type=finding["SourceType"], + verified=finding["Verified"], + extra_data=finding.get("ExtraData", {}), + repository=result["repository"], + filename=source_meta_data["file"], + commit_hash=source_meta_data.get("commit"), + committer_email=source_meta_data.get("email"), + commit_timestamp=( + datetime.datetime.strptime( + source_meta_data.get("timestamp"), "%Y-%m-%d %H:%M:%S %z" + ) + if source_meta_data.get("timestamp") + else None + ), + line_number=source_meta_data["line"], + is_still_valid=finding["Verified"], + last_validated_on=datetime.datetime.strptime( + result["end"], DATE_TIME_FORMAT + ), + ) + + logging.info( + f'Ingesting finding: {finding.uuid} for scan: {result["scan_uuid"]}' + ) + session.add(finding) + + if not s3.delete_file(file_key): + logging.error(f"Error deleting file from S3, key: {file_key}") + session.rollback() + return False + + logging.info(f"Deleted file from S3, key: {file_key}") + session.commit() + return True diff --git a/infrastructure/ingestion/aws/lambda/ingestion/package.sh b/infrastructure/ingestion/aws/lambda/ingestion/package.sh new file mode 100755 index 0000000..2866648 --- /dev/null +++ b/infrastructure/ingestion/aws/lambda/ingestion/package.sh @@ -0,0 +1,6 @@ +#!/bin/bash +set -e + +docker build -t ingestion-lambda --platform=linux/arm64 -f Dockerfile . + +docker run --rm -v $(pwd):/output ingestion-lambda cp /app/ingestion.zip /output/ diff --git a/infrastructure/ingestion/aws/lambda/ingestion/pyproject.toml b/infrastructure/ingestion/aws/lambda/ingestion/pyproject.toml new file mode 100644 index 0000000..e1c9049 --- /dev/null +++ b/infrastructure/ingestion/aws/lambda/ingestion/pyproject.toml @@ -0,0 +1,21 @@ +[tool.poetry] +name = "ingestion" +version = "0.1.0" +description = "Lambda to ingest data into the data lake" +authors = ["Thomson Reuters "] +license = "mit" +readme = "README.md" +include = ["modules/*.py", "modules/common/*"] + +[tool.poetry.dependencies] +python = "^3.9" +boto3 = "^1.34.130" +sqlalchemy = "^2.0.31" +psycopg2-binary = "^2.9.9" + +[tool.poetry-plugin-lambda-build] +package_artifact_path = "ingestion.zip" + +[build-system] +requires = ["poetry-core"] +build-backend = "poetry.core.masonry.api" diff --git a/infrastructure/ingestion/aws/lambda/migration/Dockerfile b/infrastructure/ingestion/aws/lambda/migration/Dockerfile new file mode 100644 index 0000000..b81d74f --- /dev/null +++ b/infrastructure/ingestion/aws/lambda/migration/Dockerfile @@ -0,0 +1,12 @@ +FROM python@sha256:320a7a4250aba4249f458872adecf92eea88dc6abd2d76dc5c0f01cac9b53990 + +RUN pip install poetry==1.8.3 --no-cache-dir + +WORKDIR /app + +COPY . /app/ + +RUN poetry self add poetry-plugin-lambda-build \ + && poetry self add poetry-plugin-export \ + && poetry lock --no-update \ + && poetry build-lambda diff --git a/infrastructure/ingestion/aws/lambda/migration/README.md b/infrastructure/ingestion/aws/lambda/migration/README.md new file mode 100644 index 0000000..3e2206c --- /dev/null +++ b/infrastructure/ingestion/aws/lambda/migration/README.md @@ -0,0 +1,16 @@ +# Migration Lambda + +This directory contains all the necessary scripts to package Migrations (located at the root level) as a Lambda function. + +The packaging process uses the Poetry Lambda plugin and leverages Docker to ensure Lambda packages are generated for the correct platform. + +## Usage + +To package lambda, run following command + +```bash +./package.sh +``` + +> [!NOTE] +> Any changes in Migrations should be automatically detected during the repackaging process. diff --git a/infrastructure/ingestion/aws/lambda/migration/package.sh b/infrastructure/ingestion/aws/lambda/migration/package.sh new file mode 100755 index 0000000..3ff9917 --- /dev/null +++ b/infrastructure/ingestion/aws/lambda/migration/package.sh @@ -0,0 +1,8 @@ +#!/bin/bash +set -e + +ROOT_DIR=$(git rev-parse --show-toplevel) + +docker build -t migrate-lambda --platform=linux/arm64 -f Dockerfile $ROOT_DIR/migrations + +docker run --rm -v $(pwd):/output migrate-lambda cp /app/migration.zip /output/ diff --git a/infrastructure/ingestion/aws/locals.tf b/infrastructure/ingestion/aws/locals.tf new file mode 100644 index 0000000..ebf70ba --- /dev/null +++ b/infrastructure/ingestion/aws/locals.tf @@ -0,0 +1,11 @@ +locals { + environment = replace(lower(var.environment_type), " ", "-") + db_url = "postgresql://${var.rds_username}:${random_password.rds_master_password.result}@${aws_db_instance.rds_postgres.address}" + configuration_dir = "${path.module}/configuration" + ingestion_lambda_dir = "${path.module}/lambda/ingestion" + ingestion_lambda_archive = "${local.ingestion_lambda_dir}/ingestion.zip" + migration_lambda_dir = "${path.module}/lambda/migration" + migration_lambda_archive = "${local.migration_lambda_dir}/migration.zip" + s3_bucket_arn = "arn:aws:s3:::${var.s3_bucket_name}" + tags = var.tags +} diff --git a/infrastructure/ingestion/aws/outputs.tf b/infrastructure/ingestion/aws/outputs.tf new file mode 100644 index 0000000..185e483 --- /dev/null +++ b/infrastructure/ingestion/aws/outputs.tf @@ -0,0 +1,3 @@ +output "rds_pg_endpoint" { + value = aws_db_instance.rds_postgres.endpoint +} diff --git a/infrastructure/ingestion/aws/providers.tf b/infrastructure/ingestion/aws/providers.tf new file mode 100644 index 0000000..d3d1e83 --- /dev/null +++ b/infrastructure/ingestion/aws/providers.tf @@ -0,0 +1,23 @@ +terraform { + required_version = ">=1.3" + + required_providers { + aws = { + source = "hashicorp/aws" + version = "~> 5.0" + } + } + + backend "s3" { + encrypt = true + } +} + +provider "aws" { + region = var.aws_region + profile = var.aws_profile + + default_tags { + tags = local.tags + } +} diff --git a/infrastructure/ingestion/aws/rds.tf b/infrastructure/ingestion/aws/rds.tf new file mode 100644 index 0000000..a133b4f --- /dev/null +++ b/infrastructure/ingestion/aws/rds.tf @@ -0,0 +1,18 @@ +resource "aws_db_instance" "rds_postgres" { + identifier = "${var.project_name}-rds-postgres" + allocated_storage = 10 + engine = "postgres" + engine_version = "16.3" + instance_class = "db.t3.micro" # Smallest instance type for PostgreSQL + username = var.rds_username + password = random_password.rds_master_password.result + parameter_group_name = "default.postgres16" + db_name = var.rds_db_name + skip_final_snapshot = true + publicly_accessible = false + storage_encrypted = true + deletion_protection = true + backup_retention_period = 7 + vpc_security_group_ids = [aws_security_group.rds_sg.id] + db_subnet_group_name = var.db_subnet_group_name +} diff --git a/infrastructure/ingestion/aws/s3.tfbackend b/infrastructure/ingestion/aws/s3.tfbackend new file mode 100644 index 0000000..6fa4016 --- /dev/null +++ b/infrastructure/ingestion/aws/s3.tfbackend @@ -0,0 +1,5 @@ +bucket = "" +key = "" +region = "" +dynamodb_table = "" +profile = "" diff --git a/infrastructure/ingestion/aws/secrets.tf b/infrastructure/ingestion/aws/secrets.tf new file mode 100644 index 0000000..e43778b --- /dev/null +++ b/infrastructure/ingestion/aws/secrets.tf @@ -0,0 +1,19 @@ +resource "random_password" "rds_master_password" { + length = 40 + special = true + min_special = 5 + override_special = "!#$%^&*()-_=+[]{}<>:?" + keepers = { + pass_version = 1 + } +} + +resource "aws_secretsmanager_secret" "rds_master_password" { + name = "${var.project_name}-rds-master-password" + description = "Master password for RDS instance" +} + +resource "aws_secretsmanager_secret_version" "rds_master_password" { + secret_id = aws_secretsmanager_secret.rds_master_password.id + secret_string = random_password.rds_master_password.result +} diff --git a/infrastructure/ingestion/aws/securitygroups.tf b/infrastructure/ingestion/aws/securitygroups.tf new file mode 100644 index 0000000..9e23393 --- /dev/null +++ b/infrastructure/ingestion/aws/securitygroups.tf @@ -0,0 +1,33 @@ +resource "aws_security_group" "rds_sg" { + name = "${var.project_name}-rds-sg" + description = "Security group for RDS instance" + vpc_id = data.aws_vpc.selected.id + + ingress { + from_port = 5432 + to_port = 5432 + protocol = "tcp" + cidr_blocks = [data.aws_vpc.selected.cidr_block] + } + + egress { + from_port = 0 + to_port = 0 + protocol = "-1" + cidr_blocks = ["0.0.0.0/0"] + } +} + + +resource "aws_security_group" "lambda_sg" { + name = "${var.project_name}-lambda-sg" + description = "Security group for Lambda functions" + vpc_id = data.aws_vpc.selected.id + + egress { + from_port = 0 + to_port = 0 + protocol = "-1" + cidr_blocks = ["0.0.0.0/0"] + } +} diff --git a/infrastructure/ingestion/aws/sfn.tf b/infrastructure/ingestion/aws/sfn.tf new file mode 100644 index 0000000..540f09e --- /dev/null +++ b/infrastructure/ingestion/aws/sfn.tf @@ -0,0 +1,13 @@ +resource "aws_sfn_state_machine" "ingestion-step-function" { + name = "${var.project_name}-ingestion-step-function" + role_arn = aws_iam_role.sfn_role.arn + definition = templatefile("${local.configuration_dir}/ingestion_sfn_definition.json", { + migrate_lambda_arn = "${aws_lambda_function.migration-lambda.arn}", + ingestion_lambda_arn = "${aws_lambda_function.ingestion-lambda.arn}" + }) + + depends_on = [ + aws_iam_role.sfn_role, + aws_iam_role_policy.sfn_policy, + ] +} diff --git a/infrastructure/ingestion/aws/sts.tf b/infrastructure/ingestion/aws/sts.tf new file mode 100644 index 0000000..8fc4b38 --- /dev/null +++ b/infrastructure/ingestion/aws/sts.tf @@ -0,0 +1 @@ +data "aws_caller_identity" "current" {} diff --git a/infrastructure/ingestion/aws/terraform.tfvars.example b/infrastructure/ingestion/aws/terraform.tfvars.example new file mode 100644 index 0000000..85c8a35 --- /dev/null +++ b/infrastructure/ingestion/aws/terraform.tfvars.example @@ -0,0 +1,17 @@ +aws_region = "" +aws_profile = "" +environment_type = "" +project_name = "" +vpc_name = "" +subnet_name = "" +db_subnet_group_name = "" +permissions_boundary_arn = "" +s3_bucket_name = "" +tags = { + "mytag" = "tag" + "mytag2" = "tag2" +} +rds_username = "" +rds_db_name = "" +ingestion_schedule = "" +disable_ingestion_schedule = "" diff --git a/infrastructure/ingestion/aws/variables.tf b/infrastructure/ingestion/aws/variables.tf new file mode 100644 index 0000000..5b1b594 --- /dev/null +++ b/infrastructure/ingestion/aws/variables.tf @@ -0,0 +1,115 @@ +variable "aws_region" { + type = string + description = "AWS region where to deploy resources" + + validation { + condition = can(regex("^(af|ap|ca|eu|me|sa|us)-(central|north|(north(?:east|west))|south|south(?:east|west)|east|west)-\\d+$", var.aws_region)) + error_message = "You should enter a valid AWS region (https://docs.aws.amazon.com/AmazonRDS/latest/UserGuide/Concepts.RegionsAndAvailabilityZones.html)" + } +} + +variable "aws_profile" { + type = string + description = "AWS profile to use for authentication" +} + +variable "environment_type" { + type = string + description = "Environment type" + + validation { + condition = contains(["PRODUCTION", "PRE-PRODUCTION", "QUALITY ASSURANCE", "INTEGRATION TESTING", "DEVELOPMENT", "LAB"], var.environment_type) + error_message = "The environment type should be one of the following values: PRODUCTION, PRE-PRODUCTION, QUALITY ASSURANCE, INTEGRATION TESTING, DEVELOPMENT, LAB (case sensitive)" + } +} + +variable "vpc_name" { + type = string + description = "Identifier of the VPC to use for secrets-finder" +} + +variable "subnet_name" { + type = string + description = "Name of the subnet where to deploy the resources (wildcards are allowed: first match is used)" +} + +variable "db_subnet_group_name" { + type = string + description = "Name of the RDS subnet group" +} + +variable "tags" { + type = map(string) + description = "A map of tags to add to the resources" + + validation { + condition = alltrue([for v in values(var.tags) : v != ""]) + error_message = "Tag values must not be empty." + } +} + +variable "project_name" { + type = string + description = "Name of the project" + default = "secrets-finder" +} + +variable "permissions_boundary_arn" { + type = string + description = "ARN of the permissions boundary to use for the IAM role" + + validation { + condition = can(regex("^arn:aws:iam::[0-9]{12}:policy\\/([a-zA-Z0-9-_.]+)$", var.permissions_boundary_arn)) + error_message = "The provided ARN is not a valid ARN for a policy" + } +} + +variable "s3_bucket_name" { + type = string + description = "Name of the S3 bucket to create" + + validation { + condition = can(regex("^[a-z0-9.-]{3,63}$", var.s3_bucket_name)) + error_message = "The S3 bucket name must be a valid string with only a-z0-9.- characters and have a length between 3 and 63" + } +} + +variable "rds_username" { + type = string + description = "Username for the RDS instance" + default = "secrets_finder" + + validation { + condition = can(regex("^[a-z][a-z0-9_]{1,}$", var.rds_username)) + error_message = "The RDS username must be a valid string with only a-z0-9_ characters, have a length greater than 1, and not start with a number" + } +} + + +variable "rds_db_name" { + type = string + description = "Name of the database to create in the RDS instance" + default = "secrets_finder" + + validation { + condition = can(regex("^[a-z][a-z0-9_]{1,}$", var.rds_db_name)) + error_message = "The RDS database name must be a valid string with only a-z0-9_ characters, have a length greater than 1, and not start with a number" + } +} + +variable "ingestion_schedule" { + type = string + description = "Cron schedule for the CloudWatch Event Rule" + default = "rate(24 hours)" + + validation { + condition = can(regex("^(rate|cron)\\(\\d+ (minutes|hours|days)\\)$", var.ingestion_schedule)) + error_message = "The ingestion schedule should be in the format 'rate(n minutes|hours|days)' or 'cron(expression)', where n is a positive integer" + } +} + +variable "disable_ingestion_schedule" { + type = bool + description = "Disable the ingestion schedule" + default = false +} diff --git a/infrastructure/ingestion/aws/vpc.tf b/infrastructure/ingestion/aws/vpc.tf new file mode 100644 index 0000000..041279b --- /dev/null +++ b/infrastructure/ingestion/aws/vpc.tf @@ -0,0 +1,22 @@ +data "aws_vpc" "selected" { + filter { + name = "tag:Name" + values = [var.vpc_name] + } +} + +data "aws_subnets" "default" { + filter { + name = "tag:Name" + values = [var.subnet_name] + } +} + +data "aws_subnet" "selected" { + id = element(sort(data.aws_subnets.default.ids), 0) +} + +data "aws_security_group" "default" { + vpc_id = data.aws_vpc.selected.id + name = "default" +}