Skip to content

Commit

Permalink
fix: explain better s3_bucket_partition_prefix func & script for movi…
Browse files Browse the repository at this point in the history
…ng object between paths & trim / at the end of the bucket_prefix before writing object.
  • Loading branch information
EreminAnton committed Sep 6, 2024
1 parent 4a9f2bc commit 914c5e0
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 1 deletion.
37 changes: 37 additions & 0 deletions athena_query/fix_path.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
#!/bin/bash
# In releases previous to 2.0.0, Elevator didn't check for double slashes in the path, which could cause Athena to fail.
# If your S3 bucket has double slashes in the path, you can run this script to fix the path.
# Replace SOURCE_PREFIX, DESTINATION_PREFIX, and BUCKET_NAME with your values.

set -x # Enable debugging

SOURCE_PREFIX="logs//2024/"
DESTINATION_PREFIX="logs/2024/"
BUCKET_NAME=""

# Set the number of parallel jobs (adjust based on your system's resources)
NUM_JOBS=10

# Function to move a single file
move_file() {
FILE_PATH="$1"
BUCKET_NAME="$2"
NEW_FILE_PATH=$(echo "$FILE_PATH" | sed "s|//|/|g")

# Copy the file to the new path with encryption
aws s3 cp "s3://$BUCKET_NAME/$FILE_PATH" "s3://$BUCKET_NAME/$NEW_FILE_PATH" --sse AES256

# Delete the original file if the copy was successful
if [[ $? -eq 0 ]]; then
aws s3 rm "s3://$BUCKET_NAME/$FILE_PATH"
echo "Moved $FILE_PATH to $NEW_FILE_PATH"
else
echo "Error copying $FILE_PATH"
fi
}

export -f move_file # Export the function to be used by parallel

# Step 1: List all objects in the first path and move them asynchronously
aws s3 ls "s3://$BUCKET_NAME/$SOURCE_PREFIX" --recursive | awk '{print $4}' | \
grep -v '^$' | xargs -P "$NUM_JOBS" -I {} bash -c 'move_file "$@"' _ {} "$BUCKET_NAME"
2 changes: 2 additions & 0 deletions src/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@ def get_accounts_and_permission_sets(cls, values: dict) -> dict: # noqa: ANN101
groups = get_groups_from_statements(group_statements)
permission_sets = set()
accounts = set()
s3_bucket_prefix_for_partitions = values.get("s3_bucket_prefix_for_partitions", "").rstrip("/")
for statement in statements:
permission_sets.update(statement.permission_set)
if statement.resource_type == "Account":
Expand All @@ -128,6 +129,7 @@ def get_accounts_and_permission_sets(cls, values: dict) -> dict: # noqa: ANN101
"statements": frozenset(statements),
"group_statements": frozenset(group_statements),
"groups": groups,
"s3_bucket_prefix_for_partitions": s3_bucket_prefix_for_partitions,
}


Expand Down
6 changes: 5 additions & 1 deletion vars.tf
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,11 @@ variable "s3_bucket_name_for_audit_entry" {
}

variable "s3_bucket_partition_prefix" {
description = "The prefix for the S3 bucket partitions"
description = <<EOT
The prefix for the S3 audit bucket object partitions.
Don't use slashes (/) in the prefix, as it will be added automatically, e.g. "logs" will be transformed to "logs/".
If you want to use the root of the bucket, leave this empty.
EOT
type = string
default = "logs"
}
Expand Down

0 comments on commit 914c5e0

Please sign in to comment.