Skip to content

Commit

Permalink
add bucket ops script, tasks, and update sli / gen rule (#388)
Browse files Browse the repository at this point in the history
  • Loading branch information
stewartshea authored Jun 24, 2024
1 parent 330cd74 commit dac2d60
Show file tree
Hide file tree
Showing 6 changed files with 245 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,12 @@ spec:
configProvided:
- name: PROJECT_IDS
value: {{match_resource.resource.project_id}}
- name: USAGE_THRESHOLD
value: "0.5"
- name: OPS_RATE_THRESHOLD
value: "10"
- name: PUBLIC_ACCESS_BUCKET_THRESHOLD
value: "0"
secretsProvided:
- name: gcp_credentials_json
workspaceKey: {{custom.gcp_ops_suite_sa}}
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ metadata:
spec:
imageURL: https://storage.googleapis.com/runwhen-nonprod-shared-images/icons/gcp/cloud_storage/cloud_storage.svg
alias: GCP Storage Bucket Health for Project {{match_resource.resource.project_id}}
asMeasuredBy: The number of risky GCP bucket configurations in project {{match_resource.resource.project_id}}
asMeasuredBy: A combined score that includes public bucket counts, total storage consumption, and total storage operations for project {{match_resource.resource.project_id}}. A score of 1 is healthy, and less than 1 indicates an issue with one of these components.
configProvided:
- name: SLX_PLACEHOLDER
value: SLX_PLACEHOLDER
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,12 @@ spec:
configProvided:
- name: PROJECT_IDS
value: {{match_resource.resource.project_id}}
- name: USAGE_THRESHOLD
value: "0.5"
- name: OPS_RATE_THRESHOLD
value: "10"
- name: PUBLIC_ACCESS_BUCKET_THRESHOLD
value: "0"
secretsProvided:
- name: gcp_credentials_json
workspaceKey: {{custom.gcp_ops_suite_sa}}
172 changes: 172 additions & 0 deletions codebundles/gcp-bucket-health/bucket_ops_costs.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
#!/bin/bash

SERVICE_ACCOUNT_KEY=$GOOGLE_APPLICATION_CREDENTIALS
# Function to get an access token using the service account key
get_access_token() {
local key_file=$1
local email=$(jq -r .client_email $key_file)
local key=$(jq -r .private_key $key_file | sed 's/\\n/\n/g')

local header=$(echo -n '{"alg":"RS256","typ":"JWT"}' | openssl base64 -e -A | tr -d '=' | tr '/+' '_-' | tr -d '\n')
local now=$(date +%s)
local exp=$(($now + 3600))
local payload=$(echo -n "{\"iss\":\"$email\",\"scope\":\"https://www.googleapis.com/auth/cloud-platform\",\"aud\":\"https://oauth2.googleapis.com/token\",\"exp\":$exp,\"iat\":$now}" | openssl base64 -e -A | tr -d '=' | tr '/+' '_-' | tr -d '\n')

local sig=$(echo -n "$header.$payload" | openssl dgst -sha256 -sign <(echo -n "$key") | openssl base64 -e -A | tr -d '=' | tr '/+' '_-' | tr -d '\n')

local jwt="$header.$payload.$sig"

local token=$(curl -s --request POST \
--url https://oauth2.googleapis.com/token \
--header "Content-Type: application/x-www-form-urlencoded" \
--data "grant_type=urn:ietf:params:oauth:grant-type:jwt-bearer&assertion=$jwt" | jq -r .access_token)
echo $token
}

# Function to check if the Monitoring API is enabled
is_monitoring_api_enabled() {
local project_id=$1
local token=$2
local response=$(curl -s -w "\nHTTP_STATUS:%{http_code}" --header "Authorization: Bearer $token" \
"https://serviceusage.googleapis.com/v1/projects/$project_id/services/monitoring.googleapis.com")

local http_status=$(echo "$response" | sed -n 's/.*HTTP_STATUS:\([0-9]*\)$/\1/p')
local response_body=$(echo "$response" | sed -n '1,/^HTTP_STATUS:/p' | sed '$d')

if [[ "$http_status" -ne 200 ]]; then
echo "Error checking Monitoring API status for project $project_id:"
echo "HTTP Status: $http_status"
echo "Response: $response_body"
return 1
fi

local state=$(echo "$response_body" | jq -r '.state')
if [[ "$state" == "ENABLED" ]]; then
return 0
else
echo "Monitoring API is not enabled for project $project_id."
echo "State: $state"
return 1
fi
}

# Function to list buckets in a project
list_buckets() {
local project_id=$1
local token=$2
local response=$(curl -s --header "Authorization: Bearer $token" \
"https://storage.googleapis.com/storage/v1/b?project=$project_id")

echo $response | jq -r '.items[].name'
}

# Function to get the sizes of all buckets using PromQL
get_bucket_read_ops() {
local project_id=$1
local token=$2

local response=$(curl -s --header "Authorization: Bearer $token" \
--header "Content-Type: application/x-www-form-urlencoded" \
--data 'query=sum by (bucket_name)(rate(storage_googleapis_com:api_request_count{monitored_resource="gcs_bucket",method=~"Read.*|List.*|Get.*"}[30m]))' \
"https://monitoring.googleapis.com/v1/projects/$project_id/location/global/prometheus/api/v1/query")

echo $response | jq -r '.data.result[] | {bucket_name: .metric.bucket_name, ops: .value[1]}'
}

# Function to get the sizes of all buckets using PromQL
get_bucket_write_ops() {
local project_id=$1
local token=$2

local response=$(curl -s --header "Authorization: Bearer $token" \
--header "Content-Type: application/x-www-form-urlencoded" \
--data 'query=sum by (bucket_name)(rate(storage_googleapis_com:api_request_count{monitored_resource="gcs_bucket",method=~"Write.*"}[30m]))' \
"https://monitoring.googleapis.com/v1/projects/$project_id/location/global/prometheus/api/v1/query")

echo $response | jq -r '.data.result[] | {bucket_name: .metric.bucket_name, ops: .value[1]}'
}
# Function to get bucket metadata (including location and storage class)
get_bucket_metadata() {
local bucket_name=$1
local token=$2
local response=$(curl -s --header "Authorization: Bearer $token" \
"https://storage.googleapis.com/storage/v1/b/$bucket_name")

echo $response
}

# Check if PROJECT_IDS environment variable is set and valid
if [ -z "$PROJECT_IDS" ]; then
echo "Error: PROJECT_IDS environment variable is not set or empty."
echo "Usage: export PROJECT_IDS='project_id1,project_id2,...'"
exit 1
fi

# Read the PROJECT_IDS environment variable into an array
IFS=',' read -r -a projects <<< "$PROJECT_IDS"

# Get the access token using either the provided service account key or gcloud
if [ -n "$SERVICE_ACCOUNT_KEY" ]; then
echo "SERVICE_ACCOUNT_KEY is set. Using it to get the access token."
access_token=$(get_access_token "$SERVICE_ACCOUNT_KEY")
else
echo "SERVICE_ACCOUNT_KEY is not set. Attempting to set access token using gcloud."
access_token=$(gcloud auth application-default print-access-token)
if [ -z "$access_token" ]; then
echo "Failed to retrieve access token using gcloud. Exiting..."
exit 1
fi
fi

read_ops=()
write_ops=()

# Iterate over each project ID provided
for project_id in "${projects[@]}"; do
echo "Processing project: $project_id"

# List all buckets in the project
buckets=$(list_buckets "$project_id" "$access_token")

# Iterate over each bucket and match its size
if is_monitoring_api_enabled "$project_id" "$access_token"; then
echo "Monitoring API is enabled for project: $project_id"

# Get the read/write ops of all buckets
all_bucket_read_ops=$(get_bucket_read_ops "$project_id" "$access_token")
all_bucket_write_ops=$(get_bucket_write_ops "$project_id" "$access_token")

# Iterate over each bucket and match its size
for bucket_name in $buckets; do
echo "Processing bucket: $bucket_name"

# Initialize operations to zero
read_ops=0
write_ops=0

# Get the read/write ops for the current bucket
read_ops=$(echo "$all_bucket_read_ops" | jq -r --arg bucket_name "$bucket_name" '. | select(.bucket_name == $bucket_name) | .ops // 0 | tonumber | round')
write_ops=$(echo "$all_bucket_write_ops" | jq -r --arg bucket_name "$bucket_name" '. | select(.bucket_name == $bucket_name) | .ops // 0 | tonumber | round')


# Calculate total operations and cost using jq for arithmetic
total_ops=$(echo "$write_ops $read_ops" | jq -n '[inputs] | add')

# Add results to output
echo "Read Rate: $read_ops ops/s, Write Rate: $write_ops ops/s, Total rate: $total_ops ops/s"

# Get region
region=$(echo "$metadata" | jq -r '.location')

# Add bucket operations to the list
bucket_ops+=("{\"project\": \"$project_id\", \"bucket\": \"$bucket_name\", \"write_ops\": \"$write_ops\", \"read_ops\": \"$read_ops\", \"total_ops\": \"$total_ops\", \"region\": \"$region\"}")
done
else
echo "Monitoring API is not enabled for project: $project_id"
fi

done

# Output the result in JSON format
echo "["$(IFS=,; echo "${bucket_ops[*]}")"]" > $HOME/bucket_ops_report.json
cat $HOME/bucket_ops_report.json | jq 'sort_by(.total_ops) | reverse'
35 changes: 35 additions & 0 deletions codebundles/gcp-bucket-health/runbook.robot
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,34 @@ Check GCP Bucket Security Configuration for `${PROJECT_IDS}`
END
END

Fetch GCP Bucket Storage Operations Rate for `${PROJECT_IDS}`
[Documentation] Fetches all GCP buckets in each project and obtains the read and write operations rate that incurrs cost. Generates issues if the rate is above a specified threshold.
[Tags] gcloud gcs gcp bucket
${bucket_ops}= RW.CLI.Run Bash File
... bash_file=bucket_ops_costs.sh
... env=${env}
... secret_file__gcp_credentials_json=${gcp_credentials_json}
... show_in_rwl_cheatsheet=true
... timeout_seconds=240
${bucket_ops_output}= RW.CLI.Run Cli
... cmd=cat $HOME/bucket_ops_report.json | jq .
... env=${env}
${bucket_list}= Evaluate json.loads(r'''${bucket_ops_output.stdout}''') json
FOR ${item} IN @{bucket_list}
IF ${item["total_ops"]} > ${OPS_RATE_THRESHOLD}
RW.Core.Add Issue
... severity=3
... expected=Storage bucket should be below operations rate threshold.
... actual=Storage bucket is above operations rate threshold.
... title= GCP storage bucket `${item["bucket"]}` in project `${item["project"]}` has a rate of `${item["bucket"]}` read/write operations per second.
... reproduce_hint=${bucket_ops.cmd}
... details=${item}
... next_steps=Investigate storage operations for GCP storage bucket `${item["bucket"]}` in project `${item["project"]}` to avoid unnecessary cloud provider costs.
END
END
RW.Core.Add Pre To Report GCP Bucket Usage:\n${bucket_ops_output.stdout}
RW.Core.Add Pre To Report Commands Used:\n${bucket_ops.cmd}

*** Keywords ***
Suite Initialization
${gcp_credentials_json}= RW.Core.Import Secret gcp_credentials_json
Expand All @@ -104,6 +132,12 @@ Suite Initialization
... pattern=\w*
... example=0.5
... default=0.5
${OPS_RATE_THRESHOLD}= RW.Core.Import User Variable OPS_RATE_THRESHOLD
... type=string
... description=The rate of read+write operations, in ops/s, to generate an issue on.
... pattern=\w*
... example=10
... default=10
${PUBLIC_ACCESS_BUCKET_THRESHOLD}= RW.Core.Import User Variable PUBLIC_ACCESS_BUCKET_THRESHOLD
... type=string
... description=The amount of storage buckets that can be publicly accessible.
Expand All @@ -113,6 +147,7 @@ Suite Initialization
${HOME}= Get Environment Variable HOME
${OS_PATH}= Get Environment Variable PATH
Set Suite Variable ${USAGE_THRESHOLD} ${USAGE_THRESHOLD}
Set Suite Variable ${OPS_RATE_THRESHOLD} ${OPS_RATE_THRESHOLD}
Set Suite Variable ${PUBLIC_ACCESS_BUCKET_THRESHOLD} ${PUBLIC_ACCESS_BUCKET_THRESHOLD}
Set Suite Variable ${PROJECT_IDS} ${PROJECT_IDS}
Set Suite Variable ${gcp_credentials_json} ${gcp_credentials_json}
Expand Down
27 changes: 25 additions & 2 deletions codebundles/gcp-bucket-health/sli.robot
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ Fetch GCP Bucket Storage Utilization for `${PROJECT_IDS}`
... secret_file__gcp_credentials_json=${gcp_credentials_json}
... timeout_seconds=240
${buckets_over_threshold}= RW.CLI.Run Cli
... cmd=cat $HOME/bucket_report.json | jq '[.[] | select(.size_tb > ${USAGE_THRESHOLD})] | length'
... cmd=cat $HOME/bucket_report.json | jq '[.[] | select(.size_tb | tonumber > ${USAGE_THRESHOLD})] | length'
... env=${env}
${buckets_over_utilization}= Evaluate 1 if int(${buckets_over_threshold.stdout}) == 0 else 0
Set Global Variable ${buckets_over_utilization}
Expand All @@ -44,8 +44,24 @@ Check GCP Bucket Security Configuration for `${PROJECT_IDS}`
${public_bucket_score}= Evaluate 1 if int(${total_public_access_buckets.stdout}) <= ${PUBLIC_ACCESS_BUCKET_THRESHOLD} else 0
Set Global Variable ${public_bucket_score}


Fetch GCP Bucket Storage Operations Rate for `${PROJECT_IDS}`
[Documentation] Fetches all GCP buckets in each project and obtains the read and write operations rate that incurrs cost.
[Tags] gcloud gcs gcp bucket
${bucket_ops}= RW.CLI.Run Bash File
... bash_file=bucket_ops_costs.sh
... env=${env}
... secret_file__gcp_credentials_json=${gcp_credentials_json}
... show_in_rwl_cheatsheet=true
... timeout_seconds=240
${buckets_over_ops_threshold}= RW.CLI.Run Cli
... cmd=cat $HOME/bucket_ops_report.json | jq '[.[] | select(.total_ops | tonumber > ${OPS_RATE_THRESHOLD})] | length'
... env=${env}
${bucket_ops_rate_score}= Evaluate 1 if int(${buckets_over_ops_threshold.stdout}) == 0 else 0
Set Global Variable ${bucket_ops_rate_score}

Generate Bucket Score
${bucket_health_score}= Evaluate (${buckets_over_utilization} + ${public_bucket_score}) / 2
${bucket_health_score}= Evaluate (${buckets_over_utilization} + ${public_bucket_score} + ${bucket_ops_rate_score}) / 3
${health_score}= Convert to Number ${bucket_health_score} 2
RW.Core.Push Metric ${health_score}

Expand Down Expand Up @@ -73,8 +89,15 @@ Suite Initialization
... pattern=\w*
... example=1
... default=0
${OPS_RATE_THRESHOLD}= RW.Core.Import User Variable OPS_RATE_THRESHOLD
... type=string
... description=The rate of read+write operations, in ops/s, to generate an issue on.
... pattern=\w*
... example=10
... default=10
${HOME}= Get Environment Variable HOME
${OS_PATH}= Get Environment Variable PATH
Set Suite Variable ${OPS_RATE_THRESHOLD} ${OPS_RATE_THRESHOLD}
Set Suite Variable ${USAGE_THRESHOLD} ${USAGE_THRESHOLD}
Set Suite Variable ${PUBLIC_ACCESS_BUCKET_THRESHOLD} ${PUBLIC_ACCESS_BUCKET_THRESHOLD}
Set Suite Variable ${PROJECT_IDS} ${PROJECT_IDS}
Expand Down

0 comments on commit dac2d60

Please sign in to comment.