diff --git a/.github/workflows/api.js b/.github/workflows/api.js new file mode 100644 index 0000000..b336688 --- /dev/null +++ b/.github/workflows/api.js @@ -0,0 +1,64 @@ +const delay = (ms) => new Promise((resolve) => setTimeout(resolve, ms)); + + + +async function call_api_with_retry_logic(api_call, max_retries = 5, default_delay = 5, secondary_rate_limit_delay_base = 60, delay_function = delay) { + for (let i = 0; i < max_retries; i++) { + try { + return await api_call(); + } catch (error) { + console.error(`Attempt ${i + 1} failed: ${error.message}`); + + let wait_time; + if (error.response && error.response.status && error.response.headers) { + const status = error.response.status; + const retry_after = error.response.headers["retry-after"]; + const rate_limit_remaining = error.response.headers["x-ratelimit-remaining"]; + const rate_limit_reset = error.response.headers["x-ratelimit-reset"]; + const max_waiting_time = 900; + + if (status === 403) { + if (rate_limit_remaining === "0") { + wait_time = rate_limit_reset - Math.floor(Date.now() / 1000); + if (wait_time > max_waiting_time) { + console.error(`Rate limit reset time is in ${wait_time} seconds. Operation aborted.`); + throw error; + } else { + console.error(`Rate limit exceeded. Retrying in ${wait_time} seconds...`); + } + } else if (retry_after && parseInt(retry_after) > max_waiting_time) { + console.error(`Retry after time is in ${retry_after} seconds. Operation aborted.`); + throw error; + } else if ( + error.response.data.message.includes("secondary rate limit") + ) { + wait_time = secondary_rate_limit_delay_base + Math.floor(0.5 * Math.random() * secondary_rate_limit_delay_base); + console.error(`Secondary rate limit exceeded. Retrying in ${wait_time} seconds...`); + } else { + wait_time = parseInt(retry_after); + console.error(`Rate limit exceeded. Retrying in ${wait_time} seconds...`); + } + } else if (status >= 500) { + wait_time = default_delay; + console.error(`An internal error occurred on server. Retrying in ${wait_time} seconds...`); + } else if (status >= 400) { + console.error(`Client error: ${status}. Operation aborted.`); + throw error; + } + } else { + wait_time = default_delay; + console.error(`Unknown error. Retrying in ${wait_time} seconds...`); + } + + if (i === max_retries - 1) { + throw error; + } + + await delay_function(wait_time * 1000); + } + } +} + + + +module.exports = call_api_with_retry_logic; diff --git a/.github/workflows/api.test.js b/.github/workflows/api.test.js new file mode 100644 index 0000000..e20dee1 --- /dev/null +++ b/.github/workflows/api.test.js @@ -0,0 +1,176 @@ +const call_api_with_retry_logic = require('./api'); + +jest.setTimeout(999999); + +describe('call_api_with_retry_logic', () => { + let api_call, delay_function; + const max_retries = 3; + const default_delay = 1; + const secondary_rate_limit_delay_base = 60; + const max_waiting_time = 900; + + beforeEach(() => { + api_call = jest.fn(); + delay_function = jest.fn(); + }); + + test('should return api_call result when successful', async () => { + const expected = 'success'; + api_call.mockResolvedValueOnce(expected); + + const result = await call_api_with_retry_logic(api_call, max_retries, default_delay, secondary_rate_limit_delay_base, delay_function); + + expect(result).toEqual(expected); + }); + + test('should retry on error and eventually succeed', async () => { + const expected = 'success'; + const error = new Error('failure'); + api_call.mockRejectedValueOnce(error).mockResolvedValueOnce(expected); + + const result = await call_api_with_retry_logic(api_call, max_retries, default_delay, secondary_rate_limit_delay_base, delay_function); + + expect(result).toEqual(expected); + expect(api_call).toHaveBeenCalledTimes(2); + expect(delay_function).toHaveBeenCalledTimes(1); + expect(delay_function).toHaveBeenCalledWith(default_delay * 1000); + }); + + test('should throw error when all retries fail', async () => { + const error = new Error('failure'); + api_call.mockRejectedValue(error); + + await expect(call_api_with_retry_logic(api_call, max_retries, default_delay, secondary_rate_limit_delay_base, delay_function)).rejects.toThrow(error); + expect(delay_function).toHaveBeenCalledTimes(max_retries-1); + expect(delay_function).toHaveBeenCalledWith(default_delay * 1000); + }); + + test('should handle 403 status with rate limit exceeded', async () => { + const error = new Error('failure'); + const seconds_to_wait_before_retry = 2; + error.response = { + status: 403, + headers: { + 'retry-after': '5', + 'x-ratelimit-remaining': '0', + 'x-ratelimit-reset': String(Math.floor(Date.now() / 1000) + seconds_to_wait_before_retry) + }, + data: { + message: 'Rate limit exceeded' + } + }; + api_call.mockRejectedValueOnce(error).mockResolvedValueOnce('success'); + + const result = await call_api_with_retry_logic(api_call, max_retries, default_delay, secondary_rate_limit_delay_base, delay_function); + + expect(result).toEqual('success'); + expect(delay_function).toHaveBeenCalledTimes(1); + expect(delay_function).toHaveBeenCalledWith(seconds_to_wait_before_retry * 1000); + }); + + test('should handle 403 status with secondary rate limit exceeded', async () => { + const error = new Error('failure'); + error.response = { + status: 403, + headers: {}, + data: { + message: 'secondary rate limit exceeded' + } + }; + api_call.mockRejectedValueOnce(error).mockResolvedValueOnce('success'); + + const result = await call_api_with_retry_logic(api_call, max_retries, default_delay, secondary_rate_limit_delay_base, delay_function); + + expect(result).toEqual('success'); + expect(delay_function).toHaveBeenCalledTimes(1); + const [[wait_time]] = delay_function.mock.calls; + expect(wait_time).toBeGreaterThanOrEqual(secondary_rate_limit_delay_base*1000); + }); + + test('should handle 500 status', async () => { + const error = new Error('failure'); + error.response = { + status: 500, + headers: {}, + data: { + message: 'Server error' + } + }; + api_call.mockRejectedValueOnce(error).mockResolvedValueOnce('success'); + + const result = await call_api_with_retry_logic(api_call, max_retries, default_delay, secondary_rate_limit_delay_base, delay_function); + + expect(result).toEqual('success'); + expect(delay_function).toHaveBeenCalledTimes(1); + expect(delay_function).toHaveBeenCalledWith(default_delay * 1000); + }); + + test('should handle 400 status and abort operation', async () => { + const error = new Error('failure'); + error.response = { + status: 400, + headers: {}, + data: { + message: 'Bad request' + } + }; + api_call.mockRejectedValueOnce(error); + + await expect(call_api_with_retry_logic(api_call, max_retries, default_delay, secondary_rate_limit_delay_base, delay_function)).rejects.toThrow(error); + expect(api_call).toHaveBeenCalledTimes(1); + expect(delay_function).not.toHaveBeenCalled(); + }); + + test('should handle 403 status with retry after exceeding max waiting time', async () => { + const error = new Error('failure'); + error.response = { + status: 403, + headers: { + 'retry-after': `${max_waiting_time + 1}`, + }, + data: { + message: 'Rate limit exceeded' + } + }; + api_call.mockRejectedValueOnce(error); + + await expect(call_api_with_retry_logic(api_call, max_retries, default_delay, secondary_rate_limit_delay_base, delay_function)).rejects.toThrow(error); + expect(api_call).toHaveBeenCalledTimes(1); + expect(delay_function).not.toHaveBeenCalled(); + }); + + test('should handle 403 status with rate limit reset time exceeding max waiting time', async () => { + const error = new Error('failure'); + error.response = { + status: 403, + headers: { + 'x-ratelimit-remaining': '0', + 'x-ratelimit-reset': String(Math.floor(Date.now() / 1000) + max_waiting_time + 1) + }, + data: { + message: 'Rate limit exceeded' + } + }; + api_call.mockRejectedValueOnce(error); + + await expect(call_api_with_retry_logic(api_call, max_retries, default_delay, secondary_rate_limit_delay_base, delay_function)).rejects.toThrow(error); + expect(api_call).toHaveBeenCalledTimes(1); + expect(delay_function).not.toHaveBeenCalled(); + }); + + test('should handle non-403 4xx status', async () => { + const error = new Error('failure'); + error.response = { + status: 400, + headers: {}, + data: { + message: 'Bad request' + } + }; + api_call.mockRejectedValueOnce(error); + + await expect(call_api_with_retry_logic(api_call, max_retries, default_delay, secondary_rate_limit_delay_base, delay_function)).rejects.toThrow(error); + expect(api_call).toHaveBeenCalledTimes(1); + expect(delay_function).not.toHaveBeenCalled(); + }); +}); diff --git a/.github/workflows/github-issues.js b/.github/workflows/github-issues.js new file mode 100644 index 0000000..339d756 --- /dev/null +++ b/.github/workflows/github-issues.js @@ -0,0 +1,117 @@ +const call_api_with_retry_logic = require('./api.js'); + +const MAX_WAITING_TIME = 900; + +async function create_issues(github_context, findings_per_committer) { + const max_retries = 5; + const default_delay = 5; + const secondary_rate_limit_delay_base = 60; + + for (const committer in findings_per_committer) { + try { + const issue = await create_issue_for_committer(github_context, committer, findings_per_committer[committer], max_retries, default_delay, secondary_rate_limit_delay_base); + await add_label_to_issue(github_context, issue, max_retries, default_delay, secondary_rate_limit_delay_base); + await assign_issue_to_committer(github_context, committer, issue, max_retries, default_delay, secondary_rate_limit_delay_base); + } catch (error) { + console.error(`Failed to process committer ${committer}: ${error.message}`); + if (error.response) { + const retry_after = error.response.headers['retry-after']; + const rate_limit_remaining = error.response.headers['x-ratelimit-remaining']; + const rate_limit_reset = error.response.headers['x-ratelimit-reset']; + if (error.response && error.response.status === 403 && ((rate_limit_remaining === '0' && (rate_limit_reset - Math.floor(Date.now()/1000)) > MAX_WAITING_TIME) || (retry_after && int(retry_after) > MAX_WAITING_TIME))) { + throw error; + } + } + } + } +} + +async function create_issue_for_committer(github_context, committer, commits, max_retries, default_delay, secondary_rate_limit_delay_base) { + console.log(`Creating issue for committer: ${committer}`); + + const DEFAULT_VALUE = "N/A"; + const FINDINGS_PER_COMMIT = Object.entries(commits).map(([id, findings], _) => { + const LIST_OF_FINDINGS = findings.map((entry, index) => { + const detector = entry.DetectorName || DEFAULT_VALUE; + const { file, line } = entry.SourceMetadata?.Data?.Git ?? {}; + const author = committer ? `[${committer}](${github_context.server_url}/${committer})` : DEFAULT_VALUE; + const file_link = file ? `[${file}](${github_context.server_url}/${github_context.scanned_repository_owner}/${github_context.scanned_repository_name}/blob/${id}/${ encodeURI(file) }?plain=1#L${ line })` : DEFAULT_VALUE; + + return `> **FINDING ${index + 1}**\n` + + `>    **Type**\n   ${detector}\n` + + `>   \n` + + `>    **Author**\n   ${author}\n` + + `>   \n` + + `>    **File**\n   ${file_link}\n\n`; + }); + + return `**COMMIT ${id.substring(0, 7)}**\n` + LIST_OF_FINDINGS.join(""); + }).join('
\n\n'); + + const number_of_commits_containing_secrets = Object.keys(commits).length; + const total_number_of_secrets = Object.values(commits).map(findings => findings.length).reduce((a, b) => a + b, 0); + + const issue = `# ⚠️ WARNING: SECRET${total_number_of_secrets > 1 ? 'S' : ''} PUSHED TO MAIN BRANCH +### ${ total_number_of_secrets } secret${total_number_of_secrets > 1 ? 's have' : ' has'} been found in ${ number_of_commits_containing_secrets } commit${ number_of_commits_containing_secrets > 1 ? 's' : ''}. + +

+ +## FINDINGS +${FINDINGS_PER_COMMIT} + +Please note that the detector considers all secrets found in the commit${ number_of_commits_containing_secrets > 1 ? 's' : ''}, even if they have been pushed earlier. + +
+ +You can find more information in the workflow run that generated this report:\\ +${github_context.server_url}/${github_context.secrets_finder_repository}/actions/runs/${github_context.run_id} + +

+ +## REMEDIATION PROCEDURE +- You **MUST** rotate the credentials that were stored in plain text. Assume they have already been compromised. +- You **MUST** move the new credentials to an approved secrets management service and pattern. +- You **SHOULD** clear the plaintext secrets from Git history. + +

+ +To clean-up your Git history, you can use the following guidance: +- [Removing Sensitive Data - GitHub](https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/removing-sensitive-data-from-a-repository) + +

+ +You can also find more information about how to rotate your secrets here: +https://howtorotate.com/docs/introduction/getting-started/` + + const { data } = await call_api_with_retry_logic(() => github_context.api.issues.create({ + owner: github_context.scanned_repository_owner, + repo: github_context.scanned_repository_name, + title: `⚠️ Secret${total_number_of_secrets > 1 ? 's' : ''} pushed to main branch (${ number_of_commits_containing_secrets } commit${ number_of_commits_containing_secrets > 1 ? 's' : ''} affected)`, + body: issue + }), max_retries, default_delay, secondary_rate_limit_delay_base); + + console.log(`New issue created in repository: ${ data.html_url }`); + return data.number; +} + +async function add_label_to_issue(github_context, issue, max_retries, default_delay, secondary_rate_limit_delay_base) { + console.log(`Adding label to issue: ${issue}`) + await call_api_with_retry_logic(() => github_context.api.issues.addLabels({ + owner: github_context.scanned_repository_owner, + repo: github_context.scanned_repository_name, + issue_number: issue, + labels: ['leaked-secrets'] + }), max_retries, default_delay, secondary_rate_limit_delay_base); +} + +async function assign_issue_to_committer(github_context, committer, issue, max_retries, default_delay, secondary_rate_limit_delay_base) { + console.log(`Assigning issue to committer: ${committer}`); + await call_api_with_retry_logic(() => github_context.api.issues.addAssignees({ + owner: github_context.scanned_repository_owner, + repo: github_context.scanned_repository_name, + issue_number: issue, + assignees: [committer] + }), max_retries, default_delay, secondary_rate_limit_delay_base); +} + +module.exports = create_issues; diff --git a/.github/workflows/package.json b/.github/workflows/package.json new file mode 100644 index 0000000..f8a09a7 --- /dev/null +++ b/.github/workflows/package.json @@ -0,0 +1,13 @@ +{ + "name": "github_api_client", + "version": "1.0.0", + "license": "MIT", + "main": "api.js", + "scripts": { + "test": "jest" + }, + "author": "Thomson Reuters", + "devDependencies": { + "jest": "29.7.0" + } + } diff --git a/.github/workflows/secrets-detection-pull-request.yaml b/.github/workflows/secrets-detection-pull-request.yaml new file mode 100644 index 0000000..16be3e4 --- /dev/null +++ b/.github/workflows/secrets-detection-pull-request.yaml @@ -0,0 +1,248 @@ +name: Secrets detection in pull requests + +on: + repository_dispatch: + types: [secrets_detection_in_pull_request] + +jobs: + secrets-detection: + if: ${{ github.event.client_payload.event.pull_request && contains(fromJSON('["opened", "synchronize", "reopened"]'), github.event.client_payload.event.action) }} + runs-on: ubuntu-latest + steps: + - name: Set environment variables + id: set-environment-variables + run: | + SCAN_UUID=$(uuidgen -r | tr '[:upper:]' '[:lower:]') + REPOSITORY_SCAN_UUID=$(uuidgen -r | tr '[:upper:]' '[:lower:]') + echo "SCAN_UUID=$SCAN_UUID" >> "$GITHUB_ENV" + echo "REPOSITORY_SCAN_UUID=$REPOSITORY_SCAN_UUID" >> "$GITHUB_ENV" + echo "SCAN_REPORT_FILE=$SCAN_UUID.json" >> "$GITHUB_ENV" + + - name: Checkout scanning repository + id: checkout-scanning-repository + uses: actions/checkout@v4 + with: + path: scanning-repository + + - name: Checkout repository + id: checkout-repository + uses: actions/checkout@v4 + with: + repository: ${{ github.event.client_payload.event.repository.full_name }} + token: ${{ secrets.ORG_TOKEN }} + fetch-depth: 0 + path: repository + + - name: Fetch information about repository + id: fetch-information-for-repository + working-directory: "${{ github.workspace }}/repository" + run: | + echo "Repository to scan: ${{ github.event.client_payload.event.repository.name }}" + CURRENT_BRANCH="${{ github.event.client_payload.event.pull_request.head }}" + BASE_BRANCH="${{ github.event.client_payload.event.pull_request.base }}" + FIRST_COMMIT_OF_CURRENT_BRANCH="$(git merge-base origin/$BASE_BRANCH origin/$CURRENT_BRANCH)" + echo "Current branch: $CURRENT_BRANCH" + echo "Base branch: $BASE_BRANCH" + echo "First commit of current branch: $FIRST_COMMIT_OF_CURRENT_BRANCH" + echo "CURRENT_BRANCH=$CURRENT_BRANCH" >> $GITHUB_ENV + echo "FIRST_COMMIT_OF_CURRENT_BRANCH=$FIRST_COMMIT_OF_CURRENT_BRANCH" >> $GITHUB_ENV + + - name: Check for custom detectors config file + id: check-config + continue-on-error: true + if: ${{ vars.CUSTOM_DETECTORS_CONFIG_FILE != '' }} + run: | + custom_detectors_config_file_full_path="${GITHUB_WORKSPACE}/scanning-repository/${{ vars.CUSTOM_DETECTORS_CONFIG_FILE }}" + if [ -f "$custom_detectors_config_file_full_path" ]; then + echo "Custom detectors config file found: $custom_detectors_config_file_full_path" + echo "custom_config=$custom_detectors_config_file_full_path" >> $GITHUB_OUTPUT + else + echo "Custom detectors config file not found. Using default configuration." + echo "custom_config=" >> $GITHUB_OUTPUT + + - name: Perform secrets detection + id: scan + timeout-minutes: ${{ vars.SCAN_TIMEOUT_MINUTES || 15 }} + continue-on-error: true + working-directory: "${{ github.workspace }}/repository" + run: | + set +e + start=$(date -u +"%Y-%m-%dT%H:%M:%S.%6N") + if [ -n "${{ steps.check-config.outputs.custom_config }}" ]; then + docker run --name secrets-finder -v "$(pwd):/repository" -v "${{ steps.check-config.outputs.custom_config }}:/configuration.yaml" -i ghcr.io/trufflesecurity/trufflehog@sha256:62d6e889cc2f647321617dcd9142b23f5ee7a577754c9dce3f453263e333de01 git file:///repository --branch "$CURRENT_BRANCH" --since-commit "$FIRST_COMMIT_OF_CURRENT_BRANCH" --fail --json --no-update --config=/configuration.yaml $(if [ "${{ vars.REPORT_ONLY_VERIFIED_SECRETS }}" = "true" ]; then echo "--only-verified"; fi); exit_code=$? + else + docker run --name secrets-finder -v "$(pwd):/repository" -i ghcr.io/trufflesecurity/trufflehog@sha256:62d6e889cc2f647321617dcd9142b23f5ee7a577754c9dce3f453263e333de01 git file:///repository --branch "$CURRENT_BRANCH" --since-commit "$FIRST_COMMIT_OF_CURRENT_BRANCH" --fail --json --no-update $(if [ "${{ vars.REPORT_ONLY_VERIFIED_SECRETS }}" = "true" ]; then echo "--only-verified"; fi); exit_code=$? + fi + end=$(date -u +"%Y-%m-%dT%H:%M:%S.%6N") + + echo "exit_code=$exit_code" >> $GITHUB_OUTPUT + echo "start=$start" >> $GITHUB_OUTPUT + echo "end=$end" >> $GITHUB_OUTPUT + + exit $exit_code + + - name: Retrieve logs from container + id: retrieve-logs-container + continue-on-error: true + if: ${{ always() && steps.scan.outcome == 'failure' }} + run: | + docker logs secrets-finder | jq -s '[.[] | select(has("SourceMetadata"))] | unique' > $SCAN_REPORT_FILE + FOUND_SECRETS=$(jq 'length > 0' $SCAN_REPORT_FILE) + echo "found_secrets=$FOUND_SECRETS" >> $GITHUB_OUTPUT + + - name: Remove container + id: remove-container + continue-on-error: true + run: | + docker rm secrets-finder + docker images | grep ghcr.io/trufflesecurity/trufflehog | awk '{print $3}' | xargs docker rmi + + - name: Change repository visibility (if needed) + id: change-repo-visibility + if: ${{ always() && steps.scan.outcome == 'failure' && steps.retrieve-logs-container.outputs.found_secrets == 'true' && vars.HIDE_PUBLIC_REPOSITORIES_IF_SECRETS_FOUND == 'true' && github.event.client_payload.event.repository.visibility == 'public' }} + continue-on-error: true + run: | + echo "Findings found. Changing repository visibility to private." + curl -X PATCH -H "Authorization: token ${{ secrets.ORG_TOKEN }}" -H "Accept: application/vnd.github.nebula-preview+json" https://api.github.com/repos/${{ github.event.client_payload.event.repository.full_name }} -d '{"visibility": "private"}' &>/dev/null + + - name: Push a review in the pull request if secrets have been detected + id: ask-for-review-if-secrets-detected + if: ${{ always() && steps.scan.outcome == 'failure' && steps.retrieve-logs-container.outputs.found_secrets == 'true' && ((steps.change-repo-visibility.outcome == 'success') || (github.event.client_payload.event.repository.visibility != 'public')) }} + continue-on-error: true + uses: actions/github-script@v7 + with: + github-token: ${{ secrets.ORG_TOKEN }} + script: | + const fs = require('fs'); + const path = require('path'); + const call_api_with_retry_logic = require(path.join(process.env.GITHUB_WORKSPACE, 'scanning-repository', '.github', 'workflows', 'api.js')); + + const SCAN_REPORT_FILE_CONTENT = fs.readFileSync('${{ env.SCAN_REPORT_FILE }}'); + const SECRETS = JSON.parse(SCAN_REPORT_FILE_CONTENT); + const DEFAULT_VALUE = "N/A"; + + const LIST_OF_FINDINGS = SECRETS.map((entry, index) => { + const detector = entry.DetectorName || DEFAULT_VALUE; + const { email, commit, file, line } = entry.SourceMetadata?.Data?.Git ?? {}; + const commitLink = commit ? `[${commit.substring(0, 7)}](${{ github.server_url }}/${{ github.event.client_payload.event.repository.full_name }}/commit/${commit})` : DEFAULT_VALUE; + const fileLink = file ? `[${file}](${{ github.server_url }}/${{ github.event.client_payload.event.repository.full_name }}/blob/${commit}/${ encodeURI(file) }?plain=1#L${ line })` : DEFAULT_VALUE; + + return `**Finding ${index + 1}**\n` + + `>    **Type**\n   ${detector}\n` + + `>   \n` + + `>    **Author**\n   ${email ? email : DEFAULT_VALUE}\n` + + `>   \n` + + `>    **Commit**\n   ${commitLink}\n` + + `>   \n` + + `>    **File**\n   ${fileLink}\n\n`; + }); + + const issue = `# ⚠️ WARNING: SECRET${SECRETS.length > 1 ? 'S' : ''} FOUND IN PULL REQUEST + ### ${SECRETS.length > 1 ? 'S' : 'A s'}ecret${SECRETS.length > 1 ? 's have' : ' has'} been found in this pull request. + +

+ + ## FINDINGS + ${LIST_OF_FINDINGS.join('
\n\n')} + + You can find more information in the workflow run that generated this report:\\ + ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} + +

+ + ## REMEDIATION PROCEDURE + - You **MUST** rotate the credentials that were stored in plain text. Assume they have already been compromised. + - You **MUST** move the new credentials to an approved secrets management service and pattern. + - You **SHOULD** clear the plaintext secrets from Git history. + +

+ + To clean-up your Git history, you can use the following guidance: + - [Removing Sensitive Data - GitHub](https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/removing-sensitive-data-from-a-repository) + +

+ + You can also find more information about how to rotate your secrets here: + https://howtorotate.com/docs/introduction/getting-started/ + +

+ + ## INFORMATION + ℹ️ Upon completion of the remediation process, you can disregard this request for change and proceed with merging the pull request.` + + const { data } = await call_api_with_retry_logic(() => github.rest.pulls.createReview({ + owner: "${{ github.event.client_payload.event.repository.owner }}", + repo: "${{ github.event.client_payload.event.repository.name }}", + pull_number: "${{ github.event.client_payload.event.pull_request.number }}", + body: issue, + event: "REQUEST_CHANGES" + })); + + console.log(`New review created in pull request: ${ data.html_url }`); + + - name: Add 'leaked-secrets' label when secrets are found + id: add-label-to-pull-request + if: ${{ always() && steps.scan.outcome == 'failure' && steps.retrieve-logs-container.outputs.found_secrets == 'true' && steps.ask-for-review-if-secrets-detected.outcome == 'success' }} + continue-on-error: true + uses: actions/github-script@v7 + with: + github-token: ${{ secrets.ORG_TOKEN }} + retries: 3 + script: | + const path = require('path'); + const call_api_with_retry_logic = require(path.join(process.env.GITHUB_WORKSPACE, 'scanning-repository', '.github', 'workflows', 'api.js')); + + await call_api_with_retry_logic(() => github.rest.issues.addLabels({ + owner: "${{ github.event.client_payload.event.repository.owner }}", + repo: "${{ github.event.client_payload.event.repository.name }}", + issue_number: "${{ github.event.client_payload.event.pull_request.number }}", + labels: ['leaked-secrets'] + })); + + - name: Display findings + id: display-findings + continue-on-error: true + if: ${{ always() && steps.scan.outcome == 'failure' && steps.retrieve-logs-container.outputs.found_secrets == 'true' && vars.DEBUG == 'true' }} + run: cat $SCAN_REPORT_FILE + + - name: Generate final report + id: generate-final-report + if: ${{ always() }} + run: | + TMP_FILE=$(mktemp) + + if [[ "${{ steps.scan.outcome }}" == "skipped" ]] || [[ "${{ steps.retrieve-logs-container.outcome }}" == "failure" ]]; then + jq -n '{ "scan_type": "prevention", "start": "${{ steps.scan.outputs.start || 'N/A' }}", "end": "${{ steps.scan.outputs.end || 'N/A' }}", "status": "failure", "scan_context": "pull_request", "scan_mode": "${{ vars.REPORT_ONLY_VERIFIED_SECRETS == 'true' && 'verified' || 'all' }}", "scan_uuid": "${{ env.SCAN_UUID }}", "scan_identifier": "github_secrets_finder", "scm": "github", "results": [ { "scan_uuid": "${{ env.REPOSITORY_SCAN_UUID }}", "start": "${{ steps.scan.outputs.start || 'N/A' }}", "end": "${{ steps.scan.outputs.end || 'N/A' }}", "organization": "${{ github.event.client_payload.event.repository.owner }}", "repository": "${{ github.event.client_payload.event.repository.name }}", "status": "failure", "metadata": { "identifier": "${{ github.event.client_payload.event.pull_request.number }}", "created_at": "${{ github.event.client_payload.event.pull_request.created_at }}" }, "findings": [] } ] }' > "$TMP_FILE" + else + if [[ "${{ steps.scan.outcome }}" == "failure" ]]; then + if [[ "${{ steps.scan.outputs.exit_code }}" == "183" ]]; then + cat $SCAN_REPORT_FILE | jq -c '{ "scan_type": "prevention", "start": "${{ steps.scan.outputs.start }}", "end": "${{ steps.scan.outputs.end }}", "status": "success", "scan_context": "pull_request", "scan_mode": "${{ vars.REPORT_ONLY_VERIFIED_SECRETS == 'true' && 'verified' || 'all' }}", "scan_uuid": "${{ env.SCAN_UUID }}", "scan_identifier": "github_secrets_finder", "scm": "github", "results": [{ "scan_uuid": "${{ env.REPOSITORY_SCAN_UUID }}", "start": "${{ steps.scan.outputs.start || 'N/A' }}", "end": "${{ steps.scan.outputs.end || 'N/A' }}", "organization": "${{ github.event.client_payload.event.repository.owner }}", "repository": "${{ github.event.client_payload.event.repository.name }}", "status": "success", "metadata": { "identifier": "${{ github.event.client_payload.event.pull_request.number }}", "created_at": "${{ github.event.client_payload.event.pull_request.created_at }}" }, "findings": . }] }' > "$TMP_FILE" + else + jq -n '{ "scan_type": "prevention", "start": "${{ steps.scan.outputs.start || 'N/A' }}", "end": "${{ steps.scan.outputs.end || 'N/A' }}", "status": "failure", "scan_mode": "${{ vars.REPORT_ONLY_VERIFIED_SECRETS == 'true' && 'verified' || 'all' }}", "scan_context": "pull_request", "scan_uuid": "${{ env.SCAN_UUID }}", "scan_identifier": "github_secrets_finder", "scm": "github", "results": [ { "scan_uuid": "${{ env.REPOSITORY_SCAN_UUID }}", "start": "${{ steps.scan.outputs.start || 'N/A' }}", "end": "${{ steps.scan.outputs.end || 'N/A' }}", "organization": "${{ github.event.client_payload.event.repository.owner }}", "repository": "${{ github.event.client_payload.event.repository.name }}", "status": "failure", "metadata": { "identifier": "${{ github.event.client_payload.event.pull_request.number }}", "created_at": "${{ github.event.client_payload.event.pull_request.created_at }}" }, "findings": [] } ] }' > "$TMP_FILE" + fi + else + jq -n '{ "scan_type": "prevention", "start": "${{ steps.scan.outputs.start }}", "end": "${{ steps.scan.outputs.end }}", "status": "success", "scan_mode": "${{ vars.REPORT_ONLY_VERIFIED_SECRETS == 'true' && 'verified' || 'all' }}", "scan_context": "pull_request", "scan_uuid": "${{ env.SCAN_UUID }}", "scan_identifier": "github_secrets_finder", "scm": "github", "results": [{ "scan_uuid": "${{ env.REPOSITORY_SCAN_UUID }}", "start": "${{ steps.scan.outputs.start || 'N/A' }}", "end": "${{ steps.scan.outputs.end || 'N/A' }}", "organization": "${{ github.event.client_payload.event.repository.owner }}", "repository": "${{ github.event.client_payload.event.repository.name }}", "status": "success", "metadata": { "identifier": "${{ github.event.client_payload.event.pull_request.number }}", "created_at": "${{ github.event.client_payload.event.pull_request.created_at }}" }, "findings": [ { "scan_uuid": "$REPOSITORY_SCAN_UUID", organization: "${{ github.event.client_payload.event.repository.owner }}", "repository": "${{ github.event.client_payload.event.repository.name }}", "status": "failure", "metadata": { "identifier": "${{ github.event.client_payload.event.pull_request.number }}", "created_at": "${{ github.event.client_payload.event.pull_request.created_at }}" }, "findings": [] } ] }] }' > "$TMP_FILE" + fi + fi + + mv "$TMP_FILE" "$SCAN_REPORT_FILE" + + - name: Configure AWS credentials + id: configure-aws-credentials + if: ${{ always() }} + uses: aws-actions/configure-aws-credentials@v4 + with: + aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} + aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + aws-region: ${{ vars.AWS_REGION }} + role-to-assume: ${{ vars.AWS_ROLE_ARN }} + role-session-name: SecretsFinderOngoingScanGitHub + role-skip-session-tagging: true + role-duration-seconds: 3600 + + - name: Send findings to S3 bucket + id: send-findings-to-s3-bucket + if: ${{ always() && steps.configure-aws-credentials.outcome == 'success' }} + run: | + CURRENT_DATE="$(date +"%Y%m%d")" + aws s3 cp $SCAN_REPORT_FILE s3://${{ vars.AWS_S3_BUCKET_NAME }}/secrets-finder/ongoing-scans/results/ diff --git a/.github/workflows/secrets-detection-push.yaml b/.github/workflows/secrets-detection-push.yaml new file mode 100644 index 0000000..400469d --- /dev/null +++ b/.github/workflows/secrets-detection-push.yaml @@ -0,0 +1,287 @@ +name: Secrets detection in commits pushed to default branch + +on: + repository_dispatch: + types: [secrets_detection_in_default_branch] + +jobs: + review-received-event: + if: ${{ github.event.client_payload.event.commits && (github.event.client_payload.event.ref == format('{0}{1}', 'refs/heads/', github.event.client_payload.event.repository.default_branch)) }} + continue-on-error: true + runs-on: ubuntu-latest + outputs: + ALL_COMMITS: ${{ steps.get-all-commits.outputs.all_commits_in_push || '[]' }} + steps: + - name: Display information about push + id: display-information-about-push + run: | + echo "Repository to scan: ${{ github.event.client_payload.event.repository.name }}" + echo "Pusher: ${{ github.event.client_payload.event.pusher.name }}" + - name: Get commits from the event + id: get-all-commits + env: + ORG_TOKEN: ${{ secrets.ORG_TOKEN }} + run: | + NUMBER_OF_COMMITS_IN_PUSH_EVENT=$(jq -c -r '.commits | length' <<< '${{ toJson(github.event.client_payload.event) }}') + + all_commits_in_push=$(jq -c -r '.commits' <<< '${{ toJson(github.event.client_payload.event) }}') + echo "all_commits_in_push=$all_commits_in_push" >> $GITHUB_OUTPUT + + echo "Commits found in the push event:" + echo "$NUMBER_OF_COMMITS_IN_PUSH_EVENT" + + # This logic is commented out because GitHub said that there should be no limit on the number of commits in a push event + # if [ $NUMBER_OF_COMMITS_IN_PUSH_EVENT -lt 20 ]; then + # IMPORTANT: REMOVE ESCAPE CHARACTERS IF UNCOMMENTING THIS CODE + # all_commits_in_push=$(jq -c -r '.commits' <<< '$\{\{ toJson(github.event.client_payload.event) \}\}') + # echo "all_commits_in_push=$all_commits_in_push" >> $GITHUB_OUTPUT + + # echo "Commits found in the push event:" + # echo "$NUMBER_OF_COMMITS_IN_PUSH_EVENT" + # else + # IMPORTANT: REMOVE ESCAPE CHARACTERS IF UNCOMMENTING THIS CODE + # COMPARE_URL="https://api.github.com/repos/$\{\{ github.event.client_payload.event.repository.owner \}\}/$\{\{ github.event.client_payload.event.repository.name \}\}/compare/$\{\{ github.event.client_payload.event.before \}\}...$\{\{ github.event.client_payload.event.after \}\}" + # echo "Compare URL: $COMPARE_URL" + # PAGE=1 + # ALL_COMMITS="[]" + # while true + # do + # RESPONSE=$(curl -H "Authorization: token $ORG_TOKEN" -H "Accept: application/vnd.github+json" -s "$COMPARE_URL?per_page=100&page=$PAGE") + # COMMITS_ON_PAGE=$(echo "$RESPONSE" | jq -c -r '.commits') + # NB_COMMITS_ON_PAGE=$(echo "$COMMITS_ON_PAGE" | jq -c -r 'length') + # if [ $NB_COMMITS_ON_PAGE -eq 0 ]; then + # break + # fi + # ALL_COMMITS=$(jq -c -s '.[0] + .[1]' <<< "$ALL_COMMITS $COMMITS_ON_PAGE") + # if [ $NB_COMMITS_ON_PAGE -lt 100 ]; then + # break + # fi + # PAGE=$((PAGE+1)) + # done + # ALL_COMMITS=$(jq -c '[.[] | .id = .sha | del(.sha) | .author = .author.login | .timestamp = .commit.author.date | { id, url, timestamp, author } ]' <<< "$ALL_COMMITS") + # echo "all_commits_in_push=$ALL_COMMITS" >> $GITHUB_OUTPUT + # fi + - name: Display information about commits + id: display-information-about-commits + run: | + NUMBER_OF_COMMITS=$(echo '${{ steps.get-all-commits.outputs.all_commits_in_push || '[]' }}' | jq '. | length') + COMMITS=$(echo '${{ steps.get-all-commits.outputs.all_commits_in_push || '[]' }}' | jq -c '[.[] | .id]') + echo "Number of commits to scan: $NUMBER_OF_COMMITS" + echo "Commits to scan: $COMMITS" + + secrets-detection: + needs: review-received-event + if: ${{ github.event.client_payload.event.commits && (github.event.client_payload.event.ref == format('{0}{1}', 'refs/heads/', github.event.client_payload.event.repository.default_branch)) }} + continue-on-error: true + runs-on: ubuntu-latest + strategy: + max-parallel: 3 + matrix: + commit: ${{ fromJson(needs.review-received-event.outputs.ALL_COMMITS) }} + fail-fast: false + steps: + - name: Set environment variables + id: set-environment-variables + run: | + SCAN_UUID=$(uuidgen -r | tr '[:upper:]' '[:lower:]') + REPOSITORY_SCAN_UUID=$(uuidgen -r | tr '[:upper:]' '[:lower:]') + echo "SCAN_UUID=$SCAN_UUID" >> "$GITHUB_ENV" + echo "REPOSITORY_SCAN_UUID=$REPOSITORY_SCAN_UUID" >> "$GITHUB_ENV" + echo "SCAN_REPORT_FILE=$SCAN_UUID.json" >> "$GITHUB_ENV" + + - name: Checkout scanning repository + id: checkout-scanning-repository + uses: actions/checkout@v4 + with: + path: scanning-repository + + - name: Checkout repository + id: checkout-repository + uses: actions/checkout@v4 + with: + repository: ${{ github.event.client_payload.event.repository.full_name }} + token: ${{ secrets.ORG_TOKEN }} + ref: ${{ matrix.commit.id }} + path: repository + + - name: Check for custom detectors config file + id: check-config + continue-on-error: true + if: ${{ vars.CUSTOM_DETECTORS_CONFIG_FILE != '' }} + run: | + custom_detectors_config_file_full_path="${GITHUB_WORKSPACE}/scanning-repository/${{ vars.CUSTOM_DETECTORS_CONFIG_FILE }}" + if [ -f "$custom_detectors_config_file_full_path" ]; then + echo "Custom detectors config file found: $custom_detectors_config_file_full_path" + echo "custom_config=$custom_detectors_config_file_full_path" >> $GITHUB_OUTPUT + else + echo "Custom detectors config file not found. Using default configuration." + echo "custom_config=" >> $GITHUB_OUTPUT + fi + + - name: Perform secrets detection + id: scan + timeout-minutes: ${{ vars.SCAN_TIMEOUT_MINUTES || 15 }} + continue-on-error: true + working-directory: "${{ github.workspace }}/repository" + run: | + set +e + start=$(date -u +"%Y-%m-%dT%H:%M:%S.%6N") + if [ -n "${{ steps.check-config.outputs.custom_config }}" ]; then + docker run --name secrets-finder -v "$(pwd):/repository" -v "${{ steps.check-config.outputs.custom_config }}:/configuration.yaml" -i ghcr.io/trufflesecurity/trufflehog@sha256:62d6e889cc2f647321617dcd9142b23f5ee7a577754c9dce3f453263e333de01 git file:///repository --fail --json --no-update --config=/configuration.yaml $(if [ "${{ vars.REPORT_ONLY_VERIFIED_SECRETS }}" = "true" ]; then echo "--only-verified"; fi); exit_code=$? + else + docker run --name secrets-finder -v "$(pwd):/repository" -i ghcr.io/trufflesecurity/trufflehog@sha256:62d6e889cc2f647321617dcd9142b23f5ee7a577754c9dce3f453263e333de01 git file:///repository --fail --json --no-update $(if [ "${{ vars.REPORT_ONLY_VERIFIED_SECRETS }}" = "true" ]; then echo "--only-verified"; fi); exit_code=$? + fi + end=$(date -u +"%Y-%m-%dT%H:%M:%S.%6N") + + echo "exit_code=$exit_code" >> $GITHUB_OUTPUT + echo "start=$start" >> $GITHUB_OUTPUT + echo "end=$end" >> $GITHUB_OUTPUT + + exit $exit_code + + - name: Retrieve logs from container + id: retrieve-logs-container + continue-on-error: true + if: ${{ always() && steps.scan.outcome == 'failure' }} + run: | + docker logs secrets-finder | jq -s '[.[] | select(has("SourceMetadata"))] | unique' > $SCAN_REPORT_FILE + FOUND_SECRETS=$(jq 'length > 0' $SCAN_REPORT_FILE) + echo "found_secrets=$FOUND_SECRETS" >> $GITHUB_OUTPUT + + - name: Remove container + id: remove-container + continue-on-error: true + run: | + docker rm secrets-finder + docker images | grep ghcr.io/trufflesecurity/trufflehog | awk '{print $3}' | xargs docker rmi + + - name: Display findings + id: display-findings + if: ${{ always() && steps.scan.outcome == 'failure' && steps.retrieve-logs-container.outputs.found_secrets == 'true' && vars.DEBUG == 'true' }} + continue-on-error: true + run: cat $SCAN_REPORT_FILE + + - name: Generate list of findings per committer + id: generate-findings-per-committer + if: ${{ always() && steps.scan.outcome == 'failure' && steps.retrieve-logs-container.outputs.found_secrets == 'true' }} + continue-on-error: true + run: | + LIST_OF_FINDINGS=$(cat $SCAN_REPORT_FILE | jq '.') + echo '{}' | jq -c --argjson list_of_findings "$LIST_OF_FINDINGS" '{"${{ matrix.commit.author }}": { "${{ matrix.commit.id }}": $list_of_findings }}' > "findings-${{ matrix.commit.id }}.json" + + - name: Upload updated list of findings per committer + if: ${{ always() && steps.scan.outcome == 'failure' && steps.generate-findings-per-committer.outcome == 'success' && steps.retrieve-logs-container.outputs.found_secrets == 'true' }} + continue-on-error: true + uses: actions/upload-artifact@v4 + with: + name: findings-${{ matrix.commit.id }} + path: findings-${{ matrix.commit.id }}.json + + - name: Generate final report + id: generate-final-report + if: ${{ always() }} + run: | + TMP_FILE=$(mktemp) + + if [[ "${{ steps.scan.outcome }}" == "skipped" ]] || [[ "${{ steps.retrieve-logs-container.outcome }}" == "failure" ]]; then + jq -n '{ "scan_type": "prevention", "start": "${{ steps.scan.outputs.start || 'N/A' }}", "end": "${{ steps.scan.outputs.end || 'N/A' }}", "status": "failure", "scan_mode": "${{ vars.REPORT_ONLY_VERIFIED_SECRETS == 'true' && 'verified' || 'all' }}", "scan_context": "commit", "scan_uuid": "${{ env.SCAN_UUID }}", "scan_identifier": "github_secrets_finder", "scm": "github", "results": [{ "scan_uuid": "${{ env.REPOSITORY_SCAN_UUID }}", "start": "${{ steps.scan.outputs.start || 'N/A' }}", "end": "${{ steps.scan.outputs.end || 'N/A' }}", "organization": "${{ github.event.client_payload.event.repository.owner }}", "repository": "${{ github.event.client_payload.event.repository.name }}", "status": "failure", "metadata": { "identifier": "${{ matrix.commit.id }}", "created_at": "${{ matrix.commit.timestamp }}" }, "findings": [] }] }' > "$TMP_FILE" + else + if [[ "${{ steps.scan.outcome }}" == "failure" ]]; then + if [[ "${{ steps.scan.outputs.exit_code }}" == "183" ]]; then + cat $SCAN_REPORT_FILE | jq -c '{ "scan_type": "prevention", "start": "${{ steps.scan.outputs.start }}", "end": "${{ steps.scan.outputs.end }}", "status": "success", "scan_mode": "${{ vars.REPORT_ONLY_VERIFIED_SECRETS == 'true' && 'verified' || 'all' }}", "scan_context": "commit", "scan_uuid": "${{ env.SCAN_UUID }}", "scan_identifier": "github_secrets_finder", "scm": "github", "results": [{ "scan_uuid": "${{ env.REPOSITORY_SCAN_UUID }}", "start": "${{ steps.scan.outputs.start || 'N/A' }}", "end": "${{ steps.scan.outputs.end || 'N/A' }}", "organization": "${{ github.event.client_payload.event.repository.owner }}", "repository": "${{ github.event.client_payload.event.repository.name }}", "status": "success", "metadata": { "identifier": "${{ matrix.commit.id }}", "created_at": "${{ matrix.commit.timestamp }}" }, "findings": . }] }' > "$TMP_FILE" + else + jq -n '{ "scan_type": "prevention", "start": "${{ steps.scan.outputs.start || 'N/A' }}", "end": "${{ steps.scan.outputs.end || 'N/A' }}", "status": "failure", "scan_mode": "${{ vars.REPORT_ONLY_VERIFIED_SECRETS == 'true' && 'verified' || 'all' }}", "scan_context": "commit", "scan_uuid": "${{ env.SCAN_UUID }}", "scan_identifier": "github_secrets_finder", "scm": "github", "results": [{ "scan_uuid": "${{ env.REPOSITORY_SCAN_UUID }}", "start": "${{ steps.scan.outputs.start || 'N/A' }}", "end": "${{ steps.scan.outputs.end || 'N/A' }}", "organization": "${{ github.event.client_payload.event.repository.owner }}", "repository": "${{ github.event.client_payload.event.repository.name }}", "status": "failure", "metadata": { "identifier": "${{ matrix.commit.id }}", "created_at": "${{ matrix.commit.timestamp }}" }, "findings": [] }] }' > "$TMP_FILE" + fi + else + jq -n '{ "scan_type": "prevention", "start": "${{ steps.scan.outputs.start }}", "end": "${{ steps.scan.outputs.end }}", "status": "success", "scan_mode": "${{ vars.REPORT_ONLY_VERIFIED_SECRETS == 'true' && 'verified' || 'all' }}", "scan_context": "commit", "scan_uuid": "${{ env.SCAN_UUID }}", "scan_identifier": "github_secrets_finder", "scm": "github", "results": [{ "scan_uuid": "${{ env.REPOSITORY_SCAN_UUID }}", "start": "${{ steps.scan.outputs.start || 'N/A' }}", "end": "${{ steps.scan.outputs.end || 'N/A' }}", "organization": "${{ github.event.client_payload.event.repository.owner }}", "repository": "${{ github.event.client_payload.event.repository.name }}", "status": "success", "metadata": { "identifier": "${{ matrix.commit.id }}", "created_at": "${{ matrix.commit.timestamp }}" }, "findings": [] }] }' > "$TMP_FILE" + fi + fi + + mv "$TMP_FILE" "$SCAN_REPORT_FILE" + + - name: Configure AWS credentials + id: configure-aws-credentials + if: ${{ always() }} + uses: aws-actions/configure-aws-credentials@v4 + with: + aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} + aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + aws-region: ${{ vars.AWS_REGION }} + role-to-assume: ${{ vars.AWS_ROLE_ARN }} + role-session-name: SecretsFinderOngoingScanGitHub + role-skip-session-tagging: true + role-duration-seconds: 3600 + + - name: Send findings to S3 bucket + id: send-findings-to-s3-bucket + if: ${{ always() && steps.configure-aws-credentials.outcome == 'success' }} + run: | + aws s3 cp $SCAN_REPORT_FILE s3://${{ vars.AWS_S3_BUCKET_NAME }}/secrets-finder/ongoing-scans/results/ + + change-repo-visibility-if-secrets-found: + needs: secrets-detection + runs-on: ubuntu-latest + if: ${{ always() && vars.HIDE_PUBLIC_REPOSITORIES_IF_SECRETS_FOUND == 'true' && github.event.client_payload.event.repository.visibility == 'public' }} + steps: + - name: Download list of findings per committer + id: download-list-of-findings-per-committer + uses: actions/download-artifact@v4 + with: + path: /home/runner/work/all-findings + + - name: Change repository visibility (if needed) + id: change-repo-visibility + continue-on-error: true + run: | + if [ -d "/home/runner/work/all-findings" ]; then + echo "Findings found. Changing repository visibility to private." + curl -X PATCH -H "Authorization: token ${{ secrets.ORG_TOKEN }}" -H "Accept: application/vnd.github.nebula-preview+json" https://api.github.com/repos/${{ github.event.client_payload.event.repository.full_name }} -d '{"visibility": "private"}' &>/dev/null + else + echo "No findings found. Repository visibility will not be changed." + fi + + manage-list-of-findings-per-committer: + needs: [secrets-detection, change-repo-visibility-if-secrets-found] + if: ${{ always() && (needs.change-repo-visibility-if-secrets-found.result == 'success' || github.event.client_payload.event.repository.visibility != 'public') }} + runs-on: ubuntu-latest + steps: + - name: Download list of findings per committer + id: download-list-of-findings-per-committer + uses: actions/download-artifact@v4 + with: + path: /home/runner/work/all-findings + + - name: Aggregate findings (if any) + id: aggregate-findings + run: | + if [ ! -d "/home/runner/work/all-findings" ]; then + exit 1 + fi + + cd /home/runner/work/all-findings + find . -type f -exec cp {} . \; + jq -s 'reduce .[] as $item ({}; . * $item)' findings-*.json > findings-per-committer.json + + - name: Checkout scanning repository + id: checkout-scanning-repository + if: ${{ always() && steps.aggregate-findings.outcome == 'success' }} + uses: actions/checkout@v4 + + - name: Create issues for each committer + id: create-issues-for-each-committer + if: ${{ always() && steps.aggregate-findings.outcome == 'success' }} + uses: actions/github-script@v7 + with: + github-token: ${{ secrets.ORG_TOKEN }} + retries: 3 + script: | + const fs = require('fs'); + const path = require('path'); + + const create_issues = require(path.join(process.env.GITHUB_WORKSPACE, '.github', 'workflows', 'github-issues.js')); + + const github_context = { api: github.rest, server_url: "${{ github.server_url }}", scanned_repository_name: "${{ github.event.client_payload.event.repository.name }}", scanned_repository_owner: "${{ github.event.client_payload.event.repository.owner }}", secrets_finder_repository: "${{ github.repository }}", run_id: "${{ github.run_id }}" }; + const FINDINGS_PER_COMMITTER_FILE_CONTENT = fs.readFileSync('/home/runner/work/all-findings/findings-per-committer.json'); + const FINDINGS_PER_COMMITTER = JSON.parse(FINDINGS_PER_COMMITTER_FILE_CONTENT); + + await create_issues(github_context, FINDINGS_PER_COMMITTER); diff --git a/.gitignore b/.gitignore index bf4c586..4c68d1b 100644 --- a/.gitignore +++ b/.gitignore @@ -6,6 +6,7 @@ # .DS_Store files **/.DS_Store + # Local .terraform directories **/.terraform/* @@ -19,16 +20,24 @@ **/*.tfvars **/*.tfvars.json + # .lock.hcl files **/.terraform.lock.hcl + +# Decrypted secrets +infrastructure/secrets-finder/setup/secrets/secrets.json + + # Logs .logs/* *.log + # Archive files **/*.zip + # Files managed and generated by pkcs12-to-pem-converter.sh **/ca-chain.pem **/cert.pem @@ -36,27 +45,34 @@ **/private_key.pem **/*.pfx + # Python files *.pyc + # vscode configuration .vscode/* + # Inventory and issues creation files inventory*.json issues-*.json + # Configuration files .secrets.* + # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] *$py.class + # C extensions *.so + # Distribution / packaging .Python build/ @@ -77,16 +93,19 @@ share/python-wheels/ *.egg MANIFEST + # PyInstaller # Usually these files are written by a python script from a template # before PyInstaller builds the exe, so as to inject date/other infos into it. *.manifest *.spec + # Installer logs pip-log.txt pip-delete-this-directory.txt + # Unit test / coverage reports htmlcov/ .tox/ @@ -102,42 +121,52 @@ coverage.xml .pytest_cache/ cover/ + # Translations *.mo *.pot + # Django stuff: *.log local_settings.py db.sqlite3 db.sqlite3-journal + # Flask stuff: instance/ .webassets-cache + # Scrapy stuff: .scrapy + # Sphinx documentation docs/_build/ + # PyBuilder .pybuilder/ target/ + # Jupyter Notebook .ipynb_checkpoints + # IPython profile_default/ ipython_config.py + # pyenv # For a library or package, you might want to ignore these files since the code is # intended to run in multiple environments; otherwise, check them in: # .python-version + # pipenv # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. # However, in case of collaboration, if having platform-specific dependencies or dependencies @@ -145,6 +174,7 @@ ipython_config.py # install all needed dependencies. Pipfile.lock + # poetry # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. # This is especially recommended for binary packages to ensure reproducibility, and is more @@ -152,6 +182,7 @@ Pipfile.lock # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control poetry.lock + # pdm # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. #pdm.lock @@ -162,16 +193,20 @@ poetry.lock .pdm-python .pdm-build/ + # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm __pypackages__/ + # Celery stuff celerybeat-schedule celerybeat.pid + # SageMath parsed files *.sage.py + # Environments .env .venv @@ -181,30 +216,38 @@ ENV/ env.bak/ venv.bak/ + # Spyder project settings .spyderproject .spyproject + # Rope project settings .ropeproject + # mkdocs documentation /site + # mypy .mypy_cache/ .dmypy.json dmypy.json + # Pyre type checker .pyre/ + # pytype static type analyzer .pytype/ + # Cython debug symbols cython_debug/ + # PyCharm # JetBrains specific template is maintained in a separate JetBrains.gitignore that can # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore diff --git a/configuration/secrets-finder/aws/aws_ec2_instances.json b/configuration/secrets-finder/aws/aws_ec2_instances.json new file mode 100644 index 0000000..f01fe1a --- /dev/null +++ b/configuration/secrets-finder/aws/aws_ec2_instances.json @@ -0,0 +1,783 @@ +[ + "a1.2xlarge", + "a1.4xlarge", + "a1.large", + "a1.medium", + "a1.metal", + "a1.xlarge", + "c1.medium", + "c1.xlarge", + "c3.2xlarge", + "c3.4xlarge", + "c3.8xlarge", + "c3.large", + "c3.xlarge", + "c4.2xlarge", + "c4.4xlarge", + "c4.8xlarge", + "c4.large", + "c4.xlarge", + "c5.12xlarge", + "c5.18xlarge", + "c5.24xlarge", + "c5.2xlarge", + "c5.4xlarge", + "c5.9xlarge", + "c5.large", + "c5.metal", + "c5.xlarge", + "c5a.12xlarge", + "c5a.16xlarge", + "c5a.24xlarge", + "c5a.2xlarge", + "c5a.4xlarge", + "c5a.8xlarge", + "c5a.large", + "c5a.xlarge", + "c5ad.12xlarge", + "c5ad.16xlarge", + "c5ad.24xlarge", + "c5ad.2xlarge", + "c5ad.4xlarge", + "c5ad.8xlarge", + "c5ad.large", + "c5ad.xlarge", + "c5d.12xlarge", + "c5d.18xlarge", + "c5d.24xlarge", + "c5d.2xlarge", + "c5d.4xlarge", + "c5d.9xlarge", + "c5d.large", + "c5d.metal", + "c5d.xlarge", + "c5n.18xlarge", + "c5n.2xlarge", + "c5n.4xlarge", + "c5n.9xlarge", + "c5n.large", + "c5n.metal", + "c5n.xlarge", + "c6a.12xlarge", + "c6a.16xlarge", + "c6a.24xlarge", + "c6a.2xlarge", + "c6a.32xlarge", + "c6a.48xlarge", + "c6a.4xlarge", + "c6a.8xlarge", + "c6a.large", + "c6a.metal", + "c6a.xlarge", + "c6g.12xlarge", + "c6g.16xlarge", + "c6g.2xlarge", + "c6g.4xlarge", + "c6g.8xlarge", + "c6g.large", + "c6g.medium", + "c6g.metal", + "c6g.xlarge", + "c6gd.12xlarge", + "c6gd.16xlarge", + "c6gd.2xlarge", + "c6gd.4xlarge", + "c6gd.8xlarge", + "c6gd.large", + "c6gd.medium", + "c6gd.metal", + "c6gd.xlarge", + "c6gn.12xlarge", + "c6gn.16xlarge", + "c6gn.2xlarge", + "c6gn.4xlarge", + "c6gn.8xlarge", + "c6gn.large", + "c6gn.medium", + "c6gn.xlarge", + "c6i.12xlarge", + "c6i.16xlarge", + "c6i.24xlarge", + "c6i.2xlarge", + "c6i.32xlarge", + "c6i.4xlarge", + "c6i.8xlarge", + "c6i.large", + "c6i.metal", + "c6i.xlarge", + "c6id.12xlarge", + "c6id.16xlarge", + "c6id.24xlarge", + "c6id.2xlarge", + "c6id.32xlarge", + "c6id.4xlarge", + "c6id.8xlarge", + "c6id.large", + "c6id.metal", + "c6id.xlarge", + "c6in.12xlarge", + "c6in.16xlarge", + "c6in.24xlarge", + "c6in.2xlarge", + "c6in.32xlarge", + "c6in.4xlarge", + "c6in.8xlarge", + "c6in.large", + "c6in.metal", + "c6in.xlarge", + "c7a.12xlarge", + "c7a.16xlarge", + "c7a.24xlarge", + "c7a.2xlarge", + "c7a.32xlarge", + "c7a.48xlarge", + "c7a.4xlarge", + "c7a.8xlarge", + "c7a.large", + "c7a.medium", + "c7a.metal-48xl", + "c7a.xlarge", + "c7g.12xlarge", + "c7g.16xlarge", + "c7g.2xlarge", + "c7g.4xlarge", + "c7g.8xlarge", + "c7g.large", + "c7g.medium", + "c7g.metal", + "c7g.xlarge", + "c7gd.12xlarge", + "c7gd.16xlarge", + "c7gd.2xlarge", + "c7gd.4xlarge", + "c7gd.8xlarge", + "c7gd.large", + "c7gd.medium", + "c7gd.metal", + "c7gd.xlarge", + "c7gn.12xlarge", + "c7gn.16xlarge", + "c7gn.2xlarge", + "c7gn.4xlarge", + "c7gn.8xlarge", + "c7gn.large", + "c7gn.medium", + "c7gn.metal", + "c7gn.xlarge", + "c7i.12xlarge", + "c7i.16xlarge", + "c7i.24xlarge", + "c7i.2xlarge", + "c7i.48xlarge", + "c7i.4xlarge", + "c7i.8xlarge", + "c7i.large", + "c7i.metal-24xl", + "c7i.metal-48xl", + "c7i.xlarge", + "d2.2xlarge", + "d2.4xlarge", + "d2.8xlarge", + "d2.xlarge", + "d3.2xlarge", + "d3.4xlarge", + "d3.8xlarge", + "d3.xlarge", + "d3en.12xlarge", + "d3en.2xlarge", + "d3en.4xlarge", + "d3en.6xlarge", + "d3en.8xlarge", + "d3en.xlarge", + "dl1.24xlarge", + "f1.16xlarge", + "f1.2xlarge", + "f1.4xlarge", + "g3.16xlarge", + "g3.4xlarge", + "g3.8xlarge", + "g3s.xlarge", + "g4ad.16xlarge", + "g4ad.2xlarge", + "g4ad.4xlarge", + "g4ad.8xlarge", + "g4ad.xlarge", + "g4dn.12xlarge", + "g4dn.16xlarge", + "g4dn.2xlarge", + "g4dn.4xlarge", + "g4dn.8xlarge", + "g4dn.metal", + "g4dn.xlarge", + "g5.12xlarge", + "g5.16xlarge", + "g5.24xlarge", + "g5.2xlarge", + "g5.48xlarge", + "g5.4xlarge", + "g5.8xlarge", + "g5.xlarge", + "g5g.16xlarge", + "g5g.2xlarge", + "g5g.4xlarge", + "g5g.8xlarge", + "g5g.metal", + "g5g.xlarge", + "g6.12xlarge", + "g6.16xlarge", + "g6.24xlarge", + "g6.2xlarge", + "g6.48xlarge", + "g6.4xlarge", + "g6.8xlarge", + "g6.xlarge", + "gr6.4xlarge", + "gr6.8xlarge", + "h1.16xlarge", + "h1.2xlarge", + "h1.4xlarge", + "h1.8xlarge", + "hpc7g.16xlarge", + "hpc7g.4xlarge", + "hpc7g.8xlarge", + "i2.2xlarge", + "i2.4xlarge", + "i2.8xlarge", + "i2.xlarge", + "i3.16xlarge", + "i3.2xlarge", + "i3.4xlarge", + "i3.8xlarge", + "i3.large", + "i3.metal", + "i3.xlarge", + "i3en.12xlarge", + "i3en.24xlarge", + "i3en.2xlarge", + "i3en.3xlarge", + "i3en.6xlarge", + "i3en.large", + "i3en.metal", + "i3en.xlarge", + "i4g.16xlarge", + "i4g.2xlarge", + "i4g.4xlarge", + "i4g.8xlarge", + "i4g.large", + "i4g.xlarge", + "i4i.12xlarge", + "i4i.16xlarge", + "i4i.24xlarge", + "i4i.2xlarge", + "i4i.32xlarge", + "i4i.4xlarge", + "i4i.8xlarge", + "i4i.large", + "i4i.metal", + "i4i.xlarge", + "im4gn.16xlarge", + "im4gn.2xlarge", + "im4gn.4xlarge", + "im4gn.8xlarge", + "im4gn.large", + "im4gn.xlarge", + "inf1.24xlarge", + "inf1.2xlarge", + "inf1.6xlarge", + "inf1.xlarge", + "inf2.24xlarge", + "inf2.48xlarge", + "inf2.8xlarge", + "inf2.xlarge", + "is4gen.2xlarge", + "is4gen.4xlarge", + "is4gen.8xlarge", + "is4gen.large", + "is4gen.medium", + "is4gen.xlarge", + "m1.large", + "m1.medium", + "m1.small", + "m1.xlarge", + "m2.2xlarge", + "m2.4xlarge", + "m2.xlarge", + "m3.2xlarge", + "m3.large", + "m3.medium", + "m3.xlarge", + "m4.10xlarge", + "m4.16xlarge", + "m4.2xlarge", + "m4.4xlarge", + "m4.large", + "m4.xlarge", + "m5.12xlarge", + "m5.16xlarge", + "m5.24xlarge", + "m5.2xlarge", + "m5.4xlarge", + "m5.8xlarge", + "m5.large", + "m5.metal", + "m5.xlarge", + "m5a.12xlarge", + "m5a.16xlarge", + "m5a.24xlarge", + "m5a.2xlarge", + "m5a.4xlarge", + "m5a.8xlarge", + "m5a.large", + "m5a.xlarge", + "m5ad.12xlarge", + "m5ad.16xlarge", + "m5ad.24xlarge", + "m5ad.2xlarge", + "m5ad.4xlarge", + "m5ad.8xlarge", + "m5ad.large", + "m5ad.xlarge", + "m5d.12xlarge", + "m5d.16xlarge", + "m5d.24xlarge", + "m5d.2xlarge", + "m5d.4xlarge", + "m5d.8xlarge", + "m5d.large", + "m5d.metal", + "m5d.xlarge", + "m5dn.12xlarge", + "m5dn.16xlarge", + "m5dn.24xlarge", + "m5dn.2xlarge", + "m5dn.4xlarge", + "m5dn.8xlarge", + "m5dn.large", + "m5dn.metal", + "m5dn.xlarge", + "m5n.12xlarge", + "m5n.16xlarge", + "m5n.24xlarge", + "m5n.2xlarge", + "m5n.4xlarge", + "m5n.8xlarge", + "m5n.large", + "m5n.metal", + "m5n.xlarge", + "m5zn.12xlarge", + "m5zn.2xlarge", + "m5zn.3xlarge", + "m5zn.6xlarge", + "m5zn.large", + "m5zn.metal", + "m5zn.xlarge", + "m6a.12xlarge", + "m6a.16xlarge", + "m6a.24xlarge", + "m6a.2xlarge", + "m6a.32xlarge", + "m6a.48xlarge", + "m6a.4xlarge", + "m6a.8xlarge", + "m6a.large", + "m6a.metal", + "m6a.xlarge", + "m6g.12xlarge", + "m6g.16xlarge", + "m6g.2xlarge", + "m6g.4xlarge", + "m6g.8xlarge", + "m6g.large", + "m6g.medium", + "m6g.metal", + "m6g.xlarge", + "m6gd.12xlarge", + "m6gd.16xlarge", + "m6gd.2xlarge", + "m6gd.4xlarge", + "m6gd.8xlarge", + "m6gd.large", + "m6gd.medium", + "m6gd.metal", + "m6gd.xlarge", + "m6i.12xlarge", + "m6i.16xlarge", + "m6i.24xlarge", + "m6i.2xlarge", + "m6i.32xlarge", + "m6i.4xlarge", + "m6i.8xlarge", + "m6i.large", + "m6i.metal", + "m6i.xlarge", + "m6id.12xlarge", + "m6id.16xlarge", + "m6id.24xlarge", + "m6id.2xlarge", + "m6id.32xlarge", + "m6id.4xlarge", + "m6id.8xlarge", + "m6id.large", + "m6id.metal", + "m6id.xlarge", + "m6idn.12xlarge", + "m6idn.16xlarge", + "m6idn.24xlarge", + "m6idn.2xlarge", + "m6idn.32xlarge", + "m6idn.4xlarge", + "m6idn.8xlarge", + "m6idn.large", + "m6idn.metal", + "m6idn.xlarge", + "m6in.12xlarge", + "m6in.16xlarge", + "m6in.24xlarge", + "m6in.2xlarge", + "m6in.32xlarge", + "m6in.4xlarge", + "m6in.8xlarge", + "m6in.large", + "m6in.metal", + "m6in.xlarge", + "m7a.12xlarge", + "m7a.16xlarge", + "m7a.24xlarge", + "m7a.2xlarge", + "m7a.32xlarge", + "m7a.48xlarge", + "m7a.4xlarge", + "m7a.8xlarge", + "m7a.large", + "m7a.medium", + "m7a.metal-48xl", + "m7a.xlarge", + "m7g.12xlarge", + "m7g.16xlarge", + "m7g.2xlarge", + "m7g.4xlarge", + "m7g.8xlarge", + "m7g.large", + "m7g.medium", + "m7g.metal", + "m7g.xlarge", + "m7gd.12xlarge", + "m7gd.16xlarge", + "m7gd.2xlarge", + "m7gd.4xlarge", + "m7gd.8xlarge", + "m7gd.large", + "m7gd.medium", + "m7gd.metal", + "m7gd.xlarge", + "m7i-flex.2xlarge", + "m7i-flex.4xlarge", + "m7i-flex.8xlarge", + "m7i-flex.large", + "m7i-flex.xlarge", + "m7i.12xlarge", + "m7i.16xlarge", + "m7i.24xlarge", + "m7i.2xlarge", + "m7i.48xlarge", + "m7i.4xlarge", + "m7i.8xlarge", + "m7i.large", + "m7i.metal-24xl", + "m7i.metal-48xl", + "m7i.xlarge", + "mac1.metal", + "mac2-m1ultra.metal", + "mac2-m2.metal", + "mac2-m2pro.metal", + "mac2.metal", + "p2.16xlarge", + "p2.8xlarge", + "p2.xlarge", + "p3.16xlarge", + "p3.2xlarge", + "p3.8xlarge", + "p3dn.24xlarge", + "p4d.24xlarge", + "p5.48xlarge", + "r3.2xlarge", + "r3.4xlarge", + "r3.8xlarge", + "r3.large", + "r3.xlarge", + "r4.16xlarge", + "r4.2xlarge", + "r4.4xlarge", + "r4.8xlarge", + "r4.large", + "r4.xlarge", + "r5.12xlarge", + "r5.16xlarge", + "r5.24xlarge", + "r5.2xlarge", + "r5.4xlarge", + "r5.8xlarge", + "r5.large", + "r5.metal", + "r5.xlarge", + "r5a.12xlarge", + "r5a.16xlarge", + "r5a.24xlarge", + "r5a.2xlarge", + "r5a.4xlarge", + "r5a.8xlarge", + "r5a.large", + "r5a.xlarge", + "r5ad.12xlarge", + "r5ad.16xlarge", + "r5ad.24xlarge", + "r5ad.2xlarge", + "r5ad.4xlarge", + "r5ad.8xlarge", + "r5ad.large", + "r5ad.xlarge", + "r5b.12xlarge", + "r5b.16xlarge", + "r5b.24xlarge", + "r5b.2xlarge", + "r5b.4xlarge", + "r5b.8xlarge", + "r5b.large", + "r5b.metal", + "r5b.xlarge", + "r5d.12xlarge", + "r5d.16xlarge", + "r5d.24xlarge", + "r5d.2xlarge", + "r5d.4xlarge", + "r5d.8xlarge", + "r5d.large", + "r5d.metal", + "r5d.xlarge", + "r5dn.12xlarge", + "r5dn.16xlarge", + "r5dn.24xlarge", + "r5dn.2xlarge", + "r5dn.4xlarge", + "r5dn.8xlarge", + "r5dn.large", + "r5dn.metal", + "r5dn.xlarge", + "r5n.12xlarge", + "r5n.16xlarge", + "r5n.24xlarge", + "r5n.2xlarge", + "r5n.4xlarge", + "r5n.8xlarge", + "r5n.large", + "r5n.metal", + "r5n.xlarge", + "r6a.12xlarge", + "r6a.16xlarge", + "r6a.24xlarge", + "r6a.2xlarge", + "r6a.32xlarge", + "r6a.48xlarge", + "r6a.4xlarge", + "r6a.8xlarge", + "r6a.large", + "r6a.metal", + "r6a.xlarge", + "r6g.12xlarge", + "r6g.16xlarge", + "r6g.2xlarge", + "r6g.4xlarge", + "r6g.8xlarge", + "r6g.large", + "r6g.medium", + "r6g.metal", + "r6g.xlarge", + "r6gd.12xlarge", + "r6gd.16xlarge", + "r6gd.2xlarge", + "r6gd.4xlarge", + "r6gd.8xlarge", + "r6gd.large", + "r6gd.medium", + "r6gd.metal", + "r6gd.xlarge", + "r6i.12xlarge", + "r6i.16xlarge", + "r6i.24xlarge", + "r6i.2xlarge", + "r6i.32xlarge", + "r6i.4xlarge", + "r6i.8xlarge", + "r6i.large", + "r6i.metal", + "r6i.xlarge", + "r6id.12xlarge", + "r6id.16xlarge", + "r6id.24xlarge", + "r6id.2xlarge", + "r6id.32xlarge", + "r6id.4xlarge", + "r6id.8xlarge", + "r6id.large", + "r6id.metal", + "r6id.xlarge", + "r6idn.12xlarge", + "r6idn.16xlarge", + "r6idn.24xlarge", + "r6idn.2xlarge", + "r6idn.32xlarge", + "r6idn.4xlarge", + "r6idn.8xlarge", + "r6idn.large", + "r6idn.metal", + "r6idn.xlarge", + "r6in.12xlarge", + "r6in.16xlarge", + "r6in.24xlarge", + "r6in.2xlarge", + "r6in.32xlarge", + "r6in.4xlarge", + "r6in.8xlarge", + "r6in.large", + "r6in.metal", + "r6in.xlarge", + "r7a.12xlarge", + "r7a.16xlarge", + "r7a.24xlarge", + "r7a.2xlarge", + "r7a.32xlarge", + "r7a.48xlarge", + "r7a.4xlarge", + "r7a.8xlarge", + "r7a.large", + "r7a.medium", + "r7a.metal-48xl", + "r7a.xlarge", + "r7g.12xlarge", + "r7g.16xlarge", + "r7g.2xlarge", + "r7g.4xlarge", + "r7g.8xlarge", + "r7g.large", + "r7g.medium", + "r7g.metal", + "r7g.xlarge", + "r7gd.12xlarge", + "r7gd.16xlarge", + "r7gd.2xlarge", + "r7gd.4xlarge", + "r7gd.8xlarge", + "r7gd.large", + "r7gd.medium", + "r7gd.metal", + "r7gd.xlarge", + "r7i.12xlarge", + "r7i.16xlarge", + "r7i.24xlarge", + "r7i.2xlarge", + "r7i.48xlarge", + "r7i.4xlarge", + "r7i.8xlarge", + "r7i.large", + "r7i.metal-24xl", + "r7i.metal-48xl", + "r7i.xlarge", + "r7iz.12xlarge", + "r7iz.16xlarge", + "r7iz.2xlarge", + "r7iz.32xlarge", + "r7iz.4xlarge", + "r7iz.8xlarge", + "r7iz.large", + "r7iz.metal-16xl", + "r7iz.metal-32xl", + "r7iz.xlarge", + "t1.micro", + "t2.2xlarge", + "t2.large", + "t2.medium", + "t2.micro", + "t2.nano", + "t2.small", + "t2.xlarge", + "t3.2xlarge", + "t3.large", + "t3.medium", + "t3.micro", + "t3.nano", + "t3.small", + "t3.xlarge", + "t3a.2xlarge", + "t3a.large", + "t3a.medium", + "t3a.micro", + "t3a.nano", + "t3a.small", + "t3a.xlarge", + "t4g.2xlarge", + "t4g.large", + "t4g.medium", + "t4g.micro", + "t4g.nano", + "t4g.small", + "t4g.xlarge", + "trn1.2xlarge", + "trn1.32xlarge", + "trn1n.32xlarge", + "u-12tb1.112xlarge", + "u-18tb1.112xlarge", + "u-24tb1.112xlarge", + "u-3tb1.56xlarge", + "u-6tb1.112xlarge", + "u-6tb1.56xlarge", + "u-9tb1.112xlarge", + "u7i-12tb.224xlarge", + "u7in-16tb.224xlarge", + "u7in-24tb.224xlarge", + "u7in-32tb.224xlarge", + "vt1.24xlarge", + "vt1.3xlarge", + "vt1.6xlarge", + "x1.16xlarge", + "x1.32xlarge", + "x1e.16xlarge", + "x1e.2xlarge", + "x1e.32xlarge", + "x1e.4xlarge", + "x1e.8xlarge", + "x1e.xlarge", + "x2gd.12xlarge", + "x2gd.16xlarge", + "x2gd.2xlarge", + "x2gd.4xlarge", + "x2gd.8xlarge", + "x2gd.large", + "x2gd.medium", + "x2gd.metal", + "x2gd.xlarge", + "x2idn.16xlarge", + "x2idn.24xlarge", + "x2idn.32xlarge", + "x2idn.metal", + "x2iedn.16xlarge", + "x2iedn.24xlarge", + "x2iedn.2xlarge", + "x2iedn.32xlarge", + "x2iedn.4xlarge", + "x2iedn.8xlarge", + "x2iedn.metal", + "x2iedn.xlarge", + "x2iezn.12xlarge", + "x2iezn.2xlarge", + "x2iezn.4xlarge", + "x2iezn.6xlarge", + "x2iezn.8xlarge", + "x2iezn.metal", + "z1d.12xlarge", + "z1d.2xlarge", + "z1d.3xlarge", + "z1d.6xlarge", + "z1d.large", + "z1d.metal", + "z1d.xlarge" +] diff --git a/configuration/secrets-finder/aws/backend.env b/configuration/secrets-finder/aws/backend.env new file mode 100644 index 0000000..9f5aae9 --- /dev/null +++ b/configuration/secrets-finder/aws/backend.env @@ -0,0 +1,9 @@ +%{ if sns_topic_arn != "" ~} +SECRETS_FINDER_SNS_TOPIC_ARN="${sns_topic_arn}" +%{ endif ~} +%{ if terminate_instance_on_error != "" ~} +SECRETS_FINDER_TERMINATE_ON_ERROR=${terminate_instance_on_error} +%{ endif ~} +%{ if terminate_instance_after_scan != "" ~} +SECRETS_FINDER_TERMINATE_AFTER_SCAN=${terminate_instance_after_scan} +%{ endif ~} diff --git a/configuration/secrets-finder/aws/backend.py b/configuration/secrets-finder/aws/backend.py new file mode 100644 index 0000000..a2adaad --- /dev/null +++ b/configuration/secrets-finder/aws/backend.py @@ -0,0 +1,159 @@ +import botocore +import inspect +import os +import random +import re +import time + +import common + + +def call_aws_service(fn, max_retries=5): + delay = 1 + for i in range(max_retries): + try: + return fn() + except botocore.exceptions.ClientError as error: + if error.response["Error"]["Code"] in [ + "TooManyRequestsException", + "Throttling", + ]: + delay_with_jitter = random.uniform(delay, delay + i + 1) + delay *= 2 + time.sleep(delay_with_jitter) + else: + raise error + + aws_service = ( + re.sub("\s+", " ", inspect.getsource(fn)).replace("lambda: ", "").strip() + ) + raise Exception(f"Maximum attempts reached calling AWS service: {aws_service}") + + +def get_imdsv2_token(): + token_url = "http://169.254.169.254/latest/api/token" + token_headers = {"X-aws-ec2-metadata-token-ttl-seconds": "300"} + token_response = common.make_api_request("PUT", token_url, headers=token_headers) + return token_response.text + + +def terminate_instance(ec2_client): + token = get_imdsv2_token() + headers = {"X-aws-ec2-metadata-token": token} + response = common.make_api_request( + method="GET", + url="http://169.254.169.254/latest/meta-data/instance-id", + headers=headers, + ) + instance_id = response.text + + ec2_client.terminate_instances(InstanceIds=[instance_id]) + + +def upload_files_to_s3(s3_client, s3_bucket_name, s3_directory, local_directory): + if os.path.exists(local_directory): + for file in os.listdir(local_directory): + if file.endswith(".log") and os.path.isfile( + os.path.join(local_directory, file) + ): + upload_file_to_s3_using_local_directory_structure( + s3_client, s3_bucket_name, s3_directory, local_directory, file + ) + + +def upload_file_to_s3_using_local_directory_structure( + s3_client, s3_bucket_name, s3_directory, local_directory, file +): + local_file_path = os.path.join(local_directory, file) + s3_file_path = os.path.join(s3_directory, file) + upload_file_to_s3(s3_client, s3_bucket_name, local_file_path, s3_file_path) + + +def upload_file_to_s3(s3_client, s3_bucket_name, local_file_path, s3_file_path): + if not os.path.isfile(local_file_path): + raise FileNotFoundError( + f"File could not be uploaded to S3 as it does not exist: {local_file_path}" + ) + s3_client.upload_file(local_file_path, s3_bucket_name, s3_file_path) + return True + + +def download_s3_file( + s3_client, s3_bucket_name, s3_file_path, local_file_path, accept_missing=False +): + try: + call_aws_service( + lambda: s3_client.download_file( + s3_bucket_name, s3_file_path, local_file_path + ) + ) + return True + except botocore.exceptions.ClientError as e: + if e.response["Error"]["Code"] == "404": + if accept_missing: + return False + else: + raise FileNotFoundError( + f"The file {s3_file_path} does not exist in bucket: {s3_bucket_name}" + ) + else: + raise + + +def download_s3_bucket_directory(s3_client, s3_bucket_name, s3_path, local_path): + paginator = s3_client.get_paginator("list_objects") + for result in paginator.paginate( + Bucket=s3_bucket_name, Delimiter="/", Prefix=s3_path + ): + if result.get("CommonPrefixes") is not None: + for subdirectory in result.get("CommonPrefixes"): + download_s3_bucket_directory( + s3_client, s3_bucket_name, subdirectory.get("Prefix"), local_path + ) + for file in result.get("Contents", []): + destination = os.path.join(local_path, file.get("Key")[len(s3_path) :]) + if not os.path.exists(os.path.dirname(destination)): + os.makedirs(os.path.dirname(destination)) + download_s3_file( + s3_client=s3_client, + s3_bucket_name=s3_bucket_name, + s3_file_path=file.get("Key"), + local_file_path=destination, + ) + + +def get_secret_value_from_secrets_manager(secrets_manager_client, reference): + response = call_aws_service( + lambda: secrets_manager_client.get_secret_value(SecretId=reference) + ) + secret_string = response["SecretString"] + return secret_string + + +def send_sns_message(sns_client, sns_topic_arn, subject, message): + try: + call_aws_service( + lambda: sns_client.publish( + TopicArn=sns_topic_arn, Subject=subject, Message=message + ) + ) + except Exception: + pass + + +def get_topic_arn(sns_client, topic_name): + response = sns_client.list_topics() + for topic in response["Topics"]: + if topic["TopicArn"].split(":")[-1] == topic_name: + return topic["TopicArn"] + return None + + +def publish_to_sns(sns_client, topic_name, message): + topic_arn = get_topic_arn(topic_name) + if topic_arn: + call_aws_service( + lambda: sns_client.publish( + TopicId=topic_arn, Message=f"[SECRETS FINDER] {message}" + ) + ) diff --git a/configuration/secrets-finder/aws/backend.requirements.txt b/configuration/secrets-finder/aws/backend.requirements.txt new file mode 100644 index 0000000..7ec8be6 --- /dev/null +++ b/configuration/secrets-finder/aws/backend.requirements.txt @@ -0,0 +1,2 @@ +boto3 ~= 1.34 +pyyaml ~= 6.0.1 diff --git a/configuration/secrets-finder/aws/finalizer.py b/configuration/secrets-finder/aws/finalizer.py new file mode 100644 index 0000000..282b1f5 --- /dev/null +++ b/configuration/secrets-finder/aws/finalizer.py @@ -0,0 +1,186 @@ +import argparse +import boto3 +import logging +import os +import sys + +import common +import backend + + +def configure_parser(): + parser = argparse.ArgumentParser( + prog="secrets-finder-finalizer", + description="This script performs post-scan operations on the instance where a secrets detection scan has been launched..", + epilog="This script has been developed by Thomson Reuters. For issues, comments or help, you can contact the maintainers on the official GitHub repository: https://github.com/thomsonreuters/secrets-finder", + ) + + parser.add_argument("--debug", action="store_true", help="store debug information") + parser.add_argument( + "--scan-identifier", + help="the identifier of the scan performed", + type=common.non_empty_string, + required=os.environ.get("SECRETS_FINDER_SCAN_IDENTIFIER") is None, + default=os.environ.get("SECRETS_FINDER_SCAN_IDENTIFIER"), + ) + parser.add_argument( + "--scan-uuid", + help="the UUID associated to the scan", + type=common.valid_uuid4, + required=os.environ.get("SECRETS_FINDER_SCAN_UUID") is None, + default=os.environ.get("SECRETS_FINDER_SCAN_UUID"), + ) + parser.add_argument( + "--scan-folder", + help="the folder where the scan files are located on the instance", + type=common.non_empty_string, + required=os.environ.get("SECRETS_FINDER_SCAN_FOLDER") is None, + default=os.environ.get("SECRETS_FINDER_SCAN_FOLDER"), + ) + parser.add_argument( + "--scanner-folder", + help="the folder where the scanner files are located on the instance", + type=common.non_empty_string, + required=os.environ.get("SECRETS_FINDER_SCANNER_FOLDER") is None, + default=os.environ.get("SECRETS_FINDER_SCANNER_FOLDER"), + ) + parser.add_argument( + "--s3-bucket-name", + help="the name of the S3 bucket where the scan results should be reported", + type=common.non_empty_string, + required=os.environ.get("SECRETS_FINDER_S3_BUCKET_NAME") is None, + default=os.environ.get("SECRETS_FINDER_S3_BUCKET_NAME"), + ) + parser.add_argument( + "--sns-topic-arn", + help="the name of the SNS topic to use for important notifications", + type=common.non_empty_string, + default=os.environ.get("SECRETS_FINDER_SNS_TOPIC_ARN"), + ) + parser.add_argument( + "--terminate-instance-after-scan", + help="whether to terminate the instance at the end of operations", + action="store_true", + default=common.str_to_bool( + os.environ.get("SECRETS_FINDER_TERMINATE_AFTER_SCAN", "true") + ), + ) + parser.add_argument( + "--terminate-on-error", + help="whether to terminate the instance if an error occurs", + action="store_true", + default=common.str_to_bool( + os.environ.get("SECRETS_FINDER_TERMINATE_ON_ERROR", "true") + ), + ) + + return parser.parse_args() + + +def main(): + try: + common.load_environment_variables( + folder=os.path.dirname(os.path.abspath(__file__)), + environment_file="backend.env", + ) + common.load_environment_variables( + folder=os.path.dirname(os.path.abspath(__file__)), + environment_file="scanner.env", + ) + arguments = configure_parser() + common.configure_logging( + destination_folder=os.path.join( + os.path.dirname(os.path.abspath(__file__)), "logs" + ), + log_file="finalizer.log", + level=logging.INFO if not arguments.debug else logging.DEBUG, + ) + except Exception as exception: + print( + f"FATAL ERROR: An unexpected error occurred during initialization: {str(exception)}" + ) + sys.exit(2) + + try: + s3 = boto3.client("s3") + + common.log( + "INFO", + "FINALIZER", + f"Uploading results to S3 bucket: {arguments.s3_bucket_name}", + ) + backend.upload_file_to_s3_using_local_directory_structure( + s3, + arguments.s3_bucket_name, + f"secrets-finder/scheduled-scans/results", + arguments.scanner_folder, + f"{arguments.scan_uuid}.json", + ) + common.log( + "INFO", + "FINALIZER", + f"Uploading logs from scan folder to S3 bucket: {arguments.s3_bucket_name}", + ) + backend.upload_files_to_s3( + s3, + arguments.s3_bucket_name, + f"secrets-finder/scheduled-scans/logs/{arguments.scan_uuid}", + os.path.join(arguments.scan_folder, "logs"), + ) + common.log( + "INFO", + "FINALIZER", + f"Uploading logs from scanner folder to S3 bucket: {arguments.s3_bucket_name}", + ) + backend.upload_files_to_s3( + s3, + arguments.s3_bucket_name, + f"secrets-finder/scheduled-scans/logs/{arguments.scan_uuid}", + os.path.join(arguments.scanner_folder, "logs"), + ) + + if arguments.terminate_instance_after_scan: + common.log("INFO", "FINALIZER", "Terminating instance...") + ec2 = boto3.client("ec2") + backend.terminate_instance(ec2) + else: + sys.exit(0) + except Exception as e: + common.log( + "ERROR", "FINALIZER", f"An error occurred during finalization of scan: {e}" + ) + token = backend.get_imdsv2_token() + headers = {"X-aws-ec2-metadata-token": token} + response = common.make_api_request( + method="GET", + url="http://169.254.169.254/latest/meta-data/instance-id", + headers=headers, + ) + instance_id = response.text + if arguments.sns_topic_arn: + sns = boto3.client("sns") + backend.send_sns_message( + sns, + arguments.sns_topic_arn, + "[SECRETS FINDER]", + f"An error occurred during finalization of scan on instance '{instance_id}': {e}", + ) + if arguments.terminate_on_error: + try: + ec2 = boto3.client("ec2") + backend.terminate_instance(ec2) + except Exception as e: + if arguments.sns_topic_arn: + backend.send_sns_message( + sns, + arguments.sns_topic_arn, + "[SECRETS FINDER]", + f"Instance '{instance_id}' was expected to be terminated because of an error during finalization, but an error occurred while trying to terminate it: {e}", + ) + common.shutdown() + else: + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/configuration/secrets-finder/aws/initializer.py b/configuration/secrets-finder/aws/initializer.py new file mode 100644 index 0000000..c4d58a5 --- /dev/null +++ b/configuration/secrets-finder/aws/initializer.py @@ -0,0 +1,558 @@ +import argparse +import boto3 +import json +import logging +import os +import yaml +import sys +import tempfile + +import common +import backend + + +def configure_parser(): + parser = argparse.ArgumentParser( + prog="secrets-finder-initializer", + description="This script initializes the instance where a secrets detection scan should be performed.", + epilog="This script has been developed by Thomson Reuters. For issues, comments or help, you can contact the maintainers on the official GitHub repository: https://github.com/thomsonreuters/secrets-finder", + ) + + parser.add_argument("--debug", action="store_true", help="store debug information") + parser.add_argument( + "--scm", + help="the source code management system to use", + type=common.non_empty_string, + choices=["github", "azure_devops", "custom"], + required=os.environ.get("SECRETS_FINDER_SCM") is None, + default=os.environ.get("SECRETS_FINDER_SCM"), + ) + parser.add_argument( + "--scan-identifier", + help="the identifier for the scan", + type=common.non_empty_string, + required=os.environ.get("SECRETS_FINDER_SCAN_IDENTIFIER") is None, + default=os.environ.get("SECRETS_FINDER_SCAN_IDENTIFIER"), + ) + parser.add_argument( + "--scan-uuid", + help="the UUID associated to the scan", + type=common.valid_uuid4, + required=os.environ.get("SECRETS_FINDER_SCAN_UUID") is None, + default=os.environ.get("SECRETS_FINDER_SCAN_UUID"), + ) + parser.add_argument( + "--scan-folder", + help="the folder where the scan files will be downloaded", + type=common.non_empty_string, + required=os.environ.get("SECRETS_FINDER_SCAN_FOLDER") is None, + default=os.environ.get("SECRETS_FINDER_SCAN_FOLDER"), + ) + parser.add_argument( + "--scanner-folder", + help="the folder where the scanner files will be downloaded", + type=common.non_empty_string, + required=os.environ.get("SECRETS_FINDER_SCANNER_FOLDER") is None, + default=os.environ.get("SECRETS_FINDER_SCANNER_FOLDER"), + ) + parser.add_argument( + "--s3-bucket-name", + help="the name of the S3 bucket where the scan files are stored", + type=common.non_empty_string, + required=os.environ.get("SECRETS_FINDER_S3_BUCKET_NAME") is None, + default=os.environ.get("SECRETS_FINDER_S3_BUCKET_NAME"), + ) + parser.add_argument( + "--trufflehog-installation-path", + help="the path where trufflehog should be installed", + type=common.non_empty_string, + default=os.environ.get( + "SECRETS_FINDER_TRUFFLEHOG_INSTALLATION_PATH", "/usr/bin" + ), + ) + parser.add_argument( + "--trufflehog-version", + help="the version of TruffleHog to install", + type=common.non_empty_string, + default=os.environ.get("SECRETS_FINDER_TRUFFLEHOG_VERSION"), + ) + parser.add_argument( + "--user", + help="the user running the scan on the instance", + type=common.non_empty_string, + default=os.environ.get("SECRETS_FINDER_SCAN_INSTANCE_USER", "secrets-finder"), + ) + parser.add_argument( + "--credentials-reference", + help="the reference stored AWS Secrets Manager and holding the credentials to use for authentication", + type=common.non_empty_string, + required=os.environ.get("SECRETS_FINDER_CREDENTIALS_REFERENCE") is None, + default=os.environ.get("SECRETS_FINDER_CREDENTIALS_REFERENCE"), + ) + parser.add_argument( + "--aws-region", + help="the AWS region to use", + type=common.non_empty_string, + required=os.environ.get("AWS_REGION") is None, + default=os.environ.get("AWS_REGION"), + ) + parser.add_argument( + "--datadog-api-key-reference", + help="the reference stored in AWS Secrets Manager and holding the Datadog API key", + type=common.non_empty_string, + default=os.environ.get("SECRETS_FINDER_DATADOG_API_KEY_REFERENCE"), + ) + parser.add_argument( + "--sns-topic-arn", + help="the name of the SNS topic to use for important notifications", + type=common.non_empty_string, + default=os.environ.get("SECRETS_FINDER_SNS_TOPIC_ARN"), + ) + parser.add_argument( + "--terminate-on-error", + help="whether to terminate the instance if an error occurs", + action="store_true", + default=os.environ.get("SECRETS_FINDER_TERMINATE_ON_ERROR", "true").lower() + == "true", + ) + + return parser.parse_args() + + +def configure_datadog(datadog_api_key_reference): + if datadog_api_key_reference: + try: + common.log("INFO", "INITIALIZER", f"Configuring Datadog...") + common.log( + "DEBUG", + "INITIALIZER", + f"Datadog API key reference {datadog_api_key_reference}", + ) + token = backend.get_imdsv2_token() + headers = {"X-aws-ec2-metadata-token": token} + response = common.make_api_request( + method="GET", + url="http://169.254.169.254/latest/meta-data/instance-id", + headers=headers, + ) + instance_id = response.text + execution_output = common.run_command( + f"aws ec2 describe-tags --filters 'Name=resource-id,Values={instance_id}' 'Name=key,Values=Name' --query 'Tags[].Value' --output text" + ) + instance_name = execution_output[0] + common.log( + "DEBUG", + "INITIALIZER", + f"Instance name to configure for Datadog: {instance_name}", + ) + + api_token = get_datadog_api_token(datadog_api_key_reference) + + agent_configuration = { + "api_key": api_token, + "hostname": instance_name, + "expvar_port": 47477, + "logs_enabled": True, + "process_config": { + "process_collection": {"enabled": True}, + "scrub_args": True, + "custom_sensitive_words": ["token"], + }, + } + + log_configuration = { + "logs": [{"type": "journald", "include_units": ["scanner.service"]}] + } + + common.log( + "DEBUG", + "INITIALIZER", + f"Datadog agent configuration: {agent_configuration}", + ) + common.log( + "DEBUG", + "INITIALIZER", + f"Datadog log configuration: {log_configuration}", + ) + + common.create_directory( + os.path.join("etc", "datadog-agent", "conf.d", "journald.d") + ) + + with open( + os.path.join("etc", "datadog-agent", "datadog.yaml"), "w" + ) as file: + yaml.dump(agent_configuration, file) + + with open( + os.path.join( + "etc", "datadog-agent", "conf.d", "journald.d", "conf.yaml" + ), + "w", + ) as file: + yaml.dump(log_configuration, file) + + common.log( + "DEBUG", "INITIALIZER", "Setting permissions for dd-agent user..." + ) + common.run_command("usermod -a -G systemd-journal dd-agent") + + common.log("DEBUG", "INITIALIZER", "Enabling Datadog...") + common.run_command("systemctl enable datadog-agent-trace") + common.run_command("systemctl enable datadog-agent-process") + common.run_command("systemctl enable datadog-agent") + common.run_command("systemctl start datadog-agent") + + common.log( + "INFO", "INITIALIZER", "Datadog has been configured successfully." + ) + except Exception as e: + common.log( + "WARNING", + "INITIALIZER", + f"An error occurred while configuring Datadog: {e}", + ) + + +def get_datadog_api_token(reference): + common.log("INFO", "INITIALIZER", f"Retrieving Datadog API token: {reference}") + secrets_manager = boto3.client("secretsmanager") + secret_string = backend.get_secret_value_from_secrets_manager( + secrets_manager, reference + ) + common.log( + "DEBUG", "INITIALIZER", f"Datadog API token loaded: {secret_string[:4]}..." + ) + return secret_string + + +def download_scan_files(s3_bucket_name, scan_identifier, secrets_finder_scan_folder): + common.log("INFO", "INITIALIZER", "Downloading scan files...") + s3 = boto3.client("s3") + backend.download_s3_bucket_directory( + s3, + s3_bucket_name, + f"secrets-finder/scheduled-scans/scans/{scan_identifier}/files", + secrets_finder_scan_folder, + ) + + +def download_scanner_files( + s3_bucket_name, scan_identifier, secrets_finder_scanner_folder +): + common.log("INFO", "INITIALIZER", "Downloading scanner files...") + + s3 = boto3.client("s3") + backend.download_s3_bucket_directory( + s3, + s3_bucket_name, + f"secrets-finder/scheduled-scans/scans/{scan_identifier}/setup", + secrets_finder_scanner_folder, + ) + backend.download_s3_file( + s3_client=s3, + s3_bucket_name=s3_bucket_name, + s3_file_path=f"secrets-finder/scheduled-scans/scanner/git-credentials-helper.sh", + local_file_path=os.path.join( + secrets_finder_scanner_folder, "git-credentials-helper.sh" + ), + accept_missing=False, + ) + backend.download_s3_file( + s3_client=s3, + s3_bucket_name=s3_bucket_name, + s3_file_path=f"secrets-finder/scheduled-scans/scanner/scan-configuration.schema.json", + local_file_path=os.path.join( + secrets_finder_scanner_folder, "scan-configuration.schema.json" + ), + accept_missing=False, + ) + backend.download_s3_file( + s3_client=s3, + s3_bucket_name=s3_bucket_name, + s3_file_path=f"secrets-finder/scheduled-scans/scanner/scanner.py", + local_file_path=os.path.join(secrets_finder_scanner_folder, "scanner.py"), + accept_missing=False, + ) + backend.download_s3_file( + s3_client=s3, + s3_bucket_name=s3_bucket_name, + s3_file_path=f"secrets-finder/scheduled-scans/scanner/configuration.yaml", + local_file_path=os.path.join( + secrets_finder_scanner_folder, "configuration.yaml" + ), + accept_missing=True, + ) + + +def set_system_locale(locale): + common.log("INFO", "INITIALIZER", f"Setting system locale to: {locale}") + common.run_command(f"localectl set-locale {locale}") + + +def install_packages(packages): + common.log("INFO", "INITIALIZER", f"Installing packages: {packages}") + package_manager = common.detect_package_manager() + common.attempt_operation_with_retry( + lambda: common.run_command(f"{package_manager} install -y {packages}") + ) + + +def get_secrets_finder_credentials(reference): + common.log( + "INFO", "INITIALIZER", f"Retrieving credentials used for scan: {reference}" + ) + secrets_manager = boto3.client("secretsmanager") + secret_string = backend.get_secret_value_from_secrets_manager( + secrets_manager, reference + ) + secret = json.loads(secret_string) + return secret["username"], secret["token"] + + +def configure_git_credential_helper(user, helper): + common.log("INFO", "INITIALIZER", f"Configuring Git credential helper: {helper}") + command = f"git config --global credential.helper '{helper}'" + common.run_command(command, env={"HOME": os.path.join("home", user)}) + + +def make_script_executable(file): + command = f"chmod +x '{file}'" + common.run_command(command) + + +def write_environment_variables_to_file(variables, file_path): + common.log( + "INFO", + "INITIALIZER", + f"Persisting environment variables for service: {file_path}", + ) + with open(file_path, "w") as f: + for key, value in variables.items(): + f.write(f"{key}={value}\n") + + +def set_permissions(path, permissions): + common.log("INFO", "INITIALIZER", f"Setting permissions for file: {path}") + common.log("DEBUG", "INITIALIZER", f"Permissions: {permissions}") + os.chmod(path, permissions) + + +def enable_service(service): + common.log("INFO", "INITIALIZER", f"Enabling service: {service}") + command = f"systemctl enable {service}" + common.run_command(command) + + +def install_trufflehog(trufflehog_installation_path, trufflehog_version): + common.log("INFO", "INITIALIZER", "Installing TruffleHog...") + common.log( + "DEBUG", + "INITIALIZER", + f"TruffleHog installation path: {trufflehog_installation_path}", + ) + common.log("DEBUG", "INITIALIZER", f"TruffleHog version: {trufflehog_version}") + + if not os.path.isdir(trufflehog_installation_path): + os.makedirs(trufflehog_installation_path) + + with tempfile.NamedTemporaryFile(delete=True) as temporary_file: + download_command = f"curl -sSfL https://raw.githubusercontent.com/trufflesecurity/trufflehog/main/scripts/install.sh -o {temporary_file.name}" + common.attempt_operation_with_retry( + lambda: common.run_command(download_command) + ) + + install_command = ( + f"sh {temporary_file.name} -b '{trufflehog_installation_path}'" + ) + if trufflehog_version: + install_command += f" 'v{trufflehog_version}'" + common.run_command(install_command) + + make_script_executable(os.path.join(trufflehog_installation_path, "trufflehog")) + + common.log("INFO", "INITIALIZER", "TruffleHog has been installed successfully.") + + +def configure_instance(): + common.log("INFO", "INITIALIZER", "Configuring instance...") + + token = backend.get_imdsv2_token() + headers = {"X-aws-ec2-metadata-token": token} + response = common.make_api_request( + method="GET", + url="http://169.254.169.254/latest/meta-data/instance-type", + headers=headers, + ) + if response.text == "i4i.2xlarge": + common.run_command("mkfs.ext4 /dev/nvme1n1 -O ^has_journal") + with open("/etc/fstab", "a") as file: + file.write( + "/dev/nvme1n1 /tmp ext4 defaults,noatime,discard,barrier=0 1 2\n" + ) + common.run_command("mount -a") + common.run_command("chmod 777 /tmp") + common.run_command("dd if=/dev/zero of=/tmp/swapfile bs=128M count=1024") + common.run_command("chmod 0600 /tmp/swapfile") + common.run_command("mkswap /tmp/swapfile") + common.run_command("swapon /tmp/swapfile") + with open("/etc/fstab", "a") as file: + file.write("/tmp/swapfile swap swap defaults 0 0\n") + common.run_command("mount -a") + + common.run_command("echo 1", output_file="/sys/module/zswap/parameters/enabled") + + +def set_owner_permissions(user, folder): + common.log("INFO", "INITIALIZER", f"Setting owner permissions for folder: {folder}") + command = f"chown -R {user}:{user} '{folder}'" + common.run_command(command) + + +def start_service(service): + common.log("INFO", "INITIALIZER", f"Starting service: {service}") + command = f"systemctl start {service}" + common.run_command(command) + + +def upload_log_files_to_s3(s3_bucket_name, s3_directory, local_directory): + s3 = boto3.client("s3") + for file in os.listdir(local_directory): + if file.endswith(".log") and os.path.isfile( + os.path.join(local_directory, file) + ): + backend.upload_file_to_s3_using_local_directory_structure( + s3, s3_bucket_name, s3_directory, local_directory, file + ) + + +def main(): + try: + common.load_environment_variables( + folder=os.path.dirname(os.path.abspath(__file__)), + environment_file="backend.env", + ) + common.load_environment_variables( + folder=os.path.dirname(os.path.abspath(__file__)), + environment_file="scanner.env", + ) + arguments = configure_parser() + common.configure_logging( + destination_folder=os.path.join( + os.path.dirname(os.path.abspath(__file__)), "logs" + ), + log_file="initializer.log", + level=logging.INFO if not arguments.debug else logging.DEBUG, + ) + except Exception as exception: + print( + f"FATAL ERROR: An unexpected error occurred during initialization: {str(exception)}" + ) + sys.exit(2) + + try: + configure_datadog(arguments.datadog_api_key_reference) + + download_scan_files( + arguments.s3_bucket_name, arguments.scan_identifier, arguments.scan_folder + ) + download_scanner_files( + arguments.s3_bucket_name, + arguments.scan_identifier, + arguments.scanner_folder, + ) + + set_system_locale("LANG=en_US.UTF-8") + install_packages("jq git glibc-langpack-en") + + username, token = get_secrets_finder_credentials( + arguments.credentials_reference + ) + + environment_variables = os.environ.copy() + environment_variables["SECRETS_FINDER_SCAN_USERNAME"] = username + environment_variables["SECRETS_FINDER_SCAN_TOKEN"] = token + + configure_git_credential_helper( + arguments.user, + os.path.join(arguments.scanner_folder, "git-credentials-helper.sh"), + ) + + make_script_executable( + os.path.join(arguments.scanner_folder, "git-credentials-helper.sh") + ) + + service_environment_variables = { + "SECRETS_FINDER_SCAN_USERNAME": username, + "SECRETS_FINDER_SCAN_TOKEN": token, + } + + service_environment_variables_file = os.path.join("etc", "secrets-finder.env") + write_environment_variables_to_file( + service_environment_variables, service_environment_variables_file + ) + set_permissions(service_environment_variables_file, 0o400) + + common.run_command( + f"mv {os.path.join(arguments.scanner_folder, 'scanner.service')} {os.path.join('usr', 'lib', 'systemd', 'system', 'secrets-finder.service')}" + ) + enable_service("secrets-finder.service") + + install_trufflehog( + arguments.trufflehog_installation_path, arguments.trufflehog_version + ) + + configure_instance() + + set_owner_permissions(arguments.user, arguments.scan_folder) + set_owner_permissions(arguments.user, arguments.scanner_folder) + + start_service("secrets-finder.service") + except Exception as e: + try: + common.log( + "ERROR", "INITIALIZER", f"An error occurred during initialization: {e}" + ) + upload_log_files_to_s3( + arguments.s3_bucket_name, + os.path.join( + "secrets-finder", "scheduled-scans", "logs", arguments.scan_uuid + ), + os.path.join(arguments.scanner_folder, "logs"), + ) + finally: + token = backend.get_imdsv2_token() + headers = {"X-aws-ec2-metadata-token": token} + response = common.make_api_request( + method="GET", + url="http://169.254.169.254/latest/meta-data/instance-id", + headers=headers, + ) + instance_id = response.text + if arguments.sns_topic_arn: + sns = boto3.client("sns") + backend.send_sns_message( + sns, + arguments.sns_topic_arn, + "[SECRETS FINDER]", + f"An error occurred during initializationfor instance '{instance_id}': {e}", + ) + if arguments.terminate_on_error: + try: + ec2 = boto3.client("ec2") + backend.terminate_instance(ec2) + except Exception as e: + if arguments.sns_topic_arn: + backend.send_sns_message( + sns, + arguments.sns_topic_arn, + "[SECRETS FINDER]", + f"Instance '{instance_id}' was expected to be terminated because of an error during initialization, but an error occurred while trying to terminate it: {e}", + ) + common.shutdown() + else: + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/configuration/secrets-finder/aws/setup.sh b/configuration/secrets-finder/aws/setup.sh new file mode 100644 index 0000000..3e72e0d --- /dev/null +++ b/configuration/secrets-finder/aws/setup.sh @@ -0,0 +1,85 @@ +#!/bin/bash +set -e + +function write { + CURRENT_TIME=$(date +'%y-%m-%d %T.%6N') + echo "[$CURRENT_TIME][SECRETS-FINDER][$1] $2" +} + +check_exit_code() { + exit_code=$? + if [ "$exit_code" -ne 0 ]; then + handle_error + fi +} + +function handle_error { + write "ERROR" "Unexpected error during initialization. Operation aborted." + + terminate_instance="%{ if terminate_instance_on_error != "" }${terminate_instance_on_error}%{else}false%{ endif }" + if [[ "$terminate_instance" == "true" ]]; then + (TOKEN=$(curl -X PUT -H "X-aws-ec2-metadata-token-ttl-seconds: 300" -s "http://169.254.169.254/latest/api/token") && INSTANCE_ID="$(curl -H "X-aws-ec2-metadata-token: $TOKEN" -s http://169.254.169.254/latest/meta-data/instance-id)" && aws ec2 terminate-instances --instance-ids "$INSTANCE_ID") || shutdown -h now; + else + exit 1 + fi +} + +trap 'handle_error' ERR + +write "INFO" "Creation of user: ${instance_user}" +adduser "${instance_user}" +echo "${instance_user} ALL=(ALL) NOPASSWD:ALL" >> "/etc/sudoers.d/${instance_user}" + +write "INFO" "Creation of folders" +SECRETS_FINDER_SCAN_FOLDER="${scan_folder}" +SECRETS_FINDER_SCANNER_FOLDER="${scanner_folder}" +mkdir -p "$SECRETS_FINDER_SCAN_FOLDER" +mkdir -p "$SECRETS_FINDER_SCANNER_FOLDER" +write "INFO" "Scan folder created successfully: $SECRETS_FINDER_SCAN_FOLDER" +write "INFO" "Scanner folder created successfully: $SECRETS_FINDER_SCANNER_FOLDER" + +write "INFO" "Configuration of AWS CLI" +aws configure set region "${aws_region}" +sudo -u ${instance_user} aws configure set region "${aws_region}" + +S3_BUCKET="${s3_bucket}" +write "INFO" "Download of mainfiles from bucket: $S3_BUCKET" + +write "INFO" "Download of utils files" +aws s3 cp "s3://$S3_BUCKET/secrets-finder/scheduled-scans/scanner/common.py" "$SECRETS_FINDER_SCANNER_FOLDER" +aws s3 cp "s3://$S3_BUCKET/secrets-finder/scheduled-scans/scanner/backend.py" "$SECRETS_FINDER_SCANNER_FOLDER" + +write "INFO" "Download of backend initializer and finalizer" +aws s3 cp "s3://$S3_BUCKET/secrets-finder/scheduled-scans/scanner/initializer.py" "$SECRETS_FINDER_SCANNER_FOLDER" +aws s3 cp "s3://$S3_BUCKET/secrets-finder/scheduled-scans/scanner/finalizer.py" "$SECRETS_FINDER_SCANNER_FOLDER" + +write "INFO" "Download of requirements" +aws s3 cp "s3://$S3_BUCKET/secrets-finder/scheduled-scans/scanner/common.requirements.txt" "$SECRETS_FINDER_SCANNER_FOLDER" +aws s3 cp "s3://$S3_BUCKET/secrets-finder/scheduled-scans/scanner/backend.requirements.txt" "$SECRETS_FINDER_SCANNER_FOLDER" +aws s3 cp "s3://$S3_BUCKET/secrets-finder/scheduled-scans/scanner/scanner.requirements.txt" "$SECRETS_FINDER_SCANNER_FOLDER" + +write "INFO" "Download of environment files" +aws s3 cp "s3://$S3_BUCKET/secrets-finder/scheduled-scans/scans/${scan_identifier}/setup/backend.env" "$SECRETS_FINDER_SCANNER_FOLDER" +aws s3 cp "s3://$S3_BUCKET/secrets-finder/scheduled-scans/scans/${scan_identifier}/setup/scanner.env" "$SECRETS_FINDER_SCANNER_FOLDER" + +write "INFO" "Generation of scan UUID" +uuidgen -r > "$SECRETS_FINDER_SCANNER_FOLDER/uuid.txt" +export SECRETS_FINDER_SCAN_UUID="$(cat "$SECRETS_FINDER_SCANNER_FOLDER/uuid.txt")" +write "INFO" "Scan UUID generated: $SECRETS_FINDER_SCAN_UUID" + +write "INFO" "Initialization of environment" +python3 -m venv "$SECRETS_FINDER_SCANNER_FOLDER/venv" +source "$SECRETS_FINDER_SCANNER_FOLDER/venv/bin/activate" + +write "INFO" "Installation of scanner dependencies" +python3 -m pip install --upgrade pip && pip install -r "$SECRETS_FINDER_SCANNER_FOLDER/common.requirements.txt" && pip install -r "$SECRETS_FINDER_SCANNER_FOLDER/backend.requirements.txt" && pip install -r "$SECRETS_FINDER_SCANNER_FOLDER/scanner.requirements.txt" +check_exit_code + +write "INFO" "Initialization of scanner" +python3 "$SECRETS_FINDER_SCANNER_FOLDER/initializer.py" +check_exit_code + +deactivate + +write "INFO" "Configuration of scheduled scan done. Exiting..." +exit 0 diff --git a/configuration/secrets-finder/common.py b/configuration/secrets-finder/common.py new file mode 100644 index 0000000..dbc65dc --- /dev/null +++ b/configuration/secrets-finder/common.py @@ -0,0 +1,266 @@ +import argparse +import datetime +import dotenv +import glob +import json +import logging +import logging.config +import os +import requests +import shlex +import subprocess +import time +import uuid + + +def valid_uuid4(value): + try: + uuid_obj = uuid.UUID(value, version=4) + except ValueError: + raise argparse.ArgumentTypeError(f"{value} is not a valid UUID v4") + + if uuid_obj.version != 4: + raise argparse.ArgumentTypeError(f"{value} is not a UUID v4") + + return str(uuid_obj) + + +def str_to_bool(s): + return s.lower() in ["true", "1", "t", "y", "yes"] + + +def non_empty_string(value): + svalue = str(value) + if svalue == "": + raise argparse.ArgumentTypeError("value cannot be an empty string") + return svalue + + +def generate_unique_identifier(): + return str(uuid.uuid4()) + + +def configure_logging(destination_folder, log_file, level=logging.INFO): + create_directory(destination_folder) + logging.config.dictConfig({"version": 1, "disable_existing_loggers": True}) + logging.basicConfig( + format="%(message)s", + filename=os.path.join(destination_folder, log_file), + level=level, + ) + + +def log( + level, + context, + message, + levels={ + "INFO": logging.info, + "WARNING": logging.warning, + "ERROR": logging.error, + "DEBUG": logging.debug, + }, +): + current_time = str(datetime.datetime.now()) + + log_string = json.dumps( + {"time": current_time, "level": level, "context": context, "message": message}, + separators=(",", ":"), + ) + + return levels[level]("%s", log_string) + + +def load_environment_variables(folder, environment_file): + environment_file_path = os.path.join(folder, environment_file) + if os.path.isfile(environment_file_path): + dotenv.load_dotenv(dotenv_path=environment_file_path, override=True) + + +def attempt_operation_with_retry(operation, max_retries=3, backoff_factor=1): + for i in range(max_retries): + try: + return operation() + except Exception: + if i == max_retries - 1: + raise + else: + time.sleep(backoff_factor * (2**i)) + + +def shutdown(): + os.system("shutdown -h now") + + +def run_command( + command, + accepted_nonzero_return_codes=None, + env=None, + output_file=None, + error_file=None, + append_output=False, + append_error=False, + working_directory=None, +): + if env is None: + env = os.environ.copy() + else: + env = {**os.environ.copy(), **env} + + args = shlex.split(command) + + output_mode = "a" if append_output else "w" + error_mode = "a" if append_error else "w" + + out = open(output_file, output_mode) if output_file else subprocess.PIPE + err = open(error_file, error_mode) if error_file else subprocess.PIPE + + process = subprocess.Popen( + args, stdout=out, stderr=err, env=env, cwd=working_directory + ) + stdout, stderr = process.communicate() + + if output_file: + out.close() + if error_file: + err.close() + + if process.returncode != 0 and ( + accepted_nonzero_return_codes is None + or process.returncode not in accepted_nonzero_return_codes + ): + error_message = f"Command '{command}' failed" + if stderr: + error_message += f" with error: {stderr.decode()}" + raise Exception(error_message) + + return stdout.decode().rstrip() if stdout else None, ( + stderr.decode().rstrip() if stderr else None + ) + + +def create_virtual_environment(folder, virtual_environment="virtual_environment"): + command = f"python3 -m venv '{os.path.join(folder, virtual_environment)}'" + run_command(command) + + +def install_requirements_using_file(folder, file, virtual_environment=None): + virtual_environment_path = ( + os.path.join(folder, virtual_environment) if virtual_environment else None + ) + if virtual_environment_path and os.path.isdir(virtual_environment_path): + pip_bin = os.path.join(virtual_environment_path, "bin", "pip") + pip_command = f"{pip_bin} install -r '{file}'" + else: + pip_command = f"pip3 install -r '{file}'" + + attempt_operation_with_retry( + lambda: run_command(pip_command, env=os.environ.copy()) + ) + + +def get_python_files(folder, pattern): + files = glob.glob(os.path.join(folder, pattern)) + files.sort() + return [file for file in files if file.endswith(".py") and os.path.isfile(file)] + + +def get_requirements_file(folder, filename): + requirements_file = os.path.join(folder, filename) + return requirements_file if os.path.isfile(requirements_file) else None + + +def get_env_file(folder, filename): + env_file = os.path.join(folder, filename) + return env_file if os.path.isfile(env_file) else None + + +def run_python_script(file, requirements_file, env_file, folder): + python_command = f"python3 '{file}'" + environment_variables_to_pass = os.environ.copy() + + if requirements_file: + virtual_environment_folder_name = ( + f"{os.path.splitext(os.path.basename(file))[0]}-venv" + ) + if not os.path.isdir(os.path.join(folder, virtual_environment_folder_name)): + create_virtual_environment(folder, virtual_environment_folder_name) + install_requirements_using_file( + folder, requirements_file, virtual_environment_folder_name + ) + python_bin = os.path.join(folder, virtual_environment_folder_name, "bin/python") + python_command = f"{python_bin} '{file}'" + + if env_file: + script_environment_variables = dotenv.dotenv_values(env_file) + environment_variables_to_pass.update(script_environment_variables) + + run_command( + python_command, env=environment_variables_to_pass, working_directory=folder + ) + + +def run_python_scripts(folder, pattern): + files = get_python_files(folder, pattern) + + for file in files: + filename_without_extension = os.path.splitext(os.path.basename(file))[0] + requirements_file = get_requirements_file( + folder, f"{filename_without_extension}.requirements.txt" + ) + env_file = get_env_file(folder, f"{filename_without_extension}.env") + run_python_script(file, requirements_file, env_file, folder) + + +def make_api_request(method, url, max_retries=3, backoff_factor=1, **kwargs): + valid_methods = ["GET", "OPTIONS", "HEAD", "POST", "PUT", "PATCH", "DELETE"] + if method not in valid_methods: + raise ValueError( + f"Invalid HTTP method: {method}. Must be one of {valid_methods}." + ) + + for attempt in range(max_retries): + try: + response = requests.request(method, url, **kwargs) + response.raise_for_status() + return response + except ( + requests.exceptions.HTTPError, + requests.exceptions.Timeout, + requests.exceptions.ConnectionError, + ) as e: + if attempt == max_retries - 1: + raise + else: + time.sleep(backoff_factor * (2**attempt)) + + +def create_directory(path): + os.makedirs(path, exist_ok=True) + + +def detect_package_manager(): + try: + run_command("dnf --version") + return "dnf" + except: + pass + + try: + run_command("yum --version") + return "yum" + except: + pass + + try: + run_command("apt-get --version") + return "apt-get" + except: + pass + try: + run_command("dpkg --version") + return "dpkg" + except: + pass + + raise Exception("No supported package manager found") diff --git a/configuration/secrets-finder/common.requirements.txt b/configuration/secrets-finder/common.requirements.txt new file mode 100644 index 0000000..a6e92aa --- /dev/null +++ b/configuration/secrets-finder/common.requirements.txt @@ -0,0 +1,2 @@ +python-dotenv ~= 0.21.1 +requests ~= 2.32 diff --git a/configuration/secrets-finder/scanner/git-credentials-helper.sh b/configuration/secrets-finder/scanner/git-credentials-helper.sh new file mode 100644 index 0000000..9f9988c --- /dev/null +++ b/configuration/secrets-finder/scanner/git-credentials-helper.sh @@ -0,0 +1,6 @@ +#!/bin/bash +if echo "$1" | grep -iq "username"; then + echo "$SECRETS_FINDER_SCAN_USERNAME" +elif echo "$1" | grep -iq "password"; then + echo "$SECRETS_FINDER_SCAN_TOKEN" +fi diff --git a/configuration/secrets-finder/scanner/scan-configuration.schema.json b/configuration/secrets-finder/scanner/scan-configuration.schema.json new file mode 100644 index 0000000..e1fd155 --- /dev/null +++ b/configuration/secrets-finder/scanner/scan-configuration.schema.json @@ -0,0 +1,42 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://www.thomsonreuters.com/secrets-finder/scheduled-scans/scan-configuration.schema.json", + "title": "Secrets Finder: Scan configuration", + "description": "A configuration file specifying the repositories to scan for secrets.", + "properties": { + "scm": {"type": "string", "enum": ["github", "azure_devops", "custom"]}, + "endpoint": {"type": "string", "format": "uri"}, + "repositories": { + "type": "array", + "items": { + "type": "object", + "properties": { + "organization": {"type": "string"}, + "name": { + "type": "string", + "description": "The name of the repository", + "minLength": 1 + }, + "since-commit": { + "type": "string", + "description": "The commit since which to scan" + }, + "branch": { + "type": "string", + "description": "The branch to scan" + }, + "max-depth": { + "type": "integer", + "description": "The maximum depth to scan" + }, + "metadata": { + "type": "object", + "additionalProperties": true + } + }, + "required": ["organization", "name"] + } + } + }, + "required": ["scm", "repositories", "endpoint"] +} diff --git a/configuration/secrets-finder/scanner/scanner.env b/configuration/secrets-finder/scanner/scanner.env new file mode 100644 index 0000000..847c38e --- /dev/null +++ b/configuration/secrets-finder/scanner/scanner.env @@ -0,0 +1,12 @@ +AWS_REGION="${aws_region}" +SECRETS_FINDER_SCM="${scm}" +SECRETS_FINDER_SCAN_IDENTIFIER="${scan_identifier}" +SECRETS_FINDER_SCAN_FOLDER="${scan_folder}" +SECRETS_FINDER_SCANNER_FOLDER="${scanner_folder}" +SECRETS_FINDER_S3_BUCKET_NAME="${s3_bucket}" +SECRETS_FINDER_TRUFFLEHOG_INSTALLATION_PATH="/usr/bin" +SECRETS_FINDER_TRUFFLEHOG_VERSION="3.71.1" +SECRETS_FINDER_SCAN_INSTANCE_USER="${instance_user}" +SECRETS_FINDER_CREDENTIALS_REFERENCE="${credentials_reference}" +SECRETS_FINDER_DATADOG_API_KEY_REFERENCE="${datadog_api_key_reference}" +SECRETS_FINDER_REPORT_ONLY_VERIFIED=${try(report_only_verified, false)} diff --git a/configuration/secrets-finder/scanner/scanner.py b/configuration/secrets-finder/scanner/scanner.py new file mode 100644 index 0000000..b45f6be --- /dev/null +++ b/configuration/secrets-finder/scanner/scanner.py @@ -0,0 +1,590 @@ +import argparse +import concurrent.futures +import datetime +import json +import jsonschema +import logging +import logging.config +import os +import sys +import tempfile +import threading +import uuid + +import common + + +def configure_parser(): + parser = argparse.ArgumentParser( + prog="secrets-finder-scanner", + description="This script performs secrets detection scanning on source code repositories managed by git.", + epilog="This script has been developed by Thomson Reuters. For issues, comments or help, you can contact the maintainers on the official GitHub repository: https://github.com/thomsonreuters/secrets-finder", + ) + + parser.add_argument("--debug", action="store_true", help="store debug information") + parser.add_argument( + "--scm", + help="the source code management system to use", + type=common.non_empty_string, + required=os.environ.get("SECRETS_FINDER_SCM") is None, + choices=["github", "azure_devops", "custom"], + default=os.environ.get("SECRETS_FINDER_SCM"), + ) + parser.add_argument( + "--scan-identifier", + help="the identifier for the scan", + type=common.non_empty_string, + required=os.environ.get("SECRETS_FINDER_SCAN_IDENTIFIER") is None, + default=os.environ.get("SECRETS_FINDER_SCAN_IDENTIFIER"), + ) + parser.add_argument( + "--scan-uuid", + help="the UUID associated to the scan", + type=common.valid_uuid4, + required=os.environ.get("SECRETS_FINDER_SCAN_UUID") is None, + default=os.environ.get("SECRETS_FINDER_SCAN_UUID"), + ) + parser.add_argument( + "--scan-folder", + help="the folder dedicated to the scan", + type=common.non_empty_string, + required=os.environ.get("SECRETS_FINDER_SCAN_FOLDER") is None, + default=os.environ.get("SECRETS_FINDER_SCAN_FOLDER"), + ) + parser.add_argument( + "--scanner-folder", + help="the folder dedicated to the scanner", + type=common.non_empty_string, + required=os.environ.get("SECRETS_FINDER_SCANNER_FOLDER") is None, + default=os.environ.get("SECRETS_FINDER_SCANNER_FOLDER"), + ) + parser.add_argument( + "--trufflehog-installation-path", + help="the path where trufflehog is installed", + type=common.non_empty_string, + default=os.environ.get( + "SECRETS_FINDER_TRUFFLEHOG_INSTALLATION_PATH", "/usr/bin" + ), + ) + parser.add_argument( + "--trufflehog-executable-name", + help="the name of the trufflehog executable", + type=common.non_empty_string, + default=os.environ.get( + "SECRETS_FINDER_TRUFFLEHOG_EXECUTABLE_NAME", "trufflehog" + ), + ) + parser.add_argument( + "--report-only-verified", + action="store_true", + help="report only verified secrets", + default=common.str_to_bool( + os.environ.get("SECRETS_FINDER_REPORT_ONLY_VERIFIED", "false") + ), + ) + parser.add_argument( + "--exit-on-error-pre", + action="store_true", + help="exit on error in pre-scan scripts", + default=os.environ.get("SECRETS_FINDER_EXIT_ON_ERROR_PRE"), + ) + parser.add_argument( + "--exit-on-error-post", + action="store_true", + help="exit on error in post-scan scripts", + default=os.environ.get("SECRETS_FINDER_EXIT_ON_ERROR_POST"), + ) + + return parser.parse_args() + + +class SecretsFinder: + def __init__( + self, + scanner_folder, + scan_uuid, + scan_identifier, + scm, + scan_configuration_schema_filename="scan-configuration.schema.json", + trufflehog_installation_path=os.path.join("usr", "bin"), + trufflehog_executable_name="trufflehog", + report_only_verified=False, + concurrency=20, + ): + self.scanner_folder = scanner_folder + self.scan_identifier = scan_identifier + self.scan_uuid = scan_uuid + self.scm = scm + self.scan_results = [] + self.scan_results_lock = threading.Lock() + self.trufflehog_installation_path = trufflehog_installation_path + self.trufflehog_executable_name = trufflehog_executable_name + self.report_only_verified = report_only_verified + self.concurrency = concurrency + self.status = "ready" + + scan_configuration_schema_path = os.path.join( + self.scanner_folder, scan_configuration_schema_filename + ) + if not os.path.isfile(scan_configuration_schema_path): + raise FileNotFoundError( + f"Scan configuration schema not found: {scan_configuration_schema_path}" + ) + + with open(scan_configuration_schema_path, "r") as file: + self.scan_configuration_schema = json.load(file) + + common.log( + "INFO", "SECRETS-FINDER (main)", f"Scanner initialized: {scan_identifier}" + ) + common.log( + "INFO", + "SECRETS-FINDER (main)", + f"Source code management system to scan: {scm}", + ) + common.log( + "DEBUG", "SECRETS-FINDER (main)", f"Concurrency level: {concurrency}" + ) + common.log( + "DEBUG", + "SECRETS-FINDER (main)", + f"Scan configuration schema: {scan_configuration_schema_path}", + ) + + def scan(self): + try: + common.log( + "INFO", + "SECRETS-FINDER (main)", + f"Starting scan: {self.scan_identifier}", + ) + self.local_data = threading.local() + self.status = "running" + self.start = datetime.datetime.now().isoformat() + self._check_for_credentials() + configuration = self._load_and_validate_configuration( + configuration_file="repositories_to_scan.json", + location=self.scanner_folder, + ) + self._scan_repositories(configuration) + self.status = "success" + common.log( + "INFO", + "SECRETS-FINDER (main)", + f"Scan completed: {self.scan_identifier}", + ) + except Exception as e: + self.status = "failure" + common.log( + "ERROR", + "SECRETS-FINDER (main)", + f"Scan failed: {self.scan_identifier}. Error: {str(e)}", + ) + finally: + self.end = datetime.datetime.now().isoformat() + self._save_all_results_to_file(location=self.scanner_folder) + + return self.status + + def _check_for_credentials(self): + if not os.environ.get("SECRETS_FINDER_SCAN_USERNAME") or not os.environ.get( + "SECRETS_FINDER_SCAN_TOKEN" + ): + raise ValueError( + "Credentials not found in environment variables: SECRETS_FINDER_SCAN_USERNAME, SECRETS_FINDER_SCAN_TOKEN" + ) + + def _load_and_validate_configuration(self, configuration_file, location): + try: + configuration_file_path = os.path.join(location, configuration_file) + + common.log( + "INFO", + "SECRETS-FINDER (main)", + f"Loading and validating configuration file: {configuration_file_path}", + ) + if not os.path.isfile(configuration_file_path): + raise FileNotFoundError( + f"Configuration file not found: {configuration_file_path}" + ) + + with open(f"{configuration_file_path}", "r") as file: + configuration = json.load(file) + + jsonschema.validate( + instance=configuration, schema=self.scan_configuration_schema + ) + + common.log( + "INFO", + "SECRETS-FINDER (main)", + f"Configuration file loaded and validated successfully: {configuration_file_path}", + ) + return configuration + except jsonschema.exceptions.ValidationError as validation_error: + raise ValueError( + f"Configuration file does not strictly conform to the schema: {str(validation_error)}" + ) + except Exception: + raise + + def _scan_repositories(self, configuration): + endpoint = configuration.get("endpoint") + repositories = configuration.get("repositories") + + common.log( + "INFO", + "SECRETS-FINDER (main)", + f"Scanning {len(repositories)} repositor{'ies' if len(repositories) > 1 else 'y'} with {self.concurrency} worker{'s' if self.concurrency > 1 else ''}...", + ) + with concurrent.futures.ThreadPoolExecutor( + max_workers=self.concurrency + ) as executor: + for repository in repositories: + executor.submit(self._scan_repository, endpoint, repository) + executor.shutdown(wait=True) + common.log( + "INFO", "SECRETS-FINDER (main)", "All repositories scanned successfully." + ) + + def _scan_repository(self, endpoint, repository): + try: + self.local_data.execution_id = str(uuid.uuid4())[:8] + self.local_data.start = datetime.datetime.now().isoformat() + repository_scan_identifier = common.generate_unique_identifier() + repository_organization = repository.get("organization") + repository_name = repository.get("name") + + common.log( + "INFO", + f"SECRETS-FINDER ({self.local_data.execution_id})", + f"Scanning repository: {repository_name} (organization: {repository_organization})", + ) + + local_directory = self._clone_repository(endpoint, repository) + + trufflehog_results = self._scan_local_git_repository( + local_directory, repository + ) + + self.local_data.end = datetime.datetime.now().isoformat() + self._save_scan_results( + repository_scan_identifier, repository, "findings", trufflehog_results + ) + + common.log( + "INFO", + f"SECRETS-FINDER ({self.local_data.execution_id})", + f"Repository scanned successfully: {repository_name} (organization: {repository_organization})", + ) + except Exception as exception: + common.log( + "ERROR", + f"SECRETS-FINDER ({self.local_data.execution_id})", + f"An error occurred while processing repository {repository_name}: {str(exception)}", + ) + self.local_data.end = datetime.datetime.now().isoformat() + self._save_error(repository_scan_identifier, repository) + + def _clone_repository(self, endpoint, repository): + repository_organization = repository.get("organization") + repository_name = repository.get("name") + git_repository_url = endpoint.format( + organization=repository_organization, repository=repository_name + ) + + common.log( + "DEBUG", + f"SECRETS-FINDER ({self.local_data.execution_id})", + f"Cloning repository: {git_repository_url}", + ) + + env = os.environ.copy() + env["GIT_TERMINAL_PROMPT"] = "0" + env["GIT_ASKPASS"] = os.path.join( + self.scanner_folder, "git-credentials-helper.sh" + ) + env["SECRETS_FINDER_SCAN_USERNAME"] = os.environ.get( + "SECRETS_FINDER_SCAN_USERNAME" + ) + env["SECRETS_FINDER_SCAN_TOKEN"] = os.environ.get("SECRETS_FINDER_SCAN_TOKEN") + + temporary_directory = tempfile.mkdtemp() + common.log( + "DEBUG", + f"SECRETS-FINDER ({self.local_data.execution_id})", + f"Temporary directory created: {temporary_directory}", + ) + + try: + common.log( + "DEBUG", + f"SECRETS-FINDER ({self.local_data.execution_id})", + f"Cloning repository: {git_repository_url}", + ) + common.run_command( + f"git clone '{git_repository_url}' '{temporary_directory}'", env=env + ) + common.log( + "DEBUG", + f"SECRETS-FINDER ({self.local_data.execution_id})", + f"Repository cloned: {git_repository_url}", + ) + return temporary_directory + except Exception: + self._delete_local_git_repository(temporary_directory) + raise + + def _scan_local_git_repository(self, local_directory, repository): + try: + repository_since_commit = repository.get("since-commit") + repository_branch = repository.get("branch") + repository_max_depth = repository.get("max-depth") + + trufflehog_command = f"{os.path.join(self.trufflehog_installation_path, self.trufflehog_executable_name)} git --no-update --json" + if self.report_only_verified: + trufflehog_command += " --only-verified" + if repository_since_commit: + trufflehog_command += f" --since-commit={repository_since_commit}" + if repository_branch: + trufflehog_command += f" --branch={repository_branch}" + if repository_max_depth: + trufflehog_command += f" --max-depth={repository_max_depth}" + if os.path.isfile(os.path.join(self.scanner_folder, "configuration.yaml")): + trufflehog_command += f" --config={os.path.join(self.scanner_folder, 'configuration.yaml')}" + trufflehog_command += f" file://{local_directory}" + + common.log( + "DEBUG", + f"SECRETS-FINDER ({self.local_data.execution_id})", + f"Scanning command to execute: {trufflehog_command}", + ) + + try: + execution_output = common.run_command(trufflehog_command) + except Exception as e: + raise Exception(str(e)) + + trufflehog_logs = ( + execution_output[1].splitlines() if execution_output[1] else [] + ) + for line in trufflehog_logs: + try: + if line: + common.log( + "DEBUG", + f"TRUFFLEHOG ({self.local_data.execution_id})", + line, + ) + except Exception as e: + continue + + trufflehog_results = ( + execution_output[0].splitlines() if execution_output[0] else [] + ) + return trufflehog_results + finally: + self._delete_local_git_repository(local_directory) + + def _delete_local_git_repository(self, local_directory): + try: + common.log( + "DEBUG", + f"SECRETS-FINDER ({self.local_data.execution_id})", + f"Deleting local repository: {local_directory}", + ) + delete_command = f"rm -rf '{local_directory}'" + common.run_command(delete_command) + common.log( + "DEBUG", + f"SECRETS-FINDER ({self.local_data.execution_id})", + f"Local repository deleted: {local_directory}", + ) + except Exception as exception: + common.log( + "ERROR", + f"SECRETS-FINDER ({self.local_data.execution_id})", + f"An error occurred while deleting local repository {local_directory}: {str(exception)}", + ) + + def _save_scan_results( + self, repository_scan_identifier, repository, results_key, results_to_process + ): + repository_organization = repository.get("organization") + repository_name = repository.get("name") + + common.log( + "DEBUG", + f"SECRETS-FINDER ({self.local_data.execution_id})", + f"Saving scan results for repository: {repository_name} (organization: {repository_organization})", + ) + + context = { + "scan_uuid": repository_scan_identifier, + "organization": repository_organization, + "repository": repository_name, + "start": self.local_data.start, + "end": self.local_data.end, + } + if "metadata" in repository: + context.update({"metadata": repository.get("metadata")}) + + scan_results = [] + nb_secrets_found = 0 + for line in results_to_process: + try: + if line: + nb_secrets_found += 1 + scan_results.append(json.loads(line)) + except Exception: + continue + + with self.scan_results_lock: + self.scan_results.append( + {**context, **{results_key: scan_results, "status": "success"}} + ) + + common.log( + "INFO", + f"SECRETS-FINDER ({self.local_data.execution_id})", + f"Number of secrets found in repository {repository_name} (organization: {repository_organization}): {nb_secrets_found}", + ) + + def _save_error(self, repository_scan_identifier, repository): + repository_organization = repository.get("organization") + repository_name = repository.get("name") + + common.log( + "DEBUG", + f"SECRETS-FINDER ({self.local_data.execution_id})", + f"Saving error for repository: {repository_name} (organization: {repository_organization})", + ) + + context = { + "scan_uuid": repository_scan_identifier, + "organization": repository_organization, + "repository": repository_name, + "start": self.local_data.start, + "end": self.local_data.end, + } + if "metadata" in repository: + context.update({"metadata": repository.get("metadata")}) + + with self.scan_results_lock: + self.scan_results.append( + {**context, **{"findings": [], "status": "failure"}} + ) + + def _save_all_results_to_file(self, location): + filename = f"{self.scan_uuid}.json" + file_path = os.path.join(location, filename) + + common.log( + "INFO", + "SECRETS-FINDER (main)", + f"Saving all scan results to file: {file_path}", + ) + + with open(file_path, "w") as file: + json.dump( + { + "scan_type": "detection", + "scan_mode": "verified" if self.report_only_verified else "all", + "scan_uuid": self.scan_uuid, + "scan_identifier": self.scan_identifier, + "scm": self.scm, + "start": self.start, + "end": self.end, + "status": self.status, + "scan_context": "repository", + "results": self.scan_results if self.status == "success" else [], + }, + file, + ) + file.write("\n") + + common.log( + "INFO", + "SECRETS-FINDER (main)", + f"All scan results saved to file:{file_path}", + ) + + +def run_python_scripts_provided_by_user(lifecycle, folder, raise_on_error): + try: + accepted_lifefycles = ["pre", "post"] + if lifecycle not in accepted_lifefycles: + raise ValueError( + f"Invalid lifecycle found while executing python scripts provided by user: {lifecycle} (should be one of: {str(accepted_lifefycles)})" + ) + + common.log( + "INFO", + "SECRETS-FINDER (main)", + f"Executing {lifecycle}-scan scripts provided by user...", + ) + common.run_python_scripts(folder, f"{lifecycle}_*.py") + common.log( + "INFO", + "SECRETS-FINDER (main)", + f"{lifecycle}-scan scripts provided by user executed successfully.", + ) + except Exception: + common.log( + "ERROR", + "SECRETS-FINDER (main)", + f"An error occurred while executing {lifecycle}-scan scripts. {'Operation aborted.' if raise_on_error else 'User asked to continue operation.'}", + ) + if raise_on_error: + raise + else: + pass + + +def main(): + try: + common.load_environment_variables( + folder=os.path.dirname(os.path.abspath(__file__)), + environment_file="scanner.env", + ) + arguments = configure_parser() + common.configure_logging( + destination_folder=os.path.join(arguments.scanner_folder, "logs"), + log_file="secrets-finder.log", + level=logging.INFO if not arguments.debug else logging.DEBUG, + ) + except Exception as exception: + print( + f"FATAL ERROR: An unexpected error occurred during initialization: {str(exception)}" + ) + sys.exit(2) + + try: + run_python_scripts_provided_by_user( + "pre", arguments.scan_folder, arguments.exit_on_error_pre + ) + finder = SecretsFinder( + scanner_folder=arguments.scanner_folder, + scan_uuid=arguments.scan_uuid, + scan_identifier=arguments.scan_identifier, + scm=arguments.scm, + trufflehog_installation_path=arguments.trufflehog_installation_path, + trufflehog_executable_name=arguments.trufflehog_executable_name, + report_only_verified=arguments.report_only_verified, + ) + finder.scan() + run_python_scripts_provided_by_user( + "post", arguments.scan_folder, arguments.exit_on_error_post + ) + sys.exit(0) + except Exception as exception: + common.log( + "ERROR", + "SECRETS-FINDER (main)", + f"A fatal error occurred during scan: {str(exception)}. Operation aborted.", + ) + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/configuration/secrets-finder/scanner/scanner.requirements.txt b/configuration/secrets-finder/scanner/scanner.requirements.txt new file mode 100644 index 0000000..664d5f2 --- /dev/null +++ b/configuration/secrets-finder/scanner/scanner.requirements.txt @@ -0,0 +1 @@ +jsonschema ~= 4.17 diff --git a/configuration/secrets-finder/scanner/scanner.service b/configuration/secrets-finder/scanner/scanner.service new file mode 100644 index 0000000..c244187 --- /dev/null +++ b/configuration/secrets-finder/scanner/scanner.service @@ -0,0 +1,14 @@ +[Unit] +Description=Execute a secrets detection scan +After=network.target + +[Service] +Type=oneshot +User=${instance_user} +EnvironmentFile=/etc/secrets-finder.env +ExecStart=/bin/bash -c 'export SECRETS_FINDER_SCAN_UUID=$(cat "${scanner_folder}/uuid.txt") && source "${scanner_folder}/venv/bin/activate" && python3 "${scanner_folder}/scanner.py" && deactivate' +ExecStartPost=/bin/bash -c 'export SECRETS_FINDER_SCAN_UUID=$(cat "${scanner_folder}/uuid.txt") && source "${scanner_folder}/venv/bin/activate" && python3 "${scanner_folder}/finalizer.py" && deactivate' +RemainAfterExit=true + +[Install] +WantedBy=multi-user.target diff --git a/infrastructure/secrets-finder/ongoing-scans/README.md b/infrastructure/secrets-finder/ongoing-scans/README.md new file mode 100644 index 0000000..4cc944e --- /dev/null +++ b/infrastructure/secrets-finder/ongoing-scans/README.md @@ -0,0 +1,217 @@ +# Ongoing scans of GitHub repositories + +

+ +## Table of Contents +1. [Introduction](#introduction) +2. [Infrastructure](#infrastructure) +3. [Scanning operations](#scanning-operations) +4. [Generated reports](#generated-reports) +5. [Configuration of GitHub Workflows](#configuration-of-github-workflows) +6. [Deployment instructions](#deployment-instructions) + +

+ +## Introduction +This folder holds the infrastructure for the ongoing secrets scanning solution. The system is designed for use with GitHub repositories belonging to one or more organizations. The open-source tool [TruffleHog](https://github.com/trufflesecurity/trufflehog) carries out the scanning of repositories for secrets. The tool is executed as part of GitHub workflows triggered by events received from a GitHub App. + +> **NOTE:**\ +> As part of the first and current release of secrets-finder, the solution relies on GitHub Enterprise Cloud (or alternatively GitHub Enterprise Server) as well as AWS. As such, the documentation below is tailored to these platforms. The maintainers of the project aim to provide support for other Source Code Management platforms and cloud providers in the next releases. + +

+ +## Infrastructure +The infrastructure consists of four main key components: a GitHub App that receives events from GitHub organizations where it is deployed on and forwards them to an API Gateway REST API, which in turn sends those events to an AWS Lambda Function for processing. Once cryptographically verified, the events that are deemed in scope for scanning are passed to the GitHub workflows to carry out the secrets detection process accordingly. + +### GitHub App +The GitHub App, deployed at the level of the organization, captures all pushes to repositories and actions on pull requests. It forwards these events to the API Gateway set up in AWS. + +### CloudFront Distribution (and Route53 Record) +A CloudFront Distribution is used to front the API Gateway being deployed. With the chosen setup, direct calls made to the API Gateway are not allowed. Both the distribution and the REST API are protected by AWS WAF. Finally, a Route53 record can be set up to point to the domain name to the CloudFront distribution. + +### API Gateway REST API +The API Gateway REST API features a single endpoint (`/production/secrets-finder`) that receives POST requests from the GitHub App and forwards them to the AWS Lambda Function set up. + +> **IMPORTANT:**\ +> Due to known limitations from API Gateway and GitHub Apps, requests are not authenticated at this level. Instead, we perform the verification of cryptographic signatures attached to the events within the AWS Lambda Function. While this guarantees the security of the system, DoS and cost-occurring attacks are in theory possible. The use of AWS WAF coupled with a shared secret between the CloudFront Distribution and the API Gateway aim to deter those types of attacks. + +### AWS Lambda Function +The code deployed on AWS Lambda can be found in the [`lambda/secret_detection.py`](/infrastructure/secrets-finder/ongoing-scans/aws/lambda/secrets-finder.py) file. The `handler` function serves as the entry point for the Lambda. + +As part of its operations, the function first verifies incoming requests transferred to it by checking the `X-Hub-Signature-256` header for a valid signature. If the signature is missing or invalid, the function returns a 401 or 403 error, respectively. + +Assuming the signature is valid, the function then checks that the event is in scope for a secrets scanning (i.e., a push to the default branch of the concerned repository or a creation/update of a pull request in the reference implementation). If the event falls into the defined categories, it is forwarded to the appropriate GitHub workflow using the GitHub Actions API. + +The payload of the request sent to the GitHub workflow includes the event type (`event_type`), which is used to determine which workflow to trigger. The type can be either `secret_detection_in_default_branch` or `secret_detection_in_pull_request`. The payload also includes the original information sent by GitHub in the `client_payload.event` key. This information will be used by the workflow when performing the scan. + +### GitHub Workflows +Two GitHub workflows are deployed as part of the infrastructure: +- [`secret-detection-push.yaml`](/.github/workflows/secret-detection-push.yaml) +- [`secret-detection-pull-request.yaml`](/.github/workflows/secret-detection-pull-request.yaml) + +Those workflows are responsible for the scanning of one or several commits pushed in the default branch of a repository – as part of a push event, or added to pull requests, respectively. They are triggered using a `repository_dispatch` event sent by the AWS Lambda Function set up. + +While the AWS Lambda Function verifies the nature of the events received, the GitHub Workflows themselves check that the events fall into the expected categories before running secrets detection jobs. + +The following GitHub Actions are used within the workflows: +- [`actions/checkout@v4`](https://github.com/actions/checkout) +- [`actions/github-script@v7`](https://github.com/actions/github-script) +- [`actions/upload-artifact@v4`](https://github.com/actions/upload-artifact) +- [`actions/download-artifact@v4`](https://github.com/actions/download-artifact) +- [`aws-actions/configure-aws-credentials@v4`](https://github.com/aws-actions/configure-aws-credentials) + +

+ +## Scanning operations +Each workflow carries out operations that are specific to the context being considered, while the main logic remains the same across all workflows. + +Contextual information is logged in the workflow run and secrets detection is performed. In the context of a push to the default branch of a repository, all commits belonging to that push are reviewed. For a pull requests, all commits since the creation of the pull request are deemed in scope for scanning, and this each time a scan is performed. + +If any findings are found, the logs are parsed and display (if debug enabled). The final report is then generated and sent to the storage location specified (i.e., an S3 bucket in the reference implementation). Note that even when no findings are found, a report is generated, this to allow users to better track how their secrets detection program behave. + +A list of findings is also properly formatted for direct reporting to developers. In the context a commit pushed to a default branch, issues are created and assigned to the people that have committed one or more secrets. When secrets are found in a pull request, a request for comment is added and list all the commits belonging to the pull request that contain hardcoded secrets. Note that a `leaked-secrets` tag is added automatically to concerned pull requests and issues. + +When repositories are public, issues and requests for comments are only added if repositories are first made private (cf. options below). This conditional alerting mechanism aligned with "secure by default" best practices. + +

+ +## Generated reports +Each time a scan is performed, a report is generated and persisted. This report contains a JSON object made of the following elements: +- `scan_type`: always `prevention` with ongoing scans +- `start`: date (in ISO format) indicating when the scan started +- `end`: date (in ISO format) indicating when the scan finished +- `status`: either `succces` if the scan could be performed, or `failure` otherwise +- `scan_mode`: `verified` if only verified secrets are reported, `all` otherwise (the number of findings reported does not influence this value) +- `scan_context`: `commit` when scanning push events made to default branches, or `pull_request` when scanning a pull request +- `scan_uuid`: unique identifier representing the scan performed +- `scan_identifier`: always `github_secrets_finder` in this context +- `scm`: always `github` in this context +- `results`: an array containing exactly one entry, as specified below + +The `results` key holds an array where the single object being reported exposes the following elements: +- `scan_uuid`: the identifier representing the scan of the repository (different than `scan_uuid` field at top-level) +- `start`: same as the top-level key of the same name +- `end`: same as the top-level key of the same name +- `organization`: the name of the GitHub organization the repository belongs to +- `repository`: the name of the repository scanned +- `status`: either `success` if the scan could be performed, or `failure` otherwise (the number of findings reported does not influence this value) +- `metadata`: object containing an `identifier` key (the commit hash or pull request number, depending on the scan context), and a `created_at` key (when the commit or the pull request have been created, respectivel) +- `findings`: an array of findings as returned by TruffleHog, if any found + +

+ +## Configuration of GitHub Workflows +The following GitHub variables are used within the workflows:\ +(see [Deployment instructions](#deployment-instructions) for more information) +- `AWS_REGION`: the region to use when configuring the AWS client +- `AWS_ROLE_ARN`: the ARN of the role to assume when pushing results to the S3 bucket (should have put permissions) +- `AWS_S3_BUCKET_NAME`: the name of the S3 bucket where results are stored +- `SCAN_TIMEOUT_MINUTES`: how long to wait for the scanner to report results before failing the job (default is 15 minutes) +- `CUSTOM_DETECTORS_CONFIG_FILE`: the path to a custom detectors file as supported by TruffleHog (e.g., `configuration/custom_detectors.yaml`, assuming configuration is located at the root of the repository) +- `REPORT_ONLY_VERIFIED_SECRETS`: if `true`, only secrets that are verified by TruffleHog are reported, other all secrets found are enumerated (default is all) +- `HIDE_PUBLIC_REPOSITORIES_IF_SECRETS_FOUND`: if `true`, any public repository where secrets are found has its visibility changed; if the operation fails, creation of issues/requests for comments is aborted (default is false) + +The following GitHub secrets are used within the workflows\ +(see [Deployment instructions](#deployment-instructions) for more information) +- `ORG_TOKEN`: the token used to fetch the repository to scan, and if needed, take actions (change visibility of repository, creation of issues/requests for comments, add of label, and assignment of issues to selected users) +- `AWS_ACCESS_KEY_ID`: the access key ID of the principal to use for authentication, when assuming the role specified in `AWS_ROLE_ARN` +- `AWS_SECRET_ACCESS_KEY`; the access key secret of the principal to use for authentication, when assuming the role specified in `AWS_ROLE_ARN` + +

+ +## Deployment instructions +To set up the infrastructure, please proceed with the following steps. It is assumed that the workflows have already been deployed within the repository responsible for performing the scans, in the `.github/workflows` directory. This repository can be referenced using the `${var.github_secret_prevention_workflow_org}` and `${var.github_secret_prevention_workflow_repository}` variables when deploying the infrastructure with Terraform. + +### Preliminary setup for CloudFront and Route53 +The API Gateway REST API is fronted by an AWS CloudFront Distribution and a Route53 Record can be requested for creation. + +While the resources are deployed alongside the rest of the infrastructure, the following steps must be performed manually to configure the CloudFront Distribution properly, in case you want to use a custom SSL certificate (`${var.use_custom_certificate}` variable set to true). + +When using a custom SSL certificate, the Common Name of the certificate should be `${var.endpoint}.${var.hosted_zone}`, where `${var.endpoint}` is the name of the endpoint specified and `${var.hosted_zone}` is the public hosted zone selected available in Route53, as specified in the `terraform.tfvars` file of the Terraform module. + +Custom certificates should be registered in AWS Certificate Manager. Users are provided with a shell script helper (see [`pkcs12-to-pem-converter.sh`](/infrastructure/secrets-finder/ongoing-scans/aws/certificate/pkcs12-to-pem-converter.sh)) to export the required information from a certificate stored in PKCS#12 format. You can learn more about how to use the script by running the `./pkcs12-to-pem-converter.sh --help` command. + +> **Warning:**\ +> When using `--decrypt-private-key` option, the script will generate a `private_key_insecure.pem` file containing the private key in plain text. This file should be deleted after the certificate is registered in AWS Certificate Manager. + +> **Note:**\ +> Once the certificate is registered, the ARN of the certificate should be specified in the following variable of the Terraform module: `${var.certificate_arn}` + +### Registration of required secrets in AWS Secrets Manager +Using the [`secrets` module](/infrastructure/secrets-finder/setup/aws/secrets), you must store: +- the GitHub token to use when forwarding events to the GitHub workflows; +the GitHub App secret used when configuring the GitHub App; and +- the Web ACL secret to authenticate requests received by the API Gateway REST API. + +The reference to those secrets (i.e., the secrets names) should then be specified in the respective variables listed below: +- `${var.github_token_reference}` +- `${var.github_app_secret_reference}` +- `${var.api_gateway_web_acl_secret_reference}` + +### Registration of Datadog API token in AWS Secrets Manager +You have the possibility to use Datadog for reporting on AWS Lambda activity. For this, you need to store the API key in Secrets Manager, e.g., by using the [`secrets` module](/infrastructure/secrets-finder/setup/aws/secrets) provided. Then, you should specify the `${var.datadog_api_key_reference}` variable, which represents the name of the secret stored in Secrets Manager and holding the API key to use. You should also specify the service name for Datadog (`${var.datadog_service_name}` variable). + +### Deployment of the AWS infrastructure +> **Note:**\ +> It is assumed that you have already [installed Terraform](https://developer.hashicorp.com/terraform/downloads) and configured your AWS credentials accordingly for the profile you want to use. + +From the `lambda` folder, create a ZIP archive of the AWS Lambda Function: +```bash +chmod +x package.sh && ./package.sh -o secrets-finder.zip +``` + +> **Note:**\ +> If using the `-o` (or `--output`) option to provide a name, ensure that the name ends with `.zip`. If no name is provided, the resulting archive will be named in the following format:\ +> `secrets-finder-$${YYY-MM-DD}-$${SHORT_SHA256_LAMBDA}-$${SHORT_SHA256_REQUIREMENTS}.zip`\ +> The short SHA256 are the first 8 characters of the original SHA256. + +Then, navigate to the [`infrastructure/secrets-finder/ongoing-scans/aws`](/infrastructure/secrets-finder/ongoing-scans/aws) directory. + +To configure the S3 backend for Terraform, modify the `s3.tfbackend` file by setting the appropriate values. Be sure to reference the correct `` AWS profile in the `profile` key. + +Then, initialize Terraform: +```bash +terraform init -backend-config=s3.tfbackend +``` + +> **IMPORTANT:**\ +> To successfully deploy the infrastructure, it is assumed that the S3 Bucket holding the remote states already exists. This also holds for the DynamoDB Table listing the locks. You are responsible for the creation of such resources. We recommend reusing the same bucket and table across all modules of secrets-finder. In such case, make sure to specify a different path for each module. + +Next, create a `terraform.tfvars` file and set the required variables. This file as well as the [README.md](/infrastructure/secrets-finder/ongoing-scans/aws/README.md) file provided alongside the module provide valuable information about the purpose of each variable. + +Lastly, review the changes to be made and, if satisfactory, proceed with deploying the infrastructure by following the steps below: +```bash +# Review changes +terraform plan + +# Deploy changes +terrafrom apply -auto-approve +``` + +Upon successful completion, the following outputs should be available: +- `api_gateway_url` +- `cloudwatch_logs` +- `lambda_execution_role` +- `lambda_function` +- `cloudfront_distribution` +- `route53_record` (if requested) + +### Deployment and installation of the GitHub App +A GitHub App should be created and then installed at the level of the organization(s) you want to scan. The GitHub App should be configured as follows: + +In the *General* tab: +- **Webhook URL**\ + `${route53_record}/secrets-finder` where `${route53_record}` is the value of the output of same name from the Terraform module. If no record should be created, then the webhook URL should be `${cloudfront_distribution_endpoint}/secrets-finder`. +- **Webhook secret**\ + The name of the secret stored in Secrets Managed and referenced in the `${var.github_app_secret_reference}` variable of the Terraform module. + +In the *Permissions & events* tab, set the following permissions: +- `Contents`: Read and write +- `Metadata`: Read-only +- `Pull requests`: Read-only + +In the *Permissions & events* tab, subscribe to the following events: +- `Push` +- `Pull request` + +During installation in an organization, you are expected to define which repositories the GitHub App should have access to, i.e., the repositories subject for scanning. diff --git a/infrastructure/secrets-finder/ongoing-scans/aws/README.md b/infrastructure/secrets-finder/ongoing-scans/aws/README.md new file mode 100644 index 0000000..3adf33a --- /dev/null +++ b/infrastructure/secrets-finder/ongoing-scans/aws/README.md @@ -0,0 +1,96 @@ +# aws + + +## Requirements + +| Name | Version | +|------|---------| +| [terraform](#requirement\_terraform) | >=1.7 | +| [aws](#requirement\_aws) | ~> 5.0 | + +## Providers + +| Name | Version | +|------|---------| +| [aws](#provider\_aws) | ~> 5.0 | + +## Modules + +No modules. + +## Resources + +| Name | Type | +|------|------| +| [aws_api_gateway_deployment.production](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/api_gateway_deployment) | resource | +| [aws_api_gateway_integration.github_app_event_handler](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/api_gateway_integration) | resource | +| [aws_api_gateway_method.post](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/api_gateway_method) | resource | +| [aws_api_gateway_method_settings.log_setting](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/api_gateway_method_settings) | resource | +| [aws_api_gateway_resource.secrets_finder](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/api_gateway_resource) | resource | +| [aws_api_gateway_rest_api.gateway](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/api_gateway_rest_api) | resource | +| [aws_api_gateway_stage.production](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/api_gateway_stage) | resource | +| [aws_cloudfront_distribution.distribution](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/cloudfront_distribution) | resource | +| [aws_cloudwatch_log_group.logs](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/cloudwatch_log_group) | resource | +| [aws_cloudwatch_log_group.waf_log_group](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/cloudwatch_log_group) | resource | +| [aws_iam_policy.policy_for_execution_role](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_policy) | resource | +| [aws_iam_role.lambda_execution_role](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role) | resource | +| [aws_iam_role_policy_attachment.lambda_execution_policy](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role_policy_attachment) | resource | +| [aws_lambda_function.github_app_event_handler](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/lambda_function) | resource | +| [aws_lambda_permission.api_gateway_permission](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/lambda_permission) | resource | +| [aws_route53_record.record](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/route53_record) | resource | +| [aws_wafv2_web_acl.api_gateway_web_acl](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/wafv2_web_acl) | resource | +| [aws_wafv2_web_acl.cloudfront_web_acl](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/wafv2_web_acl) | resource | +| [aws_wafv2_web_acl_association.api_gateway_web_acl_association](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/wafv2_web_acl_association) | resource | +| [aws_wafv2_web_acl_logging_configuration.api_gateway_web_acl_logging](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/wafv2_web_acl_logging_configuration) | resource | +| [aws_wafv2_web_acl_logging_configuration.cloudfront_web_acl_logging](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/wafv2_web_acl_logging_configuration) | resource | +| [aws_caller_identity.current](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/caller_identity) | data source | +| [aws_cloudwatch_log_group.existing_waf_log_group](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/cloudwatch_log_group) | data source | +| [aws_iam_policy_document.lambda_assume_role](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/iam_policy_document) | data source | +| [aws_iam_policy_document.permissions_for_execution_role](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/iam_policy_document) | data source | +| [aws_route53_zone.hosted_zone](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/route53_zone) | data source | +| [aws_secretsmanager_secret.api_gateway_web_acl_secret](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/secretsmanager_secret) | data source | +| [aws_secretsmanager_secret.datadog_api_token](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/secretsmanager_secret) | data source | +| [aws_secretsmanager_secret.github_app_secret](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/secretsmanager_secret) | data source | +| [aws_secretsmanager_secret.github_token](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/secretsmanager_secret) | data source | +| [aws_secretsmanager_secret_version.api_gateway_web_acl_secret](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/secretsmanager_secret_version) | data source | + +## Inputs + +| Name | Description | Type | Default | Required | +|------|-------------|------|---------|:--------:| +| [api\_gateway\_web\_acl\_secret\_reference](#input\_api\_gateway\_web\_acl\_secret\_reference) | Name of the secret stored in Secrets Manager and containing the secret to use for the configuration of the web ACL of the API Gateway | `string` | n/a | yes | +| [aws\_profile](#input\_aws\_profile) | AWS profile to use for authentication | `string` | `"default"` | no | +| [aws\_region](#input\_aws\_region) | AWS region where to deploy resources | `string` | `"us-east-1"` | no | +| [certificate\_arn](#input\_certificate\_arn) | ARN of the ACM certificate to use for the CloudFront distribution when 'use\_custom\_certificate' variable is true | `string` | n/a | yes | +| [create\_route53\_record](#input\_create\_route53\_record) | Wether to create a Route53 record for the CloudFront distribution | `bool` | `true` | no | +| [create\_waf\_log\_group](#input\_create\_waf\_log\_group) | Whether to create a log group for the WAF logs | `bool` | `true` | no | +| [datadog\_api\_key\_reference](#input\_datadog\_api\_key\_reference) | Name of the secret stored in Secrets Manager and containing the Datadog API token. Leave empty if Datadog should not be used. | `string` | `null` | no | +| [datadog\_service\_name](#input\_datadog\_service\_name) | Name of the service to use for Datadog monitoring. Leave empty if Datadog should not be used. | `string` | `null` | no | +| [endpoint](#input\_endpoint) | Endpoint to use for the CloudFront distribution and Route53 record (if created) (note: 'hosted\_zone' variable will be appended to the endpoint to create the full domain name) | `string` | n/a | yes | +| [environment\_type](#input\_environment\_type) | Environment type | `string` | `"PRODUCTION"` | no | +| [github\_app\_secret\_reference](#input\_github\_app\_secret\_reference) | Name of the secret stored in Secrets Manager and containing the secret configured for the GitHub App and used for validating signature of incoming requests | `string` | n/a | yes | +| [github\_secret\_prevention\_workflow\_org](#input\_github\_secret\_prevention\_workflow\_org) | Name of the GitHub organization where the secret prevention workflows will be triggered | `string` | n/a | yes | +| [github\_secret\_prevention\_workflow\_repository](#input\_github\_secret\_prevention\_workflow\_repository) | Name of the GitHub repository where the secret prevention workflow will be triggered | `string` | n/a | yes | +| [github\_token\_reference](#input\_github\_token\_reference) | Name of the secret stored in Secrets Manager and containing the GitHub token to use for triggering the GitHub workflow | `string` | n/a | yes | +| [hosted\_zone](#input\_hosted\_zone) | The hosted zone to use for the CloudFront distribution | `string` | n/a | yes | +| [iam\_role\_path](#input\_iam\_role\_path) | The path to use when creating IAM roles | `string` | `"/"` | no | +| [kms\_key\_arn](#input\_kms\_key\_arn) | KMS key ARN used to encrypt the log groups. Leave empty if logs should not be encrypted. | `string` | `null` | no | +| [lambda\_archive\_file\_path](#input\_lambda\_archive\_file\_path) | Path to the archive file containing the Lambda function code | `string` | n/a | yes | +| [permissions\_boundary\_arn](#input\_permissions\_boundary\_arn) | The name of the IAM permissions boundary to attach to the IAM role created by the module | `string` | `null` | no | +| [project\_name](#input\_project\_name) | Name of the project (should be the same across all modules of secrets-finder to ensure consistency) | `string` | `"secrets-finder"` | no | +| [route53\_record\_name](#input\_route53\_record\_name) | Name of the Route53 record to create when 'create\_route53\_record' is true | `string` | `null` | no | +| [tags](#input\_tags) | A map of tags to add to the resources | `map(string)` | `{}` | no | +| [use\_custom\_certificate](#input\_use\_custom\_certificate) | Whether to use a custom certificate for the CloudFront distribution | `bool` | `true` | no | +| [waf\_log\_group\_name](#input\_waf\_log\_group\_name) | Name of the log group to use for the WAF logs (if 'create\_waf\_log\_group' is true, name is used to create the log group) | `string` | `null` | no | + +## Outputs + +| Name | Description | +|------|-------------| +| [api\_gateway\_url](#output\_api\_gateway\_url) | n/a | +| [cloudfront\_distribution](#output\_cloudfront\_distribution) | n/a | +| [cloudwatch\_logs](#output\_cloudwatch\_logs) | n/a | +| [lambda\_execution\_role](#output\_lambda\_execution\_role) | n/a | +| [lambda\_function](#output\_lambda\_function) | n/a | +| [route53\_record](#output\_route53\_record) | n/a | + diff --git a/infrastructure/secrets-finder/ongoing-scans/aws/api-gateway.tf b/infrastructure/secrets-finder/ongoing-scans/aws/api-gateway.tf new file mode 100644 index 0000000..95c7ec4 --- /dev/null +++ b/infrastructure/secrets-finder/ongoing-scans/aws/api-gateway.tf @@ -0,0 +1,62 @@ +resource "aws_api_gateway_rest_api" "gateway" { + name = var.project_name + description = "This API Gateway receives events relating to the GitHub organization and forwards them to a Lambda function for secrets detection scanning." +} + +resource "aws_api_gateway_resource" "secrets_finder" { + rest_api_id = aws_api_gateway_rest_api.gateway.id + parent_id = aws_api_gateway_rest_api.gateway.root_resource_id + path_part = "secrets-finder" +} + +resource "aws_api_gateway_method" "post" { + rest_api_id = aws_api_gateway_rest_api.gateway.id + resource_id = aws_api_gateway_resource.secrets_finder.id + http_method = "POST" + authorization = "NONE" +} + +resource "aws_api_gateway_method_settings" "log_setting" { + rest_api_id = aws_api_gateway_rest_api.gateway.id + stage_name = aws_api_gateway_stage.production.stage_name + method_path = "*/*" + settings { + logging_level = "INFO" + data_trace_enabled = true + metrics_enabled = true + } +} + +resource "aws_api_gateway_integration" "github_app_event_handler" { + rest_api_id = aws_api_gateway_rest_api.gateway.id + resource_id = aws_api_gateway_resource.secrets_finder.id + http_method = aws_api_gateway_method.post.http_method + integration_http_method = "POST" + type = "AWS_PROXY" + uri = aws_lambda_function.github_app_event_handler.invoke_arn + passthrough_behavior = "WHEN_NO_MATCH" +} + +resource "aws_api_gateway_deployment" "production" { + rest_api_id = aws_api_gateway_rest_api.gateway.id + + triggers = { + redeployment = sha1(jsonencode([ + aws_api_gateway_resource.secrets_finder.id, + aws_api_gateway_method.post.id, + aws_api_gateway_integration.github_app_event_handler.id, + aws_wafv2_web_acl.api_gateway_web_acl.id, + aws_lambda_function.github_app_event_handler.id, + aws_iam_role.lambda_execution_role.id + ])) + } + + depends_on = [aws_api_gateway_integration.github_app_event_handler] +} + +resource "aws_api_gateway_stage" "production" { + deployment_id = aws_api_gateway_deployment.production.id + rest_api_id = aws_api_gateway_rest_api.gateway.id + stage_name = "production" + xray_tracing_enabled = true +} diff --git a/infrastructure/secrets-finder/ongoing-scans/aws/cloudfront.tf b/infrastructure/secrets-finder/ongoing-scans/aws/cloudfront.tf new file mode 100644 index 0000000..ceebfcc --- /dev/null +++ b/infrastructure/secrets-finder/ongoing-scans/aws/cloudfront.tf @@ -0,0 +1,63 @@ +resource "aws_cloudfront_distribution" "distribution" { + enabled = true + comment = "CloudFront distribution serving the API Gateway of Secrets Finder" + + origin { + origin_id = var.endpoint + domain_name = "${aws_api_gateway_deployment.production.rest_api_id}.execute-api.${var.aws_region}.amazonaws.com" + origin_path = "/production" + + custom_origin_config { + http_port = 80 + https_port = 443 + origin_protocol_policy = "https-only" + origin_ssl_protocols = ["TLSv1.2"] + } + + custom_header { + name = "x-waf-secret" + value = data.aws_secretsmanager_secret_version.api_gateway_web_acl_secret.secret_string + } + } + + aliases = ["${var.endpoint}.${var.hosted_zone}"] + + is_ipv6_enabled = true + http_version = "http2" + + default_cache_behavior { + target_origin_id = var.endpoint + viewer_protocol_policy = "https-only" + allowed_methods = ["GET", "HEAD", "OPTIONS", "POST", "PUT", "PATCH", "DELETE"] + cached_methods = ["GET", "HEAD", "OPTIONS"] + smooth_streaming = false + compress = true + min_ttl = 0 + default_ttl = 0 + max_ttl = 0 + + # deprecated + forwarded_values { + query_string = false + cookies { + forward = "none" + } + headers = ["Authorization"] + } + } + + restrictions { + geo_restriction { + restriction_type = "none" + } + } + + viewer_certificate { + cloudfront_default_certificate = var.use_custom_certificate ? false : true + acm_certificate_arn = var.use_custom_certificate ? var.certificate_arn : null + ssl_support_method = "sni-only" + minimum_protocol_version = "TLSv1.2_2021" + } + + web_acl_id = aws_wafv2_web_acl.cloudfront_web_acl.arn +} diff --git a/infrastructure/secrets-finder/ongoing-scans/aws/cloudwatch.tf b/infrastructure/secrets-finder/ongoing-scans/aws/cloudwatch.tf new file mode 100644 index 0000000..97e4907 --- /dev/null +++ b/infrastructure/secrets-finder/ongoing-scans/aws/cloudwatch.tf @@ -0,0 +1,26 @@ +resource "aws_cloudwatch_log_group" "logs" { + name = "/aws/lambda/${aws_lambda_function.github_app_event_handler.function_name}" + retention_in_days = 30 + + kms_key_id = var.kms_key_arn +} + +resource "aws_cloudwatch_log_group" "waf_log_group" { + count = var.create_waf_log_group ? 1 : 0 + name = var.waf_log_group_name + retention_in_days = 30 + + kms_key_id = var.kms_key_arn + + lifecycle { + precondition { + condition = (var.create_waf_log_group == true) && (var.waf_log_group_name != null) + error_message = "The WAF log group name to create is missing" + } + } +} + +data "aws_cloudwatch_log_group" "existing_waf_log_group" { + count = var.create_waf_log_group || var.waf_log_group_name == null ? 0 : 1 + name = var.waf_log_group_name +} diff --git a/infrastructure/secrets-finder/ongoing-scans/aws/iam.tf b/infrastructure/secrets-finder/ongoing-scans/aws/iam.tf new file mode 100644 index 0000000..9f0608f --- /dev/null +++ b/infrastructure/secrets-finder/ongoing-scans/aws/iam.tf @@ -0,0 +1,70 @@ +data "aws_iam_policy_document" "lambda_assume_role" { + statement { + effect = "Allow" + principals { + identifiers = ["lambda.amazonaws.com"] + type = "Service" + } + actions = ["sts:AssumeRole"] + } +} + +resource "aws_iam_role" "lambda_execution_role" { + name = "${var.project_name}-execution-role" + assume_role_policy = data.aws_iam_policy_document.lambda_assume_role.json + path = var.iam_role_path + permissions_boundary = var.permissions_boundary_arn +} + +data "aws_iam_policy_document" "permissions_for_execution_role" { + dynamic "statement" { + for_each = (var.datadog_api_key_reference != null) ? [var.datadog_api_key_reference] : [] + content { + sid = "FetchDatadogAPIToken" + effect = "Allow" + actions = [ + "secretsmanager:GetSecretValue" + ] + resources = [data.aws_secretsmanager_secret.datadog_api_token[0].arn] + } + } + + statement { + sid = "FetchGitHubToken" + effect = "Allow" + actions = [ + "secretsmanager:GetSecretValue" + ] + resources = [data.aws_secretsmanager_secret.github_token.arn] + } + + statement { + sid = "FetchGitHubAppSecret" + effect = "Allow" + actions = [ + "secretsmanager:GetSecretValue" + ] + resources = [data.aws_secretsmanager_secret.github_app_secret.arn] + } + + statement { + sid = "WriteToCloudWatchLogGroup" + effect = "Allow" + actions = [ + "logs:CreateLogStream", + "logs:PutLogEvents", + ] + resources = ["arn:aws:logs:*:*:*"] + } +} + +resource "aws_iam_policy" "policy_for_execution_role" { + name = "${var.project_name}-execution-role-permissions" + description = "Policy granting necessary permissions to Lambda execution instance" + policy = data.aws_iam_policy_document.permissions_for_execution_role.json +} + +resource "aws_iam_role_policy_attachment" "lambda_execution_policy" { + policy_arn = aws_iam_policy.policy_for_execution_role.arn + role = aws_iam_role.lambda_execution_role.name +} diff --git a/infrastructure/secrets-finder/ongoing-scans/aws/lambda.tf b/infrastructure/secrets-finder/ongoing-scans/aws/lambda.tf new file mode 100644 index 0000000..5a0d5b6 --- /dev/null +++ b/infrastructure/secrets-finder/ongoing-scans/aws/lambda.tf @@ -0,0 +1,48 @@ +resource "aws_lambda_function" "github_app_event_handler" { + function_name = var.project_name + role = aws_iam_role.lambda_execution_role.arn + architectures = ["arm64"] + runtime = "python3.11" + handler = "secrets_finder.handler" + + filename = var.lambda_archive_file_path + source_code_hash = filebase64sha256(var.lambda_archive_file_path) + + layers = var.datadog_api_key_reference != null ? [ + "arn:aws:lambda:us-east-1:464622532012:layer:Datadog-Python311-ARM:78", + "arn:aws:lambda:us-east-1:464622532012:layer:Datadog-Extension-ARM:47" + ] : null + + environment { + variables = { + SECRETS_FINDER_GITHUB_TOKEN_REFERENCE = var.github_token_reference + SECRETS_FINDER_GITHUB_APP_SECRET_REFERENCE = var.github_app_secret_reference + GITHUB_ORGANIZATION = var.github_secret_prevention_workflow_org + GITHUB_REPOSITORY = var.github_secret_prevention_workflow_repository + + DD_SITE = var.datadog_api_key_reference != null ? "datadoghq.com" : null + DD_API_KEY_SECRET_ARN = var.datadog_api_key_reference != null ? data.aws_secretsmanager_secret.datadog_api_token[0].arn : null + DD_ENHANCED_METRICS = var.datadog_api_key_reference != null ? true : null + DD_TRACE_ENABLED = var.datadog_api_key_reference != null ? true : null + DD_LOGS_INJECTION = var.datadog_api_key_reference != null ? true : null + DD_ENV = "prod" + DD_SERVICE = var.datadog_api_key_reference != null ? var.datadog_service_name : null + } + } + + lifecycle { + precondition { + condition = (var.datadog_api_key_reference == null && var.datadog_service_name == null) || (var.datadog_api_key_reference != null && var.datadog_service_name != null) + error_message = "Either both or none of the Datadog parameters must be set" + } + } +} + +resource "aws_lambda_permission" "api_gateway_permission" { + statement_id = "AllowInvocationOfLambdaByAPIGateway" + action = "lambda:InvokeFunction" + function_name = aws_lambda_function.github_app_event_handler.function_name + principal = "apigateway.amazonaws.com" + + source_arn = "${aws_api_gateway_rest_api.gateway.execution_arn}/*" +} diff --git a/infrastructure/secrets-finder/ongoing-scans/aws/lambda/package.sh b/infrastructure/secrets-finder/ongoing-scans/aws/lambda/package.sh new file mode 100755 index 0000000..5a5a0d4 --- /dev/null +++ b/infrastructure/secrets-finder/ongoing-scans/aws/lambda/package.sh @@ -0,0 +1,85 @@ +#!/bin/bash +set -e + +function write { + printf "%-10s %s\n" "[$1]" "$2" +} + +function show_help() { + echo "Usage: $0 [OPTIONS]" + echo "" + echo "Options:" + echo " --output Output filename (default: secrets-finder--.zip)" + echo " --help Display this help and exit" + echo "" +} + +echo "****************************************" +echo "* Packaging of AWS Lambda function *" +echo "****************************************" +if ! grep -E -i "(debian|ubuntu)" /etc/*-release &>/dev/null && ! [[ "$OSTYPE" =~ ^darwin ]]; then + write "ERROR" "Unsupported operating system ($OSTYPE). Only macOS and Linux operating systems are supported." + exit 1 +fi + +if ! command -v python3 &>/dev/null; then + write "ERROR" "Python 3 is not installed. Operation aborted." >&2 + exit 1 +fi + +if ! command -v pip &>/dev/null; then + write "ERROR" "pip is not installed. Operation aborted." >&2 + exit 1 +fi + +LAMBDA_DIRECTORY=$(pwd) + +while [[ $# -gt 0 ]]; do + key="$1" + + case $key in + -o | --output) + OUTPUT="$2" + shift + shift + ;; + --help) + show_help + exit 0 + ;; + *) + echo "Invalid option: $1" + exit 1 + ;; + esac +done + +if [ -z "$OUTPUT" ]; then + if [[ "$OSTYPE" =~ ^darwin ]]; then + SHORT_SHA256_LAMBDA_FUNCTION=$(shasum -a 256 secrets-finder.py | cut -c1-8) + SHORT_SHA256_REQUIREMENTS=$(shasum -a 256 requirements.txt | cut -c1-8) + fi + if [[ "$OSTYPE" =~ ^linux ]]; then + SHORT_SHA256_LAMBDA_FUNCTION=$(sha256sum secrets-finder.py | cut -c1-8) + SHORT_SHA256_REQUIREMENTS=$(sha256sum requirements.txt | cut -c1-8) + fi + OUTPUT="secrets-finder-$(date +%d%m%Y)-$SHORT_SHA256_LAMBDA_FUNCTION-$SHORT_SHA256_REQUIREMENTS.zip" +fi + +if ! [[ "$OUTPUT" =~ ^[a-zA-Z0-9_./-]+$ ]]; then + write "ERROR" "Invalid output filename: $OUTPUT. Operation aborted." >&2 + exit 1 +fi + +BUILD_FOLDER=$(mktemp -d) + +write "INFO" "Fetching requirements..." +python3 -m pip install -r requirements.txt -t "$BUILD_FOLDER" 1>/dev/null + +cd "$BUILD_FOLDER" && zip -r -q -X -9 "$LAMBDA_DIRECTORY/$OUTPUT" . && rm -rf "$BUILD_FOLDER" + +cd "$LAMBDA_DIRECTORY" +write "INFO" "Building archive..." +zip -q -X -9 "$OUTPUT" secrets-finder.py + +write "INFO" "Archive built: $OUTPUT" diff --git a/infrastructure/secrets-finder/ongoing-scans/aws/lambda/requirements.txt b/infrastructure/secrets-finder/ongoing-scans/aws/lambda/requirements.txt new file mode 100644 index 0000000..daeb2ff --- /dev/null +++ b/infrastructure/secrets-finder/ongoing-scans/aws/lambda/requirements.txt @@ -0,0 +1 @@ +requests ~= 2.32 diff --git a/infrastructure/secrets-finder/ongoing-scans/aws/lambda/secrets-finder.py b/infrastructure/secrets-finder/ongoing-scans/aws/lambda/secrets-finder.py new file mode 100644 index 0000000..e3b926c --- /dev/null +++ b/infrastructure/secrets-finder/ongoing-scans/aws/lambda/secrets-finder.py @@ -0,0 +1,142 @@ +import boto3 +import hashlib +import hmac +import json +import os +import requests + + +HTTP_MAX_RETRIES = 5 +HTTP_BACKOFF_FACTOR = 0.1 +HTTP_STATUSES_ELIGIBLE_FOR_RETRY = [429, 500, 502, 503, 504] + + +secrets_manager = boto3.client("secretsmanager") + + +def handler(event, _): + validate_request(event) + status_code = forward_request(event) + return {"statusCode": status_code} + + +def validate_request(event): + if "body" not in event: + raise Exception("Error: Invalid request") + + signature = event["headers"]["X-Hub-Signature-256"] + if not signature: + raise Exception("Missing X-Hub-Signature-256 header") + + github_app_secret_reference = os.environ[ + "SECRETS_FINDER_GITHUB_APP_SECRET_REFERENCE" + ] + github_app_secret = secrets_manager.get_secret_value( + SecretId=github_app_secret_reference + )["SecretString"] + + if not verify_signature(github_app_secret, signature, event["body"]): + raise Exception("Unauthorized") + + +def verify_signature(secret, signature, payload): + mac = hmac.new( + secret.encode("utf-8"), msg=payload.encode("utf-8"), digestmod=hashlib.sha256 + ) + expected_signature = f"sha256={mac.hexdigest()}" + return hmac.compare_digest(expected_signature, signature) + + +def forward_request(event): + payload = json.loads(event["body"]) + + if "commits" in payload and "ref" in payload: + if ( + payload.get("ref") + == f"refs/heads/{(repository := payload.get('repository')) and (default_branch := repository.get('default_branch'))}" + ): + event_type = "secrets_detection_in_default_branch" + formatted_event = { + "ref": payload.get("ref"), + "commits": list( + map( + lambda c: { + "id": c.get("id"), + "author": c.get("author").get("username"), + "url": c.get("url"), + "timestamp": c.get("timestamp"), + }, + payload["commits"], + ) + ), + "before": payload.get("before"), + "after": payload.get("after"), + "pusher": payload.get("pusher").get("name"), + "repository": { + "default_branch": default_branch, + "name": repository.get("name"), + "full_name": repository.get("full_name"), + "owner": repository.get("owner").get("login"), + "visibility": repository.get("visibility"), + }, + } + else: + return {"statusCode": 204} + + elif "pull_request" in payload and "action" in payload: + if payload.get("action") in ["opened", "synchronize", "reopened"]: + event_type = "secrets_detection_in_pull_request" + pull_request = payload.get("pull_request") + repository = payload.get("repository") + formatted_event = { + "action": payload.get("action"), + "pull_request": { + "number": pull_request.get("number"), + "head": pull_request.get("head").get("ref"), + "base": pull_request.get("base").get("ref"), + "created_at": pull_request.get("created_at"), + }, + "repository": { + "name": repository.get("name"), + "full_name": repository.get("full_name"), + "owner": repository.get("owner").get("login"), + "visibility": repository.get("visibility"), + }, + } + else: + return {"statusCode": 204} + else: + raise ValueError("Unrecognized request. Operation canceled.") + + github_token_reference = os.environ["SECRETS_FINDER_GITHUB_TOKEN_REFERENCE"] + github_token = secrets_manager.get_secret_value(SecretId=github_token_reference)[ + "SecretString" + ] + + organization = os.getenv("GITHUB_ORGANIZATION") + repository = os.getenv("GITHUB_REPOSITORY") + url = f"https://api.github.com/repos/{organization}/{repository}/dispatches" + + formatted_payload = { + "event_type": event_type, + "client_payload": {"event": formatted_event}, + } + + headers = { + "Accept": "application/vnd.github.everest-preview+json", + "Authorization": f"Bearer {github_token}", + } + + requests_session = requests.Session() + requests_retry_strategy = requests.adapters.Retry( + total=HTTP_MAX_RETRIES, + backoff_factor=HTTP_BACKOFF_FACTOR, + status_forcelist=HTTP_STATUSES_ELIGIBLE_FOR_RETRY, + ) + requests_session.mount( + "https://", requests.adapters.HTTPAdapter(max_retries=requests_retry_strategy) + ) + response = requests_session.post( + url, data=json.dumps(formatted_payload).encode("utf-8"), headers=headers + ) + return response.status_code diff --git a/infrastructure/secrets-finder/ongoing-scans/aws/locals.tf b/infrastructure/secrets-finder/ongoing-scans/aws/locals.tf new file mode 100644 index 0000000..34652b7 --- /dev/null +++ b/infrastructure/secrets-finder/ongoing-scans/aws/locals.tf @@ -0,0 +1,4 @@ +locals { + environment = replace(lower(var.environment_type), " ", "-") + tags = merge(try(var.tags, {}), { environment = local.environment }) +} diff --git a/infrastructure/secrets-finder/ongoing-scans/aws/outputs.tf b/infrastructure/secrets-finder/ongoing-scans/aws/outputs.tf new file mode 100644 index 0000000..cb93571 --- /dev/null +++ b/infrastructure/secrets-finder/ongoing-scans/aws/outputs.tf @@ -0,0 +1,23 @@ +output "api_gateway_url" { + value = aws_api_gateway_deployment.production.invoke_url +} + +output "cloudwatch_logs" { + value = aws_cloudwatch_log_group.logs.arn +} + +output "lambda_execution_role" { + value = aws_iam_role.lambda_execution_role.arn +} + +output "lambda_function" { + value = aws_lambda_function.github_app_event_handler.arn +} + +output "cloudfront_distribution" { + value = aws_cloudfront_distribution.distribution.domain_name +} + +output "route53_record" { + value = var.create_route53_record != null ? aws_route53_record.record[*].fqdn : null +} diff --git a/infrastructure/secrets-finder/ongoing-scans/aws/providers.tf b/infrastructure/secrets-finder/ongoing-scans/aws/providers.tf new file mode 100644 index 0000000..0bf2c61 --- /dev/null +++ b/infrastructure/secrets-finder/ongoing-scans/aws/providers.tf @@ -0,0 +1,21 @@ +terraform { + required_version = ">=1.7" + + required_providers { + aws = { + source = "hashicorp/aws" + version = "~> 5.0" + } + } + + backend "s3" { + encrypt = true + } +} + +provider "aws" { + region = var.aws_region + profile = var.aws_profile + + default_tags { tags = local.tags } +} diff --git a/infrastructure/secrets-finder/ongoing-scans/aws/route53.tf b/infrastructure/secrets-finder/ongoing-scans/aws/route53.tf new file mode 100644 index 0000000..9bcf69b --- /dev/null +++ b/infrastructure/secrets-finder/ongoing-scans/aws/route53.tf @@ -0,0 +1,13 @@ +data "aws_route53_zone" "hosted_zone" { + name = var.hosted_zone + private_zone = false +} + +resource "aws_route53_record" "record" { + count = var.create_route53_record ? 1 : 0 + zone_id = data.aws_route53_zone.hosted_zone.zone_id + name = var.endpoint + type = "CNAME" + ttl = 300 + records = [aws_cloudfront_distribution.distribution.domain_name] +} diff --git a/infrastructure/secrets-finder/ongoing-scans/aws/s3.tfbackend b/infrastructure/secrets-finder/ongoing-scans/aws/s3.tfbackend new file mode 100644 index 0000000..6fa4016 --- /dev/null +++ b/infrastructure/secrets-finder/ongoing-scans/aws/s3.tfbackend @@ -0,0 +1,5 @@ +bucket = "" +key = "" +region = "" +dynamodb_table = "" +profile = "" diff --git a/infrastructure/secrets-finder/ongoing-scans/aws/secrets-manager.tf b/infrastructure/secrets-finder/ongoing-scans/aws/secrets-manager.tf new file mode 100644 index 0000000..9b680f6 --- /dev/null +++ b/infrastructure/secrets-finder/ongoing-scans/aws/secrets-manager.tf @@ -0,0 +1,20 @@ +data "aws_secretsmanager_secret" "api_gateway_web_acl_secret" { + name = var.api_gateway_web_acl_secret_reference +} + +data "aws_secretsmanager_secret_version" "api_gateway_web_acl_secret" { + secret_id = data.aws_secretsmanager_secret.api_gateway_web_acl_secret.id +} + +data "aws_secretsmanager_secret" "datadog_api_token" { + count = var.datadog_api_key_reference != null ? 1 : 0 + name = var.datadog_api_key_reference +} + +data "aws_secretsmanager_secret" "github_token" { + name = var.github_token_reference +} + +data "aws_secretsmanager_secret" "github_app_secret" { + name = var.github_app_secret_reference +} diff --git a/infrastructure/secrets-finder/ongoing-scans/aws/sts.tf b/infrastructure/secrets-finder/ongoing-scans/aws/sts.tf new file mode 100644 index 0000000..8fc4b38 --- /dev/null +++ b/infrastructure/secrets-finder/ongoing-scans/aws/sts.tf @@ -0,0 +1 @@ +data "aws_caller_identity" "current" {} diff --git a/infrastructure/secrets-finder/ongoing-scans/aws/variables.tf b/infrastructure/secrets-finder/ongoing-scans/aws/variables.tf new file mode 100644 index 0000000..471c206 --- /dev/null +++ b/infrastructure/secrets-finder/ongoing-scans/aws/variables.tf @@ -0,0 +1,230 @@ +variable "aws_region" { + type = string + default = "us-east-1" + description = "AWS region where to deploy resources" + + validation { + condition = can(regex("^(af|ap|ca|eu|me|sa|us)-(central|north|(north(?:east|west))|south|south(?:east|west)|east|west)-\\d+$", var.aws_region)) + error_message = "You should enter a valid AWS region (https://docs.aws.amazon.com/AmazonRDS/latest/UserGuide/Concepts.RegionsAndAvailabilityZones.html)" + } +} + +variable "aws_profile" { + type = string + default = "default" + description = "AWS profile to use for authentication" +} + +variable "environment_type" { + type = string + default = "PRODUCTION" + description = "Environment type" + + validation { + condition = contains(["PRODUCTION", "PRE-PRODUCTION", "QUALITY ASSURANCE", "INTEGRATION TESTING", "DEVELOPMENT", "LAB"], var.environment_type) + error_message = "The environment type should be one of the following values: PRODUCTION, PRE-PRODUCTION, QUALITY ASSURANCE, INTEGRATION TESTING, DEVELOPMENT, LAB (case sensitive)" + } +} + +variable "tags" { + type = map(string) + description = "A map of tags to add to the resources" + default = {} + + validation { + condition = alltrue([for v in values(var.tags) : v != ""]) + error_message = "Tag values must not be empty." + } +} + +variable "permissions_boundary_arn" { + type = string + default = null + description = "The name of the IAM permissions boundary to attach to the IAM role created by the module" + + validation { + condition = can(regex("^arn:aws:iam::[0-9]{12}:policy\\/([a-zA-Z0-9-_.]+)$", var.permissions_boundary_arn)) + error_message = "The provided ARN is not a valid ARN for a policy" + } +} + +variable "iam_role_path" { + type = string + default = "/" + description = "The path to use when creating IAM roles" + + validation { + condition = can(regex("^\\/([a-zA-Z0-9]+([-a-zA-Z0-9]*[a-zA-Z0-9]+)?\\/)*$", var.iam_role_path)) + error_message = "The provided path is invalid" + } +} + +variable "project_name" { + type = string + default = "secrets-finder" + description = "Name of the project (should be the same across all modules of secrets-finder to ensure consistency)" +} + +variable "github_secret_prevention_workflow_org" { + type = string + description = "Name of the GitHub organization where the secret prevention workflows will be triggered" + + validation { + condition = can(regex("^[a-zA-Z0-9]([a-zA-Z0-9-_]+[a-zA-Z0-9])*$", var.github_secret_prevention_workflow_org)) && length(var.github_secret_prevention_workflow_org) <= 39 + error_message = "The provided organization name is invalid" + } +} + +variable "github_secret_prevention_workflow_repository" { + type = string + description = "Name of the GitHub repository where the secret prevention workflow will be triggered" + + validation { + condition = can(regex("^[a-zA-Z0-9-_.]{1,100}$", var.github_secret_prevention_workflow_repository)) + error_message = "The provided repository name is invalid" + } +} + +variable "lambda_archive_file_path" { + type = string + description = "Path to the archive file containing the Lambda function code" + + validation { + condition = fileexists(var.lambda_archive_file_path) + error_message = "The path to the archive file is invalid" + } +} + +variable "hosted_zone" { + type = string + description = "The hosted zone to use for the CloudFront distribution" + + validation { + condition = var.hosted_zone != null + error_message = "The provided hosted zone is invalid" + } +} + +variable "endpoint" { + type = string + description = "Endpoint to use for the CloudFront distribution and Route53 record (if created) (note: 'hosted_zone' variable will be appended to the endpoint to create the full domain name)" + + validation { + condition = can(regex("^[a-zA-Z0-9]([a-zA-Z0-9-_]+[a-zA-Z0-9])*$", var.endpoint)) + error_message = "The provided endpoint is invalid" + } +} + +variable "use_custom_certificate" { + type = bool + description = "Whether to use a custom certificate for the CloudFront distribution" + default = true +} + +variable "certificate_arn" { + type = string + description = "ARN of the ACM certificate to use for the CloudFront distribution when 'use_custom_certificate' variable is true" + + validation { + condition = can(regex("^arn:aws:acm:((af|ap|ca|eu|me|sa|us)-(central|north|(north(?:east|west))|south|south(?:east|west)|east|west)-\\d+):[0-9]{12}:certificate\\/[0-9a-f]{8}(-[0-9a-f]{4}){3}-[0-9a-f]{12}$", var.certificate_arn)) + error_message = "The provided ARN is not a valid ARN for a certificate" + } +} + +variable "create_route53_record" { + type = bool + description = "Wether to create a Route53 record for the CloudFront distribution" + default = true +} + +variable "route53_record_name" { + type = string + default = null + description = "Name of the Route53 record to create when 'create_route53_record' is true" + + validation { + condition = var.route53_record_name == null || can(regex("^[a-zA-Z0-9]([a-zA-Z0-9-_]+[a-zA-Z0-9])*$", var.route53_record_name)) + error_message = "The provided record name is invalid" + } +} + +variable "create_waf_log_group" { + type = bool + description = "Whether to create a log group for the WAF logs" + default = true +} + +variable "waf_log_group_name" { + type = string + default = null + description = "Name of the log group to use for the WAF logs (if 'create_waf_log_group' is true, name is used to create the log group)" + + validation { + condition = var.waf_log_group_name == null || can(regex("^[a-zA-Z0-9]([a-zA-Z0-9-_]+[a-zA-Z0-9])*$", var.waf_log_group_name)) + error_message = "The provided log group name is invalid" + } +} + +variable "kms_key_arn" { + type = string + default = null + description = "KMS key ARN used to encrypt the log groups. Leave empty if logs should not be encrypted." + + validation { + condition = var.kms_key_arn == null || can(regex("^arn:aws:kms:(af|ap|ca|eu|me|sa|us)-(central|north|(north(?:east|west))|south|south(?:east|west)|east|west)-\\d+:\\d{12}:key/[a-f0-9-]{36}$", var.kms_key_arn)) + error_message = "The KMS key ARN is invalid" + + } +} + +variable "github_token_reference" { + type = string + description = "Name of the secret stored in Secrets Manager and containing the GitHub token to use for triggering the GitHub workflow" + + validation { + condition = can(regex("^[a-zA-Z0-9/_+=.@-]{1,512}$", var.github_token_reference)) + error_message = "The secret name is invalid" + } +} + +variable "github_app_secret_reference" { + type = string + description = "Name of the secret stored in Secrets Manager and containing the secret configured for the GitHub App and used for validating signature of incoming requests" + + validation { + condition = can(regex("^[a-zA-Z0-9/_+=.@-]{1,512}$", var.github_app_secret_reference)) + error_message = "The secret name is invalid" + } +} + +variable "api_gateway_web_acl_secret_reference" { + type = string + description = "Name of the secret stored in Secrets Manager and containing the secret to use for the configuration of the web ACL of the API Gateway" + + validation { + condition = can(regex("^[a-zA-Z0-9/_+=.@-]{1,512}$", var.api_gateway_web_acl_secret_reference)) + error_message = "The secret name is invalid" + } +} + +variable "datadog_api_key_reference" { + type = string + default = null + description = "Name of the secret stored in Secrets Manager and containing the Datadog API token. Leave empty if Datadog should not be used." + + validation { + condition = var.datadog_api_key_reference == null || can(regex("^[a-zA-Z0-9/_+=.@-]{1,512}$", var.datadog_api_key_reference)) + error_message = "The secret name is invalid" + } +} + +variable "datadog_service_name" { + type = string + default = null + description = "Name of the service to use for Datadog monitoring. Leave empty if Datadog should not be used." + + validation { + condition = var.datadog_service_name == null || can(regex("^[a-zA-Z0-9-_.]+$", var.datadog_service_name)) + error_message = "The provided service name is invalid" + } +} diff --git a/infrastructure/secrets-finder/ongoing-scans/aws/waf.tf b/infrastructure/secrets-finder/ongoing-scans/aws/waf.tf new file mode 100644 index 0000000..f2c9b7f --- /dev/null +++ b/infrastructure/secrets-finder/ongoing-scans/aws/waf.tf @@ -0,0 +1,297 @@ +# API Gateway +resource "aws_wafv2_web_acl" "api_gateway_web_acl" { + name = "${var.project_name}-api-gateway-webacl" + description = "Web ACL for the API Gateway deployed by secrets-finder" + scope = "REGIONAL" + + custom_response_body { + key = "unauthorized" + content = "Unauthorized request" + content_type = "TEXT_PLAIN" + } + + default_action { + block { + custom_response { + response_code = 403 + custom_response_body_key = "unauthorized" + } + } + } + + rule { + name = "aws-managed-rules-common-rule-set" + priority = 0 + + override_action { + count {} + } + + statement { + managed_rule_group_statement { + name = "AWSManagedRulesCommonRuleSet" + vendor_name = "AWS" + } + } + + visibility_config { + cloudwatch_metrics_enabled = false + metric_name = "${var.project_name}-AWSManagedRulesCommonRuleSet" + sampled_requests_enabled = false + } + } + + rule { + name = "aws-managed-rules-bot-control-rule-set" + priority = 1 + + override_action { + count {} + } + + statement { + managed_rule_group_statement { + name = "AWSManagedRulesBotControlRuleSet" + vendor_name = "AWS" + } + } + + visibility_config { + cloudwatch_metrics_enabled = false + metric_name = "${var.project_name}-AWSManagedRulesBotControlRuleSet" + sampled_requests_enabled = false + } + } + + rule { + name = "aws-managed-rules-known-bad-inputs-rule-set" + priority = 2 + + override_action { + count {} + } + + statement { + managed_rule_group_statement { + name = "AWSManagedRulesKnownBadInputsRuleSet" + vendor_name = "AWS" + } + } + + visibility_config { + cloudwatch_metrics_enabled = false + metric_name = "${var.project_name}-AWSManagedRulesKnownBadInputsRuleSet" + sampled_requests_enabled = false + } + } + + rule { + name = "aws-managed-rules-amazon-ip-reputation-list" + priority = 3 + + override_action { + count {} + } + + statement { + managed_rule_group_statement { + name = "AWSManagedRulesAmazonIpReputationList" + vendor_name = "AWS" + } + } + + visibility_config { + cloudwatch_metrics_enabled = false + metric_name = "${var.project_name}-AWSManagedRulesAmazonIpReputationList" + sampled_requests_enabled = false + } + } + + rule { + name = "authorize-requests" + priority = 4 + + action { + allow {} + } + + statement { + byte_match_statement { + positional_constraint = "EXACTLY" + search_string = data.aws_secretsmanager_secret_version.api_gateway_web_acl_secret.secret_string + field_to_match { + single_header { + name = "x-waf-secret" + } + } + text_transformation { + priority = 0 + type = "NONE" + } + } + } + + visibility_config { + cloudwatch_metrics_enabled = true + metric_name = "${var.project_name}-api-gateway-rule" + sampled_requests_enabled = true + } + } + + visibility_config { + cloudwatch_metrics_enabled = true + metric_name = "${var.project_name}-api-gateway-web-acl" + sampled_requests_enabled = true + } +} + +resource "aws_wafv2_web_acl_logging_configuration" "api_gateway_web_acl_logging" { + count = var.waf_log_group_name != null ? 1 : 0 + log_destination_configs = var.create_waf_log_group != null ? ["${aws_cloudwatch_log_group.waf_log_group[0].arn}:*"] : ["${data.aws_cloudwatch_log_group.existing_waf_log_group[0].arn}:*"] + resource_arn = aws_wafv2_web_acl.api_gateway_web_acl.arn +} + +resource "aws_wafv2_web_acl_association" "api_gateway_web_acl_association" { + resource_arn = aws_api_gateway_stage.production.arn + web_acl_arn = aws_wafv2_web_acl.api_gateway_web_acl.arn +} + + + +# CloudFront +resource "aws_wafv2_web_acl" "cloudfront_web_acl" { + name = "${var.project_name}-cloudfront" + description = "Web ACL for the CloudFront distribution deployed by secrets-finder" + scope = "CLOUDFRONT" + + default_action { + allow {} + } + + rule { + name = "aws-managed-rules-amazon-ip-reputation-list" + priority = 0 + + override_action { + none {} + } + + statement { + managed_rule_group_statement { + name = "AWSManagedRulesAmazonIpReputationList" + vendor_name = "AWS" + } + } + + visibility_config { + cloudwatch_metrics_enabled = true + metric_name = "${var.project_name}-AWSManagedRulesAmazonIpReputationList" + sampled_requests_enabled = true + } + } + + rule { + name = "aws-managed-rules-common-rule-set" + priority = 1 + + override_action { + none {} + } + + statement { + managed_rule_group_statement { + name = "AWSManagedRulesCommonRuleSet" + vendor_name = "AWS" + + rule_action_override { + name = "SizeRestrictions_BODY" + action_to_use { + allow {} + } + } + } + } + + visibility_config { + cloudwatch_metrics_enabled = true + metric_name = "${var.project_name}-AWSManagedRulesCommonRuleSet" + sampled_requests_enabled = true + } + } + + rule { + name = "aws-managed-rules-known-bad-inputs-rule-set" + priority = 2 + + override_action { + none {} + } + + statement { + managed_rule_group_statement { + name = "AWSManagedRulesKnownBadInputsRuleSet" + vendor_name = "AWS" + } + } + + visibility_config { + cloudwatch_metrics_enabled = true + metric_name = "${var.project_name}-AWSManagedRulesKnownBadInputsRuleSet" + sampled_requests_enabled = true + } + } + + rule { + name = "aws-managed-rules-bot-control-rule-set" + priority = 3 + + override_action { + none {} + } + + statement { + managed_rule_group_statement { + name = "AWSManagedRulesBotControlRuleSet" + vendor_name = "AWS" + + managed_rule_group_configs { + aws_managed_rules_bot_control_rule_set { + inspection_level = "TARGETED" + } + } + + rule_action_override { + name = "CategoryHttpLibrary" + action_to_use { + block {} + } + } + + rule_action_override { + name = "SignalNonBrowserUserAgent" + action_to_use { + allow {} + } + } + } + } + + visibility_config { + cloudwatch_metrics_enabled = true + metric_name = "${var.project_name}-AWSManagedRulesBotControlRuleSet" + sampled_requests_enabled = true + } + } + + visibility_config { + cloudwatch_metrics_enabled = true + metric_name = "${var.project_name}-web-acl" + sampled_requests_enabled = true + } +} + +resource "aws_wafv2_web_acl_logging_configuration" "cloudfront_web_acl_logging" { + count = var.waf_log_group_name != null ? 1 : 0 + log_destination_configs = [var.create_waf_log_group != null ? aws_cloudwatch_log_group.waf_log_group[0].arn : data.aws_cloudwatch_log_group.existing_waf_log_group[0].arn] + resource_arn = aws_wafv2_web_acl.cloudfront_web_acl.arn +} diff --git a/infrastructure/secrets-finder/ongoing-scans/certificate/pkcs12-to-pem-converter.sh b/infrastructure/secrets-finder/ongoing-scans/certificate/pkcs12-to-pem-converter.sh new file mode 100755 index 0000000..bb9ea56 --- /dev/null +++ b/infrastructure/secrets-finder/ongoing-scans/certificate/pkcs12-to-pem-converter.sh @@ -0,0 +1,107 @@ +#!/bin/bash + +function write { + printf "%-10s %s\n" "[$1]" "$2" +} + +function show_help() { + echo "Usage: $0 [OPTIONS]" + echo "" + echo "Options:" + echo " --decrypt-private-key Decrypt the private key in private_key_insecure.pem (warning: the file will remain on disk)" + echo " --force-replacement Force the replacement of existing files (private_key.pem, private_key_insecure.pem, cert.pem, ca_chain.pem)" + echo " --help Display this help and exit" + echo "" + echo "Environment variables:" + echo " SECRETS_FINDER_CERTIFICATE_FILE Path to the PKCS12 certificate file" + echo " SECRETS_FINDER_CERTIFICATE_PASSWORD Password of the PKCS12 certificate file" +} + +echo "****************************************" +echo "* Extraction of PKCS12 certificate *" +echo "****************************************" +if [[ "$OSTYPE" != "darwin"* ]] && [[ "$OSTYPE" != "linux-gnu"* ]]; then + write "ERROR" "Unsupported operating system ($OSTYPE). Only macOS and Linux operating systems are supported." + exit 1 +fi + +DECRYPT_PRIVATE_KEY=false +FORCE_REPLACEMENT=false + +for arg in "$@"; do + case $arg in + --decrypt-private-key) + DECRYPT_PRIVATE_KEY=true + shift + ;; + --force-replacement) + FORCE_REPLACEMENT=true + shift + ;; + --help) + show_help + exit 0 + ;; + esac +done + +if ! command -v openssl &>/dev/null; then + write "ERROR" "openssl could not be found" + exit 1 +fi + +if [ -z "$SECRETS_FINDER_CERTIFICATE_FILE" ]; then + write "ERROR" "Environment variable missing (SECRETS_FINDER_CERTIFICATE_FILE). Specify the path to the PKCS12 certificate file and try again." + exit 1 +fi + +if [ -z "$SECRETS_FINDER_CERTIFICATE_PASSWORD" ]; then + write "ERROR" "Environment variable missing (SECRETS_FINDER_CERTIFICATE_PASSWORD). Specify the password of the PKCS12 certificate file and try again." + exit 1 +fi + +write "INFO" "Certificate: $SECRETS_FINDER_CERTIFICATE_FILE" + +if [ -f private_key.pem ] || [ -f private_key_insecure.pem ] || [ -f cert.pem ] || [ -f ca_chain.pem ]; then + if [ "$FORCE_REPLACEMENT" = false ]; then + write "ERROR" "One or more of the following files already exist: private_key.pem, private_key_insecure.pem, cert.pem, ca_chain.pem" + exit 1 + else + write "WARNING" "One or more of the following files already exist and will be replaced: private_key.pem, private_key_insecure.pem, cert.pem, ca_chain.pem" + fi +fi + +if openssl pkcs12 -in "$SECRETS_FINDER_CERTIFICATE_FILE" -nocerts -out private_key.pem --passin pass:"$SECRETS_FINDER_CERTIFICATE_PASSWORD" --passout pass:"$SECRETS_FINDER_CERTIFICATE_PASSWORD" &>/dev/null; then + write "INFO" "Private key extracted successfully" +else + write "ERROR" "Private key extraction failed" + exit 1 +fi + +if [ "$DECRYPT_PRIVATE_KEY" = true ]; then + if openssl rsa -in private_key.pem -out private_key_insecure.pem --passin pass:"$SECRETS_FINDER_CERTIFICATE_PASSWORD" &>/dev/null; then + write "INFO" "Private key decrypted successfully" + write "WARNING" "private_key_insecure.pem is not encrypted" + else + write "ERROR" "Private key decryption failed" + exit 1 + fi +else + write "INFO" "Private key not decrypted (--decrypt-private-key not specified)" +fi + +if openssl pkcs12 -in "$SECRETS_FINDER_CERTIFICATE_FILE" -clcerts -nokeys -out cert.pem --passin pass:"$SECRETS_FINDER_CERTIFICATE_PASSWORD" &>/dev/null; then + write "INFO" "Certificate extracted successfully" +else + write "ERROR" "Certificate extraction failed" + exit 1 +fi + +if openssl pkcs12 -in "$SECRETS_FINDER_CERTIFICATE_FILE" -cacerts -nokeys -chain -out ca_chain.pem --passin pass:"$SECRETS_FINDER_CERTIFICATE_PASSWORD" &>/dev/null; then + write "INFO" "CA chain extracted successfully" +else + write "ERROR" "CA chain extraction failed" + exit 1 +fi + +write "INFO" "All files were extracted successfully" diff --git a/infrastructure/secrets-finder/scheduled-scans/aws/README.md b/infrastructure/secrets-finder/scheduled-scans/aws/README.md new file mode 100644 index 0000000..0fe627c --- /dev/null +++ b/infrastructure/secrets-finder/scheduled-scans/aws/README.md @@ -0,0 +1,122 @@ +# Scheduled scans of git-based repositories + +

+ +## Table of Contents +1. [Introduction](#introduction) +2. [Infrastructure](#infrastructure) +3. [Scanning configuration](#scanning-configuration) +4. [Generated reports](#generated-reports) +5. [Deployment instructions](#deployment-instructions) + +

+ +## Introduction +This folder holds the infrastructure for the scheduled secrets scanning solution. The system is designed for use with git-based repositories belonging to an organization and deployed on different source code management platforms. The open-source tool [TruffleHog](https://github.com/trufflesecurity/trufflehog) carries out the scanning of repositories for secrets. The tool is executed within dedicated instances that are set up on a predefined interval. + +> **NOTE:**\ +> As part of the first and current release of secrets-finder, the solution relies on AWS. As such, the documentation below is tailored to this platform. The maintainers of the project aim to provide support for other cloud providers in the next releases. + +

+ +## Infrastructure +The infrastructure consists of several key components that are deployed in AWS. The main components are described below. + +### CloudWatch Event +This resource is used to trigger the CodeBuild Build Project responsible of scheduling the scans at a predefined interval. The interval is set to every Monday by default, but can be adjusted as needed. + +### CodeBuild Build Project +This resource has its own build specification, formatted based on a predefined template, and populated based on the scans to be performed. As part of its operations, the Build Project schedules the start of an EC2 instance for the first scan, drops the instance from the Terraform state, and proceeding similarly for the next scan, and so on. If something goes wrong, the Build Project offers the possibility to send notifications to a preconfigured SNS topic. + +### S3 Bucket +A bucket is used to store both the configuration belonging to each scan defined, and the results of the scans performed, as well as the logs generated by the instances. This bucket is expected to be created using the [`storage` module](/infrastructure/setup/aws/storage). + +### Secrets Manager +This service is notably used to store the credentials used to fetch the repositories to scan. Each reference loaded by the `automation` module points to secrets stored before hands in Secrets Maager using the [`secrets` module](/infrastructure/setup/aws/secrets). + +### EC2 Instances +These instances are used to perform the scans. They are started by the CodeBuild Build Project and are terminated once the scan is completed, unless the user specifies not to do so. The instances are configured with the necessary permissions to access the repositories to scan, and to push the results to the S3 bucket. Each scan can hold its own configuration (see below). + +

+ +## Scanning configuration +The `${var.scans}` variable in the `automation` module specifies the list of scans to perform. Each scan is defined by a set of options described below: +- `identifier`: this represents the unique identifier of the scan, and should ideally represents a human-readable name to easily identify which jobs and findings belong to the same context +- `scm`: the source code management platform where the repositories to be scanned are stored (supported values are `github`, `azure_devops`, and `custom` (for all other source code management platforms)) +- `credentials_reference`: the name of the secret stored in Secrets Manager and holding the credentials to use when fetching the repositories to scan (should be an object with `username` and `password` keys, both of type string) +- `ec2_instance_type`: the type of EC2 instance to use when performing the scan (this option is typically used to scale the infrastructure for each scan based on the number of repositories to cover) +- `files`: an array containing local paths to all the files that users want to inject to the EC2 instance when performing the scan (this allows to specify pre- and post-scan scripts to execute) +- `repositories_to_scan`: the path to the file containing the list of repositories to scan (this file should conform to the `/configuration/secrets-finder/scanner/scan-configuration.schema.json` schema; if not present, it is assumed that a pre-scan script will generate the list of repositories to scan and save such list on the `/home/secrets-finder/scanner/repositories_to_scan.json` file on the EC2 instance) +- `terminate_instance_on_error`: if set to `true`, the instance is terminated if an error occurs during the scan – including during execution of pre- and post-scan scripts (default is `true`) +- `terminate_instance_after_scan`: if set to `true`, the instance is terminated after the scan is completed (default is `true`) +- `report_only_verified`: if set to `true`, only verified secrets are reported in the results (default is `false`) + +> **NOTE:**\ +> The endpoint specified in the repositories_to_scan.json file should be a template string denoting the endpoint to call when cloning repositories. The template string should contains those two variables: `organization` and `repository`. For example, a valid endpoint for GitHub would be: `https://github.com/{organization}/{repository}`. + +

+ +## Generated reports +Each time a scan is performed, a report is generated and persisted. This report contains a JSON object made of the following elements: +- `scan_type`: always `detection` with ongoing scans +- `start`: date (in ISO format) indicating when the scan started +- `end`: date (in ISO format) indicating when the scan finished +- `status`: either `succces` if the scan could be performed, or `failure` otherwise +- `scan_mode`: `verified` if only verified secrets are reported, `all` otherwise (the number of findings reported does not influence this value) +- `scan_context`: always `repository` for scheduled scans +- `scan_uuid`: unique identifier representing the scan performed +- `scan_identifier`: matches the identifier specified in the configuration of the scan +- `scm`: matches the scm specified in the configuration of the scan +- `results`: an array containing as many entries as we have repositories scanned in an iteration + +The `results` key holds an array where each object reports the following information: +- `scan_uuid`: the identifier representing the scan of the repository (different than `scan_uuid` field at top-level) +- `start`: same as the top-level key of the same name +- `end`: same as the top-level key of the same name +- `organization`: the name of the GitHub organization the repository belongs to +- `repository`: the name of the repository scanned +- `status`: either `success` if the scan could be performed, or `failure` otherwise (the number of findings reported does not influence this value) +- `metadata`: any metadata provided in the `repositories_to_scan.json` file for the repository scanned +- `findings`: an array of findings as returned by TruffleHog, if any found + +

+ +## Deployment instructions +To set up the infrastructure, please proceed with the following steps. + +### Registration of required secrets in AWS Secrets Manager +Using the [`secrets` module](/infrastructure/secrets-finder/setup/aws/secrets), you must store all the secrets needed to perform a scan: namely, the credentials used to reach the different source code management plaforms to scan, and the organizations hosted there. + +### Registration of Datadog API token in AWS Secrets Manager +You have the possibility to use Datadog for reporting on EC2 instance activity. For this, you need to store the API key in Secrets Manager, e.g., by using the [`secrets` module](/infrastructure/secrets-finder/setup/aws/secrets) provided. Then, you should specify the `${var.datadog_api_key_reference}` variable, which represents the name of the secret stored in Secrets Manager and holding the API key to use. You should also specify the other variables related to Datadog in the `terraform.tfvars` file. + +### Deployment of the AWS infrastructure +> **Note:**\ +> It is assumed that you have already [installed Terraform](https://developer.hashicorp.com/terraform/downloads) and configured your AWS credentials accordingly for the profile you want to use. + +Then, navigate to the [`automation`](/infrastructure/secrets-finder/scheduled-scans/aws/automation) directory. + +To configure the S3 backend for Terraform, modify the `s3.tfbackend` file by setting the appropriate values. Be sure to reference the correct `` AWS profile in the `profile` key. + +Then, initialize Terraform: +```bash +terraform init -backend-config=s3.tfbackend +``` + +> **IMPORTANT:**\ +> To successfully deploy the infrastructure, it is assumed that the S3 Bucket holding the remote states already exists. This also holds for the DynamoDB Table listing the locks. You are responsible for the creation of such resources. We recommend reusing the same bucket and table across all modules of secrets-finder. In such case, make sure to specify a different path for each module. + +Next, create a `terraform.tfvars` file and set the required variables. This file as well as the [README.md](/infrastructure/secrets-finder/ongoing-scans/aws/README.md) file provided alongside the module provide valuable information about the purpose of each variable. + +Lastly, review the changes to be made and, if satisfactory, proceed with deploying the infrastructure by following the steps below: +```bash +# Review changes +terraform plan + +# Deploy changes +terrafrom apply -auto-approve +``` + +Upon successful completion, the following outputs should be available: +- `codebuild_arn` +- `event_rule_arn` diff --git a/infrastructure/secrets-finder/scheduled-scans/aws/automation/README.md b/infrastructure/secrets-finder/scheduled-scans/aws/automation/README.md new file mode 100644 index 0000000..3fcf755 --- /dev/null +++ b/infrastructure/secrets-finder/scheduled-scans/aws/automation/README.md @@ -0,0 +1,109 @@ +# automation + + +## Requirements + +| Name | Version | +|------|---------| +| [terraform](#requirement\_terraform) | >=1.7 | +| [aws](#requirement\_aws) | ~> 5.0 | +| [datadog](#requirement\_datadog) | ~> 3.23 | + +## Providers + +| Name | Version | +|------|---------| +| [aws](#provider\_aws) | ~> 5.0 | +| [datadog](#provider\_datadog) | ~> 3.23 | + +## Modules + +No modules. + +## Resources + +| Name | Type | +|------|------| +| [aws_cloudwatch_event_rule.start_codebuild](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/cloudwatch_event_rule) | resource | +| [aws_cloudwatch_event_target.trigger_codebuild_start](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/cloudwatch_event_target) | resource | +| [aws_codebuild_project.secrets_finder](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/codebuild_project) | resource | +| [aws_iam_policy.permissions_for_codebuild](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_policy) | resource | +| [aws_iam_policy.start_codebuild](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_policy) | resource | +| [aws_iam_policy.tag_cloudwatch_event](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_policy) | resource | +| [aws_iam_role.cloudwatch_role](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role) | resource | +| [aws_iam_role.codebuild_role](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role) | resource | +| [aws_iam_role_policy_attachment.permissions_for_codebuild](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role_policy_attachment) | resource | +| [aws_iam_role_policy_attachment.start_codebuild](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role_policy_attachment) | resource | +| [aws_iam_role_policy_attachment.tag_event](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role_policy_attachment) | resource | +| [aws_s3_object.backend_static_files](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/s3_object) | resource | +| [aws_s3_object.backend_template_files](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/s3_object) | resource | +| [aws_s3_object.common_files](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/s3_object) | resource | +| [aws_s3_object.repositories_to_scan_files](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/s3_object) | resource | +| [aws_s3_object.scanner_static_files](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/s3_object) | resource | +| [aws_s3_object.scanner_template_files](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/s3_object) | resource | +| [aws_s3_object.scanning_files](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/s3_object) | resource | +| [aws_s3_object.trufflehog_configuration_file](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/s3_object) | resource | +| [aws_sns_topic.important_notifications](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/sns_topic) | resource | +| [aws_sns_topic_subscription.email_subscription](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/sns_topic_subscription) | resource | +| [datadog_monitor.monitor_ec2_instance_age](https://registry.terraform.io/providers/DataDog/datadog/latest/docs/resources/monitor) | resource | +| [datadog_monitor.monitor_failed_builds](https://registry.terraform.io/providers/DataDog/datadog/latest/docs/resources/monitor) | resource | +| [aws_caller_identity.current](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/caller_identity) | data source | +| [aws_iam_policy_document.cloudwatch_assume_role](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/iam_policy_document) | data source | +| [aws_iam_policy_document.codebuild_assume_policy](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/iam_policy_document) | data source | +| [aws_iam_policy_document.policy_document_permissions_for_codebuild](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/iam_policy_document) | data source | +| [aws_iam_policy_document.policy_document_start_codebuild](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/iam_policy_document) | data source | +| [aws_iam_policy_document.policy_document_tag_cloudwatch_event](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/iam_policy_document) | data source | +| [aws_kms_key.ami_encryption_key](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/kms_key) | data source | +| [aws_kms_key.ebs_encryption_key](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/kms_key) | data source | +| [aws_s3_bucket.secrets_finder](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/s3_bucket) | data source | +| [aws_secretsmanager_secret.credentials_references](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/secretsmanager_secret) | data source | +| [aws_secretsmanager_secret.datadog_api_key](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/secretsmanager_secret) | data source | +| [aws_secretsmanager_secret.datadog_application_key](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/secretsmanager_secret) | data source | +| [aws_secretsmanager_secret.token_reference_github_organization_hosting_secrets_finder](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/secretsmanager_secret) | data source | +| [aws_secretsmanager_secret_version.datadog_api_key](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/secretsmanager_secret_version) | data source | +| [aws_secretsmanager_secret_version.datadog_application_key](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/secretsmanager_secret_version) | data source | +| [aws_subnets.selected](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/subnets) | data source | +| [aws_vpc.vpc](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/vpc) | data source | + +## Inputs + +| Name | Description | Type | Default | Required | +|------|-------------|------|---------|:--------:| +| [ami\_encryption\_key\_arn](#input\_ami\_encryption\_key\_arn) | The ARN of the KMS key used to decrypt/encrypt the AMI used for the scanning instances | `string` | `null` | no | +| [aws\_profile](#input\_aws\_profile) | AWS profile to use for authentication | `string` | `"default"` | no | +| [aws\_region](#input\_aws\_region) | AWS region where to deploy resources | `string` | `"us-east-1"` | no | +| [datadog\_account](#input\_datadog\_account) | The name of the Datadog account to which EC2 instance metrics should be reported and where monitors are set up. This variable is only used if 'enable\_datadog\_monitors' variable is set to 'true'. | `string` | `null` | no | +| [datadog\_api\_key\_reference](#input\_datadog\_api\_key\_reference) | Name of the secret stored in Secrets Manager and containing the Datadog API key. Leave empty if Datadog should not be configured. | `string` | `null` | no | +| [datadog\_application\_key\_reference](#input\_datadog\_application\_key\_reference) | Name of the secret stored in Secrets Manager and containing the Datadog application key. Leave empty if Datadog monitors should not be configured. | `string` | `null` | no | +| [datadog\_ec2\_instance\_monitor\_ec2\_age\_limit](#input\_datadog\_ec2\_instance\_monitor\_ec2\_age\_limit) | Time (in hours) to wait before considering an instance in an unhealthy state. Value should be between 1 and 72 and is only considered if 'enable\_datadog\_monitors' is set to 'true'. | `number` | `1` | no | +| [datadog\_monitors\_notify\_list](#input\_datadog\_monitors\_notify\_list) | List of recipients to notify whenever an alert is triggered. The format for each recipient should conform with the official specification (https://docs.datadoghq.com/monitors/notify/#notifications). This list is only considered if 'enable\_datadog\_monitors' variable is set to 'true'. | `list(string)` | `[]` | no | +| [datadog\_tags](#input\_datadog\_tags) | A list of tags for Datadog | `list(string)` | `[]` | no | +| [dynamodb\_table\_remote\_states](#input\_dynamodb\_table\_remote\_states) | Name of the DynamoDB table containing the locks used for the remote states representing the infrastructure | `string` | n/a | yes | +| [ebs\_encryption\_key\_arn](#input\_ebs\_encryption\_key\_arn) | The ARN of the KMS key used to encrypt the EBS volumes | `string` | `null` | no | +| [enable\_datadog\_monitors](#input\_enable\_datadog\_monitors) | Define whether Datadog monitors should be set up to monitor the status of the EC2 instances and the Codebuild project. If this variable is set to 'true', both 'datadog\_api\_key\_reference' and 'datadog\_application\_key\_reference' variables should be set, and the corresponding secrets should exist in Parameter Store. | `bool` | `true` | no | +| [environment\_type](#input\_environment\_type) | Environment (PRODUCTION, PRE-PRODUCTION, QUALITY ASSURANCE, INTEGRATION TESTING, DEVELOPMENT, LAB) | `string` | `"PRODUCTION"` | no | +| [github\_organization\_hosting\_secrets\_finder](#input\_github\_organization\_hosting\_secrets\_finder) | Name of the GitHub Organization where the repository containing the secrets-finder code is hosted | `string` | n/a | yes | +| [github\_repository\_hosting\_secrets\_finder](#input\_github\_repository\_hosting\_secrets\_finder) | Name of the GitHub Repository containing the secrets-finder code | `string` | n/a | yes | +| [iam\_role\_path](#input\_iam\_role\_path) | The path to use when creating IAM roles | `string` | `"/"` | no | +| [instance\_user](#input\_instance\_user) | Username to create and use on the instances started for the scanning process | `string` | `"secrets-finder"` | no | +| [permissions\_boundary\_arn](#input\_permissions\_boundary\_arn) | The name of the IAM permissions boundary to attach to the IAM roles created by the module | `string` | `null` | no | +| [project\_name](#input\_project\_name) | Name of the project (should be the same across all modules of secrets-finder to ensure consistency) | `string` | `"secrets-finder"` | no | +| [s3\_bucket\_name](#input\_s3\_bucket\_name) | Name of the S3 bucket containing files used for secrets detection scans | `string` | n/a | yes | +| [s3\_bucket\_remote\_states](#input\_s3\_bucket\_remote\_states) | Name of the S3 bucket containing the remote states of the infrastructure | `string` | n/a | yes | +| [scans](#input\_scans) | List of scans to perform |
list(object({
identifier = string
scm = string
credentials_reference = string
ec2_instance_type = string
files = optional(list(string))
repositories_to_scan = optional(string)
terminate_instance_on_error = optional(bool)
terminate_instance_after_scan = optional(bool)
report_only_verified = optional(bool)
}))
| n/a | yes | +| [sns\_topic\_receiver](#input\_sns\_topic\_receiver) | Email address of the receiver of the SNS topic to which important notifications are sent. Leave empty if no notifications should be sent. | `string` | `null` | no | +| [start\_schedule](#input\_start\_schedule) | The cron specifying when a new scanning instance should be set up (default is: every Monday at 06:00, expected format: https://docs.aws.amazon.com/AmazonCloudWatch/latest/events/ScheduledEvents.html#CronExpressions) | `string` | `"cron(0 6 ? * MON *)"` | no | +| [subnet\_name](#input\_subnet\_name) | Name of the subnet where to deploy the resources (wildcards are allowed: first match is used) | `string` | n/a | yes | +| [tags](#input\_tags) | A map of tags to add to the resources | `map(string)` | `{}` | no | +| [terraform\_version](#input\_terraform\_version) | Version of Terraform to use when starting a new scan from CodeBuild | `string` | `"1.8.5"` | no | +| [token\_reference\_github\_organization\_hosting\_secrets\_finder](#input\_token\_reference\_github\_organization\_hosting\_secrets\_finder) | Name of the secret stored in Secrets Manager containing the GitHub token for the organization hosting the secrets-finder code. Leave empty if the repository is publicly accessible. | `string` | `null` | no | +| [trufflehog\_configuration\_file](#input\_trufflehog\_configuration\_file) | Path to the Trufflehog configuration file. Leave empty if no configuration file should be used. | `string` | `null` | no | +| [vpc\_name](#input\_vpc\_name) | Identifier of the VPC to use for secrets-finder | `string` | n/a | yes | + +## Outputs + +| Name | Description | +|------|-------------| +| [codebuild\_arn](#output\_codebuild\_arn) | n/a | +| [event\_rule\_arn](#output\_event\_rule\_arn) | n/a | + diff --git a/infrastructure/secrets-finder/scheduled-scans/aws/automation/cloudwatch.tf b/infrastructure/secrets-finder/scheduled-scans/aws/automation/cloudwatch.tf new file mode 100644 index 0000000..02dc7d2 --- /dev/null +++ b/infrastructure/secrets-finder/scheduled-scans/aws/automation/cloudwatch.tf @@ -0,0 +1,21 @@ +resource "aws_cloudwatch_event_rule" "start_codebuild" { + name = var.project_name + description = "Event rule to start the CodeBuild build project responsible to trigger scheduled scans for secrets detection" + schedule_expression = var.start_schedule + + depends_on = [ + aws_s3_object.scanner_static_files, + aws_s3_object.backend_static_files, + aws_s3_object.scanner_template_files, + aws_s3_object.backend_template_files, + aws_s3_object.scanning_files, + aws_s3_object.trufflehog_configuration_file + ] +} + +resource "aws_cloudwatch_event_target" "trigger_codebuild_start" { + target_id = "StartCodeBuild" + rule = aws_cloudwatch_event_rule.start_codebuild.name + arn = aws_codebuild_project.secrets_finder.arn + role_arn = aws_iam_role.codebuild_role.arn +} diff --git a/infrastructure/secrets-finder/scheduled-scans/aws/automation/codebuild-buildspec.yml b/infrastructure/secrets-finder/scheduled-scans/aws/automation/codebuild-buildspec.yml new file mode 100644 index 0000000..ebd9428 --- /dev/null +++ b/infrastructure/secrets-finder/scheduled-scans/aws/automation/codebuild-buildspec.yml @@ -0,0 +1,94 @@ +version: 0.2 + +env: + shell: bash + +phases: + install: + commands: + - echo "Installing Terraform" + - curl -LO https://releases.hashicorp.com/terraform/$${TERRAFORM_VERSION}/terraform_$${TERRAFORM_VERSION}_linux_amd64.zip + - curl -LO https://releases.hashicorp.com/terraform/$${TERRAFORM_VERSION}/terraform_$${TERRAFORM_VERSION}_SHA256SUMS.sig + - curl -LO https://releases.hashicorp.com/terraform/$${TERRAFORM_VERSION}/terraform_$${TERRAFORM_VERSION}_SHA256SUMS + - shasum -a 256 -c terraform_$${TERRAFORM_VERSION}_SHA256SUMS 2>&1 | grep OK + - gpg --keyserver keyserver.ubuntu.com --recv-keys C874011F0AB405110D02105534365D9472D7468F + - gpg --verify terraform_$${TERRAFORM_VERSION}_SHA256SUMS.sig terraform_$${TERRAFORM_VERSION}_SHA256SUMS + - unzip terraform_$${TERRAFORM_VERSION}_linux_amd64.zip && chmod +x terraform + - mv terraform /usr/local/bin/ + + pre_build: + commands: + - | + if ! [[ -z "$GITHUB_TOKEN" ]]; then + git clone https://oauth2:$GITHUB_TOKEN@github.com/$GITHUB_ORG_SECRETS_FINDER/$GITHUB_REPOSITORY_SECRETS_FINDER.git + else + git clone https://github.com/$GITHUB_ORG_SECRETS_FINDER/$GITHUB_REPOSITORY_SECRETS_FINDER.git + fi + - cd "$GITHUB_REPOSITORY_SECRETS_FINDER/infrastructure/secrets-finder/scheduled-scans/aws/scan" + - sed -i '/profile *=/d' "providers.tf" + + build: + commands: + - sed -i '/profile *=/d' "s3.tfbackend" + + - terraform init --reconfigure -backend-config=s3.tfbackend + + - | + function insert_or_replace_in_terraform_tfvars() { + pattern=$1 + line=$2 + file="terraform.tfvars" + + if [ -f "$file" ]; then + grep -v "^$pattern" "$file" > temp && mv temp "$file" + fi + + echo "$line" >> "$file" + } + + if ! [[ -z "$DATADOG_API_KEY_REFERENCE" ]]; then + insert_or_replace_in_terraform_tfvars "datadog_api_key_reference *=" "datadog_api_key_reference = \"$DATADOG_API_KEY_REFERENCE\"" + else + insert_or_replace_in_terraform_tfvars "datadog_api_key_reference *=" "datadog_api_key_reference = null" + fi + + if ! [[ -z "$DATADOG_ACCOUNT_NAME" ]]; then + insert_or_replace_in_terraform_tfvars "datadog_enable_ec2_instance_metrics *=" "datadog_enable_ec2_instance_metrics = true" + insert_or_replace_in_terraform_tfvars "datadog_account *=" "datadog_account = \"$DATADOG_ACCOUNT_NAME\"" + else + insert_or_replace_in_terraform_tfvars "datadog_enable_ec2_instance_metrics *=" "datadog_enable_ec2_instance_metrics = false" + insert_or_replace_in_terraform_tfvars "datadog_account *=" "datadog_account = null" + fi + + insert_or_replace_in_terraform_tfvars "vpc_name *=" "vpc_name = \"$VPC_NAME\"" + insert_or_replace_in_terraform_tfvars "subnet_name *=" "subnet_name = \"$SUBNET_NAME\"" + insert_or_replace_in_terraform_tfvars "s3_bucket_name *=" "s3_bucket_name = \"$S3_BUCKET_NAME\"" + insert_or_replace_in_terraform_tfvars "instance_user *=" "instance_user = \"$INSTANCE_USER\"" + + if [[ -n $SNS_TOPIC_ARN ]]; then + insert_or_replace_in_terraform_tfvars "sns_topic_arn *=" "sns_topic_arn = \"$SNS_TOPIC_ARN\"" + fi + + - | + # Execution of scans + error_encountered="false" + failed_scans=() + trap 'error_encountered="true"; if ! [[ " $${failed_scans[@]} " =~ " $${scan_being_executed} " ]]; then failed_scans+=("$scan_being_executed"); fi' ERR + + %{ for scan in scans ~} + scan_being_executed="${scan.identifier}" + echo "Starting scan: $scan_being_executed" + terraform apply --auto-approve -input=false -var="scm=${scan.scm}" -var="scan_identifier=${scan.identifier}" -var="credentials_reference=${scan.credentials_reference}" -var="instance_type=${scan.ec2_instance_type}" + if terraform state list | grep -q "aws_instance.secrets_finder"; then + terraform state rm aws_instance.secrets_finder + fi + + %{ endfor ~} + trap - ERR + if [ "$error_encountered" == "true" ]; then + if [ -n "$SNS_TOPIC_ARN" ]; then + formatted_list_of_failed_scans=$(printf "%s\n" "$${failed_scans[@]}" | sort -u | paste -sd,- | sed 's/,$//') + aws sns publish --topic-arn $SNS_TOPIC_ARN --subject "[SECRETS FINDER]" --message "One or several scans failed to start (cf. CodeBuild Project logs): $formatted_list_of_failed_scans" + fi + exit 1 + fi diff --git a/infrastructure/secrets-finder/scheduled-scans/aws/automation/codebuild.tf b/infrastructure/secrets-finder/scheduled-scans/aws/automation/codebuild.tf new file mode 100644 index 0000000..cbc11a3 --- /dev/null +++ b/infrastructure/secrets-finder/scheduled-scans/aws/automation/codebuild.tf @@ -0,0 +1,132 @@ +resource "aws_codebuild_project" "secrets_finder" { + badge_enabled = false + build_timeout = 60 + name = var.project_name + queued_timeout = 480 + service_role = aws_iam_role.codebuild_role.arn + + artifacts { + type = "NO_ARTIFACTS" + } + + environment { + compute_type = "BUILD_GENERAL1_SMALL" + image = "aws/codebuild/standard:6.0" + image_pull_credentials_type = "CODEBUILD" + privileged_mode = true + type = "LINUX_CONTAINER" + + environment_variable { + name = "AWS_REGION" + value = var.aws_region + type = "PLAINTEXT" + } + + environment_variable { + name = "PROJECT_NAME" + value = var.project_name + type = "PLAINTEXT" + } + + environment_variable { + name = "VPC_NAME" + value = var.vpc_name + type = "PLAINTEXT" + } + + environment_variable { + name = "SUBNET_NAME" + value = var.subnet_name + type = "PLAINTEXT" + } + + environment_variable { + name = "S3_BUCKET_NAME" + value = var.s3_bucket_name + type = "PLAINTEXT" + } + + environment_variable { + name = "INSTANCE_USER" + value = var.instance_user + type = "PLAINTEXT" + } + + dynamic "environment_variable" { + for_each = (var.token_reference_github_organization_hosting_secrets_finder != null) ? [var.token_reference_github_organization_hosting_secrets_finder] : [] + content { + name = "GITHUB_TOKEN" + value = data.aws_secretsmanager_secret.token_reference_github_organization_hosting_secrets_finder[0].arn + type = "SECRETS_MANAGER" + } + } + + environment_variable { + name = "GITHUB_ORG_SECRETS_FINDER" + value = var.github_organization_hosting_secrets_finder + type = "PLAINTEXT" + } + + environment_variable { + name = "GITHUB_REPOSITORY_SECRETS_FINDER" + value = var.github_repository_hosting_secrets_finder + type = "PLAINTEXT" + } + + dynamic "environment_variable" { + for_each = var.sns_topic_receiver != null ? [aws_sns_topic.important_notifications[0].arn] : [] + content { + name = "SNS_TOPIC_ARN" + value = environment_variable.value + type = "PLAINTEXT" + } + } + + environment_variable { + name = "TERRAFORM_VERSION" + value = var.terraform_version + type = "PLAINTEXT" + } + + dynamic "environment_variable" { + for_each = (var.datadog_api_key_reference != null) ? [var.datadog_api_key_reference] : [] + content { + name = "DATADOG_API_KEY_REFERENCE" + value = environment_variable.value + type = "PLAINTEXT" + } + } + + dynamic "environment_variable" { + for_each = (var.enable_datadog_monitors == true) ? [var.datadog_account] : [] + content { + name = "DATADOG_ACCOUNT_NAME" + value = environment_variable.value + type = "PLAINTEXT" + } + } + } + + source { + buildspec = templatefile(local.buildspec_file, { scans = var.scans }) + git_clone_depth = 0 + insecure_ssl = false + report_build_status = false + type = "NO_SOURCE" + } + + lifecycle { + precondition { + condition = (var.enable_datadog_monitors == false) || (var.enable_datadog_monitors == true && var.datadog_account != null) + error_message = "Datadog monitors were enabled but no Datadog account was provided to collect EC2 instance metrics (variable 'datadog_account' has no value)" + } + } + + depends_on = [ + aws_s3_object.scanner_static_files, + aws_s3_object.backend_static_files, + aws_s3_object.scanner_template_files, + aws_s3_object.backend_template_files, + aws_s3_object.scanning_files + ] +} diff --git a/infrastructure/secrets-finder/scheduled-scans/aws/automation/datadog-codebuild-monitoring b/infrastructure/secrets-finder/scheduled-scans/aws/automation/datadog-codebuild-monitoring new file mode 100644 index 0000000..0a056f8 --- /dev/null +++ b/infrastructure/secrets-finder/scheduled-scans/aws/automation/datadog-codebuild-monitoring @@ -0,0 +1,14 @@ +{{#is_alert}} +The Codebuild project has failed: [{{projectname.name}}](https://${aws_region}.console.aws.amazon.com/codesuite/codebuild/${aws_account_id}/projects/{{projectname.name}}) + +
+ +In case of a one-time error, run the build manually from the AWS console or using the AWS CLI. + +
+ +{{/is_alert}} +{{#is_no_data}} +The Codebuild project does not report data anymore: [{{projectname.name}}](https://${aws_region}.console.aws.amazon.com/codesuite/codebuild/${aws_account_id}/projects/{{projectname.name}}) +{{/is_no_data}} +${notification_recipients} diff --git a/infrastructure/secrets-finder/scheduled-scans/aws/automation/datadog-ec2-monitoring b/infrastructure/secrets-finder/scheduled-scans/aws/automation/datadog-ec2-monitoring new file mode 100644 index 0000000..5a96f38 --- /dev/null +++ b/infrastructure/secrets-finder/scheduled-scans/aws/automation/datadog-ec2-monitoring @@ -0,0 +1,18 @@ +{{#is_alert}} +The following EC2 instance has been running for more than ${limit} hours: [{{host.name}}](https://${aws_region}.console.aws.amazon.com/ec2/home?region=${aws_region}#InstanceDetails:instanceId={{host.name}}) + +
+ +Please terminate the EC2 instance from the AWS console or using the AWS CLI if no active scan is running to avoid unnecessary costs. + +
+ +If the scanning phase is not progressing, you may want to gracefully terminate TruffleHog and manually send the results of the scan to the S3 bucket before terminating the instance. + +
+ +[**Dashboard**](https://app.datadoghq.com/dash/host_name/{{host.name}}) + +[**Information about host**](https://app.datadoghq.com/infrastructure?host={{host.name}}) +{{/is_alert}} +${notification_recipients} diff --git a/infrastructure/secrets-finder/scheduled-scans/aws/automation/datadog.tf b/infrastructure/secrets-finder/scheduled-scans/aws/automation/datadog.tf new file mode 100644 index 0000000..663a4dd --- /dev/null +++ b/infrastructure/secrets-finder/scheduled-scans/aws/automation/datadog.tf @@ -0,0 +1,36 @@ +resource "datadog_monitor" "monitor_ec2_instance_age" { + count = var.enable_datadog_monitors == true ? 1 : 0 + + name = "Secrets Finder: EC2 instance status" + type = "metric alert" + message = templatefile(local.datadog_ec2_instance_monitoring, { aws_region = var.aws_region, limit = var.datadog_ec2_instance_monitor_ec2_age_limit, notification_recipients = join(" ", var.datadog_monitors_notify_list) }) + include_tags = false + notify_audit = true + require_full_window = false + priority = 3 + timeout_h = 1 + notification_preset_name = "hide_all" + + query = "max(last_1h):max:aws.ec2.instance_age{name:${var.project_name}-*} by {host} > ${var.datadog_ec2_instance_monitor_ec2_age_limit * 3600}" + + tags = local.datadog_tags +} + +resource "datadog_monitor" "monitor_failed_builds" { + count = var.enable_datadog_monitors == true ? 1 : 0 + + name = "Secrets Finder: Codebuild status" + type = "metric alert" + message = templatefile(local.datadog_codebuild_monitoring, { aws_region = var.aws_region, aws_account_id = data.aws_caller_identity.current.account_id, notification_recipients = join(" ", var.datadog_monitors_notify_list) }) + include_tags = false + notify_audit = true + require_full_window = false + priority = 3 + no_data_timeframe = 60 + timeout_h = 12 + notification_preset_name = "hide_all" + + query = "sum(last_6h):max:aws.codebuild.failed_builds{projectname:${var.project_name}} by {projectname}.as_count() > 0" + + tags = local.datadog_tags +} diff --git a/infrastructure/secrets-finder/scheduled-scans/aws/automation/iam.tf b/infrastructure/secrets-finder/scheduled-scans/aws/automation/iam.tf new file mode 100644 index 0000000..9491c68 --- /dev/null +++ b/infrastructure/secrets-finder/scheduled-scans/aws/automation/iam.tf @@ -0,0 +1,326 @@ +resource "aws_iam_role" "cloudwatch_role" { + name = "${var.project_name}-cloudwatch" + assume_role_policy = data.aws_iam_policy_document.cloudwatch_assume_role.json + path = var.iam_role_path + permissions_boundary = var.permissions_boundary_arn +} + +data "aws_iam_policy_document" "cloudwatch_assume_role" { + statement { + effect = "Allow" + principals { + identifiers = ["events.amazonaws.com"] + type = "Service" + } + actions = ["sts:AssumeRole"] + } +} + +data "aws_iam_policy_document" "policy_document_start_codebuild" { + statement { + effect = "Allow" + actions = ["codebuild:StartBuild"] + resources = [aws_codebuild_project.secrets_finder.arn] + } +} + +resource "aws_iam_policy" "start_codebuild" { + name = "${var.project_name}-start-codebuild" + description = "Allows to start new secrets detection scan through CodeBuild" + policy = data.aws_iam_policy_document.policy_document_start_codebuild.json +} + +resource "aws_iam_role_policy_attachment" "start_codebuild" { + policy_arn = aws_iam_policy.start_codebuild.arn + role = aws_iam_role.cloudwatch_role.name +} + +data "aws_iam_policy_document" "policy_document_tag_cloudwatch_event" { + statement { + effect = "Allow" + actions = ["events:TagResource"] + resources = [aws_cloudwatch_event_rule.start_codebuild.arn] + } +} + +resource "aws_iam_policy" "tag_cloudwatch_event" { + name = "${var.project_name}-tag-cloudwatch-event" + description = "Policy allowing to tag the event responsible for launching a new secrets detection scan through CodeBuild" + policy = data.aws_iam_policy_document.policy_document_tag_cloudwatch_event.json +} + +resource "aws_iam_role_policy_attachment" "tag_event" { + policy_arn = aws_iam_policy.tag_cloudwatch_event.arn + role = aws_iam_role.cloudwatch_role.name +} + +resource "aws_iam_role" "codebuild_role" { + name = "${var.project_name}-codebuild" + assume_role_policy = data.aws_iam_policy_document.codebuild_assume_policy.json + path = var.iam_role_path + permissions_boundary = var.permissions_boundary_arn +} + +data "aws_iam_policy_document" "codebuild_assume_policy" { + statement { + actions = ["sts:AssumeRole"] + effect = "Allow" + principals { + type = "Service" + identifiers = [ + "codebuild.amazonaws.com", + "events.amazonaws.com" + ] + } + } +} + +data "aws_iam_policy_document" "policy_document_permissions_for_codebuild" { + statement { + sid = "StartCodeBuildToPerformTruffleHogScan" + effect = "Allow" + actions = [ + "codebuild:StartBuild" + ] + resources = [aws_codebuild_project.secrets_finder.arn] + } + + statement { + sid = "AllowManagementOfLogsRelatingToCodeBuild" + effect = "Allow" + actions = [ + "logs:*" + ] + resources = ["arn:aws:logs:${var.aws_region}:${data.aws_caller_identity.current.account_id}:log-group:/aws/codebuild/${var.project_name}*"] + } + + dynamic "statement" { + for_each = var.sns_topic_receiver != null ? [var.sns_topic_receiver] : [] + content { + sid = "AllowToEmitImportantNotifications" + effect = "Allow" + actions = [ + "sns:Publish" + ] + resources = [aws_sns_topic.important_notifications[0].arn] + } + } + + dynamic "statement" { + for_each = var.token_reference_github_organization_hosting_secrets_finder != null ? [var.token_reference_github_organization_hosting_secrets_finder] : [] + content { + sid = "FetchGitHubToken" + effect = "Allow" + actions = [ + "secretsmanager:GetResourcePolicy", + "secretsmanager:DescribeSecret", + "secretsmanager:GetSecretValue" + ] + resources = [data.aws_secretsmanager_secret.token_reference_github_organization_hosting_secrets_finder[0].arn] + } + } + + dynamic "statement" { + for_each = (var.enable_datadog_monitors == true) ? [var.datadog_api_key_reference] : [] + content { + sid = "FetchDatadogAPIKey" + effect = "Allow" + actions = [ + "secretsmanager:GetResourcePolicy", + "secretsmanager:DescribeSecret", + "secretsmanager:GetSecretValue" + ] + resources = [ + "arn:aws:secretsmanager:${var.aws_region}:${data.aws_caller_identity.current.account_id}:secret:${statement.value}-*" + ] + } + } + + dynamic "statement" { + for_each = (var.enable_datadog_monitors == true) ? [var.datadog_application_key_reference] : [] + content { + sid = "FetchDatadogApplicationKey" + effect = "Allow" + actions = [ + "secretsmanager:GetResourcePolicy", + "secretsmanager:DescribeSecret", + "secretsmanager:GetSecretValue" + ] + resources = [ + "arn:aws:secretsmanager:${var.aws_region}:${data.aws_caller_identity.current.account_id}:secret:${statement.value}-*" + ] + } + } + + statement { + sid = "ReviewStateOfAllCredentialsReferences" + effect = "Allow" + actions = [ + "secretsmanager:GetResourcePolicy", + "secretsmanager:DescribeSecret" + ] + resources = [ + for reference in keys(data.aws_secretsmanager_secret.credentials_references) : data.aws_secretsmanager_secret.credentials_references[reference].arn + ] + } + + statement { + sid = "AuthorizeActionsOnIAMResourcesDeployedWhenScanning" + effect = "Allow" + actions = [ + "iam:*" + ] + resources = [ + "arn:aws:iam::${data.aws_caller_identity.current.account_id}:role/${var.project_name}-ec2-role", + "arn:aws:iam::${data.aws_caller_identity.current.account_id}:policy/${var.project_name}-ec2-permissions", + "arn:aws:iam::${data.aws_caller_identity.current.account_id}:instance-profile/${var.project_name}-instance-profile" + ] + } + + dynamic "statement" { + for_each = (var.permissions_boundary_arn == true) ? ["add"] : [] + content { + sid = "AllowUseOfPermissionsBoundary" + effect = "Allow" + actions = [ + "iam:GetPolicy", + "iam:GetPolicyVersion", + "iam:AttachRolePolicy" + ] + resources = [ + var.permissions_boundary_arn + ] + } + } + + statement { + sid = "AuthorizeManagementOfS3BucketUsedBySecretsFinder" + effect = "Allow" + actions = [ + "s3:*" + ] + resources = [ + "arn:aws:s3:::${var.s3_bucket_name}", + "arn:aws:s3:::${var.s3_bucket_name}/*" + ] + } + + statement { + sid = "ListS3BucketUsedForRemoteStateManagement" + effect = "Allow" + actions = ["s3:ListBucket"] + resources = ["arn:aws:s3:::${var.s3_bucket_remote_states}"] + } + + statement { + sid = "GetAndPutObjectsInS3BucketUsedForRemoteStateManagement" + effect = "Allow" + actions = [ + "s3:GetObject*", + "s3:PutObject*" + ] + resources = ["arn:aws:s3:::${var.s3_bucket_remote_states}/*"] + } + + statement { + sid = "ManageItemsInDynamoDBTableUsedForRemoteStateManagement" + effect = "Allow" + actions = [ + "dynamodb:*Item" + ] + resources = ["arn:aws:dynamodb:${var.aws_region}:${data.aws_caller_identity.current.account_id}:table/${var.dynamodb_table_remote_states}"] + } + + statement { + sid = "ListKeysAndAliases" + effect = "Allow" + actions = [ + "kms:ListKeys", + "kms:ListAliases" + ] + + resources = ["*"] + } + + dynamic "statement" { + for_each = var.ebs_encryption_key_arn != null ? [var.ebs_encryption_key_arn] : [] + content { + sid = "AllowEBSVolumeEncryption" + effect = "Allow" + actions = [ + "kms:Decrypt", + "kms:ListKeyPolicies", + "kms:ListRetirableGrants", + "kms:Encrypt", + "kms:DescribeKey", + "kms:ListResourceTags", + "kms:CreateGrant", + "kms:ListGrants", + "kms:ReEncrypt*", + "kms:GenerateDataKey*" + ] + resources = [data.aws_kms_key.ebs_encryption_key[0].arn] + } + } + + dynamic "statement" { + for_each = var.ami_encryption_key_arn != null ? [var.ami_encryption_key_arn] : [] + content { + sid = "AllowUseOfAMIEncryptionKey" + effect = "Allow" + actions = [ + "kms:Decrypt", + "kms:ReEncrypt*", + "kms:DescribeKey", + "kms:CreateGrant", + "kms:GenerateDataKey*" + ] + resources = [data.aws_kms_key.ami_encryption_key[0].arn] + } + } + + statement { + sid = "GetInformationAboutEC2Resources" + effect = "Allow" + actions = [ + "ec2:DescribeVpc*", + "ec2:DescribeSubnets", + "ec2:DescribeImages", + "ec2:DescribeRouteTables", + "ec2:DescribeSecurityGroups", + "ec2:DescribeInstanceTypes", + "ec2:DescribeVolumes", + "ec2:DescribeInstances", + "ec2:RunInstances", + "ec2:DescribeTags", + "ec2:DescribeInstanceCreditSpecifications", + "ec2:CreateTags", + "ec2:ModifyInstanceAttribute" + ] + resources = ["*"] + } + + statement { + effect = "Allow" + actions = [ + "ec2:*" + ] + resources = ["arn:aws:ec2:${var.aws_region}:${data.aws_caller_identity.current.account_id}:instance/*"] + + condition { + test = "StringLike" + variable = "aws:ResourceTag/Name" + values = ["${var.project_name}*"] + } + } +} + +resource "aws_iam_policy" "permissions_for_codebuild" { + name = "${var.project_name}-codebuild-permissions" + policy = data.aws_iam_policy_document.policy_document_permissions_for_codebuild.json +} + +resource "aws_iam_role_policy_attachment" "permissions_for_codebuild" { + policy_arn = aws_iam_policy.permissions_for_codebuild.arn + role = aws_iam_role.codebuild_role.name +} diff --git a/infrastructure/secrets-finder/scheduled-scans/aws/automation/kms.tf b/infrastructure/secrets-finder/scheduled-scans/aws/automation/kms.tf new file mode 100644 index 0000000..8640f91 --- /dev/null +++ b/infrastructure/secrets-finder/scheduled-scans/aws/automation/kms.tf @@ -0,0 +1,9 @@ +data "aws_kms_key" "ebs_encryption_key" { + count = var.ebs_encryption_key_arn != null ? 1 : 0 + key_id = var.ebs_encryption_key_arn +} + +data "aws_kms_key" "ami_encryption_key" { + count = var.ami_encryption_key_arn != null ? 1 : 0 + key_id = var.ami_encryption_key_arn +} diff --git a/infrastructure/secrets-finder/scheduled-scans/aws/automation/locals.tf b/infrastructure/secrets-finder/scheduled-scans/aws/automation/locals.tf new file mode 100644 index 0000000..4a970fc --- /dev/null +++ b/infrastructure/secrets-finder/scheduled-scans/aws/automation/locals.tf @@ -0,0 +1,114 @@ +locals { + backend = "aws" + + environment = replace(lower(var.environment_type), " ", "-") + tags = merge(try(var.tags, {}), { environment = local.environment }) + + buildspec_file = "codebuild-buildspec.yml" + datadog_ec2_instance_monitoring = "datadog-ec2-monitoring" + datadog_codebuild_monitoring = "datadog-codebuild-monitoring" + + setup_variables = { + aws_region = var.aws_region + s3_bucket = var.s3_bucket_name + sns_topic_arn = var.sns_topic_receiver != null ? aws_sns_topic.important_notifications[0].arn : "" + instance_user = var.instance_user + scanner_folder = "/home/${var.instance_user}/scanner" + scan_folder = "/home/${var.instance_user}/scan" + datadog_api_key_reference = var.datadog_api_key_reference + } + + trufflehog_configuration_file = var.trufflehog_configuration_file != null ? file(var.trufflehog_configuration_file) : null + + configuration_folder = "../../../../../configuration/secrets-finder" + + common_files = [ + "${local.configuration_folder}/common.py", + "${local.configuration_folder}/common.requirements.txt" + ] + + scanner_static_files = [ + "${local.configuration_folder}/scanner/git-credentials-helper.sh", + "${local.configuration_folder}/scanner/scan-configuration.schema.json", + "${local.configuration_folder}/scanner/scanner.py", + "${local.configuration_folder}/scanner/scanner.requirements.txt" + ] + + backend_static_files = [ + "${local.configuration_folder}/${local.backend}/initializer.py", + "${local.configuration_folder}/${local.backend}/finalizer.py", + "${local.configuration_folder}/${local.backend}/backend.py", + "${local.configuration_folder}/${local.backend}/backend.requirements.txt" + ] + + scanner_template_files = [ + "${local.configuration_folder}/scanner/scanner.env", + "${local.configuration_folder}/scanner/scanner.service" + ] + + scanner_template_files_formatted_for_all_scans = flatten([ + for s in var.scans : [ + for f in local.scanner_template_files : { + scan = s + reference = f + formatted_file = templatefile(f, merge(local.setup_variables, { + scm = s.scm + scan_identifier = s.identifier + credentials_reference = s.credentials_reference + report_only_verified = s.report_only_verified != null ? s.report_only_verified : false + })) + } + ] + ]) + + backend_template_files = [ + "${local.configuration_folder}/${local.backend}/setup.sh", + "${local.configuration_folder}/${local.backend}/backend.env" + ] + + backend_template_files_formatted_for_all_scans = flatten([ + for s in var.scans : [ + for f in local.backend_template_files : { + scan = s + reference = f + formatted_file = templatefile(f, merge(local.setup_variables, { + scm = s.scm + scan_identifier = s.identifier + credentials_reference = s.credentials_reference + terminate_instance_on_error = s.terminate_instance_on_error != null ? s.terminate_instance_on_error : "" + terminate_instance_after_scan = s.terminate_instance_after_scan != null ? s.terminate_instance_after_scan : "" + report_only_verified = s.report_only_verified != null ? s.report_only_verified : false + })) + } + ] + ]) + + all_user_submitted_files = flatten([ + for s in var.scans : + s.files != null ? [ + for f in s.files : { + scan = s.identifier + file = f + } + ] : [] + ]) + + repositories_to_scan = [ + for s in var.scans : { + scan = s.identifier + file = s.repositories_to_scan + } if s.repositories_to_scan != null + ] + + all_scanner_files_stored_in_s3_bucket = concat( + [for f in local.common_files : "secrets-finder/scheduled-scans/scanner/${basename(f)}"], + [for f in local.scanner_static_files : "secrets-finder/scheduled-scans/scanner/${basename(f)}"], + [for f in local.backend_static_files : "secrets-finder/scheduled-scans/scanner/${basename(f)}"], + [for f in local.scanner_template_files_formatted_for_all_scans : "secrets-finder/scheduled-scans/scans/${f.scan.identifier}/${basename(f.reference)}"], + [for f in local.backend_template_files_formatted_for_all_scans : "secrets-finder/scheduled-scans/scans/${f.scan.identifier}/${basename(f.reference)}"] + ) + + all_credentials_references = [for s in var.scans : s.credentials_reference] + + datadog_tags = concat(var.datadog_tags, [var.project_name]) +} diff --git a/infrastructure/secrets-finder/scheduled-scans/aws/automation/outputs.tf b/infrastructure/secrets-finder/scheduled-scans/aws/automation/outputs.tf new file mode 100644 index 0000000..aabab5e --- /dev/null +++ b/infrastructure/secrets-finder/scheduled-scans/aws/automation/outputs.tf @@ -0,0 +1,7 @@ +output "codebuild_arn" { + value = aws_codebuild_project.secrets_finder.arn +} + +output "event_rule_arn" { + value = aws_cloudwatch_event_rule.start_codebuild.arn +} diff --git a/infrastructure/secrets-finder/scheduled-scans/aws/automation/providers.tf b/infrastructure/secrets-finder/scheduled-scans/aws/automation/providers.tf new file mode 100644 index 0000000..2d6dbd6 --- /dev/null +++ b/infrastructure/secrets-finder/scheduled-scans/aws/automation/providers.tf @@ -0,0 +1,31 @@ +terraform { + required_version = ">=1.7" + + required_providers { + aws = { + source = "hashicorp/aws" + version = "~> 5.0" + } + + datadog = { + source = "DataDog/datadog" + version = "~> 3.23" + } + } + + backend "s3" { + encrypt = true + } +} + +provider "aws" { + region = var.aws_region + profile = var.aws_profile + default_tags { tags = local.tags } +} + +provider "datadog" { + api_key = var.enable_datadog_monitors ? data.aws_secretsmanager_secret_version.datadog_api_key[0].secret_string : null + app_key = var.enable_datadog_monitors ? data.aws_secretsmanager_secret_version.datadog_application_key[0].secret_string : null + validate = var.enable_datadog_monitors ? true : false +} diff --git a/infrastructure/secrets-finder/scheduled-scans/aws/automation/s3.tf b/infrastructure/secrets-finder/scheduled-scans/aws/automation/s3.tf new file mode 100644 index 0000000..bb6ff62 --- /dev/null +++ b/infrastructure/secrets-finder/scheduled-scans/aws/automation/s3.tf @@ -0,0 +1,83 @@ +data "aws_s3_bucket" "secrets_finder" { + bucket = var.s3_bucket_name +} + +resource "aws_s3_object" "trufflehog_configuration_file" { + count = var.trufflehog_configuration_file != null ? 1 : 0 + + bucket = data.aws_s3_bucket.secrets_finder.id + key = "secrets-finder/scheduled-scans/scanner/configuration.yaml" + content = local.trufflehog_configuration_file + source_hash = filemd5(local.trufflehog_configuration_file) +} + +resource "aws_s3_object" "common_files" { + count = length(local.common_files) + + bucket = data.aws_s3_bucket.secrets_finder.id + key = "secrets-finder/scheduled-scans/scanner/${basename(local.common_files[count.index])}" + content = file(local.common_files[count.index]) + source_hash = filemd5(local.common_files[count.index]) +} + +resource "aws_s3_object" "scanner_static_files" { + count = length(local.scanner_static_files) + + bucket = data.aws_s3_bucket.secrets_finder.id + key = "secrets-finder/scheduled-scans/scanner/${basename(local.scanner_static_files[count.index])}" + content = file(local.scanner_static_files[count.index]) + source_hash = filemd5(local.scanner_static_files[count.index]) +} + +resource "aws_s3_object" "backend_static_files" { + count = length(local.backend_static_files) + + bucket = data.aws_s3_bucket.secrets_finder.id + key = "secrets-finder/scheduled-scans/scanner/${basename(local.backend_static_files[count.index])}" + content = file(local.backend_static_files[count.index]) + source_hash = filemd5(local.backend_static_files[count.index]) +} + +resource "aws_s3_object" "scanner_template_files" { + count = length(local.scanner_template_files_formatted_for_all_scans) + + bucket = data.aws_s3_bucket.secrets_finder.id + key = "secrets-finder/scheduled-scans/scans/${local.scanner_template_files_formatted_for_all_scans[count.index].scan.identifier}/setup/${basename(local.scanner_template_files_formatted_for_all_scans[count.index].reference)}" + content = local.scanner_template_files_formatted_for_all_scans[count.index].formatted_file + source_hash = md5(local.scanner_template_files_formatted_for_all_scans[count.index].formatted_file) +} + +resource "aws_s3_object" "backend_template_files" { + count = length(local.backend_template_files_formatted_for_all_scans) + + bucket = data.aws_s3_bucket.secrets_finder.id + key = "secrets-finder/scheduled-scans/scans/${local.backend_template_files_formatted_for_all_scans[count.index].scan.identifier}/setup/${basename(local.backend_template_files_formatted_for_all_scans[count.index].reference)}" + content = local.backend_template_files_formatted_for_all_scans[count.index].formatted_file + source_hash = md5(local.backend_template_files_formatted_for_all_scans[count.index].formatted_file) + content_type = "text/plain" +} + +resource "aws_s3_object" "scanning_files" { + count = length(local.all_user_submitted_files) + + bucket = data.aws_s3_bucket.secrets_finder.id + key = "secrets-finder/scheduled-scans/scans/${local.all_user_submitted_files[count.index].scan}/files/${basename(local.all_user_submitted_files[count.index].file)}" + content = file(local.all_user_submitted_files[count.index].file) + source_hash = filemd5(local.all_user_submitted_files[count.index].file) + + lifecycle { + precondition { + condition = contains(local.all_scanner_files_stored_in_s3_bucket, "secrets-finder/scheduled-scans/scans/${local.all_user_submitted_files[count.index].scan}/${basename(local.all_user_submitted_files[count.index].file)}") == false + error_message = "The user-supplied file ${basename(local.all_user_submitted_files[count.index].file)} conflicts with another file used by secrets-finder." + } + } +} + +resource "aws_s3_object" "repositories_to_scan_files" { + count = length(local.repositories_to_scan) + + bucket = data.aws_s3_bucket.secrets_finder.id + key = "secrets-finder/scheduled-scans/scans/${local.repositories_to_scan[count.index].scan}/setup/repositories_to_scan.json" + content = file(local.repositories_to_scan[count.index].file) + source_hash = filemd5(local.repositories_to_scan[count.index].file) +} diff --git a/infrastructure/secrets-finder/scheduled-scans/aws/automation/s3.tfbackend b/infrastructure/secrets-finder/scheduled-scans/aws/automation/s3.tfbackend new file mode 100644 index 0000000..6fa4016 --- /dev/null +++ b/infrastructure/secrets-finder/scheduled-scans/aws/automation/s3.tfbackend @@ -0,0 +1,5 @@ +bucket = "" +key = "" +region = "" +dynamodb_table = "" +profile = "" diff --git a/infrastructure/secrets-finder/scheduled-scans/aws/automation/secrets-manager.tf b/infrastructure/secrets-finder/scheduled-scans/aws/automation/secrets-manager.tf new file mode 100644 index 0000000..375f3f6 --- /dev/null +++ b/infrastructure/secrets-finder/scheduled-scans/aws/automation/secrets-manager.tf @@ -0,0 +1,57 @@ +data "aws_secretsmanager_secret" "token_reference_github_organization_hosting_secrets_finder" { + count = var.token_reference_github_organization_hosting_secrets_finder != null ? 1 : 0 + name = var.token_reference_github_organization_hosting_secrets_finder +} + +data "aws_secretsmanager_secret" "datadog_api_key" { + count = var.enable_datadog_monitors == true ? 1 : 0 + name = var.datadog_api_key_reference + + lifecycle { + precondition { + condition = var.datadog_api_key_reference != null + error_message = "Datadog monitors should be set up, but no secret reference to Secrets Manager was provided using the 'datadog_api_key_reference' variable." + } + } +} + +data "aws_secretsmanager_secret_version" "datadog_api_key" { + count = var.enable_datadog_monitors ? 1 : 0 + secret_id = data.aws_secretsmanager_secret.datadog_api_key[0].id + + lifecycle { + precondition { + condition = var.datadog_api_key_reference != null + error_message = "Datadog monitors should be set up, but no secret reference to Secrets Manager was provided using the 'datadog_api_key_reference' variable." + } + } +} + +data "aws_secretsmanager_secret" "datadog_application_key" { + count = var.enable_datadog_monitors == true ? 1 : 0 + name = var.datadog_application_key_reference + + lifecycle { + precondition { + condition = var.datadog_application_key_reference != null + error_message = "Datadog monitors should be set up, but no secret reference to Secrets Manager was provided using the 'datadog_application_key_reference' variable." + } + } +} + +data "aws_secretsmanager_secret_version" "datadog_application_key" { + count = var.enable_datadog_monitors ? 1 : 0 + secret_id = data.aws_secretsmanager_secret.datadog_application_key[0].id + + lifecycle { + precondition { + condition = var.datadog_application_key_reference != null + error_message = "Datadog monitors should be set up, but no secret reference to Secrets Manager was provided using the 'datadog_application_key_reference' variable." + } + } +} + +data "aws_secretsmanager_secret" "credentials_references" { + for_each = toset(local.all_credentials_references) + name = each.value +} diff --git a/infrastructure/secrets-finder/scheduled-scans/aws/automation/sns.tf b/infrastructure/secrets-finder/scheduled-scans/aws/automation/sns.tf new file mode 100644 index 0000000..f19ec6c --- /dev/null +++ b/infrastructure/secrets-finder/scheduled-scans/aws/automation/sns.tf @@ -0,0 +1,11 @@ +resource "aws_sns_topic" "important_notifications" { + count = var.sns_topic_receiver != null ? 1 : 0 + name = var.project_name +} + +resource "aws_sns_topic_subscription" "email_subscription" { + count = var.sns_topic_receiver != null ? 1 : 0 + topic_arn = aws_sns_topic.important_notifications[0].arn + protocol = "email" + endpoint = var.sns_topic_receiver +} diff --git a/infrastructure/secrets-finder/scheduled-scans/aws/automation/sts.tf b/infrastructure/secrets-finder/scheduled-scans/aws/automation/sts.tf new file mode 100644 index 0000000..8fc4b38 --- /dev/null +++ b/infrastructure/secrets-finder/scheduled-scans/aws/automation/sts.tf @@ -0,0 +1 @@ +data "aws_caller_identity" "current" {} diff --git a/infrastructure/secrets-finder/scheduled-scans/aws/automation/variables.tf b/infrastructure/secrets-finder/scheduled-scans/aws/automation/variables.tf new file mode 100644 index 0000000..28b29f7 --- /dev/null +++ b/infrastructure/secrets-finder/scheduled-scans/aws/automation/variables.tf @@ -0,0 +1,318 @@ +variable "aws_region" { + type = string + default = "us-east-1" + description = "AWS region where to deploy resources" + + validation { + condition = can(regex("^(af|ap|ca|eu|me|sa|us)-(central|north|(north(?:east|west))|south|south(?:east|west)|east|west)-\\d+$", var.aws_region)) + error_message = "You should enter a valid AWS region (https://docs.aws.amazon.com/AmazonRDS/latest/UserGuide/Concepts.RegionsAndAvailabilityZones.html)" + } +} + +variable "aws_profile" { + type = string + default = "default" + description = "AWS profile to use for authentication" +} + +variable "environment_type" { + type = string + default = "PRODUCTION" + description = "Environment (PRODUCTION, PRE-PRODUCTION, QUALITY ASSURANCE, INTEGRATION TESTING, DEVELOPMENT, LAB)" + + validation { + condition = contains(["PRODUCTION", "PRE-PRODUCTION", "QUALITY ASSURANCE", "INTEGRATION TESTING", "DEVELOPMENT", "LAB"], var.environment_type) + error_message = "The environment type should be one of the following values: PRODUCTION, PRE-PRODUCTION, QUALITY ASSURANCE, INTEGRATION TESTING, DEVELOPMENT, LAB (case sensitive)" + } +} + +variable "tags" { + type = map(string) + description = "A map of tags to add to the resources" + default = {} + + validation { + condition = alltrue([for v in values(var.tags) : v != ""]) + error_message = "Tag values must not be empty." + } +} + +variable "permissions_boundary_arn" { + type = string + default = null + description = "The name of the IAM permissions boundary to attach to the IAM roles created by the module" + + validation { + condition = var.permissions_boundary_arn == null || can(regex("^arn:aws:iam::[0-9]{12}:policy\\/([a-zA-Z0-9-_.]+)$", var.permissions_boundary_arn)) + error_message = "The provided ARN is not a valid ARN for a policy" + } +} + +variable "iam_role_path" { + type = string + default = "/" + description = "The path to use when creating IAM roles" + + validation { + condition = can(regex("^\\/([a-zA-Z0-9]+([-a-zA-Z0-9]*[a-zA-Z0-9]+)?\\/)*$", var.iam_role_path)) + error_message = "The provided path is invalid" + } +} + +variable "project_name" { + type = string + default = "secrets-finder" + description = "Name of the project (should be the same across all modules of secrets-finder to ensure consistency)" +} + +variable "vpc_name" { + type = string + description = "Identifier of the VPC to use for secrets-finder" +} + +variable "subnet_name" { + type = string + description = "Name of the subnet where to deploy the resources (wildcards are allowed: first match is used)" +} + + +variable "s3_bucket_name" { + type = string + description = "Name of the S3 bucket containing files used for secrets detection scans" +} + +variable "s3_bucket_remote_states" { + type = string + description = "Name of the S3 bucket containing the remote states of the infrastructure" + +} + +variable "dynamodb_table_remote_states" { + type = string + description = "Name of the DynamoDB table containing the locks used for the remote states representing the infrastructure" +} + +variable "start_schedule" { + type = string + default = "cron(0 6 ? * MON *)" + description = "The cron specifying when a new scanning instance should be set up (default is: every Monday at 06:00, expected format: https://docs.aws.amazon.com/AmazonCloudWatch/latest/events/ScheduledEvents.html#CronExpressions)" +} + +variable "token_reference_github_organization_hosting_secrets_finder" { + type = string + default = null + description = "Name of the secret stored in Secrets Manager containing the GitHub token for the organization hosting the secrets-finder code. Leave empty if the repository is publicly accessible." + + validation { + condition = var.token_reference_github_organization_hosting_secrets_finder == null || can(regex("^[a-zA-Z0-9/_+=.@-]{1,512}$", var.token_reference_github_organization_hosting_secrets_finder)) + error_message = "The secret name is invalid" + } +} + +variable "github_organization_hosting_secrets_finder" { + type = string + description = "Name of the GitHub Organization where the repository containing the secrets-finder code is hosted" + + validation { + condition = can(regex("^[a-zA-Z0-9][a-zA-Z0-9-]{1,38}$", var.github_organization_hosting_secrets_finder)) + error_message = "The GitHub organization name must start with a letter or number, can include dashes, and be between 1 and 39 characters." + } +} + +variable "github_repository_hosting_secrets_finder" { + type = string + description = "Name of the GitHub Repository containing the secrets-finder code" + + validation { + condition = can(regex("^[a-zA-Z0-9_.-]{1,100}$", var.github_repository_hosting_secrets_finder)) + error_message = "The GitHub repository name must be between 1 and 100 characters, and can include letters, numbers, underscores, periods, and dashes." + } +} + +variable "sns_topic_receiver" { + type = string + default = null + description = "Email address of the receiver of the SNS topic to which important notifications are sent. Leave empty if no notifications should be sent." + + validation { + condition = var.sns_topic_receiver == null || can(regex("^(?:[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*|\"(?:[\\x01-\\x08\\x0b\\x0c\\x0e-\\x1f\\x21\\x23-\\x5b\\x5d-\\x7f]|\\\\[\\x01-\\x09\\x0b\\x0c\\x0e-\\x7f])*\")@(?:(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?|\\[(?:(?:(2(5[0-5]|[0-4][0-9])|1[0-9][0-9]|[1-9]?[0-9]))\\.){3}(?:(2(5[0-5]|[0-4][0-9])|1[0-9][0-9]|[1-9]?[0-9])|[a-z0-9-]*[a-z0-9]:(?:[\\x01-\\x08\\x0b\\x0c\\x0e-\\x1f\\x21-\\x5a\\x53-\\x7f]|\\\\[\\x01-\\x09\\x0b\\x0c\\x0e-\\x7f])+)\\])$", var.sns_topic_receiver)) + error_message = "The email address of the receiver is invalid." + } +} + +variable "ebs_encryption_key_arn" { + type = string + default = null + description = "The ARN of the KMS key used to encrypt the EBS volumes" +} + +variable "ami_encryption_key_arn" { + type = string + default = null + description = "The ARN of the KMS key used to decrypt/encrypt the AMI used for the scanning instances" +} + +variable "terraform_version" { + type = string + default = "1.8.5" + description = "Version of Terraform to use when starting a new scan from CodeBuild" + + validation { + condition = can(regex("^([0-9]+)\\.([0-9]+)\\.([0-9]+)(?:-([0-9A-Za-z-]+(?:\\.[0-9A-Za-z-]+)*))?(?:\\+[0-9A-Za-z-]+)?$", var.terraform_version)) + error_message = "The Terraform version should be in the format 'x.y.z'" + } +} + +variable "trufflehog_configuration_file" { + type = string + default = null + description = "Path to the Trufflehog configuration file. Leave empty if no configuration file should be used." + + validation { + condition = var.trufflehog_configuration_file == null || can(fileexists(var.trufflehog_configuration_file)) + error_message = "The Trufflehog configuration file must exist." + } + +} + +variable "instance_user" { + type = string + default = "secrets-finder" + description = "Username to create and use on the instances started for the scanning process" + + validation { + condition = can(regex("^[a-zA-Z0-9]([a-zA-Z0-9-_]+[a-zA-Z0-9])*$", var.instance_user)) + error_message = "instance_user must contain only alphanumeric characters, dashes, and underscores, and must not start or end with a dash or underscore." + } +} + +variable "scans" { + description = "List of scans to perform" + type = list(object({ + identifier = string + scm = string + credentials_reference = string + ec2_instance_type = string + files = optional(list(string)) + repositories_to_scan = optional(string) + terminate_instance_on_error = optional(bool) + terminate_instance_after_scan = optional(bool) + report_only_verified = optional(bool) + })) + + validation { + condition = length(var.scans) > 0 + error_message = "The scans list must be defined and not empty." + } + + validation { + condition = ( + alltrue([ + for scan in var.scans : can(regex("^[a-zA-Z0-9]([a-zA-Z0-9-_]+[a-zA-Z0-9])*$", scan.identifier)) + ]) + ) + error_message = "The identifier field must contain only alphanumeric characters, dashes, and underscores, and must not start or end with a dash or underscore." + } + + validation { + condition = ( + alltrue([ + for scan in var.scans : contains(["github", "azure_devops", "custom"], scan.scm) + ]) + ) + error_message = "The scm field must be one of 'github', 'azure_devops', 'custom'." + } + + validation { + condition = ( + alltrue([ + for scan in var.scans : length(scan.credentials_reference) > 0 + ]) + ) + error_message = "Credentials reference must not be empty." + } + + validation { + condition = ( + alltrue([ + for scan in var.scans : scan.files == null ? true : alltrue([for file in scan.files : try(fileexists(file), false)]) + ]) + ) + error_message = "All files in the 'files' field must exist." + } + + validation { + condition = ( + alltrue([ + for scan in var.scans : scan.repositories_to_scan == null ? true : fileexists(scan.repositories_to_scan) + ]) + ) + error_message = "When set, repositories_to_scan should reference an existing file on the local system." + } + + validation { + condition = ( + alltrue([ + for scan in var.scans : contains(jsondecode(file("../../../../../configuration/secrets-finder/aws/aws_ec2_instances.json")), scan.ec2_instance_type) + ]) + ) + error_message = "The ec2_instance_type field must be a valid AWS EC2 instance type." + } +} + +variable "enable_datadog_monitors" { + type = bool + default = true + description = "Define whether Datadog monitors should be set up to monitor the status of the EC2 instances and the Codebuild project. If this variable is set to 'true', both 'datadog_api_key_reference' and 'datadog_application_key_reference' variables should be set, and the corresponding secrets should exist in Parameter Store." +} + +variable "datadog_account" { + type = string + default = null + description = "The name of the Datadog account to which EC2 instance metrics should be reported and where monitors are set up. This variable is only used if 'enable_datadog_monitors' variable is set to 'true'." +} + +variable "datadog_api_key_reference" { + type = string + default = null + description = "Name of the secret stored in Secrets Manager and containing the Datadog API key. Leave empty if Datadog should not be configured." + + validation { + condition = (var.datadog_api_key_reference == null) || can(regex("^[a-zA-Z0-9/_+=.@-]{1,512}$", var.datadog_api_key_reference)) + error_message = "The secret name is invalid" + } +} + +variable "datadog_application_key_reference" { + type = string + default = null + description = "Name of the secret stored in Secrets Manager and containing the Datadog application key. Leave empty if Datadog monitors should not be configured." + + validation { + condition = (var.datadog_application_key_reference == null) || can(regex("^[a-zA-Z0-9/_+=.@-]{1,512}$", var.datadog_application_key_reference)) + error_message = "The secret name is invalid" + } +} + +variable "datadog_monitors_notify_list" { + type = list(string) + default = [] + description = "List of recipients to notify whenever an alert is triggered. The format for each recipient should conform with the official specification (https://docs.datadoghq.com/monitors/notify/#notifications). This list is only considered if 'enable_datadog_monitors' variable is set to 'true'." +} + +variable "datadog_ec2_instance_monitor_ec2_age_limit" { + type = number + default = 1 + description = "Time (in hours) to wait before considering an instance in an unhealthy state. Value should be between 1 and 72 and is only considered if 'enable_datadog_monitors' is set to 'true'." + + validation { + condition = var.datadog_ec2_instance_monitor_ec2_age_limit >= 1 && var.datadog_ec2_instance_monitor_ec2_age_limit <= 72 + error_message = "The value should be between 1 and 72 (hours)" + } +} + +variable "datadog_tags" { + type = list(string) + default = [] + description = "A list of tags for Datadog" +} diff --git a/infrastructure/secrets-finder/scheduled-scans/aws/automation/vpc.tf b/infrastructure/secrets-finder/scheduled-scans/aws/automation/vpc.tf new file mode 100644 index 0000000..9103bd4 --- /dev/null +++ b/infrastructure/secrets-finder/scheduled-scans/aws/automation/vpc.tf @@ -0,0 +1,18 @@ +data "aws_vpc" "vpc" { + filter { + name = "tag:Name" + values = [var.vpc_name] + } +} + +data "aws_subnets" "selected" { + filter { + name = "tag:Name" + values = [var.subnet_name] + } + + filter { + name = "available-ip-address-count" + values = range(1, 200) + } +} diff --git a/infrastructure/secrets-finder/scheduled-scans/aws/scan/README.md b/infrastructure/secrets-finder/scheduled-scans/aws/scan/README.md new file mode 100644 index 0000000..565811e --- /dev/null +++ b/infrastructure/secrets-finder/scheduled-scans/aws/scan/README.md @@ -0,0 +1,80 @@ +# scan + + +## Requirements + +| Name | Version | +|------|---------| +| [terraform](#requirement\_terraform) | >=1.7 | +| [aws](#requirement\_aws) | ~> 5.0 | + +## Providers + +| Name | Version | +|------|---------| +| [aws](#provider\_aws) | ~> 5.0 | + +## Modules + +No modules. + +## Resources + +| Name | Type | +|------|------| +| [aws_iam_instance_profile.ec2_instance_profile](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_instance_profile) | resource | +| [aws_iam_policy.permissions_for_ec2_instance](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_policy) | resource | +| [aws_iam_role.ec2_role](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role) | resource | +| [aws_iam_role_policy_attachment.permissions_for_ec2_instance](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role_policy_attachment) | resource | +| [aws_instance.secrets_finder](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/instance) | resource | +| [aws_security_group.new_security_groups](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/security_group) | resource | +| [aws_ami.amazon_ami](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/ami) | data source | +| [aws_caller_identity.current](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/caller_identity) | data source | +| [aws_iam_policy_document.ec2_assume_role](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/iam_policy_document) | data source | +| [aws_iam_policy_document.policy_document_permissions_for_ec2_instance](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/iam_policy_document) | data source | +| [aws_s3_bucket.secrets_finder](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/s3_bucket) | data source | +| [aws_s3_object.setup](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/s3_object) | data source | +| [aws_secretsmanager_secret.credentials](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/secretsmanager_secret) | data source | +| [aws_secretsmanager_secret.datadog_api_key](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/secretsmanager_secret) | data source | +| [aws_security_group.existing_security_groups](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/security_group) | data source | +| [aws_subnets.selected](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/subnets) | data source | +| [aws_vpc.vpc](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/vpc) | data source | + +## Inputs + +| Name | Description | Type | Default | Required | +|------|-------------|------|---------|:--------:| +| [ami\_image\_filter](#input\_ami\_image\_filter) | Filter to use to find the Amazon Machine Image (AMI) to use for the EC2 instance the name can contain wildcards. Only GNU/Linux images are supported. | `string` | `"amzn2-ami-hvm*"` | no | +| [ami\_owner](#input\_ami\_owner) | Owner of the Amazon Machine Image (AMI) to use for the EC2 instance | `string` | `"amazon"` | no | +| [aws\_profile](#input\_aws\_profile) | AWS profile to use for authentication | `string` | `"default"` | no | +| [aws\_region](#input\_aws\_region) | AWS region where to deploy resources | `string` | `"us-east-1"` | no | +| [credentials\_reference](#input\_credentials\_reference) | Name of the secret stored in Secrets Manager and containing the credentials to use for the scan | `string` | n/a | yes | +| [datadog\_account](#input\_datadog\_account) | The name of the Datadog account to which EC2 instance metrics should be reported and where monitors are set up. This variable is only used if 'datadog\_enable\_ec2\_instance\_metrics' variable is set to 'true'. | `string` | `null` | no | +| [datadog\_api\_key\_reference](#input\_datadog\_api\_key\_reference) | Name of the secret stored in Secrets Manager and containing the Datadog API key | `string` | `null` | no | +| [datadog\_enable\_ec2\_instance\_metrics](#input\_datadog\_enable\_ec2\_instance\_metrics) | Enable the metrics for the EC2 instance in Datadog (should be 'true' if monitors are being used to track the health of the EC2 instance) | `bool` | `true` | no | +| [environment\_type](#input\_environment\_type) | Environment (PRODUCTION, PRE-PRODUCTION, QUALITY ASSURANCE, INTEGRATION TESTING, DEVELOPMENT, LAB) | `string` | `"PRODUCTION"` | no | +| [existing\_security\_groups](#input\_existing\_security\_groups) | List of names representing existing security groups to add to the EC2 instance | `list(string)` | `[]` | no | +| [iam\_role\_path](#input\_iam\_role\_path) | The path to use when creating IAM roles | `string` | `"/"` | no | +| [instance\_type](#input\_instance\_type) | instance\_type must be a valid AWS EC2 instance type. | `string` | `"t3a.medium"` | no | +| [instance\_user](#input\_instance\_user) | Username to create and use on the instance started for the scanning process | `string` | `"secrets-finder"` | no | +| [new\_security\_groups](#input\_new\_security\_groups) | Security groups to create (see: https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/security_group) |
list(object({
name = string,
description = string,
ingress : optional(list(object({
from_port = number,
to_port = number,
protocol = any,
description = optional(string),
cidr_blocks = optional(list(string), []),
ipv6_cidr_blocks = optional(list(string), []),
security_groups = optional(list(string), []),
prefix_list_ids = optional(list(string), [])
})), []),
egress : optional(list(object({
from_port = number,
to_port = number,
protocol = any,
description = optional(string),
cidr_blocks = optional(list(string), []),
ipv6_cidr_blocks = optional(list(string), []),
security_groups = optional(list(string), []),
prefix_list_ids = optional(list(string), [])
})), [])
}))
| `[]` | no | +| [permissions\_boundary\_arn](#input\_permissions\_boundary\_arn) | The name of the IAM permissions boundary to attach to the IAM role created by the module | `string` | `null` | no | +| [project\_name](#input\_project\_name) | Name of the project (should be the same across all modules of secrets-finder to ensure consistency) | `string` | `"secrets-finder"` | no | +| [s3\_bucket\_name](#input\_s3\_bucket\_name) | S3 bucket name where to upload the scripts | `string` | n/a | yes | +| [scan\_identifier](#input\_scan\_identifier) | Identifier of the scan | `string` | n/a | yes | +| [scm](#input\_scm) | SCM to use for the scan | `string` | n/a | yes | +| [sns\_topic\_arn](#input\_sns\_topic\_arn) | ARN of the SNS topic to use for notifications. Leave empty if SNS notifications are not needed. | `string` | `null` | no | +| [subnet\_name](#input\_subnet\_name) | Identifier of the subnet where to deploy the EC2 instance | `string` | n/a | yes | +| [tags](#input\_tags) | A map of tags to add to the resources | `map(string)` | `{}` | no | +| [trufflehog\_processes](#input\_trufflehog\_processes) | Define the number of scanning processes that should be spawned by TruffleHog. WARNING: This may be resource intensive and consume all the host resources. | `number` | `20` | no | +| [trufflehog\_version](#input\_trufflehog\_version) | Version of TruffleHog to use | `string` | `"3.78.2"` | no | +| [vpc\_name](#input\_vpc\_name) | Identifier of the VPC to use | `string` | n/a | yes | + +## Outputs + +| Name | Description | +|------|-------------| +| [ec2\_instance\_arn](#output\_ec2\_instance\_arn) | n/a | +| [ec2\_instance\_id](#output\_ec2\_instance\_id) | n/a | +| [ec2\_role\_arn](#output\_ec2\_role\_arn) | n/a | + diff --git a/infrastructure/secrets-finder/scheduled-scans/aws/scan/iam.tf b/infrastructure/secrets-finder/scheduled-scans/aws/scan/iam.tf new file mode 100644 index 0000000..a998010 --- /dev/null +++ b/infrastructure/secrets-finder/scheduled-scans/aws/scan/iam.tf @@ -0,0 +1,110 @@ +resource "aws_iam_role" "ec2_role" { + name = "${var.project_name}-ec2-role" + assume_role_policy = data.aws_iam_policy_document.ec2_assume_role.json + path = var.iam_role_path + permissions_boundary = var.permissions_boundary_arn +} + +data "aws_iam_policy_document" "ec2_assume_role" { + statement { + effect = "Allow" + principals { + identifiers = ["ec2.amazonaws.com"] + type = "Service" + } + actions = ["sts:AssumeRole"] + } +} + +data "aws_iam_policy_document" "policy_document_permissions_for_ec2_instance" { + statement { + sid = "ListS3Bucket" + effect = "Allow" + actions = ["s3:ListBucket"] + resources = [data.aws_s3_bucket.secrets_finder.arn] + } + + statement { + sid = "GetAndPutObjectsInS3Bucket" + effect = "Allow" + actions = [ + "s3:GetObject*", + "s3:PutObject*" + ] + resources = ["${data.aws_s3_bucket.secrets_finder.arn}/*"] + } + + statement { + sid = "AccessSecretInSecretsManager" + effect = "Allow" + actions = [ + "secretsmanager:GetResourcePolicy", + "secretsmanager:DescribeSecret", + "secretsmanager:GetSecretValue", + ] + resources = [data.aws_secretsmanager_secret.credentials.arn] + } + + dynamic "statement" { + for_each = (var.datadog_api_key_reference != null) ? [var.datadog_api_key_reference] : [] + content { + sid = "FetchDatadogAPIKey" + effect = "Allow" + actions = [ + "secretsmanager:GetResourcePolicy", + "secretsmanager:DescribeSecret", + "secretsmanager:GetSecretValue" + ] + resources = [data.aws_secretsmanager_secret.datadog_api_key[0].arn] + } + } + + statement { + sid = "AllowTerminationOfEC2Instance" + effect = "Allow" + actions = [ + "ec2:TerminateInstances" + ] + resources = ["arn:aws:ec2:${var.aws_region}:${data.aws_caller_identity.current.account_id}:instance/*"] + + condition { + test = "StringLike" + variable = "aws:ResourceTag/Name" + values = ["${var.project_name}*"] + } + + condition { + test = "StringLike" + variable = "ec2:InstanceProfile" + values = [aws_iam_instance_profile.ec2_instance_profile.arn] + } + } + + dynamic "statement" { + for_each = var.sns_topic_arn != null ? [var.sns_topic_arn] : [] + content { + sid = "AllowToEmitImportantNotifications" + effect = "Allow" + actions = [ + "sns:Publish" + ] + resources = [var.sns_topic_arn] + } + } +} + +resource "aws_iam_policy" "permissions_for_ec2_instance" { + name = "${var.project_name}-ec2-permissions" + description = "Policy granting necessary permissions to EC2 instance" + policy = data.aws_iam_policy_document.policy_document_permissions_for_ec2_instance.json +} + +resource "aws_iam_role_policy_attachment" "permissions_for_ec2_instance" { + policy_arn = aws_iam_policy.permissions_for_ec2_instance.arn + role = aws_iam_role.ec2_role.name +} + +resource "aws_iam_instance_profile" "ec2_instance_profile" { + name = "${var.project_name}-instance-profile" + role = aws_iam_role.ec2_role.name +} diff --git a/infrastructure/secrets-finder/scheduled-scans/aws/scan/images.tf b/infrastructure/secrets-finder/scheduled-scans/aws/scan/images.tf new file mode 100644 index 0000000..ae4a371 --- /dev/null +++ b/infrastructure/secrets-finder/scheduled-scans/aws/scan/images.tf @@ -0,0 +1,9 @@ +data "aws_ami" "amazon_ami" { + most_recent = true + owners = [var.ami_owner] + + filter { + name = "name" + values = ["${var.ami_image_filter}"] + } +} diff --git a/infrastructure/secrets-finder/scheduled-scans/aws/scan/locals.tf b/infrastructure/secrets-finder/scheduled-scans/aws/scan/locals.tf new file mode 100644 index 0000000..34652b7 --- /dev/null +++ b/infrastructure/secrets-finder/scheduled-scans/aws/scan/locals.tf @@ -0,0 +1,4 @@ +locals { + environment = replace(lower(var.environment_type), " ", "-") + tags = merge(try(var.tags, {}), { environment = local.environment }) +} diff --git a/infrastructure/secrets-finder/scheduled-scans/aws/scan/main.tf b/infrastructure/secrets-finder/scheduled-scans/aws/scan/main.tf new file mode 100644 index 0000000..3c30cf4 --- /dev/null +++ b/infrastructure/secrets-finder/scheduled-scans/aws/scan/main.tf @@ -0,0 +1,38 @@ +resource "aws_instance" "secrets_finder" { + ami = data.aws_ami.amazon_ami.id + instance_type = var.instance_type + subnet_id = data.aws_subnets.selected.ids[0] + iam_instance_profile = aws_iam_instance_profile.ec2_instance_profile.name + vpc_security_group_ids = concat([for sg in data.aws_security_group.existing_security_groups : sg.id], [for sg in aws_security_group.new_security_groups : sg.id]) + + user_data_replace_on_change = true + + root_block_device { + volume_size = 30 + volume_type = "gp2" + delete_on_termination = true + } + + user_data = data.aws_s3_object.setup.body + + tags = merge( + { + Name = "${var.project_name}-${var.scan_identifier}" + }, + [ + (var.datadog_enable_ec2_instance_metrics == true) ? { datadog-account = var.datadog_account } : null + ]... + ) + + lifecycle { + precondition { + condition = (var.datadog_enable_ec2_instance_metrics == false) || (var.datadog_enable_ec2_instance_metrics == true && var.datadog_account != null) + error_message = "EC2 instance metrics should be enabled but no Datadog account was provided (variable 'datadog_account' has no value)" + } + } + + depends_on = [ + aws_iam_policy.permissions_for_ec2_instance, + aws_iam_role_policy_attachment.permissions_for_ec2_instance + ] +} diff --git a/infrastructure/secrets-finder/scheduled-scans/aws/scan/outputs.tf b/infrastructure/secrets-finder/scheduled-scans/aws/scan/outputs.tf new file mode 100644 index 0000000..ec5953c --- /dev/null +++ b/infrastructure/secrets-finder/scheduled-scans/aws/scan/outputs.tf @@ -0,0 +1,11 @@ +output "ec2_role_arn" { + value = aws_iam_role.ec2_role.arn +} + +output "ec2_instance_id" { + value = aws_instance.secrets_finder.id +} + +output "ec2_instance_arn" { + value = aws_instance.secrets_finder.arn +} diff --git a/infrastructure/secrets-finder/scheduled-scans/aws/scan/providers.tf b/infrastructure/secrets-finder/scheduled-scans/aws/scan/providers.tf new file mode 100644 index 0000000..d089c28 --- /dev/null +++ b/infrastructure/secrets-finder/scheduled-scans/aws/scan/providers.tf @@ -0,0 +1,20 @@ +terraform { + required_version = ">=1.7" + + required_providers { + aws = { + source = "hashicorp/aws" + version = "~> 5.0" + } + } + + backend "s3" { + encrypt = true + } +} + +provider "aws" { + region = var.aws_region + profile = var.aws_profile + default_tags { tags = local.tags } +} diff --git a/infrastructure/secrets-finder/scheduled-scans/aws/scan/s3.tf b/infrastructure/secrets-finder/scheduled-scans/aws/scan/s3.tf new file mode 100644 index 0000000..df05240 --- /dev/null +++ b/infrastructure/secrets-finder/scheduled-scans/aws/scan/s3.tf @@ -0,0 +1,8 @@ +data "aws_s3_bucket" "secrets_finder" { + bucket = var.s3_bucket_name +} + +data "aws_s3_object" "setup" { + bucket = data.aws_s3_bucket.secrets_finder.id + key = "secrets-finder/scheduled-scans/scans/${var.scan_identifier}/setup/setup.sh" +} diff --git a/infrastructure/secrets-finder/scheduled-scans/aws/scan/s3.tfbackend b/infrastructure/secrets-finder/scheduled-scans/aws/scan/s3.tfbackend new file mode 100644 index 0000000..6fa4016 --- /dev/null +++ b/infrastructure/secrets-finder/scheduled-scans/aws/scan/s3.tfbackend @@ -0,0 +1,5 @@ +bucket = "" +key = "" +region = "" +dynamodb_table = "" +profile = "" diff --git a/infrastructure/secrets-finder/scheduled-scans/aws/scan/secrets-manager.tf b/infrastructure/secrets-finder/scheduled-scans/aws/scan/secrets-manager.tf new file mode 100644 index 0000000..9911575 --- /dev/null +++ b/infrastructure/secrets-finder/scheduled-scans/aws/scan/secrets-manager.tf @@ -0,0 +1,8 @@ +data "aws_secretsmanager_secret" "datadog_api_key" { + count = var.datadog_api_key_reference != null ? 1 : 0 + name = var.datadog_api_key_reference +} + +data "aws_secretsmanager_secret" "credentials" { + name = var.credentials_reference +} diff --git a/infrastructure/secrets-finder/scheduled-scans/aws/scan/security-groups.tf b/infrastructure/secrets-finder/scheduled-scans/aws/scan/security-groups.tf new file mode 100644 index 0000000..a2c0b52 --- /dev/null +++ b/infrastructure/secrets-finder/scheduled-scans/aws/scan/security-groups.tf @@ -0,0 +1,43 @@ +data "aws_security_group" "existing_security_groups" { + for_each = { for sg in var.existing_security_groups : sg => sg } + filter { + name = "group-name" + values = [each.value] + } + vpc_id = data.aws_vpc.vpc.id +} + +resource "aws_security_group" "new_security_groups" { + for_each = { for sg in var.new_security_groups : sg.name => sg } + name = each.value.name + description = each.value.description + vpc_id = data.aws_vpc.vpc.id + + dynamic "ingress" { + for_each = each.value["ingress"] + content { + from_port = ingress.value.from_port + to_port = ingress.value.to_port + protocol = ingress.value.protocol + cidr_blocks = ingress.value.cidr_blocks + description = ingress.value.description + ipv6_cidr_blocks = ingress.value.ipv6_cidr_blocks + security_groups = ingress.value.security_groups + prefix_list_ids = ingress.value.prefix_list_ids + } + } + + dynamic "egress" { + for_each = each.value["egress"] + content { + from_port = egress.value.from_port + to_port = egress.value.to_port + protocol = egress.value.protocol + cidr_blocks = egress.value.cidr_blocks + description = egress.value.description + ipv6_cidr_blocks = egress.value.ipv6_cidr_blocks + security_groups = egress.value.security_groups + prefix_list_ids = egress.value.prefix_list_ids + } + } +} diff --git a/infrastructure/secrets-finder/scheduled-scans/aws/scan/sts.tf b/infrastructure/secrets-finder/scheduled-scans/aws/scan/sts.tf new file mode 100644 index 0000000..8fc4b38 --- /dev/null +++ b/infrastructure/secrets-finder/scheduled-scans/aws/scan/sts.tf @@ -0,0 +1 @@ +data "aws_caller_identity" "current" {} diff --git a/infrastructure/secrets-finder/scheduled-scans/aws/scan/variables.tf b/infrastructure/secrets-finder/scheduled-scans/aws/scan/variables.tf new file mode 100644 index 0000000..018b07b --- /dev/null +++ b/infrastructure/secrets-finder/scheduled-scans/aws/scan/variables.tf @@ -0,0 +1,270 @@ +variable "aws_region" { + type = string + default = "us-east-1" + description = "AWS region where to deploy resources" + + validation { + condition = can(regex("^(af|ap|ca|eu|me|sa|us)-(central|north|(north(?:east|west))|south|south(?:east|west)|east|west)-\\d+$", var.aws_region)) + error_message = "You should enter a valid AWS region (https://docs.aws.amazon.com/AmazonRDS/latest/UserGuide/Concepts.RegionsAndAvailabilityZones.html)" + } +} + +variable "aws_profile" { + type = string + default = "default" + description = "AWS profile to use for authentication" +} + +variable "tags" { + type = map(string) + description = "A map of tags to add to the resources" + default = {} + + validation { + condition = alltrue([for v in values(var.tags) : v != ""]) + error_message = "Tag values must not be empty." + } +} + +variable "project_name" { + type = string + default = "secrets-finder" + description = "Name of the project (should be the same across all modules of secrets-finder to ensure consistency)" +} + +variable "environment_type" { + type = string + default = "PRODUCTION" + description = "Environment (PRODUCTION, PRE-PRODUCTION, QUALITY ASSURANCE, INTEGRATION TESTING, DEVELOPMENT, LAB)" + + validation { + condition = contains(["PRODUCTION", "PRE-PRODUCTION", "QUALITY ASSURANCE", "INTEGRATION TESTING", "DEVELOPMENT", "LAB"], var.environment_type) + error_message = "The environment type should be one of the following values: PRODUCTION, PRE-PRODUCTION, QUALITY ASSURANCE, INTEGRATION TESTING, DEVELOPMENT, LAB (case sensitive)" + } +} + +variable "vpc_name" { + type = string + description = "Identifier of the VPC to use" +} + +variable "subnet_name" { + type = string + description = "Identifier of the subnet where to deploy the EC2 instance" +} + +variable "s3_bucket_name" { + type = string + description = "S3 bucket name where to upload the scripts" +} + +variable "permissions_boundary_arn" { + type = string + default = null + description = "The name of the IAM permissions boundary to attach to the IAM role created by the module" + + validation { + condition = var.permissions_boundary_arn == null || can(regex("^arn:aws:iam::[0-9]{12}:policy\\/([a-zA-Z0-9-_.]+)$", var.permissions_boundary_arn)) + error_message = "The provided ARN is not a valid ARN for a policy" + } +} + +variable "iam_role_path" { + type = string + default = "/" + description = "The path to use when creating IAM roles" + + validation { + condition = can(regex("^\\/([a-zA-Z0-9]+([-a-zA-Z0-9]*[a-zA-Z0-9]+)?\\/)*$", var.iam_role_path)) + error_message = "The provided path is invalid" + } +} + +variable "scm" { + type = string + description = "SCM to use for the scan" + + validation { + condition = contains(["github", "azure_devops", "custom"], var.scm) + error_message = "scm must be one of 'github', 'azure_devops', 'custom'." + } +} + +variable "scan_identifier" { + type = string + description = "Identifier of the scan" + + validation { + condition = can(regex("^[a-zA-Z0-9]([a-zA-Z0-9-_]+[a-zA-Z0-9])*$", var.scan_identifier)) + error_message = "scan_identifier must contain only alphanumeric characters, dashes, and underscores, and must not start or end with a dash or underscore." + } +} + +variable "credentials_reference" { + type = string + description = "Name of the secret stored in Secrets Manager and containing the credentials to use for the scan" + + validation { + condition = can(regex("^[a-zA-Z0-9]([a-zA-Z0-9-_]+[a-zA-Z0-9])*$", var.credentials_reference)) + error_message = "The secret name is invalid" + } +} + +variable "sns_topic_arn" { + type = string + default = null + description = "ARN of the SNS topic to use for notifications. Leave empty if SNS notifications are not needed." + + validation { + condition = var.sns_topic_arn == null || can(regex("^arn:aws:sns:((af|ap|ca|eu|me|sa|us)-(central|north|(north(?:east|west))|south|south(?:east|west)|east|west)-\\d+):[0-9]{12}:([a-zA-Z0-9]([a-zA-Z0-9-_]+[a-zA-Z0-9])*)$", var.sns_topic_arn)) + error_message = "The SNS topic ARN is invalid" + } + +} + +variable "ami_owner" { + type = string + default = "amazon" + description = "Owner of the Amazon Machine Image (AMI) to use for the EC2 instance" +} + +variable "ami_image_filter" { + type = string + default = "amzn2-ami-hvm*" + description = "Filter to use to find the Amazon Machine Image (AMI) to use for the EC2 instance the name can contain wildcards. Only GNU/Linux images are supported." + +} + +variable "instance_type" { + type = string + default = "t3a.medium" + description = "instance_type must be a valid AWS EC2 instance type." + + validation { + condition = contains(jsondecode(file("../../../../../configuration/secrets-finder/aws/aws_ec2_instances.json")), var.instance_type) + error_message = "instance_type must be a valid AWS EC2 instance type." + } +} + +variable "instance_user" { + type = string + default = "secrets-finder" + description = "Username to create and use on the instance started for the scanning process" + + validation { + condition = can(regex("^[a-zA-Z0-9]([a-zA-Z0-9-_]+[a-zA-Z0-9])*$", var.instance_user)) + error_message = "instance_user must contain only alphanumeric characters, dashes, and underscores, and must not start or end with a dash or underscore." + } +} + +variable "existing_security_groups" { + type = list(string) + default = [] + description = "List of names representing existing security groups to add to the EC2 instance" +} + +variable "new_security_groups" { + type = list(object({ + name = string, + description = string, + ingress : optional(list(object({ + from_port = number, + to_port = number, + protocol = any, + description = optional(string), + cidr_blocks = optional(list(string), []), + ipv6_cidr_blocks = optional(list(string), []), + security_groups = optional(list(string), []), + prefix_list_ids = optional(list(string), []) + })), []), + egress : optional(list(object({ + from_port = number, + to_port = number, + protocol = any, + description = optional(string), + cidr_blocks = optional(list(string), []), + ipv6_cidr_blocks = optional(list(string), []), + security_groups = optional(list(string), []), + prefix_list_ids = optional(list(string), []) + })), []) + })) + + default = [] + + description = "Security groups to create (see: https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/security_group)" + + validation { + condition = alltrue([for sg in var.new_security_groups : (length(lookup(sg, "ingress", [])) != 0) || (length(lookup(sg, "egress", [])) != 0)]) + error_message = "All security groups should contain at least one ingress or egress rule" + } + + validation { + condition = alltrue([for sg in var.new_security_groups : alltrue([for v in concat(lookup(sg, "ingress", []), lookup(sg, "egress", [])) : (length(lookup(v, "cidr_blocks", [])) != 0) || (length(lookup(v, "ipv6_cidr_blocks", [])) != 0) || (length(lookup(v, "security_groups", [])) != 0) || (length(lookup(v, "prefix_list_ids", [])) != 0)])]) + error_message = "All rules must define at least one of the following attributes: cidr_blocks, ipv6_cidr_blocks, security_groups, prefix_list_ids" + } + + validation { + condition = alltrue([for sg in var.new_security_groups : alltrue([for v in concat(lookup(sg, "ingress", []), lookup(sg, "egress", [])) : can(regex("^((6553[0-5])|(655[0-2][0-9])|(65[0-4][0-9]{2})|(6[0-4][0-9]{3})|([1-5][0-9]{4})|([0-5]{0,5})|([0-9]{1,4}))$", v["from_port"]))])]) + error_message = "All 'from_port' values must refer to a valid port or a valid ICMP type number (see: https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/security_group_rule#from_port)" + } + + validation { + condition = alltrue([for sg in var.new_security_groups : alltrue([for v in concat(lookup(sg, "ingress", []), lookup(sg, "egress", [])) : can(regex("^((6553[0-5])|(655[0-2][0-9])|(65[0-4][0-9]{2})|(6[0-4][0-9]{3})|([1-5][0-9]{4})|([0-5]{0,5})|([0-9]{1,4}))$", v["to_port"]))])]) + error_message = "All 'to_port' values must refer to a valid port or a valid ICMP type number (see: https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/security_group_rule#to_port)" + } + + validation { + condition = alltrue([for sg in var.new_security_groups : alltrue([for v in concat(lookup(sg, "ingress", []), lookup(sg, "egress", [])) : can(regex("^(icmp(v6)?)|(tcp)|(udp)|(all)|((25[0-5])|(2[0-4][0-9])|(1[0-9]{2})|([1-9][0-9])|([0-9]))$", v["protocol"]))])]) + error_message = "All 'protocol' values must refer to a valid value (see: https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/security_group_rule#protocol)" + } + + validation { + condition = alltrue([for sg in var.new_security_groups : alltrue([for v in concat(lookup(sg, "ingress", []), lookup(sg, "egress", [])) : v["cidr_blocks"] == null || alltrue([for address in v["cidr_blocks"] : can(regex("^((25[0-5]|(2[0-4]|1[0-9]|[1-9])?[0-9]).){3}((25[0-5]|(2[0-4]|1[0-9]|[1-9])?[0-9]))/(0|0?[1-9]|[12][0-9]|3[012])$", address))])])]) + error_message = "All 'cidr_blocks' should contain IP addresses denoted in CIDR format (xx.xx.xx.xx/yy)" + } + + validation { + condition = alltrue([for sg in var.new_security_groups : alltrue([for v in concat(lookup(sg, "ingress", []), lookup(sg, "egress", [])) : v["ipv6_cidr_blocks"] == null || alltrue([for address in v["ipv6_cidr_blocks"] : can(regex("^(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]).){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]).){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]))/(([0-9])|([1-9][0-9])|(1[0-1][0-9])|(12[0-8]))$", address))])])]) + error_message = "All 'ipv6_cidr_blocks' should contain IPv6 addresses denoted in CIDR format" + } +} + +variable "trufflehog_version" { + type = string + default = "3.78.2" + description = "Version of TruffleHog to use" +} + +variable "trufflehog_processes" { + type = number + default = 20 + description = "Define the number of scanning processes that should be spawned by TruffleHog. WARNING: This may be resource intensive and consume all the host resources." + + validation { + condition = (var.trufflehog_processes >= 1) && (var.trufflehog_processes <= 30) + error_message = "The number of scanning processes should be between 1 and 30 (included)" + } +} + +variable "datadog_api_key_reference" { + type = string + default = null + description = "Name of the secret stored in Secrets Manager and containing the Datadog API key" + + validation { + condition = (var.datadog_api_key_reference == null) || can(regex("^[a-zA-Z0-9]([a-zA-Z0-9-_]+[a-zA-Z0-9])*$", var.datadog_api_key_reference)) + error_message = "The secret name is invalid" + } +} + +variable "datadog_enable_ec2_instance_metrics" { + type = bool + default = true + description = "Enable the metrics for the EC2 instance in Datadog (should be 'true' if monitors are being used to track the health of the EC2 instance)" +} + +variable "datadog_account" { + type = string + default = null + description = "The name of the Datadog account to which EC2 instance metrics should be reported and where monitors are set up. This variable is only used if 'datadog_enable_ec2_instance_metrics' variable is set to 'true'." +} diff --git a/infrastructure/secrets-finder/scheduled-scans/aws/scan/vpc.tf b/infrastructure/secrets-finder/scheduled-scans/aws/scan/vpc.tf new file mode 100644 index 0000000..9103bd4 --- /dev/null +++ b/infrastructure/secrets-finder/scheduled-scans/aws/scan/vpc.tf @@ -0,0 +1,18 @@ +data "aws_vpc" "vpc" { + filter { + name = "tag:Name" + values = [var.vpc_name] + } +} + +data "aws_subnets" "selected" { + filter { + name = "tag:Name" + values = [var.subnet_name] + } + + filter { + name = "available-ip-address-count" + values = range(1, 200) + } +} diff --git a/infrastructure/secrets-finder/setup/aws/secrets/README.md b/infrastructure/secrets-finder/setup/aws/secrets/README.md new file mode 100644 index 0000000..acb24f8 --- /dev/null +++ b/infrastructure/secrets-finder/setup/aws/secrets/README.md @@ -0,0 +1,32 @@ +# secrets + + +## Requirements + +| Name | Version | +|------|---------| +| [terraform](#requirement\_terraform) | >=1.7 | +| [aws](#requirement\_aws) | ~> 5.0 | + +## Providers + +No providers. + +## Modules + +No modules. + +## Resources + +No resources. + +## Inputs + +No inputs. + +## Outputs + +| Name | Description | +|------|-------------| +| [secrets\_finder\_secrets](#output\_secrets\_finder\_secrets) | ARNs of the secrets stored for use within secrets-finder | + diff --git a/infrastructure/secrets-finder/setup/aws/secrets/helper.py b/infrastructure/secrets-finder/setup/aws/secrets/helper.py new file mode 100644 index 0000000..c2087e6 --- /dev/null +++ b/infrastructure/secrets-finder/setup/aws/secrets/helper.py @@ -0,0 +1,134 @@ +import argparse +import os +import shlex +import subprocess +import sys + + +def run_command( + command, + accepted_nonzero_return_codes=None, + env=None, + output_file=None, + error_file=None, + append_output=False, + append_error=False, +): + if env is None: + env = os.environ.copy() + else: + env = {**os.environ.copy(), **env} + + args = shlex.split(command) + + output_mode = "a" if append_output else "w" + error_mode = "a" if append_error else "w" + + out = open(output_file, output_mode) if output_file else None + err = open(error_file, error_mode) if error_file else None + + process = subprocess.Popen(args, stdout=out, stderr=err, env=env) + stdout, stderr = process.communicate() + + if output_file: + out.close() + if error_file: + err.close() + + if process.returncode != 0 and ( + accepted_nonzero_return_codes is None + or process.returncode not in accepted_nonzero_return_codes + ): + error_message = f"Command '{command}' failed" + if stderr: + error_message += f" with error: {stderr.decode()}" + raise Exception(error_message) + + return stdout.decode() if stdout else None, stderr.decode() if stderr else None + + +def configure_parser(): + parser = argparse.ArgumentParser( + prog="secrets-finder-helper", + description="This script offers a wrapper to create secrets in Secrets Manager using a file encrypted with SOPS.", + epilog="This script has been developed by Thomson Reuters. For issues, comments or help, you can contact the maintainers on the official GitHub repository: https://github.com/thomsonreuters/secrets-finder", + ) + + parser.add_argument( + "--preserve-decrypted-file", + help="whether to preserve the decrypted file at the end of execution", + action="store_true", + default=os.environ.get( + "SECRETS_FINDER_PRESERVE_DECRYPTED_FILE", "false" + ).lower() + == "true", + ) + parser.add_argument( + "--ignore-warning", + help="whether to ignore warning", + action="store_true", + default=os.environ.get("SECRETS_FINDER_IGNORE_WARNING", "false").lower() + == "true", + ) + parser.add_argument( + "--sops-binary-path", + help="the path to the SOPS binary", + default=os.environ.get("SECRETS_FINDER_SOPS_BINARY_PATH", "sops"), + ) + parser.add_argument( + "--terraform-command", + help="terraform command to run ('plan', 'apply', 'destroy')", + required=True, + choices=["plan", "apply", "destroy"], + ) + parser.add_argument( + "--terraform-options", + help="additional options to pass to the terraform command", + default="", + ) + parser.add_argument( + "--aws-profile", + help="AWS profile to use", + default=os.environ.get("AWS_PROFILE", "default"), + ) + + return parser.parse_args() + + +def main(): + try: + arguments = configure_parser() + except Exception: + sys.exit(1) + + try: + if arguments.preserve_decrypted_file and not arguments.ignore_warning: + print( + "WARNING: The decrypted file will be preserved at the end of the execution. Make sure to remove it manually." + ) + confirmation = input("Type 'yes' to continue, or any other key to abort: ") + if confirmation.lower() != "yes" and confirmation.lower() != "y": + print("Operation aborted.") + sys.exit() + + run_command( + f"{arguments.sops_binary_path} -d secrets.enc.json --aws-profile {arguments.aws_profile}", + output_file="secrets.json", + ) + + try: + terraform_command = ( + f"terraform {arguments.terraform_command} {arguments.terraform_options}" + ) + return run_command(terraform_command) + except: + pass + except Exception as e: + print(f"ERROR: {e}") + finally: + if not arguments.preserve_decrypted_file: + os.remove("secrets.json") + + +if __name__ == "__main__": + main() diff --git a/infrastructure/secrets-finder/setup/aws/secrets/locals.tf b/infrastructure/secrets-finder/setup/aws/secrets/locals.tf new file mode 100644 index 0000000..9f74f98 --- /dev/null +++ b/infrastructure/secrets-finder/setup/aws/secrets/locals.tf @@ -0,0 +1,6 @@ +locals { + environment = replace(lower(var.environment_type), " ", "-") + tags = merge(try(var.tags, {}), { environment = local.environment }) + + secrets = jsondecode(file("secrets.json")) +} diff --git a/infrastructure/secrets-finder/setup/aws/secrets/outputs.tf b/infrastructure/secrets-finder/setup/aws/secrets/outputs.tf new file mode 100644 index 0000000..0618371 --- /dev/null +++ b/infrastructure/secrets-finder/setup/aws/secrets/outputs.tf @@ -0,0 +1,4 @@ +output "secrets_finder_secrets" { + value = { for s in aws_secretsmanager_secret.secrets_finder_secrets : s.name => s.arn } + description = "ARNs of the secrets stored for use within secrets-finder" +} diff --git a/infrastructure/secrets-finder/setup/aws/secrets/providers.tf b/infrastructure/secrets-finder/setup/aws/secrets/providers.tf new file mode 100644 index 0000000..d089c28 --- /dev/null +++ b/infrastructure/secrets-finder/setup/aws/secrets/providers.tf @@ -0,0 +1,20 @@ +terraform { + required_version = ">=1.7" + + required_providers { + aws = { + source = "hashicorp/aws" + version = "~> 5.0" + } + } + + backend "s3" { + encrypt = true + } +} + +provider "aws" { + region = var.aws_region + profile = var.aws_profile + default_tags { tags = local.tags } +} diff --git a/infrastructure/secrets-finder/setup/aws/secrets/s3.tfbackend b/infrastructure/secrets-finder/setup/aws/secrets/s3.tfbackend new file mode 100644 index 0000000..6fa4016 --- /dev/null +++ b/infrastructure/secrets-finder/setup/aws/secrets/s3.tfbackend @@ -0,0 +1,5 @@ +bucket = "" +key = "" +region = "" +dynamodb_table = "" +profile = "" diff --git a/infrastructure/secrets-finder/setup/aws/storage/README.md b/infrastructure/secrets-finder/setup/aws/storage/README.md new file mode 100644 index 0000000..b5f10de --- /dev/null +++ b/infrastructure/secrets-finder/setup/aws/storage/README.md @@ -0,0 +1,66 @@ +# storage + + +## Requirements + +| Name | Version | +|------|---------| +| [terraform](#requirement\_terraform) | >=1.7 | +| [aws](#requirement\_aws) | ~> 5.0 | + +## Providers + +| Name | Version | +|------|---------| +| [aws](#provider\_aws) | ~> 5.0 | + +## Modules + +No modules. + +## Resources + +| Name | Type | +|------|------| +| [aws_iam_policy.s3_access_policy](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_policy) | resource | +| [aws_iam_policy.s3_push_policy](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_policy) | resource | +| [aws_iam_role.s3_access](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role) | resource | +| [aws_iam_role.s3_push](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role) | resource | +| [aws_iam_role_policy_attachment.allow_access_to_s3_bucket](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role_policy_attachment) | resource | +| [aws_iam_role_policy_attachment.allow_push_to_s3_bucket](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role_policy_attachment) | resource | +| [aws_s3_bucket.secrets_finder](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/s3_bucket) | resource | +| [aws_s3_bucket_lifecycle_configuration.versioning-bucket-config](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/s3_bucket_lifecycle_configuration) | resource | +| [aws_s3_bucket_public_access_block.disable_public_access](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/s3_bucket_public_access_block) | resource | +| [aws_s3_bucket_versioning.versioning](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/s3_bucket_versioning) | resource | +| [aws_iam_policy_document.s3_access_assume_role](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/iam_policy_document) | data source | +| [aws_iam_policy_document.s3_access_policy_document](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/iam_policy_document) | data source | +| [aws_iam_policy_document.s3_push_assume_role](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/iam_policy_document) | data source | +| [aws_iam_policy_document.s3_push_policy_document](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/iam_policy_document) | data source | + +## Inputs + +| Name | Description | Type | Default | Required | +|------|-------------|------|---------|:--------:| +| [aws\_profile](#input\_aws\_profile) | AWS profile to use for authentication | `string` | `"default"` | no | +| [aws\_region](#input\_aws\_region) | AWS region where to deploy resources | `string` | `"us-east-1"` | no | +| [create\_access\_role](#input\_create\_access\_role) | Whether to create an IAM role for accessing the S3 bucket | `bool` | `true` | no | +| [create\_push\_role](#input\_create\_push\_role) | Whether to create an IAM role for accessing the S3 bucket | `bool` | `true` | no | +| [days\_after\_permanent\_deletion\_of\_noncurrent\_versions](#input\_days\_after\_permanent\_deletion\_of\_noncurrent\_versions) | Number of days after permanent deletion of noncurrent versions | `number` | `90` | no | +| [environment\_type](#input\_environment\_type) | Environment (PRODUCTION, PRE-PRODUCTION, QUALITY ASSURANCE, INTEGRATION TESTING, DEVELOPMENT, LAB) | `string` | `"PRODUCTION"` | no | +| [force\_destroy](#input\_force\_destroy) | A boolean that indicates all objects should be deleted from the bucket so that the bucket can be destroyed without error. WARNING: Setting this to true will permanently delete all objects in the bucket when Terraform needs to destroy the resource. | `bool` | `false` | no | +| [iam\_role\_path](#input\_iam\_role\_path) | The path to use when creating IAM roles | `string` | `"/"` | no | +| [permissions\_boundary\_arn](#input\_permissions\_boundary\_arn) | The name of the IAM permissions boundary to attach to the IAM role created by the module (if 'create\_access\_role' is set to true) | `string` | `null` | no | +| [principals\_authorized\_to\_access\_bucket](#input\_principals\_authorized\_to\_access\_bucket) | List of AWS account IDs or ARNs that are authorized to assume the role created by the module | `list(string)` | n/a | yes | +| [principals\_authorized\_to\_push\_to\_bucket](#input\_principals\_authorized\_to\_push\_to\_bucket) | List of AWS account IDs or ARNs that are authorized to assume the role created by the module | `list(string)` | n/a | yes | +| [project\_name](#input\_project\_name) | Name of the project (should be the same across all modules of secrets-finder to ensure consistency) | `string` | `"secrets-finder"` | no | +| [s3\_bucket\_name](#input\_s3\_bucket\_name) | S3 bucket name where to upload the scripts | `string` | n/a | yes | +| [tags](#input\_tags) | A map of tags to add to the resources | `map(string)` | `{}` | no | + +## Outputs + +| Name | Description | +|------|-------------| +| [s3\_access\_role](#output\_s3\_access\_role) | n/a | +| [s3\_bucket](#output\_s3\_bucket) | ARN of the S3 bucket used for secrets-finder | +| [s3\_push\_role](#output\_s3\_push\_role) | n/a | + diff --git a/infrastructure/secrets-finder/setup/aws/storage/iam.tf b/infrastructure/secrets-finder/setup/aws/storage/iam.tf new file mode 100644 index 0000000..1cb192a --- /dev/null +++ b/infrastructure/secrets-finder/setup/aws/storage/iam.tf @@ -0,0 +1,102 @@ +data "aws_iam_policy_document" "s3_access_assume_role" { + count = var.create_access_role ? 1 : 0 + statement { + effect = "Allow" + principals { + identifiers = var.principals_authorized_to_access_bucket + type = "AWS" + } + actions = ["sts:AssumeRole"] + } +} + +data "aws_iam_policy_document" "s3_access_policy_document" { + count = var.create_access_role ? 1 : 0 + statement { + sid = "ListS3Bucket" + effect = "Allow" + actions = ["s3:ListBucket"] + resources = [aws_s3_bucket.secrets_finder.arn] + } + + statement { + sid = "GetAndListObjectsInS3Bucket" + effect = "Allow" + actions = [ + "s3:GetObject*", + "s3:ListObject*" + ] + resources = ["${aws_s3_bucket.secrets_finder.arn}/*"] + } +} + +resource "aws_iam_policy" "s3_access_policy" { + count = var.create_access_role ? 1 : 0 + name = "${var.project_name}-s3-access" + description = "Policy allowing to access the S3 bucket of secrets-finder" + policy = data.aws_iam_policy_document.s3_access_policy_document[0].json +} + +resource "aws_iam_role_policy_attachment" "allow_access_to_s3_bucket" { + count = var.create_access_role ? 1 : 0 + + policy_arn = aws_iam_policy.s3_access_policy[0].arn + role = aws_iam_role.s3_access[0].name +} + +resource "aws_iam_role" "s3_access" { + count = var.create_access_role ? 1 : 0 + name = "${var.project_name}-s3-access" + assume_role_policy = data.aws_iam_policy_document.s3_access_assume_role[0].json + path = var.iam_role_path + permissions_boundary = var.permissions_boundary_arn +} + + +data "aws_iam_policy_document" "s3_push_assume_role" { + count = var.create_push_role ? 1 : 0 + statement { + effect = "Allow" + principals { + identifiers = var.principals_authorized_to_push_to_bucket + type = "AWS" + } + actions = ["sts:AssumeRole"] + } +} + + +data "aws_iam_policy_document" "s3_push_policy_document" { + count = var.create_push_role ? 1 : 0 + + statement { + sid = "GetAndListObjectsInS3Bucket" + effect = "Allow" + actions = [ + "s3:PutObject*" + ] + resources = ["${aws_s3_bucket.secrets_finder.arn}/*"] + } +} + +resource "aws_iam_policy" "s3_push_policy" { + count = var.create_push_role ? 1 : 0 + name = "${var.project_name}-s3-push" + description = "Policy allowing to push objects in the S3 bucket of secrets-finder" + policy = data.aws_iam_policy_document.s3_push_policy_document[0].json +} + +resource "aws_iam_role_policy_attachment" "allow_push_to_s3_bucket" { + count = var.create_push_role ? 1 : 0 + + policy_arn = aws_iam_policy.s3_push_policy[0].arn + role = aws_iam_role.s3_push[0].name +} + +resource "aws_iam_role" "s3_push" { + count = var.create_push_role ? 1 : 0 + name = "${var.project_name}-s3-push" + assume_role_policy = data.aws_iam_policy_document.s3_push_assume_role[0].json + path = var.iam_role_path + permissions_boundary = var.permissions_boundary_arn +} diff --git a/infrastructure/secrets-finder/setup/aws/storage/locals.tf b/infrastructure/secrets-finder/setup/aws/storage/locals.tf new file mode 100644 index 0000000..34652b7 --- /dev/null +++ b/infrastructure/secrets-finder/setup/aws/storage/locals.tf @@ -0,0 +1,4 @@ +locals { + environment = replace(lower(var.environment_type), " ", "-") + tags = merge(try(var.tags, {}), { environment = local.environment }) +} diff --git a/infrastructure/secrets-finder/setup/aws/storage/outputs.tf b/infrastructure/secrets-finder/setup/aws/storage/outputs.tf new file mode 100644 index 0000000..45790ad --- /dev/null +++ b/infrastructure/secrets-finder/setup/aws/storage/outputs.tf @@ -0,0 +1,14 @@ +output "s3_bucket" { + value = aws_s3_bucket.secrets_finder.arn + description = "ARN of the S3 bucket used for secrets-finder" +} + +output "s3_access_role" { + value = var.create_access_role == true ? aws_iam_role.s3_access[*].arn : null + depends_on = [aws_iam_role.s3_access] +} + +output "s3_push_role" { + value = var.create_push_role == true ? aws_iam_role.s3_push[*].arn : null + depends_on = [aws_iam_role.s3_push] +} diff --git a/infrastructure/secrets-finder/setup/aws/storage/providers.tf b/infrastructure/secrets-finder/setup/aws/storage/providers.tf new file mode 100644 index 0000000..d089c28 --- /dev/null +++ b/infrastructure/secrets-finder/setup/aws/storage/providers.tf @@ -0,0 +1,20 @@ +terraform { + required_version = ">=1.7" + + required_providers { + aws = { + source = "hashicorp/aws" + version = "~> 5.0" + } + } + + backend "s3" { + encrypt = true + } +} + +provider "aws" { + region = var.aws_region + profile = var.aws_profile + default_tags { tags = local.tags } +} diff --git a/infrastructure/secrets-finder/setup/aws/storage/s3.tf b/infrastructure/secrets-finder/setup/aws/storage/s3.tf new file mode 100644 index 0000000..125572d --- /dev/null +++ b/infrastructure/secrets-finder/setup/aws/storage/s3.tf @@ -0,0 +1,35 @@ +resource "aws_s3_bucket" "secrets_finder" { + bucket = var.s3_bucket_name + force_destroy = var.force_destroy != null ? var.force_destroy : false +} + +resource "aws_s3_bucket_public_access_block" "disable_public_access" { + bucket = aws_s3_bucket.secrets_finder.id + block_public_acls = true + block_public_policy = true + restrict_public_buckets = true + ignore_public_acls = true +} + +resource "aws_s3_bucket_versioning" "versioning" { + bucket = aws_s3_bucket.secrets_finder.id + versioning_configuration { + status = "Enabled" + } +} + +resource "aws_s3_bucket_lifecycle_configuration" "versioning-bucket-config" { + depends_on = [aws_s3_bucket_versioning.versioning] + + bucket = aws_s3_bucket.secrets_finder.id + + rule { + id = "delete-non-current-versions" + + noncurrent_version_expiration { + noncurrent_days = var.days_after_permanent_deletion_of_noncurrent_versions + } + + status = "Enabled" + } +} diff --git a/infrastructure/secrets-finder/setup/aws/storage/s3.tfbackend b/infrastructure/secrets-finder/setup/aws/storage/s3.tfbackend new file mode 100644 index 0000000..6fa4016 --- /dev/null +++ b/infrastructure/secrets-finder/setup/aws/storage/s3.tfbackend @@ -0,0 +1,5 @@ +bucket = "" +key = "" +region = "" +dynamodb_table = "" +profile = "" diff --git a/infrastructure/secrets-finder/setup/aws/storage/variables.tf b/infrastructure/secrets-finder/setup/aws/storage/variables.tf new file mode 100644 index 0000000..6c14998 --- /dev/null +++ b/infrastructure/secrets-finder/setup/aws/storage/variables.tf @@ -0,0 +1,130 @@ +variable "aws_region" { + type = string + default = "us-east-1" + description = "AWS region where to deploy resources" + + validation { + condition = can(regex("^(af|ap|ca|eu|me|sa|us)-(central|north|(north(?:east|west))|south|south(?:east|west)|east|west)-\\d+$", var.aws_region)) + error_message = "You should enter a valid AWS region (https://docs.aws.amazon.com/AmazonRDS/latest/UserGuide/Concepts.RegionsAndAvailabilityZones.html)" + } +} + +variable "aws_profile" { + type = string + default = "default" + description = "AWS profile to use for authentication" +} + +variable "environment_type" { + type = string + default = "PRODUCTION" + description = "Environment (PRODUCTION, PRE-PRODUCTION, QUALITY ASSURANCE, INTEGRATION TESTING, DEVELOPMENT, LAB)" + + validation { + condition = contains(["PRODUCTION", "PRE-PRODUCTION", "QUALITY ASSURANCE", "INTEGRATION TESTING", "DEVELOPMENT", "LAB"], var.environment_type) + error_message = "The environment type should be one of the following values: PRODUCTION, PRE-PRODUCTION, QUALITY ASSURANCE, INTEGRATION TESTING, DEVELOPMENT, LAB (case sensitive)" + } +} + +variable "tags" { + type = map(string) + description = "A map of tags to add to the resources" + default = {} + + validation { + condition = alltrue([for v in values(var.tags) : v != ""]) + error_message = "Tag values must not be empty." + } +} + +variable "permissions_boundary_arn" { + type = string + default = null + description = "The name of the IAM permissions boundary to attach to the IAM role created by the module (if 'create_access_role' is set to true)" + + validation { + condition = can(regex("^arn:aws:iam::[0-9]{12}:policy\\/([a-zA-Z0-9-_.]+)$", var.permissions_boundary_arn)) + error_message = "The provided ARN is not a valid ARN for a policy" + } +} + +variable "iam_role_path" { + type = string + default = "/" + description = "The path to use when creating IAM roles" + + validation { + condition = can(regex("^\\/([a-zA-Z0-9]+([-a-zA-Z0-9]*[a-zA-Z0-9]+)?\\/)*$", var.iam_role_path)) + error_message = "The provided path is invalid" + } +} + +variable "project_name" { + type = string + default = "secrets-finder" + description = "Name of the project (should be the same across all modules of secrets-finder to ensure consistency)" +} + +variable "create_access_role" { + type = bool + default = true + description = "Whether to create an IAM role for accessing the S3 bucket" +} + +variable "principals_authorized_to_access_bucket" { + type = list(string) + description = "List of AWS account IDs or ARNs that are authorized to assume the role created by the module" + + validation { + condition = alltrue([for v in var.principals_authorized_to_access_bucket : can(regex("^(\\d{12}|(arn:aws:iam::(\\d{12})?:(role|user)((\\/)|(\\/[\\w+=,.@-]{1,128}\\/))[\\w+=,.@-]{1,128}))$", v))]) + error_message = "One or more provided values are not a valid AWS account ID or ARN" + } + + validation { + condition = length(var.principals_authorized_to_access_bucket) > 0 + error_message = "At least one principal must be specified." + } +} + +variable "create_push_role" { + type = bool + default = true + description = "Whether to create an IAM role for accessing the S3 bucket" +} + +variable "principals_authorized_to_push_to_bucket" { + type = list(string) + description = "List of AWS account IDs or ARNs that are authorized to assume the role created by the module" + + validation { + condition = alltrue([for v in var.principals_authorized_to_push_to_bucket : can(regex("^(\\d{12}|(arn:aws:iam::(\\d{12})?:(role|user)((\\/)|(\\/[\\w+=,.@-]{1,128}\\/))[\\w+=,.@-]{1,128}))$", v))]) + error_message = "One or more provided values are not a valid AWS account ID or ARN" + } + + validation { + condition = length(var.principals_authorized_to_push_to_bucket) > 0 + error_message = "At least one principal must be specified." + } +} + +variable "s3_bucket_name" { + type = string + description = "S3 bucket name where to upload the scripts" +} + +variable "force_destroy" { + type = bool + default = false + description = "A boolean that indicates all objects should be deleted from the bucket so that the bucket can be destroyed without error. WARNING: Setting this to true will permanently delete all objects in the bucket when Terraform needs to destroy the resource." +} + +variable "days_after_permanent_deletion_of_noncurrent_versions" { + type = number + default = 90 + description = "Number of days after permanent deletion of noncurrent versions" + + validation { + condition = var.days_after_permanent_deletion_of_noncurrent_versions >= 1 + error_message = "The number of days after permanent deletion of noncurrent versions should be greater than or equal to 1" + } +} diff --git a/scripts/processors/github/api_github_organization.py b/scripts/processors/github/api_github_organization.py new file mode 100644 index 0000000..af5eedc --- /dev/null +++ b/scripts/processors/github/api_github_organization.py @@ -0,0 +1,378 @@ +import argparse +import datetime +import dotenv +import glob +import json +import logging +import logging.config +import os +import requests +import requests.exceptions +import sys +import urllib.parse +from time import sleep, time + + +LOG_FUNCTIONS = { + "INFO": logging.info, + "WARNING": logging.warning, + "ERROR": logging.error, + "DEBUG": logging.debug, +} + + +def load_environment_variables(folder=os.getenv("SECRETS_FINDER_SCAN_FOLDER")): + if not folder: + dotenv.load_dotenv(override=True) + else: + dotenv_files = glob.glob(os.path.join(folder, "*.env")) + for file in dotenv_files: + if os.path.isfile(file): + dotenv.load_dotenv(dotenv_path=file, override=True) + + +def positive_int(value): + ivalue = int(value) + if ivalue <= 0: + raise argparse.ArgumentTypeError(f"{value} is an invalid positive int value") + return ivalue + + +def non_empty_string(value): + svalue = str(value) + if svalue == "": + raise argparse.ArgumentTypeError("value cannot be an empty string") + return svalue + + +# This validation is used to reject common malformed URLs. It does not aim to strictly validate URLs. +# More information: https://docs.python.org/3/library/urllib.parse.html#url-parsing-security +def valid_uri(value): + try: + result = urllib.parse.urlparse(value) + return value if all([result.scheme, result.netloc]) else None + except ValueError: + pass + raise argparse.ArgumentTypeError(f"Invalid URI: {value}") + + +def configure_parser(): + parser = argparse.ArgumentParser( + prog="github-organization-processor", + description="This script fetches all the repositories of a GitHub organization using the standard GitHub API. This script supports both GitHub Enterprise Cloud and GitHub Enterprise Server.", + epilog="This script has been developed by Thomson Reuters. For issues, comments or help, you can contact the maintainers on the official GitHub repository: https://github.com/thomsonreuters/secrets-finder", + ) + + parser.add_argument( + "--debug", + action="store_true", + help="show debug information", + default=os.environ.get("GITHUB_ORGANIZATION_PROCESSOR_DEBUG", False), + ) + parser.add_argument( + "--api", + help="base URL of the API", + type=valid_uri, + default=os.environ.get( + "GITHUB_ORGANIZATION_PROCESSOR_API", "https://api.github.com" + ), + ) + parser.add_argument( + "--clone-url-template", + help="template for the clone URL", + type=non_empty_string, + default=os.environ.get( + "GITHUB_ORGANIZATION_PROCESSOR_CLONE_URL_TEMPLATE", + "https://github.com/{organization}/{repository}", + ), + ) + parser.add_argument( + "--organization", + help="GitHub organization for which repositories should be fetched", + type=non_empty_string, + required=os.environ.get("GITHUB_ORGANIZATION_PROCESSOR_ORGANIZATION") is None, + default=os.environ.get("GITHUB_ORGANIZATION_PROCESSOR_ORGANIZATION"), + ) + parser.add_argument( + "--max-retries", + help="maximum number of retries for rate limiting", + type=positive_int, + default=os.environ.get("GITHUB_ORGANIZATION_PROCESSOR_MAX_RETRIES", 10), + ) + parser.add_argument( + "--backoff-factor", + help="backoff factor for rate limiting", + type=positive_int, + default=os.environ.get("GITHUB_ORGANIZATION_PROCESSOR_BACKOFF_FACTOR", 1), + ) + + return parser + + +def configure_logging(destination_folder, level=logging.INFO): + log_file = "github-organization-processor.log" + logging.config.dictConfig({"version": 1, "disable_existing_loggers": True}) + logging.basicConfig( + format="%(message)s", filename=f"{destination_folder}/{log_file}", level=level + ) + + +def log(level, context, message): + current_time = str(datetime.datetime.now()) + + log_string = json.dumps( + {"time": current_time, "level": level, "context": context, "message": message}, + separators=(",", ":"), + ) + + return LOG_FUNCTIONS[level]("%s", log_string) + + +class MaxRetriesExceededError(Exception): + """Exception raised when the maximum number of retries is exceeded.""" + + pass + + +class GitHubClient: + def __init__(self, api, token, max_retries=10, backoff_factor=1): + log("INFO", "GITHUB-ORGANIZATION-PROCESSOR", "Configuring GitHub client...") + + self.api = api + self.token = token + self.headers = { + "Authorization": f"token {token}", + "Accept": "Accept: application/vnd.github+json", + "X-GitHub-Api-Version": "2022-11-28", + } + self.max_retries = max_retries + self.backoff_factor = backoff_factor + + log( + "INFO", + "GITHUB-ORGANIZATION-PROCESSOR", + f"GitHub client configured successfully for API: {api}", + ) + log( + "DEBUG", + "GITHUB-ORGANIZATION-PROCESSOR", + f"GitHub client configured with token starting with: {token[:8]}", + ) + + def make_api_request(self, method, url, **kwargs): + valid_methods = ["GET", "OPTIONS", "HEAD", "POST", "PUT", "PATCH", "DELETE"] + if method not in valid_methods: + raise ValueError( + f"Invalid HTTP method: {method}. Must be one of {valid_methods}." + ) + + max_retries = self.max_retries + backoff_factor = self.backoff_factor + rate_limit_retry_count = 0 + + while True: + try: + log("DEBUG", "GITHUB-CLIENT", f"{method} request to {url}") + response = requests.request(method, url, headers=self.headers, **kwargs) + + if 200 <= response.status_code < 300: + log( + "DEBUG", + "GITHUB-CLIENT", + f"Status code returned by {url}: {response.status_code}", + ) + log( + "DEBUG", + "GITHUB-CLIENT", + f"Response returned by {url}: {response.json()}", + ) + return response + elif ( + response.status_code == 403 + and "X-RateLimit-Reset" in response.headers + ): + if rate_limit_retry_count >= max_retries: + raise MaxRetriesExceededError( + f"Rate limit retry count exceeded for {url}" + ) + + reset_timestamp = int(response.headers["X-RateLimit-Reset"]) + current_timestamp = int(time()) + + if reset_timestamp <= current_timestamp: + continue + + sleep_time = reset_timestamp - current_timestamp + 1 + log( + "WARNING", + "GITHUB-CLIENT", + f"Rate limit hit. Sleeping for {sleep_time} seconds.", + ) + sleep(sleep_time) + rate_limit_retry_count += 1 + elif response.status_code == 429: + if rate_limit_retry_count >= max_retries: + raise MaxRetriesExceededError( + f"Rate limit retry count exceeded for {url}" + ) + + sleep_time = backoff_factor + log( + "WARNING", + "GITHUB-CLIENT", + f"Too many requests. Sleeping for {sleep_time} seconds.", + ) + sleep(sleep_time) + backoff_factor *= 2 + rate_limit_retry_count += 1 + else: + response.raise_for_status() + + except (requests.exceptions.Timeout, requests.exceptions.ConnectionError): + log( + "WARNING", + "GITHUB-CLIENT", + "Request timed out or connection error occurred. Waiting to retry...", + ) + sleep(10) + except requests.exceptions.RequestException as e: + log( + "DEBUG", + "GITHUB-CLIENT", + f"An error occurred while executing a {method} request to {url}: {e}", + custom_color="red", + ) + raise e + + def get_repositories(self, organization): + url = f"{self.api}/orgs/{organization}/repos" + repositories = [] + while url: + response = self.make_api_request(method="GET", url=url) + repositories.extend(response.json()) + url = response.links.get("next", {}).get("url") + + log( + "DEBUG", + "GITHUB-CLIENT", + f"Number of repositories found for organization {organization}: {len(repositories)}", + ) + return repositories + + +def persist_repositories_information( + organization, + repositories, + location=os.environ.get("SECRETS_FINDER_SCAN_FOLDER", "."), + filename="repositories.json", +): + log( + "INFO", + "GITHUB-ORGANIZATION-PROCESSOR", + f"Persisting list of repositories for organization {organization} to: {location}/{filename}", + ) + + formatted_list_of_repositories = { + "organization": organization, + "repositories": repositories, + } + + with open(f"{location}/{filename}", "w") as file: + json.dump(formatted_list_of_repositories, file, indent=4) + + log( + "INFO", + "GITHUB-ORGANIZATION-PROCESSOR", + f"List of repositories for organization {organization} persisted successfully to: {location}/{filename}", + ) + + +def persist_repositories_for_scan( + organization, + repositories, + clone_url_template, + location=os.environ.get("SECRETS_FINDER_SCAN_FOLDER", "."), + filename="repositories_to_scan.json", +): + log( + "INFO", + "GITHUB-ORGANIZATION-PROCESSOR", + f"Persisting list of repositories for organization {organization} to: {location}/{filename}", + ) + + formatted_list_of_repositories = { + "scm": "github", + "endpoint": clone_url_template, + "repositories": [], + } + + for repository in repositories: + formatted_list_of_repositories.get("repositories").append( + {"organization": organization, "name": repository.get("name")} + ) + + with open(f"{location}/{filename}", "w") as file: + json.dump(formatted_list_of_repositories, file, indent=4) + + log( + "INFO", + "GITHUB-ORGANIZATION-PROCESSOR", + f"List of repositories for organization {organization} persisted successfully to: {location}/{filename}", + ) + + +def main(): + try: + load_environment_variables() + parser = configure_parser() + arguments = parser.parse_args() + configure_logging(".", logging.INFO if not arguments.debug else logging.DEBUG) + except Exception as exception: + print( + f"FATAL ERROR: An unexpected error occurred during initialization: {str(exception)}" + ) + sys.exit(1) + + try: + if not os.environ.get("SECRETS_FINDER_SCAN_TOKEN") and not os.environ.get( + "GITHUB_TOKEN" + ): + log( + "ERROR", + "GITHUB-ORGANIZATION-PROCESSOR", + "No token provided: SECRETS_FINDER_SCAN_TOKEN and GITHUB_TOKEN environment variables are both missing. Operation aborted.", + ) + sys.exit(1) + + github_client = GitHubClient( + api=arguments.api, + token=os.environ.get( + "SECRETS_FINDER_SCAN_TOKEN", os.environ.get("GITHUB_TOKEN") + ), + max_retries=arguments.max_retries, + backoff_factor=arguments.backoff_factor, + ) + repositories = github_client.get_repositories( + organization=arguments.organization + ) + persist_repositories_information( + organization=arguments.organization, repositories=repositories + ) + persist_repositories_for_scan( + organization=arguments.organization, + repositories=repositories, + api=arguments.clone_url_template, + ) + + sys.exit(0) + except Exception as exception: + log( + "ERROR", + "GITHUB-ORGANIZATION-PROCESSOR", + f"A fatal error occurred during scan: {str(exception)}. Operation aborted.", + ) + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/scripts/processors/github/api_github_organization.requirements.txt b/scripts/processors/github/api_github_organization.requirements.txt new file mode 100644 index 0000000..2d7083d --- /dev/null +++ b/scripts/processors/github/api_github_organization.requirements.txt @@ -0,0 +1,2 @@ +python-dotenv ~= 1.0.1 +requests ~= 2.32