From dfaa420c827c03fd27b896f27cfb7a12cac910ba Mon Sep 17 00:00:00 2001 From: Frederic Lemay <461102+flemay@users.noreply.github.com> Date: Sun, 28 Jan 2024 21:21:53 +1100 Subject: [PATCH 1/2] update --- env.template | 7 ++- scripts/env.sh | 14 +++-- scripts/metadata.sh | 4 +- scripts/run_download.sh | 11 ++-- scripts/run_git_publish.sh | 114 +++++++++++++++++++++++++++++++++++ scripts/run_publish.sh | 120 +------------------------------------ scripts/run_test.sh | 8 ++- 7 files changed, 144 insertions(+), 134 deletions(-) create mode 100755 scripts/run_git_publish.sh diff --git a/env.template b/env.template index 56d1b2c..6cf9d7f 100644 --- a/env.template +++ b/env.template @@ -1,4 +1,3 @@ -# ENV_GIT_REPO_NAME is the name of the repo "vhs-themes" # ENV_GIT_REPO_URL looks like https://github.com/flemay/vhs-themes.git # ENV_GIT_TOKEN is a GitHub fine-grained personal access token starting with "github_pat_". It also works with `secrets.GITHUB_TOKEN` for GitHub Actions. # ENV_INPUT_DIR is the dir where the scripts refers files from @@ -9,14 +8,16 @@ # ENV_TZ is the timezone to set for the container. Ex: UTC ENV_GIT_EMAIL ENV_GIT_NAME -ENV_GIT_REPO_NAME +ENV_GIT_PUBLISH_BRANCH +ENV_GIT_PUBLISH_INCLUDE_PATTERNS="README.md,records,records/*.gif,pages,pages/*.md,metadata.txt,.gitignore" ENV_GIT_REPO_URL ENV_GIT_TOKEN ENV_GIT_USERNAME ENV_INPUT_DIR=input ENV_OUTPUT_DIR=output ENV_PAGINATION -ENV_PUBLISH_BRANCH +ENV_PUBLISH_DIR=/opt/src/output ENV_THEMES ENV_THEMES_LIMIT +ENV_TMP_DIR=/tmp ENV_TZ=UTC diff --git a/scripts/env.sh b/scripts/env.sh index e53a3d6..e065bdd 100755 --- a/scripts/env.sh +++ b/scripts/env.sh @@ -30,6 +30,15 @@ gitAuth() { "$@" } +# Gets the name of the repo from ENV_GIT_REPO_URL +# Ex: https://github.com/flemay/vhs-themes.git -> vhs-themes +getGitRepoName() { + declare -n _retRepoName="${1}" + declare -r _gitRepoURL="${ENV_GIT_REPO_URL:?}" + _retRepoName="${_gitRepoURL##*/}" + _retRepoName="${_retRepoName%.git}" +} + checkEnvVars(){ if ! envTemplate=$(grep -v "#" env.template | awk -F '=' '{print $1}');then logError "checkEnvVars: failed" @@ -47,11 +56,6 @@ checkEnvVars(){ if [[ "${hasUnsetEnvVars}" == "true" ]];then exit 1 fi - # https://www.shellcheck.net/wiki/SC2015 - if [[ "${ENV_PUBLISH_BRANCH:?}" == "main" ]]; then - logError "ENV_PUBLISH_BRANCH cannot be 'main'" - exit 1 - fi } arrayToCommaSeparatedString(){ diff --git a/scripts/metadata.sh b/scripts/metadata.sh index f73ee2b..1a9e2ff 100755 --- a/scripts/metadata.sh +++ b/scripts/metadata.sh @@ -15,7 +15,9 @@ readonly metadataLockFilePath="${ENV_OUTPUT_DIR:?}/metadata.lock" downloadMetadataFile() { logInfo "Metadata check: download remote metadata file" declare -r _downloadFilePath="${1}" - declare -r _endpoint="https://api.github.com/repos/${ENV_GIT_USERNAME:?}/${ENV_GIT_REPO_NAME:?}/contents/metadata.txt?ref=${ENV_PUBLISH_BRANCH:?}" + local _gitRepoName="" + getGitRepoName _gitRepoName + declare -r _endpoint="https://api.github.com/repos/${ENV_GIT_USERNAME:?}/${_gitRepoName}/contents/metadata.txt?ref=${ENV_GIT_PUBLISH_BRANCH:?}" curl -s -H "Authorization: Bearer ${ENV_GIT_TOKEN:?}" \ -H "Accept: application/vnd.github.v3.raw" \ -o "${_downloadFilePath}" \ diff --git a/scripts/run_download.sh b/scripts/run_download.sh index 1ec9fb8..2c5a23c 100755 --- a/scripts/run_download.sh +++ b/scripts/run_download.sh @@ -1,21 +1,22 @@ #!/usr/bin/env bash set -euo pipefail IFS=$'\n\t' -# Inspired by https://gist.github.com/joncardasis/e6494afd538a400722545163eb2e1fa5 source scripts/env.sh -logInfo "Downloading content from branch '${ENV_PUBLISH_BRANCH:?}' to dir '${ENV_OUTPUT_DIR:?}' started..." +logInfo "Downloading content from branch '${ENV_GIT_PUBLISH_BRANCH:?}' to dir '${ENV_OUTPUT_DIR:?}' started..." source scripts/metadata.sh checkMetadataLock rm -fr "${ENV_OUTPUT_DIR}" -readonly tmpRepoDir="/tmp/${ENV_GIT_REPO_NAME:?}" +declare gitRepoName="" +getGitRepoName gitRepoName +readonly tmpRepoDir="${ENV_TMP_DIR:?}/${gitRepoName}" rm -fr "${tmpRepoDir}" # the Git clone option `--single-branch` prevents from downloading other branches. For instance, `make testE2E` without this option would also download the branch `themes` which is about 40mb in size. Now with this option, only branch `themes_test_e2e` (5mb) is downloaded. -gitAuth clone --branch "${ENV_PUBLISH_BRANCH}" --single-branch "${ENV_GIT_REPO_URL:?}" "${tmpRepoDir}" +gitAuth clone --branch "${ENV_GIT_PUBLISH_BRANCH}" --single-branch "${ENV_GIT_REPO_URL:?}" "${tmpRepoDir}" rm -fr "${tmpRepoDir}"/.git mv "${tmpRepoDir}" "${ENV_OUTPUT_DIR}" -logInfo "Downloading content from branch '${ENV_PUBLISH_BRANCH}' to dir '${ENV_OUTPUT_DIR}' is done!" +logInfo "Downloading content from branch '${ENV_GIT_PUBLISH_BRANCH}' to dir '${ENV_OUTPUT_DIR}' is done!" diff --git a/scripts/run_git_publish.sh b/scripts/run_git_publish.sh new file mode 100755 index 0000000..13be64c --- /dev/null +++ b/scripts/run_git_publish.sh @@ -0,0 +1,114 @@ +#!/usr/bin/env bash + +# Inspired by https://gist.github.com/joncardasis/e6494afd538a400722545163eb2e1fa5 + +set -euo pipefail +IFS=$'\n\t' + +[[ -f "/.dockerenv" ]] || { printf "Error: must be executed inside Docker container\n" 1>&2; exit 1; } + +declare envGitEmail="${ENV_GIT_EMAIL:?}" +declare envGitName="${ENV_GIT_NAME:?}" +declare envGitPublishBranch="${ENV_GIT_PUBLISH_BRANCH:?}" +declare envGitPublishIncludePatterns="${ENV_GIT_PUBLISH_INCLUDE_PATTERNS:?}" +declare envGitRepoURL="${ENV_GIT_REPO_URL:?}" +# shellcheck disable=SC2034 +declare envGitToken="${ENV_GIT_TOKEN:?}" +# shellcheck disable=SC2034 +declare envGitUsername="${ENV_GIT_USERNAME:?}" +declare envPublishDir="${ENV_PUBLISH_DIR:?}" +declare envTmpDir="${ENV_TMP_DIR:?}" + +# Gets the name of the repo from envGitRepoURL +# Ex: https://github.com/flemay/3musketeers.git -> 3musketeers +getRepoName() { + printf "function: getRepoName\n" + declare -n _retRepoName="${1}" + _retRepoName="${envGitRepoURL##*/}" + _retRepoName="${_retRepoName%.git}" +} + +getRepoTmpDir() { + printf "function: getRepoTmpDir\n" + declare -n _retRepoTmpDir="${1}" + local _repoName="" + getRepoName _repoName + _retRepoTmpDir="${envTmpDir}/${_repoName}" +} + +# https://git-scm.com/docs/gitfaq#Documentation/gitfaq.txt-HowdoIreadapasswordortokenfromanenvironmentvariable +# https://stackoverflow.com/questions/72577367/git-push-provide-credentials-without-any-prompts +# Notes +# - The helper line could've been written like `-c credential.helper="!f() { echo \"username=${envGitUsername}\"; echo \"password=${envGitToken}\"; };f" \` but decided to leave git to evaluate the value when Git runs the command. +# - Possible leak: Be sure the helper code is correct. For instance, if we put `password${ENV_GIT_TOKEN}` (omiting the `=`), the command will leak the token with the following message: "warning: invalid credential line: passwordgithub_pat[REDACTED]". +gitAuth() { + printf "function: gitAuth\n" + # shellcheck disable=SC2016 + git -c credential.helper= \ + -c credential.helper='!f() { echo "username=${ENV_GIT_USERNAME:?}"; echo "password=${ENV_GIT_TOKEN:?}"; };f' \ + "$@" + # -c credential.helper="!f() { echo \"username=${envGitUsername}\"; echo \"password=${envGitToken}\"; };f" \ +} + +cloneAndSetPublishBranch() { + # Clone the repository to a tmp dir so that the current code is not messed up + # The use of option `--single-branch` is to make sure only the default branch is downloaded. This can save some bandwith + printf "function: cloneAndSetPublishBranch\n" + local _repoTmpDir="" + getRepoTmpDir _repoTmpDir + rm -fr "${_repoTmpDir:?}" + gitAuth clone --single-branch "${envGitRepoURL}" "${_repoTmpDir}" + + cd "${_repoTmpDir}" + local _currentBranch="" + _currentBranch=$(git rev-parse --abbrev-ref HEAD) + if [[ "${envGitPublishBranch}" == "${_currentBranch}" ]]; then + printf "Error: ENV_GIT_PUBLISH_BRANCH cannot be '%s'\n" "${_currentBranch}" + exit 1 + fi + + # Create a new orphan branch (which has no history and tracked files) + # According to Git (https://git-scm.com/docs/git-switch/2.23.0): + # --orphan + # Create a new orphan branch, named . All tracked files are removed. + git branch -D "${envGitPublishBranch}" &> /dev/null || true + git switch --orphan "${envGitPublishBranch}" + + # create a new .gitignore specifically for publish branch + printf "*\n" > .gitignore + IFS=',' read -ra _includePatterns <<< "${envGitPublishIncludePatterns}" + for pattern in "${_includePatterns[@]}"; do + printf "!%s\n" "${pattern}" >> .gitignore + done + + cp -r "${envPublishDir}"/* . +} + +configureGitConfig() { + printf "function: configureGitConfig\n" + git config user.email "${envGitEmail}" + git config user.name "${envGitName}" + git remote remove originForPublishing &> /dev/null || true + git remote add originForPublishing "${envGitRepoURL}" +} + +commitAndPush() { + printf "function: commitAndPush\n" + git add . + git commit -m "Publish" + # shellcheck disable=SC2310 + gitAuth push originForPublishing -d "${envGitPublishBranch}" &> /dev/null || true + gitAuth push originForPublishing "${envGitPublishBranch}" +} + +cleanup() { + printf "function: cleanup\n" + local _repoTmpDir="" + getRepoTmpDir _repoTmpDir + rm -fr "${_repoTmpDir}" +} + +cloneAndSetPublishBranch +configureGitConfig +commitAndPush +cleanup diff --git a/scripts/run_publish.sh b/scripts/run_publish.sh index 563357b..b9267e2 100755 --- a/scripts/run_publish.sh +++ b/scripts/run_publish.sh @@ -1,127 +1,13 @@ #!/usr/bin/env bash set -euo pipefail IFS=$'\n\t' -# Inspired by https://gist.github.com/joncardasis/e6494afd538a400722545163eb2e1fa5 source scripts/env.sh -logInfo "Publishing to branch '${ENV_PUBLISH_BRANCH}' started..." +logInfo "Publishing to branch '${ENV_GIT_PUBLISH_BRANCH:?}' started..." source scripts/metadata.sh checkMetadataLock generateMetadata -# preparePublishBranch creates a new local orphan branch (which has no history and tracked files). -# According to Git: -# --orphan -# Create a new orphan branch, named . All tracked files are removed. -# All of this creation is done from a copy of the current dir to another one so that anything that happens won't have any effect on the current dir. -# The publish branch contents is basically the output dir with a clean up to ensure unwanted files are not going to be published. -# So far the best way to only get the correct contents on the publish branch is to create a specific .gitignore files. -# References -# - https://git-scm.com/docs/git-switch/2.23.0 -preparePublishBranch() { - declare -r tmpRepoDir="/tmp/${ENV_GIT_REPO_NAME:?}" - rm -fr "${tmpRepoDir}" - mkdir "${tmpRepoDir}" - cp -r . "${tmpRepoDir}"/ +./scripts/run_git_publish.sh - cd "${tmpRepoDir}" - - git restore . - git branch -D "${ENV_PUBLISH_BRANCH:?}" &> /dev/null || true - git switch --orphan "${ENV_PUBLISH_BRANCH:?}" - # remove all files/dirs that are not tracked (excluding output dir) - git clean -d --exclude="${ENV_OUTPUT_DIR:?}" --force - - # create a new .gitignore specifically for publish branch - printf "*\n" > .gitignore - { - printf "!README.md\n" - printf "!records\n" - printf "!records/*.gif\n" - printf "!pages\n" - printf "!pages/*.md\n" - printf "!metadata.txt\n" - } >> .gitignore - - cp -r "${ENV_OUTPUT_DIR:?}"/* . - rm -fr "${ENV_OUTPUT_DIR}" - - # Checksum based on git status (that was before metadata) - # https://stackoverflow.com/questions/35326218/git-ls-files-how-to-escape-spaces-in-files-paths - # rm -fr checksum.txt - # git status -suall \ - # | cut -c 4- \ - # | LC_ALL=C sort \ - # | tr '\n' '\0' \ - # | tr -d '"' \ - # | xargs -0 -n 1 sha256sum \ - # | sha256sum \ - # > checksum.txt -} - -# checkGitStatus makes sure the publish branch does not have commits or staged files -# It also checks unstaged files are the expected ones such as .gitignore, README.md, pageXX.md, records/*.gif. -# This is to prevent pushing unwanted files. -# Some checks are perhaps overkill but better be safe than sorry. -# The command git status -suall shows something like -#?? .gitignore -#?? README.md -#?? "records/3024 Day.gif" -#?? records/Abernathy.gif -#?? page1.md -#?? metadata.txt -checkGitStatus() { - # Expect git log to return an error - ! git log &> /dev/null || { - logError "Publish branch ${ENV_PUBLISH_BRANCH} should not have any commits" - exit 1 - } - - # https://www.shellcheck.net/wiki/SC2155 - declare -i _totalStagedFiles - _totalStagedFiles=$(git status -s -uno | wc -l) - declare -r _totalStagedFiles - (( _totalStagedFiles == 0 )) || { - logError "Expected number of staged files to be 0" - git status 1>&2 - exit 1 - } - - declare -r _unstagedFilesRegExp="^README\.md$|^page.+\.md$|^[\"]?records/.*\.gif[\"]?$|^metadata.txt$" - if git status -suall \ - | cut -c 4- \ - | grep -Ev "${_unstagedFilesRegExp}"; then - logError "There are files that should not be part of git status" - git status -suall \ - | cut -c 4- \ - | grep -Ev "${_unstagedFilesRegExp}" 1>&2 - exit 1 - fi -} - -configureGitConfig() { - git config user.email "${ENV_GIT_EMAIL:?}" - git config user.name "${ENV_GIT_NAME:?}" - git remote remove originForPublishing &> /dev/null || true - git remote add originForPublishing "${ENV_GIT_REPO_URL:?}" -} - -commitAndPush() { - git add . - git commit -m "Publish themes" - # shellcheck disable=SC2310 - gitAuth push originForPublishing -d "${ENV_PUBLISH_BRANCH:?}" &> /dev/null || true - gitAuth push originForPublishing "${ENV_PUBLISH_BRANCH:?}" -} - -cleanup() { - rm -fr /tmp/"${ENV_GIT_REPO_NAME}" -} - -preparePublishBranch -checkGitStatus -configureGitConfig -commitAndPush -cleanup - -logInfo "Publishing to branch '${ENV_PUBLISH_BRANCH}' is done!" +logInfo "Publishing to branch '${ENV_GIT_PUBLISH_BRANCH}' is done!" diff --git a/scripts/run_test.sh b/scripts/run_test.sh index c982978..c0d002c 100755 --- a/scripts/run_test.sh +++ b/scripts/run_test.sh @@ -3,10 +3,12 @@ set -euo pipefail IFS=$'\n\t' readonly testOutputDir="output_test" + +export ENV_GIT_PUBLISH_BRANCH="themes_test_e2e" export ENV_INPUT_DIR=input export ENV_OUTPUT_DIR="${testOutputDir}" export ENV_PAGINATION=2 -export ENV_PUBLISH_BRANCH="themes_test_e2e" +export ENV_PUBLISH_DIR=/opt/src/"${testOutputDir}" export ENV_THEMES="TokyoNight,tokyonight,3024 Day,Adventure,Aurora,tokyonight" export ENV_THEMES_LIMIT=3 @@ -66,7 +68,7 @@ testPublish(){ declare -r _outputFilePath="/tmp/output.txt" # Output error to stdout and save output to a file for later comparison ./scripts/run_publish.sh 2>&1 | tee "${_outputFilePath}" - _expectedMessage="Publishing to branch '${ENV_PUBLISH_BRANCH}' is done" + _expectedMessage="Publishing to branch '${ENV_GIT_PUBLISH_BRANCH}' is done" if ! grep -q "${_expectedMessage}" "${_outputFilePath}";then logError "Expected message: ${_expectedMessage}" exit 1 @@ -120,5 +122,5 @@ if [[ "${ENV_INT_TEST_E2E:?}" == "true" ]]; then testDownload fi -logInfo "Note: You can look at the dir '${testOutputDir}' as well as the remote publish branch '${ENV_PUBLISH_BRANCH}' (if testE2E) for examining result" +logInfo "Note: You can look at the dir '${testOutputDir}' as well at the remote publish branch '${ENV_GIT_PUBLISH_BRANCH}' (if testE2E) for examining result" logInfo "Testing done!" From 50c7c4de1389e4e2e1799636a9179d2e46955a91 Mon Sep 17 00:00:00 2001 From: Frederic Lemay <461102+flemay@users.noreply.github.com> Date: Sun, 28 Jan 2024 21:29:18 +1100 Subject: [PATCH 2/2] update --- scripts/run_git_publish.sh | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/scripts/run_git_publish.sh b/scripts/run_git_publish.sh index 13be64c..b582a9b 100755 --- a/scripts/run_git_publish.sh +++ b/scripts/run_git_publish.sh @@ -7,17 +7,17 @@ IFS=$'\n\t' [[ -f "/.dockerenv" ]] || { printf "Error: must be executed inside Docker container\n" 1>&2; exit 1; } -declare envGitEmail="${ENV_GIT_EMAIL:?}" -declare envGitName="${ENV_GIT_NAME:?}" -declare envGitPublishBranch="${ENV_GIT_PUBLISH_BRANCH:?}" -declare envGitPublishIncludePatterns="${ENV_GIT_PUBLISH_INCLUDE_PATTERNS:?}" -declare envGitRepoURL="${ENV_GIT_REPO_URL:?}" +readonly envGitEmail="${ENV_GIT_EMAIL:?}" +readonly envGitName="${ENV_GIT_NAME:?}" +readonly envGitPublishBranch="${ENV_GIT_PUBLISH_BRANCH:?}" +readonly envGitPublishIncludePatterns="${ENV_GIT_PUBLISH_INCLUDE_PATTERNS:?}" +readonly envGitRepoURL="${ENV_GIT_REPO_URL:?}" # shellcheck disable=SC2034 -declare envGitToken="${ENV_GIT_TOKEN:?}" +readonly envGitToken="${ENV_GIT_TOKEN:?}" # shellcheck disable=SC2034 -declare envGitUsername="${ENV_GIT_USERNAME:?}" -declare envPublishDir="${ENV_PUBLISH_DIR:?}" -declare envTmpDir="${ENV_TMP_DIR:?}" +readonly envGitUsername="${ENV_GIT_USERNAME:?}" +readonly envPublishDir="${ENV_PUBLISH_DIR:?}" +readonly envTmpDir="${ENV_TMP_DIR:?}" # Gets the name of the repo from envGitRepoURL # Ex: https://github.com/flemay/3musketeers.git -> 3musketeers