Skip to content

Commit

Permalink
feat(backup): log everything stdout/err, implement lockfile for both …
Browse files Browse the repository at this point in the history
…backup/restore
  • Loading branch information
brokenpip3 committed Jun 22, 2024
1 parent 411068b commit 90b00bd
Show file tree
Hide file tree
Showing 6 changed files with 91 additions and 40 deletions.
2 changes: 1 addition & 1 deletion backup/pvc/VERSION.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
v0.3.0
v0.4.0
55 changes: 40 additions & 15 deletions backup/pvc/bin/backup.sh
Original file line number Diff line number Diff line change
@@ -1,39 +1,64 @@
#!/usr/bin/env bash

set -eo pipefail
source "$(dirname "$0")/utils.sh"
set -x

[[ ! $# -eq 1 ]] && echo "Usage: $0 backup_number" && exit 1;
[[ -z "${BACKUP_DIR}" ]] && echo "Required 'BACKUP_DIR' env not set" && exit 1;
[[ -z "${JENKINS_HOME}" ]] && echo "Required 'JENKINS_HOME' env not set" && exit 1;
[[ ! $# -eq 1 ]] && _log "ERROR" "Usage: $0 BACKUP_NUMBER" && exit 1
[[ -z "${BACKUP_DIR}" ]] && _log "ERROR" "Required 'BACKUP_DIR' env not set" && exit 1
[[ -z "${JENKINS_HOME}" ]] && _log "ERROR" "Required 'JENKINS_HOME' env not set" && exit 1
RETRY_COUNT=${RETRY_COUNT:-3}
RETRY_INTERVAL=${RETRY_INTERVAL:-60}
BACKUP_NUMBER=$1
TRAP_FILE="${BACKUP_DIR}/_backup_${BACKUP_NUMBER}_is_running"

# --> Check if another backup process is running (operator restart/crash)
for ((i=0; i<RETRY_COUNT; i++)); do
[[ ! -f "${TRAP_FILE}" ]] && _log "INFO" "Backup: no other backup process are running" && break
_log "INFO" "Backup is already running. Waiting for ${RETRY_INTERVAL} seconds..."
sleep "${RETRY_INTERVAL}"
done
[[ -f "${TRAP_FILE}" ]] && { _log "ERROR" "Backup is stil running after waiting ${RETRY_COUNT} time ${RETRY_INTERVAL}s. Exiting."; exit 1; }

Check failure on line 21 in backup/pvc/bin/backup.sh

View workflow job for this annotation

GitHub Actions / Codespell

stil ==> still
# --< Done

_log "INFO" "Running backup ${BACKUP_NUMBER}"
touch "${TRAP_FILE}"
# create temp dir on the same filesystem with a BACKUP_DIR to be able use atomic mv enstead of copy
BACKUP_TMP_DIR=$(mktemp -d --tmpdir=${BACKUP_DIR})
trap "test -d "${BACKUP_TMP_DIR}" && rm -fr "${BACKUP_TMP_DIR}"" EXIT SIGINT SIGTERM
BACKUP_TMP_DIR=$(mktemp -d --tmpdir="${BACKUP_DIR}")

_clean(){
test -d "${BACKUP_TMP_DIR}" && rm -fr "${BACKUP_TMP_DIR}"
test -f "${TRAP_FILE}" && rm -f "${TRAP_FILE}"
}

_trap(){
_clean
_log "ERROR" "Backup: something wrong happened, check the logs"
}

backup_number=$1
echo "Running backup"
trap '_trap' SIGQUIT SIGINT SIGTERM

# config.xml in a job directory is a config file that shouldn't be backed up
# config.xml in child directories is state that should. For example-
# branches/myorg/branches/myrepo/branches/master/config.xml should be retained while
# branches/myorg/config.xml should not
tar --zstd -C "${JENKINS_HOME}" -cf "${BACKUP_TMP_DIR}/${backup_number}.tar.zstd" \
tar --zstd -C "${JENKINS_HOME}" -cf "${BACKUP_TMP_DIR}/${BACKUP_NUMBER}.tar.zstd" \
--exclude jobs/*/workspace* \
--no-wildcards-match-slash --anchored \
--ignore-failed-read \
--exclude jobs/*/config.xml -c jobs || ret=$?

if [[ "$ret" -eq 0 ]]; then
echo "Backup was completed without warnings"
_log "INFO" "Backup ${BACKUP_NUMBER} was completed without warnings"
elif [[ "$ret" -eq 1 ]]; then
echo "Backup was completed with some warnings"
_log "INFO" "Backup ${BACKUP_NUMBER} was completed with some warnings"
fi

# atomically create a backup file
mv "${BACKUP_TMP_DIR}/${backup_number}.tar.zstd" "${BACKUP_DIR}/${backup_number}.tar.zstd"
mv "${BACKUP_TMP_DIR}/${BACKUP_NUMBER}.tar.zstd" "${BACKUP_DIR}/${BACKUP_NUMBER}.tar.zstd"

rm -rf "${BACKUP_TMP_DIR}"
[[ ! -s ${BACKUP_DIR}/${backup_number}.tar.zstd ]] && echo "backup file '${BACKUP_DIR}/${backup_number}.tar.zstd' is empty" && exit 1;
_log "INFO" "Cleaning ${BACKUP_TMP_DIR} and trap file ${TRAP_FILE}"
_clean
[[ ! -s ${BACKUP_DIR}/${BACKUP_NUMBER}.tar.zstd ]] && _log "ERROR" "Backup file '${BACKUP_DIR}/${BACKUP_NUMBER}.tar.zstd' is empty" && exit 1

echo Done
_log "INFO" "Backup ${BACKUP_NUMBER} done"
exit 0
8 changes: 2 additions & 6 deletions backup/pvc/bin/get-latest.sh
Original file line number Diff line number Diff line change
@@ -1,30 +1,26 @@
#!/usr/bin/env bash

set -eo pipefail
source "$(dirname "$0")/utils.sh"

is_backup_not_exist() {
local backup_dir="$1"
# Save the current value of 'set -e'
local previous_e
previous_e=$(set +e; :; echo $?)

# Temporarily turn off 'set -e'
set +e

# Run ls command to check if any files matching the pattern exist
ls "${backup_dir}"/*.tar.* 1> /dev/null 2>&1

# Store the exit status of the ls command
local ls_exit_status=$?

# Restore the previous value of 'set -e'
[ "$previous_e" = "0" ] && set -e

# Return true if ls command succeeded (no files found), otherwise return false
[ $ls_exit_status -ne 0 ]
}

[[ -z "${BACKUP_DIR}" ]] && { echo "Required 'BACKUP_DIR' env not set"; exit 1; }
[[ -z "${BACKUP_DIR}" ]] && { _log "ERROR" "Required 'BACKUP_DIR' env not set"; exit 1; }

# Check if we have any backup
if is_backup_not_exist "${BACKUP_DIR}"; then
Expand Down
42 changes: 29 additions & 13 deletions backup/pvc/bin/restore.sh
Original file line number Diff line number Diff line change
@@ -1,29 +1,45 @@
#!/usr/bin/env bash

set -eo pipefail
source "$(dirname "$0")/utils.sh"

[[ ! $# -eq 1 ]] && echo "Usage: $0 backup_number" && exit 1
[[ -z "${BACKUP_DIR}" ]] && echo "Required 'BACKUP_DIR' env not set" && exit 1;
[[ -z "${JENKINS_HOME}" ]] && echo "Required 'JENKINS_HOME' env not set" && exit 1;
[[ ! $# -eq 1 ]] && _log "ERROR" "Usage: $0 <backup number>" && exit 1
[[ -z "${BACKUP_DIR}" ]] && _log "ERROR" "Required 'BACKUP_DIR' env not set" && exit 1
[[ -z "${JENKINS_HOME}" ]] && _log "ERROR" "Required 'JENKINS_HOME' env not set" && exit 1
BACKUP_NUMBER=$1
RETRY_COUNT=${RETRY_COUNT:-3}
RETRY_INTERVAL=${RETRY_INTERVAL:-60}

backup_number=$1
backup_file="${BACKUP_DIR}/${backup_number}"
echo "Running restore backup with backup number #${backup_number}"
# --> Check if another restore process is running (operator restart/crash)
TRAP_FILE="${BACKUP_DIR}/_restore_${BACKUP_NUMBER}_is_running"
trap "rm -f ${TRAP_FILE}" SIGINT SIGTERM

if [[ -f "$backup_file.tar.gz" ]]; then
echo "Old format tar.gz found, restoring it"
for ((i=0; i<RETRY_COUNT; i++)); do
[[ ! -f "${TRAP_FILE}" ]] && _log "INFO" "Restore: No other process are running, restoring" && break
_log "INFO" "Restore is already running. Waiting for ${RETRY_INTERVAL} seconds..."
sleep "${RETRY_INTERVAL}"
done
[[ -f "${TRAP_FILE}" ]] && { _log "ERROR" "Restore is stil running after waiting ${RETRY_COUNT} time ${RETRY_INTERVAL}s. Exiting."; exit 1; }

Check failure on line 22 in backup/pvc/bin/restore.sh

View workflow job for this annotation

GitHub Actions / Codespell

stil ==> still
# --< Done

_log "INFO" "Running restore backup with backup number #${BACKUP_NUMBER}"
touch "${TRAP_FILE}"
BACKUP_FILE="${BACKUP_DIR}/${BACKUP_NUMBER}"

if [[ -f "$BACKUP_FILE.tar.gz" ]]; then
_log "INFO" "Restore: ld format tar.gz found, restoring it"
OPTS=""
EXT="tar.gz"
elif [[ -f "$backup_file.tar.zstd" ]]; then
echo "Backup file found, proceeding"
elif [[ -f "$BACKUP_FILE.tar.zstd" ]]; then
_log "INFO" "Restore: Backup file found, proceeding"
OPTS="--zstd"
EXT="tar.zstd"
else
echo "ERR: Backup file not found: $backup_file"
_log "ERROR" "Restore: Backup file not found: $BACKUP_FILE"
exit 1
fi

tar $OPTS -C "${JENKINS_HOME}" -xf "${BACKUP_DIR}/${backup_number}.${EXT}"
tar $OPTS -C "${JENKINS_HOME}" -xf "${BACKUP_DIR}/${BACKUP_NUMBER}.${EXT}"

echo Done
_log "INFO" "Restore: ${BACKUP_NUMBER} Done"
exit 0
11 changes: 6 additions & 5 deletions backup/pvc/bin/run.sh
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
#!/usr/bin/env bash

set -eo pipefail
source "$(dirname "$0")/utils.sh"

# Use 60 as default in case BACKUP_CLEANUP_INTERVAL did not set
BACKUP_CLEANUP_INTERVAL=${BACKUP_CLEANUP_INTERVAL:=60}

# Ensure required environment variables are set
check_env_var() {
if [[ -z "${!1}" ]]; then
echo "Required '$1' environment variable is not set"
_log "ERROR" "Required '$1' environment variable is not set"
exit 1
fi
}
Expand Down Expand Up @@ -41,7 +42,7 @@ find_exceeding_backups() {
local backup_count="$2"
# Check if we have any backup
if is_backup_not_exist "${backup_dir}"; then
echo "backups not found in ${backup_dir}" >&2
_log "ERROR" "backups not found in ${backup_dir}"
return
fi
find "${backup_dir}"/*.tar.zstd -maxdepth 0 -exec basename {} \; | sort -gr | tail -n +$((backup_count +1))
Expand All @@ -51,9 +52,9 @@ check_env_var "BACKUP_DIR"
check_env_var "JENKINS_HOME"

if [[ -z "${BACKUP_COUNT}" ]]; then
echo "ATTENTION! No BACKUP_COUNT set, it means you MUST delete old backups manually or by custom script"
_log "WARNING" "No BACKUP_COUNT set, it means you MUST delete old backups manually or by custom script"
else
echo "Retaining only the ${BACKUP_COUNT} most recent backups, cleanup occurs every ${BACKUP_CLEANUP_INTERVAL} seconds"
_log "INFO" "Retaining only the ${BACKUP_COUNT} most recent backups, cleanup occurs every ${BACKUP_CLEANUP_INTERVAL} seconds"
fi

while true;
Expand All @@ -62,7 +63,7 @@ do
if [[ -n "${BACKUP_COUNT}" ]]; then
exceeding_backups=$(find_exceeding_backups "${BACKUP_DIR}" "${BACKUP_COUNT}")
if [[ -n "$exceeding_backups" ]]; then
echo "Removing backups: $(echo "$exceeding_backups" | tr '\n' ', ' | sed 's/,$//')"
_log "INFO" "Removing backups: $(echo "$exceeding_backups" | tr '\n' ', ' | sed 's/,$//')"
echo "$exceeding_backups" | while read -r file; do
rm "${BACKUP_DIR}/${file}"
done
Expand Down
13 changes: 13 additions & 0 deletions backup/pvc/bin/utils.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#!/usr/bin/env bash
# Common utils

_log() {
local level="$1"
local message="$2"
local timestamp=$(date '+%Y-%m-%d %H:%M:%S')
if [[ "$level" =~ ^(ERROR|ERR|error|err)$ ]]; then
echo "${timestamp} [${level}] ${message}" > /proc/1/fd/2
else
echo "${timestamp} [${level}] ${message}" > /proc/1/fd/1
fi
}

0 comments on commit 90b00bd

Please sign in to comment.