#!/usr/bin/env bash ##################################################################################### #### backup-smoke-tests.sh — Verify backups are actually working #### #### Checks existence, recency, size, integrity, snapshot count, locks, restore. #### #### Supports: restic, borg, directory, rsnapshot #### #### #### #### Author: Phil Connor #### #### Contact: contact@mylinux.work #### #### License: MIT #### #### Version: 1.01 #### #### #### #### Usage: #### #### export BACKUP_TYPE="restic" #### #### export BACKUP_REPO="s3:s3.example.com/bucket" #### #### ./backup-smoke-tests.sh #### #### #### #### See --help for all options. #### ##################################################################################### set -euo pipefail # ── Defaults ────────────────────────────────────────────────────────── BACKUP_TYPE="${BACKUP_TYPE:-}" BACKUP_REPO="${BACKUP_REPO:-}" BACKUP_DIR="${BACKUP_DIR:-}" MAX_AGE_HOURS="${MAX_AGE_HOURS:-26}" MIN_SNAPSHOTS="${MIN_SNAPSHOTS:-1}" MIN_SIZE_MB="${MIN_SIZE_MB:-1}" RESTORE_TEST_FILE="${RESTORE_TEST_FILE:-}" SKIP_RESTORE="${SKIP_RESTORE:-false}" SKIP_INTEGRITY="${SKIP_INTEGRITY:-false}" MOUNT_CHECK="${MOUNT_CHECK:-}" OUTPUT_FORMAT="${OUTPUT_FORMAT:-text}" # text, tap, junit JUNIT_FILE="${JUNIT_FILE:-backup-results.xml}" VERBOSE="${VERBOSE:-false}" COLOR="${COLOR:-auto}" # ── State ───────────────────────────────────────────────────────────── PASS=0 FAIL=0 SKIP=0 TOTAL=0 RESULTS=() RESTORE_TMP="" START_TIME="" # ── Colors ──────────────────────────────────────────────────────────── setup_colors() { if [[ "$COLOR" == "never" ]]; then RED="" GREEN="" YELLOW="" BLUE="" BOLD="" RESET="" return fi if [[ "$COLOR" == "always" ]] || [[ -t 1 ]]; then RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[0;33m' BLUE='\033[0;34m' BOLD='\033[1m' RESET='\033[0m' else RED="" GREEN="" YELLOW="" BLUE="" BOLD="" RESET="" fi } # ── Logging ─────────────────────────────────────────────────────────── log() { echo -e "${BLUE}[INFO]${RESET} $*"; } warn() { echo -e "${YELLOW}[WARN]${RESET} $*" >&2; } err() { echo -e "${RED}[ERROR]${RESET} $*" >&2; } verbose() { if [[ "$VERBOSE" == "true" ]]; then echo -e "${BLUE}[DEBUG]${RESET} $*"; fi; } # ── Test Result Recording ───────────────────────────────────────────── record_pass() { local name="$1" detail="${2:-}" ((PASS++)) || true; ((TOTAL++)) || true RESULTS+=("PASS|${name}|${detail}") if [[ "$OUTPUT_FORMAT" == "tap" ]]; then echo "ok ${TOTAL} - ${name}" else echo -e " ${GREEN}✓${RESET} ${name}${detail:+ — ${detail}}"; fi } record_fail() { local name="$1" detail="${2:-}" ((FAIL++)) || true; ((TOTAL++)) || true RESULTS+=("FAIL|${name}|${detail}") if [[ "$OUTPUT_FORMAT" == "tap" ]]; then echo "not ok ${TOTAL} - ${name}" [[ -n "$detail" ]] && echo " # ${detail}" else echo -e " ${RED}✗${RESET} ${name}${detail:+ — ${detail}}"; fi } record_skip() { local name="$1" reason="${2:-}" ((SKIP++)) || true; ((TOTAL++)) || true RESULTS+=("SKIP|${name}|${reason}") if [[ "$OUTPUT_FORMAT" == "tap" ]]; then echo "ok ${TOTAL} - ${name} # SKIP ${reason}" else echo -e " ${YELLOW}⊘${RESET} ${name}${reason:+ — ${reason}}"; fi } # ── Helpers ─────────────────────────────────────────────────────────── has_cmd() { command -v "$1" >/dev/null 2>&1; } restore_ok() { find "${RESTORE_TMP}" -type f | grep -q .; } require_tool() { if ! has_cmd "$1"; then record_skip "$2" "$1 not installed"; return 1; fi; } # ── Cleanup ─────────────────────────────────────────────────────────── # shellcheck disable=SC2317 cleanup() { [[ -n "${RESTORE_TMP}" && -d "${RESTORE_TMP}" ]] && rm -rf "${RESTORE_TMP}"; } trap cleanup EXIT # ══════════════════════════════════════════════════════════════════════ # TEST SUITES # ══════════════════════════════════════════════════════════════════════ # ── 1. Repository Health ───────────────────────────────────────────── test_repo_health() { echo "" echo -e "${BOLD}Repository Health${RESET}" # 1a. Mount check (if configured) if [[ -n "${MOUNT_CHECK}" ]]; then if mountpoint -q "${MOUNT_CHECK}" 2>/dev/null; then record_pass "Mount check" "${MOUNT_CHECK} is mounted" else record_fail "Mount check" "${MOUNT_CHECK} is not mounted" warn "Skipping remaining tests — mount not available" return fi fi # 1b. Backup exists case "${BACKUP_TYPE}" in restic) require_tool restic "Repository exists" || return if restic cat config >/dev/null 2>&1; then record_pass "Repository exists" else record_fail "Repository exists" "not accessible"; fi ;; borg) require_tool borg "Repository exists" || return if borg info 2>/dev/null | grep -q "Repository ID"; then record_pass "Repository exists" else record_fail "Repository exists" "not accessible"; fi ;; directory|rsnapshot) if [[ -d "${BACKUP_DIR}" ]]; then record_pass "Backup directory exists" else record_fail "Backup directory exists" "${BACKUP_DIR} not found"; fi ;; esac # 1c. Repository reachable case "${BACKUP_TYPE}" in restic) if [[ "${BACKUP_REPO}" =~ ^(s3|sftp|rest): ]]; then require_tool restic "Repository reachable" || return if restic cat config >/dev/null 2>&1; then record_pass "Repository reachable" else record_fail "Repository reachable" "remote repository unreachable"; fi else record_pass "Repository reachable" "local"; fi ;; borg) if [[ "${BACKUP_REPO}" =~ ^ssh:// || "${BACKUP_REPO}" =~ .*@.*:.* ]]; then require_tool borg "Repository reachable" || return if borg info >/dev/null 2>&1; then record_pass "Repository reachable" else record_fail "Repository reachable" "remote repository unreachable"; fi else record_pass "Repository reachable" "local"; fi ;; directory|rsnapshot) if [[ -r "${BACKUP_DIR}" ]]; then record_pass "Backup directory reachable" else record_fail "Backup directory reachable" "${BACKUP_DIR} not readable"; fi ;; esac } # ── 2. Backup Status ───────────────────────────────────────────────── test_backup_status() { echo "" echo -e "${BOLD}Backup Status${RESET}" # 2a. Recent backup local last_ts="" max_age_s=$((MAX_AGE_HOURS * 3600)) case "${BACKUP_TYPE}" in restic) require_tool restic "Recent backup" || { test_size; test_snapshot_count; return; } local latest latest=$(restic snapshots --json --latest 1 2>/dev/null) || true if [[ -z "${latest}" || "${latest}" == "[]" || "${latest}" == "null" ]]; then record_fail "Recent backup" "no snapshots found" else local time_str time_str=$(echo "${latest}" | grep -oP '"time"\s*:\s*"\K[^"]+' | head -1) if [[ -z "${time_str}" ]]; then record_fail "Recent backup" "could not parse snapshot time" else last_ts=$(date -d "${time_str}" +%s 2>/dev/null) || true; fi fi ;; borg) require_tool borg "Recent backup" || { test_size; test_snapshot_count; return; } local borg_time borg_time=$(borg list --format '{time}{NL}' 2>/dev/null | tail -1) || true if [[ -z "${borg_time}" ]]; then record_fail "Recent backup" "no archives found" else last_ts=$(date -d "${borg_time}" +%s 2>/dev/null) || true; fi ;; directory|rsnapshot) local newest newest=$(find "${BACKUP_DIR}" -maxdepth 1 -mindepth 1 -type d -printf '%T@\n' 2>/dev/null | sort -rn | head -1) [[ -z "${newest}" ]] && newest=$(find "${BACKUP_DIR}" -maxdepth 1 -mindepth 1 -printf '%T@\n' 2>/dev/null | sort -rn | head -1) if [[ -z "${newest}" ]]; then record_fail "Recent backup" "no backups found in ${BACKUP_DIR}" else last_ts="${newest%%.*}"; fi ;; esac if [[ -n "${last_ts}" ]]; then local now_ts age_s age_h now_ts=$(date +%s); age_s=$((now_ts - last_ts)); age_h=$((age_s / 3600)) if [[ ${age_s} -le ${max_age_s} ]]; then record_pass "Recent backup" "${age_h}h ago (max ${MAX_AGE_HOURS}h)" else record_fail "Recent backup" "${age_h}h ago (max ${MAX_AGE_HOURS}h)"; fi fi test_size test_snapshot_count } test_size() { local size_mb=0 case "${BACKUP_TYPE}" in restic) require_tool restic "Backup size" || return local stats total_bytes stats=$(restic stats --json --mode raw-data 2>/dev/null) || true total_bytes=$(echo "${stats}" | grep -oP '"total_size"\s*:\s*\K[0-9]+' | head -1) || true [[ -n "${total_bytes}" ]] && size_mb=$((total_bytes / 1048576)) ;; borg) require_tool borg "Backup size" || return local size_str num unit size_str=$(borg info 2>/dev/null | grep -i "all archives" | grep -oP '[0-9.]+\s*(TB|GB|MB|kB)' | head -1) || true if [[ -n "${size_str}" ]]; then num=$(echo "${size_str}" | grep -oP '[0-9.]+'); unit=$(echo "${size_str}" | grep -oP '[A-Za-z]+') case "${unit}" in TB) size_mb=$(echo "${num} * 1048576" | bc 2>/dev/null | cut -d. -f1) || size_mb=999999 ;; GB) size_mb=$(echo "${num} * 1024" | bc 2>/dev/null | cut -d. -f1) || size_mb=999999 ;; MB) size_mb=$(echo "${num}" | cut -d. -f1) ;; kB) size_mb=0 ;; esac fi ;; directory|rsnapshot) size_mb=$(du -sm "${BACKUP_DIR}" 2>/dev/null | awk '{print $1}') || size_mb=0 ;; esac if [[ ${size_mb} -ge ${MIN_SIZE_MB} ]]; then record_pass "Backup size" "${size_mb} MB (min ${MIN_SIZE_MB} MB)" else record_fail "Backup size" "${size_mb} MB < ${MIN_SIZE_MB} MB"; fi } test_snapshot_count() { local count=0 case "${BACKUP_TYPE}" in restic) require_tool restic "Snapshot count" || return count=$(restic snapshots --json 2>/dev/null | grep -c '"time"') || count=0 ;; borg) require_tool borg "Snapshot count" || return count=$(borg list 2>/dev/null | wc -l) || count=0 ;; directory) count=$(find "${BACKUP_DIR}" -maxdepth 1 -mindepth 1 | wc -l) || count=0 ;; rsnapshot) count=$(find "${BACKUP_DIR}" -maxdepth 1 -mindepth 1 -type d | wc -l) || count=0 ;; esac if [[ ${count} -ge ${MIN_SNAPSHOTS} ]]; then record_pass "Snapshot count" "${count} (min ${MIN_SNAPSHOTS})" else record_fail "Snapshot count" "${count} < ${MIN_SNAPSHOTS}"; fi } # ── 3. Integrity ───────────────────────────────────────────────────── test_integrity_suite() { echo "" echo -e "${BOLD}Integrity${RESET}" # 3a. Integrity check if [[ "${SKIP_INTEGRITY}" == "true" ]]; then record_skip "Integrity check" "SKIP_INTEGRITY=true" else case "${BACKUP_TYPE}" in restic) require_tool restic "Integrity check" || return if restic check 2>/dev/null; then record_pass "Integrity check" else record_fail "Integrity check" "restic check failed"; fi ;; borg) require_tool borg "Integrity check" || return if borg check 2>/dev/null; then record_pass "Integrity check" else record_fail "Integrity check" "borg check failed"; fi ;; directory|rsnapshot) record_skip "Integrity check" "not applicable for ${BACKUP_TYPE}" ;; esac fi # 3b. Lock check case "${BACKUP_TYPE}" in restic) require_tool restic "Lock check" || return local lock_output lock_output=$(restic list locks 2>/dev/null) || true if [[ -z "${lock_output}" ]]; then record_pass "Lock check" "no stale locks" else record_fail "Lock check" "$(echo "${lock_output}" | wc -l) lock(s) found"; fi ;; borg) require_tool borg "Lock check" || return if borg info 2>&1 | grep -qi "lock"; then record_fail "Lock check" "repository appears locked" else record_pass "Lock check" "no stale locks"; fi ;; directory|rsnapshot) local lc lc=$(find "${BACKUP_DIR}" -maxdepth 1 \( -name "*.lock" -o -name ".lock" \) 2>/dev/null | wc -l) || lc=0 if [[ ${lc} -eq 0 ]]; then record_pass "Lock check" "no stale locks" else record_fail "Lock check" "${lc} lock file(s) in ${BACKUP_DIR}"; fi ;; esac } # ── 4. Recovery ────────────────────────────────────────────────────── test_recovery() { echo "" echo -e "${BOLD}Recovery${RESET}" if [[ "${SKIP_RESTORE}" == "true" ]]; then record_skip "Test restore" "SKIP_RESTORE=true"; return; fi RESTORE_TMP=$(mktemp -d /tmp/backup-smoke-test-XXXXXX) case "${BACKUP_TYPE}" in restic) require_tool restic "Test restore" || return restic_restore "${RESTORE_TEST_FILE}" ;; borg) require_tool borg "Test restore" || return borg_restore "${RESTORE_TEST_FILE}" ;; directory) dir_restore "${RESTORE_TEST_FILE:+${BACKUP_DIR}/${RESTORE_TEST_FILE}}" ;; rsnapshot) dir_restore "${RESTORE_TEST_FILE:+${BACKUP_DIR}/${RESTORE_TEST_FILE}}" ;; esac } restic_restore() { local target="${1:-}" if [[ -z "${target}" ]]; then target=$(restic ls latest 2>/dev/null | head -1) || true [[ -z "${target}" ]] && { record_skip "Test restore" "no files in latest snapshot"; return; } fi if restic restore latest --target "${RESTORE_TMP}" --include "${target}" 2>/dev/null && restore_ok; then record_pass "Test restore" "file restored successfully" else record_fail "Test restore" "restic restore failed"; fi } borg_restore() { local archive target archive=$(borg list --format '{archive}{NL}' 2>/dev/null | tail -1) || true [[ -z "${archive}" ]] && { record_skip "Test restore" "no archives found"; return; } target="${1:-}" if [[ -z "${target}" ]]; then target=$(borg list "::${archive}" --format '{path}{NL}' 2>/dev/null | grep -v '/$' | head -1) || true [[ -z "${target}" ]] && { record_skip "Test restore" "no files in latest archive"; return; } fi if (cd "${RESTORE_TMP}" && borg extract "::${archive}" "${target}" 2>/dev/null) && restore_ok; then record_pass "Test restore" "file restored successfully" else record_fail "Test restore" "borg extract failed"; fi } dir_restore() { local src_file="${1:-}" [[ -z "${src_file}" ]] && src_file=$(find "${BACKUP_DIR}" -type f 2>/dev/null | head -1) [[ -z "${src_file}" || ! -f "${src_file}" ]] && { record_skip "Test restore" "no files in backup directory"; return; } local dest_file dest_file="${RESTORE_TMP}/$(basename "${src_file}")" if cp "${src_file}" "${dest_file}" 2>/dev/null && [[ -f "${dest_file}" ]]; then record_pass "Test restore" "file copied successfully" else record_fail "Test restore" "copy failed"; fi } # ══════════════════════════════════════════════════════════════════════ # OUTPUT # ══════════════════════════════════════════════════════════════════════ print_summary() { local end_time; end_time=$(date +%s) local duration=$(( end_time - START_TIME )) echo "" echo -e "${BOLD}────────────────────────────────────────${RESET}" echo -e "${BOLD}Summary${RESET} ${BACKUP_TYPE} ${BACKUP_REPO:-${BACKUP_DIR}}" echo -e " ${GREEN}${PASS} passed${RESET} ${RED}${FAIL} failed${RESET} ${YELLOW}${SKIP} skipped${RESET} (${duration}s)" echo -e "${BOLD}────────────────────────────────────────${RESET}" if [[ $FAIL -eq 0 ]]; then echo -e "${GREEN}${BOLD}All tests passed.${RESET}" else echo -e "${RED}${BOLD}${FAIL} test(s) failed.${RESET}"; fi } print_tap_header() { echo "TAP version 13" } print_tap_footer() { echo "1..${TOTAL}" echo "# pass ${PASS}" echo "# fail ${FAIL}" echo "# skip ${SKIP}" } write_junit() { local end_time; end_time=$(date +%s) local duration=$(( end_time - START_TIME )) cat > "$JUNIT_FILE" < JUNIT_EOF for result in "${RESULTS[@]}"; do local status name detail IFS='|' read -r status name detail <<< "$result" name=$(echo "$name" | sed 's/&/\&/g; s//\>/g; s/"/\"/g') detail=$(echo "$detail" | sed 's/&/\&/g; s//\>/g; s/"/\"/g') echo " " >> "$JUNIT_FILE" case "$status" in PASS) [[ -n "$detail" ]] && echo " ${detail}" >> "$JUNIT_FILE" ;; FAIL) echo " FAILED: ${name} — ${detail}" >> "$JUNIT_FILE" ;; SKIP) echo " " >> "$JUNIT_FILE" ;; esac echo " " >> "$JUNIT_FILE" done echo " " >> "$JUNIT_FILE" echo "" >> "$JUNIT_FILE" log "JUnit report written to ${JUNIT_FILE}" } # ══════════════════════════════════════════════════════════════════════ # MAIN # ══════════════════════════════════════════════════════════════════════ usage() { cat <