Files
linux-scripts/promstack-backup.sh
T
chiefgeek a1a17e81a1 Sync all scripts from website downloads — 352 scripts total
Includes updated JS challenge scripts with Claude-User whitelist,
same-site referer bypass, Blackbox-Exporter allowed bot, and all
new exporters, cheat sheets, and automation scripts.
2026-05-25 03:31:08 +02:00

775 lines
31 KiB
Bash

#!/usr/bin/env bash
#########################################################################################
#### promstack-backup.sh — Backup/restore Prometheus, Grafana, Alertmanager, ####
#### and Blackbox Exporter. Export dashboards, alert rules, datasources, configs, ####
#### and TSDB snapshots for disaster recovery. ####
#### Requires: bash 4+, curl, optionally jq for dashboard export ####
#### ####
#### Author: Phil Connor ####
#### Contact: contact@mylinux.work ####
#### License: MIT ####
#### Version 1.11 ####
#### ####
#### Usage: ####
#### ./promstack-backup.sh --backup --output-dir ./backups ####
#### ####
#### See --help for all options. ####
#########################################################################################
set -euo pipefail
# ── Colors (pre-initialized) ─────────────────────────────────────────
RED="" GREEN="" YELLOW="" BLUE="" CYAN="" BOLD="" DIM="" RESET=""
setup_colors() {
if [[ "${COLOR:-auto}" == "never" ]]; then
return
fi
if [[ "${COLOR:-auto}" == "always" ]] || [[ -t 1 ]]; then
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[0;33m'
BLUE='\033[0;34m'
CYAN='\033[0;36m'
BOLD='\033[1m'
DIM='\033[2m'
RESET='\033[0m'
fi
}
# ── Logging ───────────────────────────────────────────────────────────
log() { echo -e "${BLUE}[INFO]${RESET} $*"; }
warn() { echo -e "${YELLOW}[WARN]${RESET} $*" >&2; }
err() { echo -e "${RED}[ERROR]${RESET} $*" >&2; }
verbose() { if [[ "$VERBOSE" == "true" ]]; then echo -e "${DIM}[DEBUG]${RESET} $*"; fi; }
die() { err "$*"; exit 1; }
section_header() {
echo ""
echo -e " ${BOLD}${CYAN}── $1 ──${RESET}"
echo ""
}
field() {
printf " ${BOLD}%-22s${RESET} %s\n" "$1" "$2"
}
field_color() {
printf " ${BOLD}%-22s${RESET} %b\n" "$1" "$2"
}
elapsed() {
local end_time
end_time=$(date +%s)
echo "$(( end_time - START_TIME ))s"
}
# ── Defaults ──────────────────────────────────────────────────────────
RUN_MODE=""
OUTPUT_DIR="${PSB_OUTPUT_DIR:-./monitoring-backups}"
RESTORE_DIR=""
GRAFANA_URL="${GRAFANA_URL:-http://localhost:3000}"
GRAFANA_TOKEN="${GRAFANA_API_KEY:-}"
GRAFANA_USER="${GRAFANA_USER:-admin}"
GRAFANA_PASS="${GRAFANA_PASS:-}"
PROMETHEUS_URL="${PROMETHEUS_URL:-http://localhost:9090}"
ALERTMANAGER_URL="${ALERTMANAGER_URL:-http://localhost:9093}"
AM_CONFIG_PATH="${AM_CONFIG_PATH:-/etc/alertmanager}"
BLACKBOX_URL="${BLACKBOX_URL:-http://localhost:9115}"
BLACKBOX_CONFIG_PATH="${BLACKBOX_CONFIG_PATH:-/etc/blackbox_exporter}"
PROM_CONFIG_PATH="${PROM_CONFIG_PATH:-/etc/prometheus}"
PROM_DATA_PATH="${PROM_DATA_PATH:-/var/lib/prometheus}"
COMPONENTS="${PSB_COMPONENTS:-all}"
VERBOSE="${VERBOSE:-false}"
COLOR="${COLOR:-auto}"
# ── State ─────────────────────────────────────────────────────────────
SCRIPT_NAME="$(basename "$0")"
readonly SCRIPT_NAME
START_TIME=""
BACKUP_ID=""
BACKUP_COUNT=0
ERROR_COUNT=0
# ── API helpers ──────────────────────────────────────────────────────
grafana_api() {
local method="$1" endpoint="$2"
shift 2
local -a auth_args=()
if [[ -n "$GRAFANA_TOKEN" ]]; then
auth_args+=(-H "Authorization: Bearer ${GRAFANA_TOKEN}")
elif [[ -n "$GRAFANA_PASS" ]]; then
auth_args+=(-u "${GRAFANA_USER}:${GRAFANA_PASS}")
fi
curl -sS -X "$method" "${auth_args[@]}" -H "Content-Type: application/json" \
"${GRAFANA_URL}${endpoint}" "$@"
}
backup_item() {
local label="$1" outfile="$2"
shift 2
verbose "Backing up: ${label}"
if "$@" > "$outfile" 2>/dev/null && [[ -s "$outfile" ]]; then
((BACKUP_COUNT++)) || true
echo -e " ${GREEN}${RESET} ${label}"
else
((ERROR_COUNT++)) || true
echo -e " ${YELLOW}${RESET} ${label} ${DIM}(skipped)${RESET}"
rm -f "$outfile"
fi
}
component_selected() {
[[ "$COMPONENTS" == "all" ]] || echo ",$COMPONENTS," | grep -qi ",$1,"
}
# Find a config file: check primary path first, fall back to /etc/prometheus/
find_config() {
local filename="$1" primary_dir="$2"
if [[ -f "${primary_dir}/${filename}" ]]; then
echo "${primary_dir}/${filename}"
elif [[ -f "${PROM_CONFIG_PATH}/${filename}" ]]; then
echo "${PROM_CONFIG_PATH}/${filename}"
fi
}
# ══════════════════════════════════════════════════════════════════════
# BACKUP
# ══════════════════════════════════════════════════════════════════════
backup_grafana() {
local backup_dir="$1"
local grafana_dir="${backup_dir}/grafana"
mkdir -p "${grafana_dir}/dashboards"
section_header "Grafana Backup"
field "URL:" "$GRAFANA_URL"
echo ""
# Test connectivity
if ! grafana_api GET "/api/health" > /dev/null 2>&1; then
warn "Cannot reach Grafana at ${GRAFANA_URL} — skipping"
((ERROR_COUNT++)) || true
return
fi
# Export datasources
backup_item "Datasources" "${grafana_dir}/datasources.json" \
grafana_api GET "/api/datasources"
# Export folders
backup_item "Folders" "${grafana_dir}/folders.json" \
grafana_api GET "/api/folders"
# Export dashboards
local dashboard_list
dashboard_list=$(grafana_api GET "/api/search?type=dash-db&limit=5000" 2>/dev/null || echo "[]")
local dash_count=0
if command -v jq &>/dev/null; then
local uids
uids=$(echo "$dashboard_list" | jq -r '.[].uid // empty' 2>/dev/null || true)
while IFS= read -r uid; do
[[ -z "$uid" ]] && continue
local dash_file="${grafana_dir}/dashboards/${uid}.json"
if grafana_api GET "/api/dashboards/uid/${uid}" > "$dash_file" 2>/dev/null && [[ -s "$dash_file" ]]; then
((dash_count++)) || true
((BACKUP_COUNT++)) || true
else
rm -f "$dash_file"
fi
done <<< "$uids"
echo -e " ${GREEN}${RESET} Dashboards (${dash_count} exported)"
else
backup_item "Dashboard list" "${grafana_dir}/dashboard-list.json" \
echo "$dashboard_list"
fi
# Export alert rules
backup_item "Alert rules" "${grafana_dir}/alert-rules.json" \
grafana_api GET "/api/v1/provisioning/alert-rules"
# Export notification policies
backup_item "Notification policies" "${grafana_dir}/notification-policies.json" \
grafana_api GET "/api/v1/provisioning/policies"
}
backup_prometheus() {
local backup_dir="$1"
local prom_dir="${backup_dir}/prometheus"
mkdir -p "${prom_dir}/rules"
section_header "Prometheus Backup"
field "Config path:" "$PROM_CONFIG_PATH"
field "API:" "$PROMETHEUS_URL"
echo ""
# Copy prometheus.yml
if [[ -f "${PROM_CONFIG_PATH}/prometheus.yml" ]]; then
backup_item "prometheus.yml" "${prom_dir}/prometheus.yml" \
cp "${PROM_CONFIG_PATH}/prometheus.yml" "${prom_dir}/prometheus.yml"
# Fix: backup_item ran cp redirected to file, but cp doesn't output to stdout
# Re-copy properly
if [[ -f "${PROM_CONFIG_PATH}/prometheus.yml" ]]; then
cp "${PROM_CONFIG_PATH}/prometheus.yml" "${prom_dir}/prometheus.yml" 2>/dev/null || true
fi
else
warn "prometheus.yml not found at ${PROM_CONFIG_PATH}"
((ERROR_COUNT++)) || true
fi
# Copy alert/recording rules
local rules_count=0
if [[ -d "${PROM_CONFIG_PATH}/rules" ]]; then
while IFS= read -r -d '' rf; do
cp "$rf" "${prom_dir}/rules/" 2>/dev/null && ((rules_count++)) || true
done < <(find "${PROM_CONFIG_PATH}/rules" \( -name '*.yml' -o -name '*.yaml' \) -print0 2>/dev/null | sort -z)
elif [[ -d "${PROM_CONFIG_PATH}/rules.d" ]]; then
while IFS= read -r -d '' rf; do
cp "$rf" "${prom_dir}/rules/" 2>/dev/null && ((rules_count++)) || true
done < <(find "${PROM_CONFIG_PATH}/rules.d" \( -name '*.yml' -o -name '*.yaml' \) -print0 2>/dev/null | sort -z)
fi
if [[ "$rules_count" -gt 0 ]]; then
echo -e " ${GREEN}${RESET} Alert/recording rules (${rules_count} files)"
((BACKUP_COUNT++)) || true
fi
# Export current alerts via API
backup_item "Active alerts" "${prom_dir}/active-alerts.json" \
curl -sS "${PROMETHEUS_URL}/api/v1/alerts"
# Export targets
backup_item "Scrape targets" "${prom_dir}/targets.json" \
curl -sS "${PROMETHEUS_URL}/api/v1/targets"
# Export rule groups via API
backup_item "Rule groups (API)" "${prom_dir}/rule-groups.json" \
curl -sS "${PROMETHEUS_URL}/api/v1/rules"
}
backup_alertmanager() {
local backup_dir="$1"
local am_dir="${backup_dir}/alertmanager"
mkdir -p "$am_dir"
section_header "Alertmanager Backup"
field "API:" "$ALERTMANAGER_URL"
echo ""
# Get status (includes config)
backup_item "Config (via API)" "${am_dir}/status.json" \
curl -sS "${ALERTMANAGER_URL}/api/v2/status"
# Export silences
backup_item "Silences" "${am_dir}/silences.json" \
curl -sS "${ALERTMANAGER_URL}/api/v2/silences"
# Export active alerts
backup_item "Active alerts" "${am_dir}/alerts.json" \
curl -sS "${ALERTMANAGER_URL}/api/v2/alerts"
# Copy config file — check canonical path, then /etc/prometheus/
local am_conf
am_conf=$(find_config "alertmanager.yml" "$AM_CONFIG_PATH")
if [[ -n "$am_conf" ]]; then
verbose "Found alertmanager.yml at ${am_conf}"
backup_item "alertmanager.yml" "${am_dir}/alertmanager.yml" \
cat "$am_conf"
fi
# Copy templates
local tmpl_dir=""
local tmpl_count=0
if [[ -d "${AM_CONFIG_PATH}/templates" ]]; then
tmpl_dir="${AM_CONFIG_PATH}/templates"
elif [[ -d "${PROM_CONFIG_PATH}/templates" ]]; then
tmpl_dir="${PROM_CONFIG_PATH}/templates"
fi
if [[ -n "$tmpl_dir" ]]; then
mkdir -p "${am_dir}/templates"
while IFS= read -r -d '' tf; do
cp "$tf" "${am_dir}/templates/" 2>/dev/null && ((tmpl_count++)) || true
done < <(find "$tmpl_dir" -type f -print0 2>/dev/null | sort -z)
if [[ "$tmpl_count" -gt 0 ]]; then
echo -e " ${GREEN}${RESET} Templates (${tmpl_count} files)"
((BACKUP_COUNT++)) || true
fi
fi
}
backup_blackbox() {
local backup_dir="$1"
local bb_dir="${backup_dir}/blackbox"
mkdir -p "$bb_dir"
section_header "Blackbox Exporter Backup"
field "Config path:" "$BLACKBOX_CONFIG_PATH"
field "API:" "$BLACKBOX_URL"
echo ""
# Test connectivity
if ! curl -sS "${BLACKBOX_URL}/-/healthy" > /dev/null 2>&1; then
warn "Cannot reach Blackbox Exporter at ${BLACKBOX_URL} — skipping"
((ERROR_COUNT++)) || true
return
fi
# Copy blackbox.yml — check canonical path, then /etc/prometheus/
local bb_conf
bb_conf=$(find_config "blackbox.yml" "$BLACKBOX_CONFIG_PATH")
if [[ -z "$bb_conf" ]]; then
bb_conf=$(find_config "config.yml" "$BLACKBOX_CONFIG_PATH")
fi
if [[ -n "$bb_conf" ]]; then
local bb_name
bb_name=$(basename "$bb_conf")
verbose "Found ${bb_name} at ${bb_conf}"
backup_item "${bb_name}" "${bb_dir}/${bb_name}" \
cat "$bb_conf"
else
warn "No blackbox config found at ${BLACKBOX_CONFIG_PATH} or ${PROM_CONFIG_PATH}"
((ERROR_COUNT++)) || true
fi
# Export probe config via API
backup_item "Config (via API)" "${bb_dir}/config-api.json" \
curl -sS "${BLACKBOX_URL}/config"
}
do_backup() {
BACKUP_ID="$(date +%Y%m%d-%H%M%S)"
local backup_dir="${OUTPUT_DIR}/${BACKUP_ID}"
mkdir -p "$backup_dir"
log "Starting monitoring backup..."
field "Backup ID:" "$BACKUP_ID"
field "Output:" "$backup_dir"
field "Components:" "$COMPONENTS"
if component_selected "grafana"; then
backup_grafana "$backup_dir"
fi
if component_selected "prometheus"; then
backup_prometheus "$backup_dir"
fi
if component_selected "alertmanager"; then
backup_alertmanager "$backup_dir"
fi
if component_selected "blackbox"; then
backup_blackbox "$backup_dir"
fi
# Create manifest
section_header "Finalizing"
local total_size
total_size=$(du -sh "$backup_dir" 2>/dev/null | awk '{print $1}' || echo "unknown")
local file_count
file_count=$(find "$backup_dir" -type f 2>/dev/null | wc -l || echo 0)
cat > "${backup_dir}/manifest.json" <<EOF
{"backup_id":"${BACKUP_ID}","timestamp":"$(date -u '+%Y-%m-%dT%H:%M:%SZ')","components":"${COMPONENTS}","files":${file_count},"size":"${total_size}"}
EOF
# Generate checksums
find "$backup_dir" -type f ! -name "checksums.sha256" -print0 2>/dev/null \
| sort -z \
| xargs -0 sha256sum 2>/dev/null > "${backup_dir}/checksums.sha256" || true
echo -e " ${GREEN}${RESET} Manifest and checksums created"
section_header "Backup Summary"
field "Backup ID:" "$BACKUP_ID"
field "Location:" "$backup_dir"
field_color "Files backed up:" "${GREEN}${BACKUP_COUNT}${RESET}"
if [[ "$ERROR_COUNT" -gt 0 ]]; then
field_color "Errors:" "${RED}${ERROR_COUNT}${RESET}"
else
field_color "Errors:" "${GREEN}0${RESET}"
fi
field "Total size:" "$total_size"
field "Duration:" "$(elapsed)"
}
# ══════════════════════════════════════════════════════════════════════
# RESTORE
# ══════════════════════════════════════════════════════════════════════
do_restore() {
[[ -z "$RESTORE_DIR" ]] && die "No restore directory specified (--restore-dir)"
[[ ! -d "$RESTORE_DIR" ]] && die "Restore directory not found: ${RESTORE_DIR}"
[[ ! -f "${RESTORE_DIR}/manifest.json" ]] && die "No manifest.json in ${RESTORE_DIR}"
log "Restoring from ${RESTORE_DIR}..."
# Verify checksums first
if [[ -f "${RESTORE_DIR}/checksums.sha256" ]]; then
log "Verifying backup integrity..."
if (cd / && sha256sum -c "${RESTORE_DIR}/checksums.sha256" > /dev/null 2>&1); then
echo -e " ${GREEN}${RESET} Checksums verified"
else
warn "Some checksums failed — proceed with caution"
fi
fi
# Restore Grafana
if [[ -d "${RESTORE_DIR}/grafana" ]] && component_selected "grafana"; then
section_header "Restoring Grafana"
# Restore datasources
if [[ -f "${RESTORE_DIR}/grafana/datasources.json" ]] && command -v jq &>/dev/null; then
local ds_count=0
while IFS= read -r ds; do
[[ -z "$ds" ]] && continue
local ds_name
ds_name=$(echo "$ds" | jq -r '.name // "unknown"')
if grafana_api POST "/api/datasources" -d "$ds" > /dev/null 2>&1; then
echo -e " ${GREEN}${RESET} Datasource: ${ds_name}"
((ds_count++)) || true
else
echo -e " ${YELLOW}${RESET} Datasource: ${ds_name} (may already exist)"
fi
done < <(jq -c '.[]' "${RESTORE_DIR}/grafana/datasources.json" 2>/dev/null)
log "Restored ${ds_count} datasources"
fi
# Restore dashboards
if [[ -d "${RESTORE_DIR}/grafana/dashboards" ]]; then
local dash_count=0
for df in "${RESTORE_DIR}/grafana/dashboards"/*.json; do
[[ ! -f "$df" ]] && continue
if command -v jq &>/dev/null; then
local payload
payload=$(jq '{dashboard: .dashboard, overwrite: true}' "$df" 2>/dev/null || cat "$df")
if grafana_api POST "/api/dashboards/db" -d "$payload" > /dev/null 2>&1; then
((dash_count++)) || true
fi
fi
done
echo -e " ${GREEN}${RESET} Dashboards (${dash_count} imported)"
fi
fi
# Restore Prometheus config
if [[ -d "${RESTORE_DIR}/prometheus" ]] && component_selected "prometheus"; then
section_header "Restoring Prometheus"
if [[ -f "${RESTORE_DIR}/prometheus/prometheus.yml" ]]; then
if cp "${RESTORE_DIR}/prometheus/prometheus.yml" "${PROM_CONFIG_PATH}/prometheus.yml" 2>/dev/null; then
echo -e " ${GREEN}${RESET} prometheus.yml restored"
else
echo -e " ${RED}${RESET} Failed to restore prometheus.yml (check permissions)"
fi
fi
if [[ -d "${RESTORE_DIR}/prometheus/rules" ]]; then
local target_rules="${PROM_CONFIG_PATH}/rules"
mkdir -p "$target_rules" 2>/dev/null || true
local rule_count=0
for rf in "${RESTORE_DIR}/prometheus/rules"/*; do
[[ ! -f "$rf" ]] && continue
cp "$rf" "$target_rules/" 2>/dev/null && ((rule_count++)) || true
done
echo -e " ${GREEN}${RESET} Alert rules (${rule_count} files)"
fi
# Reload Prometheus
if curl -sS -X POST "${PROMETHEUS_URL}/-/reload" > /dev/null 2>&1; then
echo -e " ${GREEN}${RESET} Prometheus reloaded"
else
warn "Could not reload Prometheus — restart manually"
fi
fi
# Restore Alertmanager config
if [[ -d "${RESTORE_DIR}/alertmanager" ]] && component_selected "alertmanager"; then
section_header "Restoring Alertmanager"
if [[ -f "${RESTORE_DIR}/alertmanager/alertmanager.yml" ]]; then
# Detect where alertmanager.yml lives on this system
local am_target="${AM_CONFIG_PATH}/alertmanager.yml"
if [[ ! -d "$AM_CONFIG_PATH" ]] && [[ -f "${PROM_CONFIG_PATH}/alertmanager.yml" ]]; then
am_target="${PROM_CONFIG_PATH}/alertmanager.yml"
fi
if cp "${RESTORE_DIR}/alertmanager/alertmanager.yml" "$am_target" 2>/dev/null; then
echo -e " ${GREEN}${RESET} alertmanager.yml restored → ${am_target}"
else
echo -e " ${RED}${RESET} Failed to restore alertmanager.yml"
fi
fi
if curl -sS -X POST "${ALERTMANAGER_URL}/-/reload" > /dev/null 2>&1; then
echo -e " ${GREEN}${RESET} Alertmanager reloaded"
else
warn "Could not reload Alertmanager — restart manually"
fi
fi
# Restore Blackbox Exporter config
if [[ -d "${RESTORE_DIR}/blackbox" ]] && component_selected "blackbox"; then
section_header "Restoring Blackbox Exporter"
local bb_conf=""
if [[ -f "${RESTORE_DIR}/blackbox/blackbox.yml" ]]; then
bb_conf="blackbox.yml"
elif [[ -f "${RESTORE_DIR}/blackbox/config.yml" ]]; then
bb_conf="config.yml"
fi
if [[ -n "$bb_conf" ]]; then
# Detect where blackbox config lives on this system
local bb_target="${BLACKBOX_CONFIG_PATH}/${bb_conf}"
if [[ ! -d "$BLACKBOX_CONFIG_PATH" ]] && [[ -f "${PROM_CONFIG_PATH}/${bb_conf}" ]]; then
bb_target="${PROM_CONFIG_PATH}/${bb_conf}"
fi
if cp "${RESTORE_DIR}/blackbox/${bb_conf}" "$bb_target" 2>/dev/null; then
echo -e " ${GREEN}${RESET} ${bb_conf} restored → ${bb_target}"
else
echo -e " ${RED}${RESET} Failed to restore ${bb_conf}"
fi
fi
if curl -sS -X POST "${BLACKBOX_URL}/-/reload" > /dev/null 2>&1; then
echo -e " ${GREEN}${RESET} Blackbox Exporter reloaded"
else
warn "Could not reload Blackbox Exporter — restart manually"
fi
fi
section_header "Restore Summary"
field "Restored from:" "$RESTORE_DIR"
field "Duration:" "$(elapsed)"
log "Restore complete — verify services are healthy"
}
# ══════════════════════════════════════════════════════════════════════
# VERIFY
# ══════════════════════════════════════════════════════════════════════
do_verify() {
local verify_dir="${RESTORE_DIR:-}"
[[ -z "$verify_dir" ]] && die "Specify backup directory with --restore-dir"
[[ ! -d "$verify_dir" ]] && die "Directory not found: ${verify_dir}"
section_header "Backup Verification"
field "Directory:" "$verify_dir"
echo ""
local pass=0 fail=0
# Check manifest
if [[ -f "${verify_dir}/manifest.json" ]]; then
echo -e " ${GREEN}${RESET} manifest.json present"
((pass++)) || true
else
echo -e " ${RED}${RESET} manifest.json missing"
((fail++)) || true
fi
# Check checksums
if [[ -f "${verify_dir}/checksums.sha256" ]]; then
if (cd / && sha256sum -c "${verify_dir}/checksums.sha256" > /dev/null 2>&1); then
echo -e " ${GREEN}${RESET} All checksums valid"
((pass++)) || true
else
echo -e " ${RED}${RESET} Checksum verification failed"
((fail++)) || true
fi
else
echo -e " ${YELLOW}!${RESET} No checksums file"
fi
# Check components
for comp in grafana prometheus alertmanager blackbox; do
if [[ -d "${verify_dir}/${comp}" ]]; then
local fcount
fcount=$(find "${verify_dir}/${comp}" -type f 2>/dev/null | wc -l || echo 0)
echo -e " ${GREEN}${RESET} ${comp}/ (${fcount} files)"
((pass++)) || true
fi
done
echo ""
field_color "Passed:" "${GREEN}${pass}${RESET}"
if [[ "$fail" -gt 0 ]]; then
field_color "Failed:" "${RED}${fail}${RESET}"
else
field_color "Failed:" "${GREEN}0${RESET}"
fi
}
# ══════════════════════════════════════════════════════════════════════
# LIST
# ══════════════════════════════════════════════════════════════════════
do_list() {
[[ ! -d "$OUTPUT_DIR" ]] && die "Backup directory not found: ${OUTPUT_DIR}"
section_header "Available Backups"
printf " ${BOLD}%-20s %-22s %-16s %8s %6s${RESET}\n" "BACKUP ID" "TIMESTAMP" "COMPONENTS" "SIZE" "FILES"
printf " %s\n" "$(printf '%.0s─' {1..76})"
local count=0
while IFS= read -r d; do
local manifest="${d}/manifest.json"
[[ ! -f "$manifest" ]] && continue
local bid ts comp sz fc
bid=$(basename "$d")
if command -v jq &>/dev/null; then
ts=$(jq -r '.timestamp // "unknown"' "$manifest" 2>/dev/null || echo "unknown")
comp=$(jq -r '.components // "unknown"' "$manifest" 2>/dev/null || echo "unknown")
sz=$(jq -r '.size // "?"' "$manifest" 2>/dev/null || echo "?")
fc=$(jq -r '.files // 0' "$manifest" 2>/dev/null || echo 0)
else
ts="(jq required)"
comp="?" sz="?" fc="?"
fi
printf " %-20s %-22s %-16s %8s %6s\n" "$bid" "${ts:0:20}" "${comp:0:14}" "$sz" "$fc"
((count++)) || true
done < <(find "$OUTPUT_DIR" -mindepth 1 -maxdepth 1 -type d 2>/dev/null | sort -r)
echo ""
field "Total backups:" "$count"
if [[ "$count" -eq 0 ]]; then
warn "No backups found in ${OUTPUT_DIR}"
fi
}
# ══════════════════════════════════════════════════════════════════════
# HELP
# ══════════════════════════════════════════════════════════════════════
show_help() {
cat <<EOF
${BOLD}${SCRIPT_NAME}${RESET} — PromStack Backup
Backup and restore Prometheus, Grafana, Alertmanager, and Blackbox Exporter
configurations and data for disaster recovery.
${BOLD}MODES${RESET}
--backup Backup monitoring stack components
--restore Restore from a backup directory
--verify Verify backup integrity
--list List available backups
${BOLD}OPTIONS${RESET}
--output-dir DIR Backup output directory (default: ./monitoring-backups)
--restore-dir DIR Directory to restore from
--components LIST Components: all, grafana, prometheus, alertmanager, blackbox (default: all)
--grafana-url URL Grafana URL (default: http://localhost:3000)
--grafana-token TOK Grafana API key for authentication
--prometheus-url URL Prometheus URL (default: http://localhost:9090)
--alertmanager-url URL Alertmanager URL (default: http://localhost:9093)
--am-config DIR Alertmanager config directory (default: /etc/alertmanager)
--blackbox-url URL Blackbox Exporter URL (default: http://localhost:9115)
--blackbox-config DIR Blackbox Exporter config directory (default: /etc/blackbox_exporter)
--prom-config DIR Prometheus config directory (default: /etc/prometheus)
--prom-data DIR Prometheus data directory (default: /var/lib/prometheus)
--verbose Debug output
--no-color Disable colored output
--help Show this help message
${BOLD}ENVIRONMENT VARIABLES${RESET}
PSB_OUTPUT_DIR Default backup directory
PSB_COMPONENTS Default components to backup
GRAFANA_URL Grafana server URL
GRAFANA_API_KEY Grafana API key
GRAFANA_USER Grafana username (default: admin)
GRAFANA_PASS Grafana password
PROMETHEUS_URL Prometheus server URL
ALERTMANAGER_URL Alertmanager server URL
AM_CONFIG_PATH Alertmanager config directory
BLACKBOX_URL Blackbox Exporter server URL
BLACKBOX_CONFIG_PATH Blackbox Exporter config directory
PROM_CONFIG_PATH Prometheus config directory
PROM_DATA_PATH Prometheus data directory
VERBOSE Enable verbose output (true/false)
COLOR Color mode: auto, always, never
${BOLD}EXAMPLES${RESET}
# Backup all components
${SCRIPT_NAME} --backup
# Backup Grafana only
${SCRIPT_NAME} --backup --components grafana --grafana-token \$TOKEN
# Backup with custom URLs
${SCRIPT_NAME} --backup --grafana-url http://grafana:3000 --prometheus-url http://prom:9090
# List available backups
${SCRIPT_NAME} --list
# Verify a backup
${SCRIPT_NAME} --verify --restore-dir ./monitoring-backups/20260410-143000
# Restore from backup
${SCRIPT_NAME} --restore --restore-dir ./monitoring-backups/20260410-143000
# Restore Grafana only
${SCRIPT_NAME} --restore --restore-dir ./backups/20260410-143000 --components grafana
${BOLD}EXIT CODES${RESET}
0 Success
1 Runtime error
EOF
}
# ══════════════════════════════════════════════════════════════════════
# PARSE ARGS
# ══════════════════════════════════════════════════════════════════════
parse_args() {
while [[ $# -gt 0 ]]; do
case "$1" in
--backup) RUN_MODE="backup"; shift ;;
--restore) RUN_MODE="restore"; shift ;;
--verify) RUN_MODE="verify"; shift ;;
--list) RUN_MODE="list"; shift ;;
--output-dir) OUTPUT_DIR="${2:?--output-dir requires a path}"; shift 2 ;;
--restore-dir) RESTORE_DIR="${2:?--restore-dir requires a path}"; shift 2 ;;
--components) COMPONENTS="${2:?--components requires a value}"; shift 2 ;;
--grafana-url) GRAFANA_URL="${2:?--grafana-url requires a URL}"; shift 2 ;;
--grafana-token) GRAFANA_TOKEN="${2:?--grafana-token requires a value}"; shift 2 ;;
--prometheus-url) PROMETHEUS_URL="${2:?--prometheus-url requires a URL}"; shift 2 ;;
--alertmanager-url) ALERTMANAGER_URL="${2:?--alertmanager-url requires a URL}"; shift 2 ;;
--am-config) AM_CONFIG_PATH="${2:?--am-config requires a path}"; shift 2 ;;
--blackbox-url) BLACKBOX_URL="${2:?--blackbox-url requires a URL}"; shift 2 ;;
--blackbox-config) BLACKBOX_CONFIG_PATH="${2:?--blackbox-config requires a path}"; shift 2 ;;
--prom-config) PROM_CONFIG_PATH="${2:?--prom-config requires a path}"; shift 2 ;;
--prom-data) PROM_DATA_PATH="${2:?--prom-data requires a path}"; shift 2 ;;
--verbose) VERBOSE="true"; shift ;;
--no-color) COLOR="never"; shift ;;
--help|-h) setup_colors; show_help; exit 0 ;;
*) die "Unknown option: $1 (see --help)" ;;
esac
done
}
# ══════════════════════════════════════════════════════════════════════
# MAIN
# ══════════════════════════════════════════════════════════════════════
main() {
parse_args "$@"
setup_colors
if [[ -z "$RUN_MODE" ]]; then
err "No mode specified"
echo ""
show_help
exit 1
fi
START_TIME=$(date +%s)
case "$RUN_MODE" in
backup) do_backup ;;
restore) do_restore ;;
verify) do_verify ;;
list) do_list ;;
*) die "Unknown mode: ${RUN_MODE}" ;;
esac
}
main "$@"