a1a17e81a1
Includes updated JS challenge scripts with Claude-User whitelist, same-site referer bypass, Blackbox-Exporter allowed bot, and all new exporters, cheat sheets, and automation scripts.
775 lines
31 KiB
Bash
775 lines
31 KiB
Bash
#!/usr/bin/env bash
|
|
|
|
#########################################################################################
|
|
#### promstack-backup.sh — Backup/restore Prometheus, Grafana, Alertmanager, ####
|
|
#### and Blackbox Exporter. Export dashboards, alert rules, datasources, configs, ####
|
|
#### and TSDB snapshots for disaster recovery. ####
|
|
#### Requires: bash 4+, curl, optionally jq for dashboard export ####
|
|
#### ####
|
|
#### Author: Phil Connor ####
|
|
#### Contact: contact@mylinux.work ####
|
|
#### License: MIT ####
|
|
#### Version 1.11 ####
|
|
#### ####
|
|
#### Usage: ####
|
|
#### ./promstack-backup.sh --backup --output-dir ./backups ####
|
|
#### ####
|
|
#### See --help for all options. ####
|
|
#########################################################################################
|
|
|
|
set -euo pipefail
|
|
|
|
# ── Colors (pre-initialized) ─────────────────────────────────────────
|
|
RED="" GREEN="" YELLOW="" BLUE="" CYAN="" BOLD="" DIM="" RESET=""
|
|
|
|
setup_colors() {
|
|
if [[ "${COLOR:-auto}" == "never" ]]; then
|
|
return
|
|
fi
|
|
if [[ "${COLOR:-auto}" == "always" ]] || [[ -t 1 ]]; then
|
|
RED='\033[0;31m'
|
|
GREEN='\033[0;32m'
|
|
YELLOW='\033[0;33m'
|
|
BLUE='\033[0;34m'
|
|
CYAN='\033[0;36m'
|
|
BOLD='\033[1m'
|
|
DIM='\033[2m'
|
|
RESET='\033[0m'
|
|
fi
|
|
}
|
|
|
|
# ── Logging ───────────────────────────────────────────────────────────
|
|
log() { echo -e "${BLUE}[INFO]${RESET} $*"; }
|
|
warn() { echo -e "${YELLOW}[WARN]${RESET} $*" >&2; }
|
|
err() { echo -e "${RED}[ERROR]${RESET} $*" >&2; }
|
|
verbose() { if [[ "$VERBOSE" == "true" ]]; then echo -e "${DIM}[DEBUG]${RESET} $*"; fi; }
|
|
die() { err "$*"; exit 1; }
|
|
|
|
section_header() {
|
|
echo ""
|
|
echo -e " ${BOLD}${CYAN}── $1 ──${RESET}"
|
|
echo ""
|
|
}
|
|
|
|
field() {
|
|
printf " ${BOLD}%-22s${RESET} %s\n" "$1" "$2"
|
|
}
|
|
|
|
field_color() {
|
|
printf " ${BOLD}%-22s${RESET} %b\n" "$1" "$2"
|
|
}
|
|
|
|
elapsed() {
|
|
local end_time
|
|
end_time=$(date +%s)
|
|
echo "$(( end_time - START_TIME ))s"
|
|
}
|
|
|
|
# ── Defaults ──────────────────────────────────────────────────────────
|
|
RUN_MODE=""
|
|
OUTPUT_DIR="${PSB_OUTPUT_DIR:-./monitoring-backups}"
|
|
RESTORE_DIR=""
|
|
GRAFANA_URL="${GRAFANA_URL:-http://localhost:3000}"
|
|
GRAFANA_TOKEN="${GRAFANA_API_KEY:-}"
|
|
GRAFANA_USER="${GRAFANA_USER:-admin}"
|
|
GRAFANA_PASS="${GRAFANA_PASS:-}"
|
|
PROMETHEUS_URL="${PROMETHEUS_URL:-http://localhost:9090}"
|
|
ALERTMANAGER_URL="${ALERTMANAGER_URL:-http://localhost:9093}"
|
|
AM_CONFIG_PATH="${AM_CONFIG_PATH:-/etc/alertmanager}"
|
|
BLACKBOX_URL="${BLACKBOX_URL:-http://localhost:9115}"
|
|
BLACKBOX_CONFIG_PATH="${BLACKBOX_CONFIG_PATH:-/etc/blackbox_exporter}"
|
|
PROM_CONFIG_PATH="${PROM_CONFIG_PATH:-/etc/prometheus}"
|
|
PROM_DATA_PATH="${PROM_DATA_PATH:-/var/lib/prometheus}"
|
|
COMPONENTS="${PSB_COMPONENTS:-all}"
|
|
VERBOSE="${VERBOSE:-false}"
|
|
COLOR="${COLOR:-auto}"
|
|
|
|
# ── State ─────────────────────────────────────────────────────────────
|
|
SCRIPT_NAME="$(basename "$0")"
|
|
readonly SCRIPT_NAME
|
|
START_TIME=""
|
|
BACKUP_ID=""
|
|
BACKUP_COUNT=0
|
|
ERROR_COUNT=0
|
|
|
|
# ── API helpers ──────────────────────────────────────────────────────
|
|
grafana_api() {
|
|
local method="$1" endpoint="$2"
|
|
shift 2
|
|
local -a auth_args=()
|
|
if [[ -n "$GRAFANA_TOKEN" ]]; then
|
|
auth_args+=(-H "Authorization: Bearer ${GRAFANA_TOKEN}")
|
|
elif [[ -n "$GRAFANA_PASS" ]]; then
|
|
auth_args+=(-u "${GRAFANA_USER}:${GRAFANA_PASS}")
|
|
fi
|
|
curl -sS -X "$method" "${auth_args[@]}" -H "Content-Type: application/json" \
|
|
"${GRAFANA_URL}${endpoint}" "$@"
|
|
}
|
|
|
|
backup_item() {
|
|
local label="$1" outfile="$2"
|
|
shift 2
|
|
verbose "Backing up: ${label}"
|
|
if "$@" > "$outfile" 2>/dev/null && [[ -s "$outfile" ]]; then
|
|
((BACKUP_COUNT++)) || true
|
|
echo -e " ${GREEN}✓${RESET} ${label}"
|
|
else
|
|
((ERROR_COUNT++)) || true
|
|
echo -e " ${YELLOW}⊘${RESET} ${label} ${DIM}(skipped)${RESET}"
|
|
rm -f "$outfile"
|
|
fi
|
|
}
|
|
|
|
component_selected() {
|
|
[[ "$COMPONENTS" == "all" ]] || echo ",$COMPONENTS," | grep -qi ",$1,"
|
|
}
|
|
|
|
# Find a config file: check primary path first, fall back to /etc/prometheus/
|
|
find_config() {
|
|
local filename="$1" primary_dir="$2"
|
|
if [[ -f "${primary_dir}/${filename}" ]]; then
|
|
echo "${primary_dir}/${filename}"
|
|
elif [[ -f "${PROM_CONFIG_PATH}/${filename}" ]]; then
|
|
echo "${PROM_CONFIG_PATH}/${filename}"
|
|
fi
|
|
}
|
|
|
|
# ══════════════════════════════════════════════════════════════════════
|
|
# BACKUP
|
|
# ══════════════════════════════════════════════════════════════════════
|
|
backup_grafana() {
|
|
local backup_dir="$1"
|
|
local grafana_dir="${backup_dir}/grafana"
|
|
mkdir -p "${grafana_dir}/dashboards"
|
|
|
|
section_header "Grafana Backup"
|
|
field "URL:" "$GRAFANA_URL"
|
|
echo ""
|
|
|
|
# Test connectivity
|
|
if ! grafana_api GET "/api/health" > /dev/null 2>&1; then
|
|
warn "Cannot reach Grafana at ${GRAFANA_URL} — skipping"
|
|
((ERROR_COUNT++)) || true
|
|
return
|
|
fi
|
|
|
|
# Export datasources
|
|
backup_item "Datasources" "${grafana_dir}/datasources.json" \
|
|
grafana_api GET "/api/datasources"
|
|
|
|
# Export folders
|
|
backup_item "Folders" "${grafana_dir}/folders.json" \
|
|
grafana_api GET "/api/folders"
|
|
|
|
# Export dashboards
|
|
local dashboard_list
|
|
dashboard_list=$(grafana_api GET "/api/search?type=dash-db&limit=5000" 2>/dev/null || echo "[]")
|
|
|
|
local dash_count=0
|
|
if command -v jq &>/dev/null; then
|
|
local uids
|
|
uids=$(echo "$dashboard_list" | jq -r '.[].uid // empty' 2>/dev/null || true)
|
|
while IFS= read -r uid; do
|
|
[[ -z "$uid" ]] && continue
|
|
local dash_file="${grafana_dir}/dashboards/${uid}.json"
|
|
if grafana_api GET "/api/dashboards/uid/${uid}" > "$dash_file" 2>/dev/null && [[ -s "$dash_file" ]]; then
|
|
((dash_count++)) || true
|
|
((BACKUP_COUNT++)) || true
|
|
else
|
|
rm -f "$dash_file"
|
|
fi
|
|
done <<< "$uids"
|
|
echo -e " ${GREEN}✓${RESET} Dashboards (${dash_count} exported)"
|
|
else
|
|
backup_item "Dashboard list" "${grafana_dir}/dashboard-list.json" \
|
|
echo "$dashboard_list"
|
|
fi
|
|
|
|
# Export alert rules
|
|
backup_item "Alert rules" "${grafana_dir}/alert-rules.json" \
|
|
grafana_api GET "/api/v1/provisioning/alert-rules"
|
|
|
|
# Export notification policies
|
|
backup_item "Notification policies" "${grafana_dir}/notification-policies.json" \
|
|
grafana_api GET "/api/v1/provisioning/policies"
|
|
}
|
|
|
|
backup_prometheus() {
|
|
local backup_dir="$1"
|
|
local prom_dir="${backup_dir}/prometheus"
|
|
mkdir -p "${prom_dir}/rules"
|
|
|
|
section_header "Prometheus Backup"
|
|
field "Config path:" "$PROM_CONFIG_PATH"
|
|
field "API:" "$PROMETHEUS_URL"
|
|
echo ""
|
|
|
|
# Copy prometheus.yml
|
|
if [[ -f "${PROM_CONFIG_PATH}/prometheus.yml" ]]; then
|
|
backup_item "prometheus.yml" "${prom_dir}/prometheus.yml" \
|
|
cp "${PROM_CONFIG_PATH}/prometheus.yml" "${prom_dir}/prometheus.yml"
|
|
# Fix: backup_item ran cp redirected to file, but cp doesn't output to stdout
|
|
# Re-copy properly
|
|
if [[ -f "${PROM_CONFIG_PATH}/prometheus.yml" ]]; then
|
|
cp "${PROM_CONFIG_PATH}/prometheus.yml" "${prom_dir}/prometheus.yml" 2>/dev/null || true
|
|
fi
|
|
else
|
|
warn "prometheus.yml not found at ${PROM_CONFIG_PATH}"
|
|
((ERROR_COUNT++)) || true
|
|
fi
|
|
|
|
# Copy alert/recording rules
|
|
local rules_count=0
|
|
if [[ -d "${PROM_CONFIG_PATH}/rules" ]]; then
|
|
while IFS= read -r -d '' rf; do
|
|
cp "$rf" "${prom_dir}/rules/" 2>/dev/null && ((rules_count++)) || true
|
|
done < <(find "${PROM_CONFIG_PATH}/rules" \( -name '*.yml' -o -name '*.yaml' \) -print0 2>/dev/null | sort -z)
|
|
elif [[ -d "${PROM_CONFIG_PATH}/rules.d" ]]; then
|
|
while IFS= read -r -d '' rf; do
|
|
cp "$rf" "${prom_dir}/rules/" 2>/dev/null && ((rules_count++)) || true
|
|
done < <(find "${PROM_CONFIG_PATH}/rules.d" \( -name '*.yml' -o -name '*.yaml' \) -print0 2>/dev/null | sort -z)
|
|
fi
|
|
if [[ "$rules_count" -gt 0 ]]; then
|
|
echo -e " ${GREEN}✓${RESET} Alert/recording rules (${rules_count} files)"
|
|
((BACKUP_COUNT++)) || true
|
|
fi
|
|
|
|
# Export current alerts via API
|
|
backup_item "Active alerts" "${prom_dir}/active-alerts.json" \
|
|
curl -sS "${PROMETHEUS_URL}/api/v1/alerts"
|
|
|
|
# Export targets
|
|
backup_item "Scrape targets" "${prom_dir}/targets.json" \
|
|
curl -sS "${PROMETHEUS_URL}/api/v1/targets"
|
|
|
|
# Export rule groups via API
|
|
backup_item "Rule groups (API)" "${prom_dir}/rule-groups.json" \
|
|
curl -sS "${PROMETHEUS_URL}/api/v1/rules"
|
|
}
|
|
|
|
backup_alertmanager() {
|
|
local backup_dir="$1"
|
|
local am_dir="${backup_dir}/alertmanager"
|
|
mkdir -p "$am_dir"
|
|
|
|
section_header "Alertmanager Backup"
|
|
field "API:" "$ALERTMANAGER_URL"
|
|
echo ""
|
|
|
|
# Get status (includes config)
|
|
backup_item "Config (via API)" "${am_dir}/status.json" \
|
|
curl -sS "${ALERTMANAGER_URL}/api/v2/status"
|
|
|
|
# Export silences
|
|
backup_item "Silences" "${am_dir}/silences.json" \
|
|
curl -sS "${ALERTMANAGER_URL}/api/v2/silences"
|
|
|
|
# Export active alerts
|
|
backup_item "Active alerts" "${am_dir}/alerts.json" \
|
|
curl -sS "${ALERTMANAGER_URL}/api/v2/alerts"
|
|
|
|
# Copy config file — check canonical path, then /etc/prometheus/
|
|
local am_conf
|
|
am_conf=$(find_config "alertmanager.yml" "$AM_CONFIG_PATH")
|
|
if [[ -n "$am_conf" ]]; then
|
|
verbose "Found alertmanager.yml at ${am_conf}"
|
|
backup_item "alertmanager.yml" "${am_dir}/alertmanager.yml" \
|
|
cat "$am_conf"
|
|
fi
|
|
|
|
# Copy templates
|
|
local tmpl_dir=""
|
|
local tmpl_count=0
|
|
if [[ -d "${AM_CONFIG_PATH}/templates" ]]; then
|
|
tmpl_dir="${AM_CONFIG_PATH}/templates"
|
|
elif [[ -d "${PROM_CONFIG_PATH}/templates" ]]; then
|
|
tmpl_dir="${PROM_CONFIG_PATH}/templates"
|
|
fi
|
|
if [[ -n "$tmpl_dir" ]]; then
|
|
mkdir -p "${am_dir}/templates"
|
|
while IFS= read -r -d '' tf; do
|
|
cp "$tf" "${am_dir}/templates/" 2>/dev/null && ((tmpl_count++)) || true
|
|
done < <(find "$tmpl_dir" -type f -print0 2>/dev/null | sort -z)
|
|
if [[ "$tmpl_count" -gt 0 ]]; then
|
|
echo -e " ${GREEN}✓${RESET} Templates (${tmpl_count} files)"
|
|
((BACKUP_COUNT++)) || true
|
|
fi
|
|
fi
|
|
}
|
|
|
|
backup_blackbox() {
|
|
local backup_dir="$1"
|
|
local bb_dir="${backup_dir}/blackbox"
|
|
mkdir -p "$bb_dir"
|
|
|
|
section_header "Blackbox Exporter Backup"
|
|
field "Config path:" "$BLACKBOX_CONFIG_PATH"
|
|
field "API:" "$BLACKBOX_URL"
|
|
echo ""
|
|
|
|
# Test connectivity
|
|
if ! curl -sS "${BLACKBOX_URL}/-/healthy" > /dev/null 2>&1; then
|
|
warn "Cannot reach Blackbox Exporter at ${BLACKBOX_URL} — skipping"
|
|
((ERROR_COUNT++)) || true
|
|
return
|
|
fi
|
|
|
|
# Copy blackbox.yml — check canonical path, then /etc/prometheus/
|
|
local bb_conf
|
|
bb_conf=$(find_config "blackbox.yml" "$BLACKBOX_CONFIG_PATH")
|
|
if [[ -z "$bb_conf" ]]; then
|
|
bb_conf=$(find_config "config.yml" "$BLACKBOX_CONFIG_PATH")
|
|
fi
|
|
if [[ -n "$bb_conf" ]]; then
|
|
local bb_name
|
|
bb_name=$(basename "$bb_conf")
|
|
verbose "Found ${bb_name} at ${bb_conf}"
|
|
backup_item "${bb_name}" "${bb_dir}/${bb_name}" \
|
|
cat "$bb_conf"
|
|
else
|
|
warn "No blackbox config found at ${BLACKBOX_CONFIG_PATH} or ${PROM_CONFIG_PATH}"
|
|
((ERROR_COUNT++)) || true
|
|
fi
|
|
|
|
# Export probe config via API
|
|
backup_item "Config (via API)" "${bb_dir}/config-api.json" \
|
|
curl -sS "${BLACKBOX_URL}/config"
|
|
}
|
|
|
|
do_backup() {
|
|
BACKUP_ID="$(date +%Y%m%d-%H%M%S)"
|
|
local backup_dir="${OUTPUT_DIR}/${BACKUP_ID}"
|
|
mkdir -p "$backup_dir"
|
|
|
|
log "Starting monitoring backup..."
|
|
field "Backup ID:" "$BACKUP_ID"
|
|
field "Output:" "$backup_dir"
|
|
field "Components:" "$COMPONENTS"
|
|
|
|
if component_selected "grafana"; then
|
|
backup_grafana "$backup_dir"
|
|
fi
|
|
|
|
if component_selected "prometheus"; then
|
|
backup_prometheus "$backup_dir"
|
|
fi
|
|
|
|
if component_selected "alertmanager"; then
|
|
backup_alertmanager "$backup_dir"
|
|
fi
|
|
|
|
if component_selected "blackbox"; then
|
|
backup_blackbox "$backup_dir"
|
|
fi
|
|
|
|
# Create manifest
|
|
section_header "Finalizing"
|
|
|
|
local total_size
|
|
total_size=$(du -sh "$backup_dir" 2>/dev/null | awk '{print $1}' || echo "unknown")
|
|
local file_count
|
|
file_count=$(find "$backup_dir" -type f 2>/dev/null | wc -l || echo 0)
|
|
|
|
cat > "${backup_dir}/manifest.json" <<EOF
|
|
{"backup_id":"${BACKUP_ID}","timestamp":"$(date -u '+%Y-%m-%dT%H:%M:%SZ')","components":"${COMPONENTS}","files":${file_count},"size":"${total_size}"}
|
|
EOF
|
|
|
|
# Generate checksums
|
|
find "$backup_dir" -type f ! -name "checksums.sha256" -print0 2>/dev/null \
|
|
| sort -z \
|
|
| xargs -0 sha256sum 2>/dev/null > "${backup_dir}/checksums.sha256" || true
|
|
echo -e " ${GREEN}✓${RESET} Manifest and checksums created"
|
|
|
|
section_header "Backup Summary"
|
|
field "Backup ID:" "$BACKUP_ID"
|
|
field "Location:" "$backup_dir"
|
|
field_color "Files backed up:" "${GREEN}${BACKUP_COUNT}${RESET}"
|
|
if [[ "$ERROR_COUNT" -gt 0 ]]; then
|
|
field_color "Errors:" "${RED}${ERROR_COUNT}${RESET}"
|
|
else
|
|
field_color "Errors:" "${GREEN}0${RESET}"
|
|
fi
|
|
field "Total size:" "$total_size"
|
|
field "Duration:" "$(elapsed)"
|
|
}
|
|
|
|
# ══════════════════════════════════════════════════════════════════════
|
|
# RESTORE
|
|
# ══════════════════════════════════════════════════════════════════════
|
|
do_restore() {
|
|
[[ -z "$RESTORE_DIR" ]] && die "No restore directory specified (--restore-dir)"
|
|
[[ ! -d "$RESTORE_DIR" ]] && die "Restore directory not found: ${RESTORE_DIR}"
|
|
[[ ! -f "${RESTORE_DIR}/manifest.json" ]] && die "No manifest.json in ${RESTORE_DIR}"
|
|
|
|
log "Restoring from ${RESTORE_DIR}..."
|
|
|
|
# Verify checksums first
|
|
if [[ -f "${RESTORE_DIR}/checksums.sha256" ]]; then
|
|
log "Verifying backup integrity..."
|
|
if (cd / && sha256sum -c "${RESTORE_DIR}/checksums.sha256" > /dev/null 2>&1); then
|
|
echo -e " ${GREEN}✓${RESET} Checksums verified"
|
|
else
|
|
warn "Some checksums failed — proceed with caution"
|
|
fi
|
|
fi
|
|
|
|
# Restore Grafana
|
|
if [[ -d "${RESTORE_DIR}/grafana" ]] && component_selected "grafana"; then
|
|
section_header "Restoring Grafana"
|
|
|
|
# Restore datasources
|
|
if [[ -f "${RESTORE_DIR}/grafana/datasources.json" ]] && command -v jq &>/dev/null; then
|
|
local ds_count=0
|
|
while IFS= read -r ds; do
|
|
[[ -z "$ds" ]] && continue
|
|
local ds_name
|
|
ds_name=$(echo "$ds" | jq -r '.name // "unknown"')
|
|
if grafana_api POST "/api/datasources" -d "$ds" > /dev/null 2>&1; then
|
|
echo -e " ${GREEN}✓${RESET} Datasource: ${ds_name}"
|
|
((ds_count++)) || true
|
|
else
|
|
echo -e " ${YELLOW}⊘${RESET} Datasource: ${ds_name} (may already exist)"
|
|
fi
|
|
done < <(jq -c '.[]' "${RESTORE_DIR}/grafana/datasources.json" 2>/dev/null)
|
|
log "Restored ${ds_count} datasources"
|
|
fi
|
|
|
|
# Restore dashboards
|
|
if [[ -d "${RESTORE_DIR}/grafana/dashboards" ]]; then
|
|
local dash_count=0
|
|
for df in "${RESTORE_DIR}/grafana/dashboards"/*.json; do
|
|
[[ ! -f "$df" ]] && continue
|
|
if command -v jq &>/dev/null; then
|
|
local payload
|
|
payload=$(jq '{dashboard: .dashboard, overwrite: true}' "$df" 2>/dev/null || cat "$df")
|
|
if grafana_api POST "/api/dashboards/db" -d "$payload" > /dev/null 2>&1; then
|
|
((dash_count++)) || true
|
|
fi
|
|
fi
|
|
done
|
|
echo -e " ${GREEN}✓${RESET} Dashboards (${dash_count} imported)"
|
|
fi
|
|
fi
|
|
|
|
# Restore Prometheus config
|
|
if [[ -d "${RESTORE_DIR}/prometheus" ]] && component_selected "prometheus"; then
|
|
section_header "Restoring Prometheus"
|
|
|
|
if [[ -f "${RESTORE_DIR}/prometheus/prometheus.yml" ]]; then
|
|
if cp "${RESTORE_DIR}/prometheus/prometheus.yml" "${PROM_CONFIG_PATH}/prometheus.yml" 2>/dev/null; then
|
|
echo -e " ${GREEN}✓${RESET} prometheus.yml restored"
|
|
else
|
|
echo -e " ${RED}✗${RESET} Failed to restore prometheus.yml (check permissions)"
|
|
fi
|
|
fi
|
|
|
|
if [[ -d "${RESTORE_DIR}/prometheus/rules" ]]; then
|
|
local target_rules="${PROM_CONFIG_PATH}/rules"
|
|
mkdir -p "$target_rules" 2>/dev/null || true
|
|
local rule_count=0
|
|
for rf in "${RESTORE_DIR}/prometheus/rules"/*; do
|
|
[[ ! -f "$rf" ]] && continue
|
|
cp "$rf" "$target_rules/" 2>/dev/null && ((rule_count++)) || true
|
|
done
|
|
echo -e " ${GREEN}✓${RESET} Alert rules (${rule_count} files)"
|
|
fi
|
|
|
|
# Reload Prometheus
|
|
if curl -sS -X POST "${PROMETHEUS_URL}/-/reload" > /dev/null 2>&1; then
|
|
echo -e " ${GREEN}✓${RESET} Prometheus reloaded"
|
|
else
|
|
warn "Could not reload Prometheus — restart manually"
|
|
fi
|
|
fi
|
|
|
|
# Restore Alertmanager config
|
|
if [[ -d "${RESTORE_DIR}/alertmanager" ]] && component_selected "alertmanager"; then
|
|
section_header "Restoring Alertmanager"
|
|
|
|
if [[ -f "${RESTORE_DIR}/alertmanager/alertmanager.yml" ]]; then
|
|
# Detect where alertmanager.yml lives on this system
|
|
local am_target="${AM_CONFIG_PATH}/alertmanager.yml"
|
|
if [[ ! -d "$AM_CONFIG_PATH" ]] && [[ -f "${PROM_CONFIG_PATH}/alertmanager.yml" ]]; then
|
|
am_target="${PROM_CONFIG_PATH}/alertmanager.yml"
|
|
fi
|
|
if cp "${RESTORE_DIR}/alertmanager/alertmanager.yml" "$am_target" 2>/dev/null; then
|
|
echo -e " ${GREEN}✓${RESET} alertmanager.yml restored → ${am_target}"
|
|
else
|
|
echo -e " ${RED}✗${RESET} Failed to restore alertmanager.yml"
|
|
fi
|
|
fi
|
|
|
|
if curl -sS -X POST "${ALERTMANAGER_URL}/-/reload" > /dev/null 2>&1; then
|
|
echo -e " ${GREEN}✓${RESET} Alertmanager reloaded"
|
|
else
|
|
warn "Could not reload Alertmanager — restart manually"
|
|
fi
|
|
fi
|
|
|
|
# Restore Blackbox Exporter config
|
|
if [[ -d "${RESTORE_DIR}/blackbox" ]] && component_selected "blackbox"; then
|
|
section_header "Restoring Blackbox Exporter"
|
|
|
|
local bb_conf=""
|
|
if [[ -f "${RESTORE_DIR}/blackbox/blackbox.yml" ]]; then
|
|
bb_conf="blackbox.yml"
|
|
elif [[ -f "${RESTORE_DIR}/blackbox/config.yml" ]]; then
|
|
bb_conf="config.yml"
|
|
fi
|
|
|
|
if [[ -n "$bb_conf" ]]; then
|
|
# Detect where blackbox config lives on this system
|
|
local bb_target="${BLACKBOX_CONFIG_PATH}/${bb_conf}"
|
|
if [[ ! -d "$BLACKBOX_CONFIG_PATH" ]] && [[ -f "${PROM_CONFIG_PATH}/${bb_conf}" ]]; then
|
|
bb_target="${PROM_CONFIG_PATH}/${bb_conf}"
|
|
fi
|
|
if cp "${RESTORE_DIR}/blackbox/${bb_conf}" "$bb_target" 2>/dev/null; then
|
|
echo -e " ${GREEN}✓${RESET} ${bb_conf} restored → ${bb_target}"
|
|
else
|
|
echo -e " ${RED}✗${RESET} Failed to restore ${bb_conf}"
|
|
fi
|
|
fi
|
|
|
|
if curl -sS -X POST "${BLACKBOX_URL}/-/reload" > /dev/null 2>&1; then
|
|
echo -e " ${GREEN}✓${RESET} Blackbox Exporter reloaded"
|
|
else
|
|
warn "Could not reload Blackbox Exporter — restart manually"
|
|
fi
|
|
fi
|
|
|
|
section_header "Restore Summary"
|
|
field "Restored from:" "$RESTORE_DIR"
|
|
field "Duration:" "$(elapsed)"
|
|
log "Restore complete — verify services are healthy"
|
|
}
|
|
|
|
# ══════════════════════════════════════════════════════════════════════
|
|
# VERIFY
|
|
# ══════════════════════════════════════════════════════════════════════
|
|
do_verify() {
|
|
local verify_dir="${RESTORE_DIR:-}"
|
|
[[ -z "$verify_dir" ]] && die "Specify backup directory with --restore-dir"
|
|
[[ ! -d "$verify_dir" ]] && die "Directory not found: ${verify_dir}"
|
|
|
|
section_header "Backup Verification"
|
|
field "Directory:" "$verify_dir"
|
|
echo ""
|
|
|
|
local pass=0 fail=0
|
|
|
|
# Check manifest
|
|
if [[ -f "${verify_dir}/manifest.json" ]]; then
|
|
echo -e " ${GREEN}✓${RESET} manifest.json present"
|
|
((pass++)) || true
|
|
else
|
|
echo -e " ${RED}✗${RESET} manifest.json missing"
|
|
((fail++)) || true
|
|
fi
|
|
|
|
# Check checksums
|
|
if [[ -f "${verify_dir}/checksums.sha256" ]]; then
|
|
if (cd / && sha256sum -c "${verify_dir}/checksums.sha256" > /dev/null 2>&1); then
|
|
echo -e " ${GREEN}✓${RESET} All checksums valid"
|
|
((pass++)) || true
|
|
else
|
|
echo -e " ${RED}✗${RESET} Checksum verification failed"
|
|
((fail++)) || true
|
|
fi
|
|
else
|
|
echo -e " ${YELLOW}!${RESET} No checksums file"
|
|
fi
|
|
|
|
# Check components
|
|
for comp in grafana prometheus alertmanager blackbox; do
|
|
if [[ -d "${verify_dir}/${comp}" ]]; then
|
|
local fcount
|
|
fcount=$(find "${verify_dir}/${comp}" -type f 2>/dev/null | wc -l || echo 0)
|
|
echo -e " ${GREEN}✓${RESET} ${comp}/ (${fcount} files)"
|
|
((pass++)) || true
|
|
fi
|
|
done
|
|
|
|
echo ""
|
|
field_color "Passed:" "${GREEN}${pass}${RESET}"
|
|
if [[ "$fail" -gt 0 ]]; then
|
|
field_color "Failed:" "${RED}${fail}${RESET}"
|
|
else
|
|
field_color "Failed:" "${GREEN}0${RESET}"
|
|
fi
|
|
}
|
|
|
|
# ══════════════════════════════════════════════════════════════════════
|
|
# LIST
|
|
# ══════════════════════════════════════════════════════════════════════
|
|
do_list() {
|
|
[[ ! -d "$OUTPUT_DIR" ]] && die "Backup directory not found: ${OUTPUT_DIR}"
|
|
|
|
section_header "Available Backups"
|
|
|
|
printf " ${BOLD}%-20s %-22s %-16s %8s %6s${RESET}\n" "BACKUP ID" "TIMESTAMP" "COMPONENTS" "SIZE" "FILES"
|
|
printf " %s\n" "$(printf '%.0s─' {1..76})"
|
|
|
|
local count=0
|
|
while IFS= read -r d; do
|
|
local manifest="${d}/manifest.json"
|
|
[[ ! -f "$manifest" ]] && continue
|
|
|
|
local bid ts comp sz fc
|
|
bid=$(basename "$d")
|
|
if command -v jq &>/dev/null; then
|
|
ts=$(jq -r '.timestamp // "unknown"' "$manifest" 2>/dev/null || echo "unknown")
|
|
comp=$(jq -r '.components // "unknown"' "$manifest" 2>/dev/null || echo "unknown")
|
|
sz=$(jq -r '.size // "?"' "$manifest" 2>/dev/null || echo "?")
|
|
fc=$(jq -r '.files // 0' "$manifest" 2>/dev/null || echo 0)
|
|
else
|
|
ts="(jq required)"
|
|
comp="?" sz="?" fc="?"
|
|
fi
|
|
|
|
printf " %-20s %-22s %-16s %8s %6s\n" "$bid" "${ts:0:20}" "${comp:0:14}" "$sz" "$fc"
|
|
((count++)) || true
|
|
done < <(find "$OUTPUT_DIR" -mindepth 1 -maxdepth 1 -type d 2>/dev/null | sort -r)
|
|
|
|
echo ""
|
|
field "Total backups:" "$count"
|
|
|
|
if [[ "$count" -eq 0 ]]; then
|
|
warn "No backups found in ${OUTPUT_DIR}"
|
|
fi
|
|
}
|
|
|
|
# ══════════════════════════════════════════════════════════════════════
|
|
# HELP
|
|
# ══════════════════════════════════════════════════════════════════════
|
|
show_help() {
|
|
cat <<EOF
|
|
${BOLD}${SCRIPT_NAME}${RESET} — PromStack Backup
|
|
|
|
Backup and restore Prometheus, Grafana, Alertmanager, and Blackbox Exporter
|
|
configurations and data for disaster recovery.
|
|
|
|
${BOLD}MODES${RESET}
|
|
--backup Backup monitoring stack components
|
|
--restore Restore from a backup directory
|
|
--verify Verify backup integrity
|
|
--list List available backups
|
|
|
|
${BOLD}OPTIONS${RESET}
|
|
--output-dir DIR Backup output directory (default: ./monitoring-backups)
|
|
--restore-dir DIR Directory to restore from
|
|
--components LIST Components: all, grafana, prometheus, alertmanager, blackbox (default: all)
|
|
--grafana-url URL Grafana URL (default: http://localhost:3000)
|
|
--grafana-token TOK Grafana API key for authentication
|
|
--prometheus-url URL Prometheus URL (default: http://localhost:9090)
|
|
--alertmanager-url URL Alertmanager URL (default: http://localhost:9093)
|
|
--am-config DIR Alertmanager config directory (default: /etc/alertmanager)
|
|
--blackbox-url URL Blackbox Exporter URL (default: http://localhost:9115)
|
|
--blackbox-config DIR Blackbox Exporter config directory (default: /etc/blackbox_exporter)
|
|
--prom-config DIR Prometheus config directory (default: /etc/prometheus)
|
|
--prom-data DIR Prometheus data directory (default: /var/lib/prometheus)
|
|
--verbose Debug output
|
|
--no-color Disable colored output
|
|
--help Show this help message
|
|
|
|
${BOLD}ENVIRONMENT VARIABLES${RESET}
|
|
PSB_OUTPUT_DIR Default backup directory
|
|
PSB_COMPONENTS Default components to backup
|
|
GRAFANA_URL Grafana server URL
|
|
GRAFANA_API_KEY Grafana API key
|
|
GRAFANA_USER Grafana username (default: admin)
|
|
GRAFANA_PASS Grafana password
|
|
PROMETHEUS_URL Prometheus server URL
|
|
ALERTMANAGER_URL Alertmanager server URL
|
|
AM_CONFIG_PATH Alertmanager config directory
|
|
BLACKBOX_URL Blackbox Exporter server URL
|
|
BLACKBOX_CONFIG_PATH Blackbox Exporter config directory
|
|
PROM_CONFIG_PATH Prometheus config directory
|
|
PROM_DATA_PATH Prometheus data directory
|
|
VERBOSE Enable verbose output (true/false)
|
|
COLOR Color mode: auto, always, never
|
|
|
|
${BOLD}EXAMPLES${RESET}
|
|
# Backup all components
|
|
${SCRIPT_NAME} --backup
|
|
|
|
# Backup Grafana only
|
|
${SCRIPT_NAME} --backup --components grafana --grafana-token \$TOKEN
|
|
|
|
# Backup with custom URLs
|
|
${SCRIPT_NAME} --backup --grafana-url http://grafana:3000 --prometheus-url http://prom:9090
|
|
|
|
# List available backups
|
|
${SCRIPT_NAME} --list
|
|
|
|
# Verify a backup
|
|
${SCRIPT_NAME} --verify --restore-dir ./monitoring-backups/20260410-143000
|
|
|
|
# Restore from backup
|
|
${SCRIPT_NAME} --restore --restore-dir ./monitoring-backups/20260410-143000
|
|
|
|
# Restore Grafana only
|
|
${SCRIPT_NAME} --restore --restore-dir ./backups/20260410-143000 --components grafana
|
|
|
|
${BOLD}EXIT CODES${RESET}
|
|
0 Success
|
|
1 Runtime error
|
|
EOF
|
|
}
|
|
|
|
# ══════════════════════════════════════════════════════════════════════
|
|
# PARSE ARGS
|
|
# ══════════════════════════════════════════════════════════════════════
|
|
parse_args() {
|
|
while [[ $# -gt 0 ]]; do
|
|
case "$1" in
|
|
--backup) RUN_MODE="backup"; shift ;;
|
|
--restore) RUN_MODE="restore"; shift ;;
|
|
--verify) RUN_MODE="verify"; shift ;;
|
|
--list) RUN_MODE="list"; shift ;;
|
|
--output-dir) OUTPUT_DIR="${2:?--output-dir requires a path}"; shift 2 ;;
|
|
--restore-dir) RESTORE_DIR="${2:?--restore-dir requires a path}"; shift 2 ;;
|
|
--components) COMPONENTS="${2:?--components requires a value}"; shift 2 ;;
|
|
--grafana-url) GRAFANA_URL="${2:?--grafana-url requires a URL}"; shift 2 ;;
|
|
--grafana-token) GRAFANA_TOKEN="${2:?--grafana-token requires a value}"; shift 2 ;;
|
|
--prometheus-url) PROMETHEUS_URL="${2:?--prometheus-url requires a URL}"; shift 2 ;;
|
|
--alertmanager-url) ALERTMANAGER_URL="${2:?--alertmanager-url requires a URL}"; shift 2 ;;
|
|
--am-config) AM_CONFIG_PATH="${2:?--am-config requires a path}"; shift 2 ;;
|
|
--blackbox-url) BLACKBOX_URL="${2:?--blackbox-url requires a URL}"; shift 2 ;;
|
|
--blackbox-config) BLACKBOX_CONFIG_PATH="${2:?--blackbox-config requires a path}"; shift 2 ;;
|
|
--prom-config) PROM_CONFIG_PATH="${2:?--prom-config requires a path}"; shift 2 ;;
|
|
--prom-data) PROM_DATA_PATH="${2:?--prom-data requires a path}"; shift 2 ;;
|
|
--verbose) VERBOSE="true"; shift ;;
|
|
--no-color) COLOR="never"; shift ;;
|
|
--help|-h) setup_colors; show_help; exit 0 ;;
|
|
*) die "Unknown option: $1 (see --help)" ;;
|
|
esac
|
|
done
|
|
}
|
|
|
|
# ══════════════════════════════════════════════════════════════════════
|
|
# MAIN
|
|
# ══════════════════════════════════════════════════════════════════════
|
|
main() {
|
|
parse_args "$@"
|
|
setup_colors
|
|
|
|
if [[ -z "$RUN_MODE" ]]; then
|
|
err "No mode specified"
|
|
echo ""
|
|
show_help
|
|
exit 1
|
|
fi
|
|
|
|
START_TIME=$(date +%s)
|
|
|
|
case "$RUN_MODE" in
|
|
backup) do_backup ;;
|
|
restore) do_restore ;;
|
|
verify) do_verify ;;
|
|
list) do_list ;;
|
|
*) die "Unknown mode: ${RUN_MODE}" ;;
|
|
esac
|
|
}
|
|
|
|
main "$@"
|