#!/usr/bin/env bash ######################################################################################### #### gcp-snapshot-manager.sh — Create, rotate, list, audit, and restore GCP #### #### persistent disk snapshots via gcloud CLI. Automated retention and fleet ops #### #### Requires: bash 4+, gcloud CLI, jq #### #### #### #### Author: Phil Connor #### #### Contact: contact@mylinux.work #### #### License: MIT #### #### Version 1.01 #### #### #### #### Usage: #### #### ./gcp-snapshot-manager.sh --snapshot --all #### #### #### #### See --help for all options. #### ######################################################################################### set -euo pipefail # ── Colors (pre-initialized) ───────────────────────────────────────── RED="" GREEN="" YELLOW="" BLUE="" CYAN="" BOLD="" DIM="" RESET="" setup_colors() { if [[ "${COLOR:-auto}" == "never" ]]; then return fi if [[ "${COLOR:-auto}" == "always" ]] || [[ -t 1 ]]; then RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[0;33m' BLUE='\033[0;34m' CYAN='\033[0;36m' BOLD='\033[1m' DIM='\033[2m' RESET='\033[0m' fi } # ── Logging ─────────────────────────────────────────────────────────── log() { echo -e "${BLUE}[INFO]${RESET} $*"; } warn() { echo -e "${YELLOW}[WARN]${RESET} $*" >&2; } err() { echo -e "${RED}[ERROR]${RESET} $*" >&2; } verbose() { if [[ "$VERBOSE" == "true" ]]; then echo -e "${DIM}[DEBUG]${RESET} $*"; fi; } die() { err "$*"; exit 1; } section_header() { echo "" echo -e " ${BOLD}${CYAN}── $1 ──${RESET}" echo "" } field() { printf " ${BOLD}%-22s${RESET} %s\n" "$1" "$2" } field_color() { printf " ${BOLD}%-22s${RESET} %b\n" "$1" "$2" } elapsed() { local end_time end_time=$(date +%s) echo "$(( end_time - START_TIME ))s" } # ── Defaults ────────────────────────────────────────────────────────── RUN_MODE="" ALSO_ROTATE="false" INSTANCE_NAME="" ZONE="" TARGET_ALL="false" SNAPSHOT_NAME="" KEEP="${GSM_KEEP:-3}" PREFIX="${GSM_PREFIX:-auto}" MAX_AGE="${GSM_MAX_AGE:-7}" OUTPUT_FORMAT="${GSM_FORMAT:-text}" DRY_RUN="true" FORCE="false" VERBOSE="${VERBOSE:-false}" COLOR="${COLOR:-auto}" GCP_PROJECT="" # ── State ───────────────────────────────────────────────────────────── SCRIPT_NAME="$(basename "$0")" readonly SCRIPT_NAME START_TIME="" SNAP_CREATED=0 SNAP_DELETED=0 SNAP_ERRORS=0 # ── Dependency and credential checks ──────────────────────────────── check_deps() { command -v gcloud &>/dev/null || die "gcloud CLI is required" command -v jq &>/dev/null || die "jq is required" } check_credentials() { local account account=$(gcloud auth list --filter="status:ACTIVE" --format="value(account)" 2>/dev/null) [[ -z "$account" ]] && die "No active gcloud credentials — run 'gcloud auth login'" if [[ -n "$GCP_PROJECT" ]]; then gcloud config set project "$GCP_PROJECT" --quiet 2>/dev/null \ || die "Cannot set project: ${GCP_PROJECT}" else GCP_PROJECT=$(gcloud config get-value project 2>/dev/null) [[ -z "$GCP_PROJECT" || "$GCP_PROJECT" == "(unset)" ]] && die "No project set — use --project or 'gcloud config set project'" fi log "Project: ${GCP_PROJECT}" } # ── Instance helpers ───────────────────────────────────────────────── get_all_instances() { gcloud compute instances list --project "$GCP_PROJECT" --format=json 2>/dev/null } get_boot_disk() { local instance="$1" zone="$2" gcloud compute instances describe "$instance" --zone "$zone" --project "$GCP_PROJECT" \ --format='json(disks)' 2>/dev/null \ | jq -r '.disks[] | select(.boot == true) | .source' 2>/dev/null \ | rev | cut -d/ -f1 | rev } get_instance_zone() { local instance_json="$1" echo "$instance_json" | jq -r '.zone' | rev | cut -d/ -f1 | rev } # ── Snapshot helpers ───────────────────────────────────────────────── list_snapshots() { gcloud compute snapshots list --project "$GCP_PROJECT" --format=json 2>/dev/null } managed_snapshots() { list_snapshots | jq --arg pfx "$PREFIX" \ '[.[] | select(.name | startswith($pfx))]' } # ══════════════════════════════════════════════════════════════════════ # SNAPSHOT # ══════════════════════════════════════════════════════════════════════ do_snapshot() { local instances_json instances_json=$(get_all_instances) local instances if [[ "$TARGET_ALL" == "true" ]]; then instances="$instances_json" elif [[ -n "$INSTANCE_NAME" ]]; then instances=$(echo "$instances_json" | jq --arg n "$INSTANCE_NAME" '[.[] | select(.name == $n)]') else die "Specify --instance NAME or --all" fi local count count=$(echo "$instances" | jq 'length') [[ "$count" -eq 0 ]] && die "No instances found" local target_label="$INSTANCE_NAME" [[ "$TARGET_ALL" == "true" ]] && target_label="all (${count} instances)" section_header "Creating Snapshots" field "Target:" "$target_label" field "Prefix:" "$PREFIX" echo "" echo "$instances" | jq -c '.[]' | while IFS= read -r inst; do local name zone disk_name snap_name name=$(echo "$inst" | jq -r '.name') zone=$(get_instance_zone "$inst") disk_name=$(get_boot_disk "$name" "$zone") snap_name="${PREFIX}-${name}-$(date +%Y%m%d-%H%M%S)" if [[ -z "$disk_name" ]]; then echo -e " ${RED}✗${RESET} ${name} (${zone}) no boot disk found" ((SNAP_ERRORS++)) || true continue fi verbose "Snapshotting ${name} disk ${disk_name} in ${zone}" if gcloud compute snapshots create "$snap_name" \ --source-disk="$disk_name" \ --source-disk-zone="$zone" \ --project "$GCP_PROJECT" \ --labels="managed-by=gcp-snapshot-manager,source-instance=${name}" \ --quiet 2>/dev/null; then echo -e " ${GREEN}✓${RESET} ${name} (${zone}) ${snap_name}" ((SNAP_CREATED++)) || true else echo -e " ${RED}✗${RESET} ${name} (${zone}) failed" ((SNAP_ERRORS++)) || true fi sleep 1 done echo "" field_color "Created:" "${GREEN}${SNAP_CREATED}${RESET}" if [[ "$SNAP_ERRORS" -gt 0 ]]; then field_color "Errors:" "${RED}${SNAP_ERRORS}${RESET}" fi if [[ "$ALSO_ROTATE" == "true" ]]; then do_rotate fi } # ══════════════════════════════════════════════════════════════════════ # ROTATE # ══════════════════════════════════════════════════════════════════════ do_rotate() { section_header "Rotating Snapshots" field "Keep:" "$KEEP per instance" field "Prefix:" "$PREFIX" if [[ "$DRY_RUN" == "true" && "$FORCE" != "true" ]]; then field "Mode:" "DRY RUN (use --force to delete)" else field "Mode:" "LIVE — deletions are permanent" fi echo "" local snaps snaps=$(managed_snapshots) local instance_names instance_names=$(echo "$snaps" | jq -r '.[].labels["source-instance"] // empty' | sort -u) if [[ -z "$instance_names" ]]; then log "No managed snapshots found matching prefix '${PREFIX}'" return fi while IFS= read -r inst; do [[ -z "$inst" ]] && continue local inst_snaps inst_snaps=$(echo "$snaps" | jq --arg inst "$inst" \ '[.[] | select(.labels["source-instance"] == $inst)] | sort_by(.creationTimestamp) | reverse') local total total=$(echo "$inst_snaps" | jq 'length') if (( total <= KEEP )); then verbose "${inst}: ${total} snapshots, keeping all" continue fi local to_delete to_delete=$(echo "$inst_snaps" | jq --argjson k "$KEEP" '.[$k:]') local del_count del_count=$(echo "$to_delete" | jq 'length') echo "$to_delete" | jq -c '.[]' | while IFS= read -r snap; do local sname sname=$(echo "$snap" | jq -r '.name') if [[ "$DRY_RUN" == "true" && "$FORCE" != "true" ]]; then echo -e " ${DIM}[DRY RUN]${RESET} would delete ${sname}" else if gcloud compute snapshots delete "$sname" \ --project "$GCP_PROJECT" --quiet 2>/dev/null; then echo -e " ${YELLOW}✓${RESET} deleted ${sname}" ((SNAP_DELETED++)) || true else echo -e " ${RED}✗${RESET} failed to delete ${sname}" ((SNAP_ERRORS++)) || true fi fi done log "${inst}: ${total} total, keeping ${KEEP}, removing ${del_count}" done <<< "$instance_names" echo "" field_color "Deleted:" "${YELLOW}${SNAP_DELETED}${RESET}" } # ══════════════════════════════════════════════════════════════════════ # LIST # ══════════════════════════════════════════════════════════════════════ do_list() { section_header "All Snapshots" local snaps snaps=$(list_snapshots) local count count=$(echo "$snaps" | jq 'length') if [[ "$count" -eq 0 ]]; then log "No snapshots found" return fi printf " %-40s %-10s %-12s %-16s %s\n" \ "NAME" "SIZE_GB" "AGE" "SOURCE_DISK" "SOURCE_INSTANCE" printf " %s\n" "$(printf '%.0s─' {1..100})" local now now=$(date +%s) echo "$snaps" | jq -c '.[]' | while IFS= read -r snap; do local name size_gb created source_disk source_inst age_str name=$(echo "$snap" | jq -r '.name') size_gb=$(echo "$snap" | jq -r '.diskSizeGb // 0') created=$(echo "$snap" | jq -r '.creationTimestamp // ""') source_disk=$(echo "$snap" | jq -r '.sourceDisk // ""' | rev | cut -d/ -f1 | rev) source_inst=$(echo "$snap" | jq -r '.labels["source-instance"] // "manual"') if [[ -n "$created" ]]; then local snap_epoch snap_epoch=$(date -d "$created" +%s 2>/dev/null || echo 0) if [[ "$snap_epoch" -gt 0 ]]; then local age_days=$(( (now - snap_epoch) / 86400 )) age_str="${age_days}d" else age_str="unknown" fi else age_str="unknown" fi printf " %-40s %-10s %-12s %-16s %s\n" \ "${name:0:39}" "$size_gb" "$age_str" "${source_disk:0:15}" "${source_inst:0:20}" done echo "" field "Total snapshots:" "$count" } # ══════════════════════════════════════════════════════════════════════ # AUDIT # ══════════════════════════════════════════════════════════════════════ do_audit() { section_header "Snapshot Audit" local instances_json instances_json=$(get_all_instances) local snaps snaps=$(list_snapshots) local now now=$(date +%s) printf " %-24s %-14s %-24s %-8s %-8s %s\n" \ "INSTANCE" "ZONE" "LATEST_SNAPSHOT" "AGE" "COUNT" "STATUS" printf " %s\n" "$(printf '%.0s─' {1..100})" echo "$instances_json" | jq -c '.[]' | while IFS= read -r inst; do local name zone name=$(echo "$inst" | jq -r '.name') zone=$(get_instance_zone "$inst") local inst_snaps snap_count inst_snaps=$(echo "$snaps" | jq --arg inst "$name" \ '[.[] | select(.labels["source-instance"] == $inst)]') snap_count=$(echo "$inst_snaps" | jq 'length') if [[ "$snap_count" -eq 0 ]]; then printf " %-24s %-14s %-24s %-8s %-8s %b%s%b\n" \ "${name:0:23}" "${zone:0:13}" "(none)" "—" "0" \ "$RED" "✗ Unprotected" "$RESET" continue fi local latest_name latest_date age_str status color latest_name=$(echo "$inst_snaps" | jq -r 'sort_by(.creationTimestamp) | last | .name // ""') latest_date=$(echo "$inst_snaps" | jq -r 'sort_by(.creationTimestamp) | last | .creationTimestamp // ""') if [[ -n "$latest_date" ]]; then local snap_epoch snap_epoch=$(date -d "$latest_date" +%s 2>/dev/null || echo 0) if [[ "$snap_epoch" -gt 0 ]]; then local age_days=$(( (now - snap_epoch) / 86400 )) age_str="${age_days}d" if (( age_days > MAX_AGE )); then status="⚠ Stale"; color="$YELLOW" else status="✓ OK"; color="$GREEN" fi else age_str="unknown"; status="✓ OK"; color="$GREEN" fi else age_str="unknown"; status="✓ OK"; color="$GREEN" fi printf " %-24s %-14s %-24s %-8s %-8s %b%s%b\n" \ "${name:0:23}" "${zone:0:13}" "${latest_name:0:23}" \ "$age_str" "$snap_count" "$color" "$status" "$RESET" done echo "" } # ══════════════════════════════════════════════════════════════════════ # RESTORE # ══════════════════════════════════════════════════════════════════════ do_restore() { [[ -z "$INSTANCE_NAME" ]] && die "--restore requires --instance NAME" [[ -z "$SNAPSHOT_NAME" ]] && die "--restore requires --snapshot-name NAME" [[ -z "$ZONE" ]] && die "--restore requires --zone ZONE" section_header "Restore from Snapshot" field "Instance:" "$INSTANCE_NAME" field "Snapshot:" "$SNAPSHOT_NAME" field "Zone:" "$ZONE" echo "" if [[ "$FORCE" != "true" ]]; then warn "This will stop the instance and replace its boot disk. Use --force to confirm." return fi log "Creating disk from snapshot..." local disk_name="restored-${INSTANCE_NAME}-$(date +%Y%m%d-%H%M%S)" if gcloud compute disks create "$disk_name" \ --source-snapshot="$SNAPSHOT_NAME" \ --zone="$ZONE" \ --project "$GCP_PROJECT" \ --quiet 2>/dev/null; then echo -e " ${GREEN}✓${RESET} Disk created: ${disk_name}" else die "Failed to create disk from snapshot" fi log "Stopping instance..." gcloud compute instances stop "$INSTANCE_NAME" \ --zone="$ZONE" --project "$GCP_PROJECT" --quiet 2>/dev/null \ || die "Failed to stop instance" local old_disk old_disk=$(get_boot_disk "$INSTANCE_NAME" "$ZONE") log "Detaching old boot disk..." gcloud compute instances detach-disk "$INSTANCE_NAME" \ --disk="$old_disk" --zone="$ZONE" --project "$GCP_PROJECT" \ --quiet 2>/dev/null || die "Failed to detach old disk" log "Attaching restored disk..." gcloud compute instances attach-disk "$INSTANCE_NAME" \ --disk="$disk_name" --zone="$ZONE" --boot \ --project "$GCP_PROJECT" --quiet 2>/dev/null \ || die "Failed to attach restored disk" log "Starting instance..." gcloud compute instances start "$INSTANCE_NAME" \ --zone="$ZONE" --project "$GCP_PROJECT" --quiet 2>/dev/null echo -e " ${GREEN}✓${RESET} Instance started with restored disk" } # ══════════════════════════════════════════════════════════════════════ # STATUS # ══════════════════════════════════════════════════════════════════════ do_status() { local instances_json instances_json=$(get_all_instances) local snaps snaps=$(list_snapshots) local now now=$(date +%s) local total_instances=0 total_snaps=0 total_gb=0 local protected=0 stale=0 unprotected=0 while IFS= read -r inst; do [[ -z "$inst" ]] && continue ((total_instances++)) || true local name name=$(echo "$inst" | jq -r '.name') local inst_snaps snap_count inst_snaps=$(echo "$snaps" | jq --arg inst "$name" \ '[.[] | select(.labels["source-instance"] == $inst)]') snap_count=$(echo "$inst_snaps" | jq 'length') total_snaps=$(( total_snaps + snap_count )) local gb gb=$(echo "$inst_snaps" | jq '[.[].diskSizeGb // 0 | tonumber] | add // 0') total_gb=$(( total_gb + gb )) if [[ "$snap_count" -eq 0 ]]; then ((unprotected++)) || true continue fi local latest_date latest_date=$(echo "$inst_snaps" | jq -r \ 'sort_by(.creationTimestamp) | last | .creationTimestamp // ""') if [[ -n "$latest_date" ]]; then local snap_epoch snap_epoch=$(date -d "$latest_date" +%s 2>/dev/null || echo 0) if [[ "$snap_epoch" -gt 0 ]]; then local age_days=$(( (now - snap_epoch) / 86400 )) if (( age_days > MAX_AGE )); then ((stale++)) || true else ((protected++)) || true fi else ((protected++)) || true fi else ((protected++)) || true fi done < <(echo "$instances_json" | jq -c '.[]') if [[ "$OUTPUT_FORMAT" == "prometheus" ]]; then cat <${MAX_AGE}d):" "${YELLOW}${stale}${RESET}" else field_color "Stale (>${MAX_AGE}d):" "${GREEN}0${RESET}" fi if [[ "$unprotected" -gt 0 ]]; then field_color "Unprotected:" "${RED}${unprotected}${RESET}" else field_color "Unprotected:" "${GREEN}0${RESET}" fi } # ══════════════════════════════════════════════════════════════════════ # HELP # ══════════════════════════════════════════════════════════════════════ show_help() { cat <