#!/usr/bin/env bash ######################################################################################### #### azure-snapshot-manager.sh — Create, rotate, list, audit, and restore Azure #### #### managed disk snapshots via az CLI. Automated retention and fleet-wide ops #### #### Requires: bash 4+, az CLI, jq #### #### #### #### Author: Phil Connor #### #### Contact: contact@mylinux.work #### #### License: MIT #### #### Version 1.01 #### #### #### #### Usage: #### #### ./azure-snapshot-manager.sh --snapshot --all #### #### #### #### See --help for all options. #### ######################################################################################### set -euo pipefail # ── Colors (pre-initialized) ───────────────────────────────────────── RED="" GREEN="" YELLOW="" BLUE="" CYAN="" BOLD="" DIM="" RESET="" setup_colors() { if [[ "${COLOR:-auto}" == "never" ]]; then return fi if [[ "${COLOR:-auto}" == "always" ]] || [[ -t 1 ]]; then RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[0;33m' BLUE='\033[0;34m' CYAN='\033[0;36m' BOLD='\033[1m' DIM='\033[2m' RESET='\033[0m' fi } # ── Logging ─────────────────────────────────────────────────────────── log() { echo -e "${BLUE}[INFO]${RESET} $*"; } warn() { echo -e "${YELLOW}[WARN]${RESET} $*" >&2; } err() { echo -e "${RED}[ERROR]${RESET} $*" >&2; } verbose() { if [[ "$VERBOSE" == "true" ]]; then echo -e "${DIM}[DEBUG]${RESET} $*"; fi; } die() { err "$*"; exit 1; } section_header() { echo "" echo -e " ${BOLD}${CYAN}── $1 ──${RESET}" echo "" } field() { printf " ${BOLD}%-22s${RESET} %s\n" "$1" "$2" } field_color() { printf " ${BOLD}%-22s${RESET} %b\n" "$1" "$2" } elapsed() { local end_time end_time=$(date +%s) echo "$(( end_time - START_TIME ))s" } # ── Defaults ────────────────────────────────────────────────────────── RUN_MODE="" ALSO_ROTATE="false" VM_NAME="" RESOURCE_GROUP="" TARGET_ALL="false" SNAPSHOT_ID="" KEEP="${ASM_KEEP:-3}" PREFIX="${ASM_PREFIX:-auto}" MAX_AGE="${ASM_MAX_AGE:-7}" OUTPUT_FORMAT="${ASM_FORMAT:-text}" DRY_RUN="true" FORCE="false" VERBOSE="${VERBOSE:-false}" COLOR="${COLOR:-auto}" SUBSCRIPTION="" # ── State ───────────────────────────────────────────────────────────── SCRIPT_NAME="$(basename "$0")" readonly SCRIPT_NAME START_TIME="" SNAP_CREATED=0 SNAP_DELETED=0 SNAP_ERRORS=0 # ── Dependency and credential checks ──────────────────────────────── check_deps() { command -v az &>/dev/null || die "az CLI is required (install: https://aka.ms/InstallAzureCLIDeb)" command -v jq &>/dev/null || die "jq is required" } check_credentials() { local acct acct=$(az account show --output json 2>&1) || die "Azure credentials not configured — run 'az login'" local sub_name sub_id sub_name=$(echo "$acct" | jq -r '.name') sub_id=$(echo "$acct" | jq -r '.id') verbose "Subscription: ${sub_name} (${sub_id})" log "Subscription: ${sub_name}" if [[ -n "$SUBSCRIPTION" ]]; then az account set --subscription "$SUBSCRIPTION" 2>/dev/null \ || die "Cannot switch to subscription: ${SUBSCRIPTION}" log "Switched to subscription: ${SUBSCRIPTION}" fi } # ── Azure CLI wrapper ──────────────────────────────────────────────── az_cmd() { local args=("$@") [[ -n "$SUBSCRIPTION" ]] && args+=(--subscription "$SUBSCRIPTION") verbose "az ${args[*]}" az "${args[@]}" } # ── VM helpers ─────────────────────────────────────────────────────── get_all_vms() { local args=(vm list --output json) [[ -n "$RESOURCE_GROUP" ]] && args+=(--resource-group "$RESOURCE_GROUP") az_cmd "${args[@]}" 2>/dev/null } get_vm_os_disk_id() { local vm_name="$1" rg="$2" az_cmd vm show --name "$vm_name" --resource-group "$rg" \ --query 'storageProfile.osDisk.managedDisk.id' --output tsv 2>/dev/null } get_vm_rg() { local vm_json="$1" echo "$vm_json" | jq -r '.resourceGroup' } # ── Snapshot helpers ───────────────────────────────────────────────── list_snapshots() { local args=(snapshot list --output json) [[ -n "$RESOURCE_GROUP" ]] && args+=(--resource-group "$RESOURCE_GROUP") az_cmd "${args[@]}" 2>/dev/null } managed_snapshots() { list_snapshots | jq --arg pfx "$PREFIX" \ '[.[] | select(.name | startswith($pfx))]' } # ══════════════════════════════════════════════════════════════════════ # SNAPSHOT # ══════════════════════════════════════════════════════════════════════ do_snapshot() { local vm_json vm_json=$(get_all_vms) local vms if [[ "$TARGET_ALL" == "true" ]]; then vms="$vm_json" elif [[ -n "$VM_NAME" ]]; then vms=$(echo "$vm_json" | jq --arg n "$VM_NAME" '[.[] | select(.name == $n)]') else die "Specify --vm NAME or --all" fi local count count=$(echo "$vms" | jq 'length') [[ "$count" -eq 0 ]] && die "No VMs found" local target_label="$VM_NAME" [[ "$TARGET_ALL" == "true" ]] && target_label="all (${count} VMs)" section_header "Creating Snapshots" field "Target:" "$target_label" field "Prefix:" "$PREFIX" echo "" echo "$vms" | jq -c '.[]' | while IFS= read -r vm; do local name rg disk_id snap_name name=$(echo "$vm" | jq -r '.name') rg=$(echo "$vm" | jq -r '.resourceGroup') disk_id=$(get_vm_os_disk_id "$name" "$rg") snap_name="${PREFIX}-${name}-$(date +%Y%m%d-%H%M%S)" if [[ -z "$disk_id" ]]; then echo -e " ${RED}✗${RESET} ${name} (${rg}) no OS disk found" ((SNAP_ERRORS++)) || true continue fi verbose "Snapshotting ${name} disk ${disk_id}" if az_cmd snapshot create \ --resource-group "$rg" \ --name "$snap_name" \ --source "$disk_id" \ --tags "managed-by=${SCRIPT_NAME}" "source-vm=${name}" \ --output none 2>/dev/null; then echo -e " ${GREEN}✓${RESET} ${name} (${rg}) ${snap_name}" ((SNAP_CREATED++)) || true else echo -e " ${RED}✗${RESET} ${name} (${rg}) failed" ((SNAP_ERRORS++)) || true fi sleep 1 done echo "" field_color "Created:" "${GREEN}${SNAP_CREATED}${RESET}" if [[ "$SNAP_ERRORS" -gt 0 ]]; then field_color "Errors:" "${RED}${SNAP_ERRORS}${RESET}" fi if [[ "$ALSO_ROTATE" == "true" ]]; then do_rotate fi } # ══════════════════════════════════════════════════════════════════════ # ROTATE # ══════════════════════════════════════════════════════════════════════ do_rotate() { section_header "Rotating Snapshots" field "Keep:" "$KEEP per VM" field "Prefix:" "$PREFIX" if [[ "$DRY_RUN" == "true" && "$FORCE" != "true" ]]; then field "Mode:" "DRY RUN (use --force to delete)" else field "Mode:" "LIVE — deletions are permanent" fi echo "" local snaps snaps=$(managed_snapshots) local vm_names vm_names=$(echo "$snaps" | jq -r '.[].tags["source-vm"] // empty' | sort -u) if [[ -z "$vm_names" ]]; then log "No managed snapshots found matching prefix '${PREFIX}'" return fi while IFS= read -r vm; do [[ -z "$vm" ]] && continue local vm_snaps vm_snaps=$(echo "$snaps" | jq --arg vm "$vm" \ '[.[] | select(.tags["source-vm"] == $vm)] | sort_by(.timeCreated) | reverse') local total total=$(echo "$vm_snaps" | jq 'length') if (( total <= KEEP )); then verbose "${vm}: ${total} snapshots, keeping all" continue fi local to_delete to_delete=$(echo "$vm_snaps" | jq --argjson k "$KEEP" '.[$k:]') local del_count del_count=$(echo "$to_delete" | jq 'length') echo "$to_delete" | jq -c '.[]' | while IFS= read -r snap; do local sname srg sname=$(echo "$snap" | jq -r '.name') srg=$(echo "$snap" | jq -r '.resourceGroup') if [[ "$DRY_RUN" == "true" && "$FORCE" != "true" ]]; then echo -e " ${DIM}[DRY RUN]${RESET} would delete ${sname} (${srg})" else if az_cmd snapshot delete --name "$sname" --resource-group "$srg" \ --output none 2>/dev/null; then echo -e " ${YELLOW}✓${RESET} deleted ${sname}" ((SNAP_DELETED++)) || true else echo -e " ${RED}✗${RESET} failed to delete ${sname}" ((SNAP_ERRORS++)) || true fi fi done log "${vm}: ${total} total, keeping ${KEEP}, removing ${del_count}" done <<< "$vm_names" echo "" field_color "Deleted:" "${YELLOW}${SNAP_DELETED}${RESET}" } # ══════════════════════════════════════════════════════════════════════ # LIST # ══════════════════════════════════════════════════════════════════════ do_list() { section_header "All Snapshots" local snaps snaps=$(list_snapshots) local count count=$(echo "$snaps" | jq 'length') if [[ "$count" -eq 0 ]]; then log "No snapshots found" return fi printf " %-36s %-16s %-8s %-12s %s\n" \ "NAME" "RESOURCE_GROUP" "SIZE_GB" "AGE" "SOURCE_VM" printf " %s\n" "$(printf '%.0s─' {1..90})" local now now=$(date +%s) echo "$snaps" | jq -c '.[]' | while IFS= read -r snap; do local name rg size_gb created source_vm age_str name=$(echo "$snap" | jq -r '.name') rg=$(echo "$snap" | jq -r '.resourceGroup') size_gb=$(echo "$snap" | jq -r '.diskSizeGb // 0') created=$(echo "$snap" | jq -r '.timeCreated // ""') source_vm=$(echo "$snap" | jq -r '.tags["source-vm"] // "manual"') if [[ -n "$created" ]]; then local snap_epoch snap_epoch=$(date -d "$created" +%s 2>/dev/null || echo 0) if [[ "$snap_epoch" -gt 0 ]]; then local age_days=$(( (now - snap_epoch) / 86400 )) age_str="${age_days}d" else age_str="unknown" fi else age_str="unknown" fi printf " %-36s %-16s %-8s %-12s %s\n" \ "${name:0:35}" "${rg:0:15}" "$size_gb" "$age_str" "${source_vm:0:20}" done echo "" field "Total snapshots:" "$count" } # ══════════════════════════════════════════════════════════════════════ # AUDIT # ══════════════════════════════════════════════════════════════════════ do_audit() { section_header "Snapshot Audit" local vm_json vm_json=$(get_all_vms) local snaps snaps=$(list_snapshots) local now now=$(date +%s) printf " %-24s %-16s %-20s %-8s %-8s %s\n" \ "VM_NAME" "RESOURCE_GROUP" "LATEST_SNAPSHOT" "AGE" "COUNT" "STATUS" printf " %s\n" "$(printf '%.0s─' {1..95})" echo "$vm_json" | jq -c '.[]' | while IFS= read -r vm; do local name rg name=$(echo "$vm" | jq -r '.name') rg=$(echo "$vm" | jq -r '.resourceGroup') local vm_snaps snap_count vm_snaps=$(echo "$snaps" | jq --arg vm "$name" \ '[.[] | select(.tags["source-vm"] == $vm)]') snap_count=$(echo "$vm_snaps" | jq 'length') if [[ "$snap_count" -eq 0 ]]; then printf " %-24s %-16s %-20s %-8s %-8s %b%s%b\n" \ "${name:0:23}" "${rg:0:15}" "(none)" "—" "0" \ "$RED" "✗ Unprotected" "$RESET" continue fi local latest_name latest_date age_str status color latest_name=$(echo "$vm_snaps" | jq -r 'sort_by(.timeCreated) | last | .name // ""') latest_date=$(echo "$vm_snaps" | jq -r 'sort_by(.timeCreated) | last | .timeCreated // ""') if [[ -n "$latest_date" ]]; then local snap_epoch snap_epoch=$(date -d "$latest_date" +%s 2>/dev/null || echo 0) if [[ "$snap_epoch" -gt 0 ]]; then local age_days=$(( (now - snap_epoch) / 86400 )) age_str="${age_days}d" if (( age_days > MAX_AGE )); then status="⚠ Stale" color="$YELLOW" else status="✓ OK" color="$GREEN" fi else age_str="unknown" status="✓ OK" color="$GREEN" fi else age_str="unknown" status="✓ OK" color="$GREEN" fi printf " %-24s %-16s %-20s %-8s %-8s %b%s%b\n" \ "${name:0:23}" "${rg:0:15}" "${latest_name:0:19}" \ "$age_str" "$snap_count" "$color" "$status" "$RESET" done echo "" } # ══════════════════════════════════════════════════════════════════════ # RESTORE # ══════════════════════════════════════════════════════════════════════ do_restore() { [[ -z "$VM_NAME" ]] && die "--restore requires --vm NAME" [[ -z "$SNAPSHOT_ID" ]] && die "--restore requires --snapshot-id NAME" [[ -z "$RESOURCE_GROUP" ]] && die "--restore requires --resource-group RG" section_header "Restore from Snapshot" field "VM:" "$VM_NAME" field "Snapshot:" "$SNAPSHOT_ID" field "Resource Group:" "$RESOURCE_GROUP" echo "" if [[ "$FORCE" != "true" ]]; then warn "This will replace the VM's OS disk. Use --force to confirm." return fi log "Creating disk from snapshot..." local disk_name="restored-${VM_NAME}-$(date +%Y%m%d-%H%M%S)" local snap_id snap_id=$(az_cmd snapshot show --name "$SNAPSHOT_ID" --resource-group "$RESOURCE_GROUP" \ --query 'id' --output tsv 2>/dev/null) || die "Snapshot not found: ${SNAPSHOT_ID}" if az_cmd disk create \ --resource-group "$RESOURCE_GROUP" \ --name "$disk_name" \ --source "$snap_id" \ --output none 2>/dev/null; then echo -e " ${GREEN}✓${RESET} Disk created: ${disk_name}" else die "Failed to create disk from snapshot" fi log "Deallocating VM..." az_cmd vm deallocate --name "$VM_NAME" --resource-group "$RESOURCE_GROUP" \ --output none 2>/dev/null || die "Failed to deallocate VM" local new_disk_id new_disk_id=$(az_cmd disk show --name "$disk_name" --resource-group "$RESOURCE_GROUP" \ --query 'id' --output tsv 2>/dev/null) log "Swapping OS disk..." if az_cmd vm update --name "$VM_NAME" --resource-group "$RESOURCE_GROUP" \ --os-disk "$new_disk_id" --output none 2>/dev/null; then echo -e " ${GREEN}✓${RESET} OS disk swapped" else die "Failed to swap OS disk" fi log "Starting VM..." az_cmd vm start --name "$VM_NAME" --resource-group "$RESOURCE_GROUP" \ --output none 2>/dev/null echo -e " ${GREEN}✓${RESET} VM started" } # ══════════════════════════════════════════════════════════════════════ # STATUS # ══════════════════════════════════════════════════════════════════════ do_status() { local vm_json vm_json=$(get_all_vms) local snaps snaps=$(list_snapshots) local now now=$(date +%s) local total_vms=0 total_snaps=0 total_gb=0 local protected=0 stale=0 unprotected=0 while IFS= read -r vm; do [[ -z "$vm" ]] && continue ((total_vms++)) || true local name name=$(echo "$vm" | jq -r '.name') local vm_snaps snap_count vm_snaps=$(echo "$snaps" | jq --arg vm "$name" \ '[.[] | select(.tags["source-vm"] == $vm)]') snap_count=$(echo "$vm_snaps" | jq 'length') total_snaps=$(( total_snaps + snap_count )) local gb gb=$(echo "$vm_snaps" | jq '[.[].diskSizeGb // 0] | add // 0') total_gb=$(( total_gb + gb )) if [[ "$snap_count" -eq 0 ]]; then ((unprotected++)) || true continue fi local latest_date latest_date=$(echo "$vm_snaps" | jq -r \ 'sort_by(.timeCreated) | last | .timeCreated // ""') if [[ -n "$latest_date" ]]; then local snap_epoch snap_epoch=$(date -d "$latest_date" +%s 2>/dev/null || echo 0) if [[ "$snap_epoch" -gt 0 ]]; then local age_days=$(( (now - snap_epoch) / 86400 )) if (( age_days > MAX_AGE )); then ((stale++)) || true else ((protected++)) || true fi else ((protected++)) || true fi else ((protected++)) || true fi done < <(echo "$vm_json" | jq -c '.[]') if [[ "$OUTPUT_FORMAT" == "prometheus" ]]; then cat <${MAX_AGE}d):" "${YELLOW}${stale}${RESET}" else field_color "Stale (>${MAX_AGE}d):" "${GREEN}0${RESET}" fi if [[ "$unprotected" -gt 0 ]]; then field_color "Unprotected:" "${RED}${unprotected}${RESET}" else field_color "Unprotected:" "${GREEN}0${RESET}" fi } # ══════════════════════════════════════════════════════════════════════ # HELP # ══════════════════════════════════════════════════════════════════════ show_help() { cat <