#!/usr/bin/env bash ######################################################################################### #### ami-lifecycle-manager.sh — AWS AMI lifecycle management #### #### Create, tag, retain, and deregister AMIs with orphan snapshot cleanup #### #### Requires: bash 4+, aws-cli v2, jq #### #### #### #### Author: Phil Connor #### #### Contact: contact@mylinux.work #### #### License: MIT #### #### Version 1.00 #### #### #### #### Usage: #### #### ./ami-lifecycle-manager.sh --create --instance-id i-1234567890abcdef0 #### #### ./ami-lifecycle-manager.sh --enforce --retention-days 30 #### #### #### #### See --help for all options. #### ######################################################################################### set -euo pipefail # ── Defaults ────────────────────────────────────────────────────────── AWS_REGION="${AWS_REGION:-}" AWS_PROFILE_OPT="${AWS_PROFILE:-}" AMI_RETENTION_DAYS="${AMI_RETENTION_DAYS:-30}" AMI_NAME_PREFIX="${AMI_NAME_PREFIX:-ami-lifecycle}" VERBOSE="${VERBOSE:-false}" COLOR="${COLOR:-auto}" DRY_RUN="false" OUTPUT_FORMAT="${OUTPUT_FORMAT:-text}" # ── State ───────────────────────────────────────────────────────────── SCRIPT_NAME="$(basename "$0")" readonly SCRIPT_NAME RUN_MODE="" INSTANCE_ID="" RETENTION_DAYS="$AMI_RETENTION_DAYS" START_TIME="" MANAGER_TAG="ami-lifecycle-manager" # ── Colors ──────────────────────────────────────────────────────────── RED="" GREEN="" YELLOW="" BLUE="" BOLD="" DIM="" RESET="" setup_colors() { if [[ "$COLOR" == "never" ]]; then RED="" GREEN="" YELLOW="" BLUE="" BOLD="" DIM="" RESET="" return fi if [[ "$COLOR" == "auto" && ! -t 1 ]]; then RED="" GREEN="" YELLOW="" BLUE="" BOLD="" DIM="" RESET="" return fi RED="\033[0;31m" GREEN="\033[0;32m" YELLOW="\033[0;33m" BLUE="\033[0;34m" BOLD="\033[1m" DIM="\033[2m" RESET="\033[0m" } # ── Logging ─────────────────────────────────────────────────────────── log_info() { printf "${GREEN}[INFO]${RESET} %s\n" "$*"; } log_warn() { printf "${YELLOW}[WARN]${RESET} %s\n" "$*" >&2; } log_error() { printf "${RED}[ERROR]${RESET} %s\n" "$*" >&2; } log_verbose() { [[ "$VERBOSE" == "true" ]] && printf "${DIM}[DEBUG] %s${RESET}\n" "$*"; } # ── Helpers ─────────────────────────────────────────────────────────── die() { log_error "$@"; exit 1; } today_utc() { date -u +%Y-%m-%d; } epoch_from_date() { local d="$1" if date --version >/dev/null 2>&1; then date -d "$d" +%s else date -j -f "%Y-%m-%d" "$d" +%s fi } days_since() { local created="$1" local now now=$(date -u +%s) local then then=$(epoch_from_date "$created") echo $(( (now - then) / 86400 )) } date_offset_days() { local base="$1" offset="$2" if date --version >/dev/null 2>&1; then date -d "${base} +${offset} days" +%Y-%m-%d else date -j -v+"${offset}d" -f "%Y-%m-%d" "$base" +%Y-%m-%d fi } # ── AWS CLI wrapper ─────────────────────────────────────────────────── aws_cmd() { local args=("$@") [[ -n "$AWS_REGION" ]] && args+=(--region "$AWS_REGION") [[ -n "$AWS_PROFILE_OPT" ]] && args+=(--profile "$AWS_PROFILE_OPT") log_verbose "aws ${args[*]}" aws "${args[@]}" } # ── Dependency check ───────────────────────────────────────────────── check_deps() { local missing=() command -v aws >/dev/null 2>&1 || missing+=("aws-cli") command -v jq >/dev/null 2>&1 || missing+=("jq") if (( ${#missing[@]} > 0 )); then die "Missing required tools: ${missing[*]}" fi local bash_major="${BASH_VERSINFO[0]}" if (( bash_major < 4 )); then die "Requires bash 4+, found ${BASH_VERSION}" fi # Verify AWS credentials if ! aws_cmd sts get-caller-identity --output text >/dev/null 2>&1; then die "AWS credentials not configured or expired" fi # Determine region if [[ -z "$AWS_REGION" ]]; then AWS_REGION=$(aws configure get region 2>/dev/null || echo "") if [[ -z "$AWS_REGION" ]]; then die "AWS_REGION is required (set via env var, --region, or aws configure)" fi fi } # ── Header ──────────────────────────────────────────────────────────── print_header() { local account_id account_id=$(aws_cmd sts get-caller-identity --query Account --output text 2>/dev/null || echo "unknown") echo "AMI Lifecycle Manager" echo "Account: $account_id" echo "Region: $AWS_REGION" echo "Mode: $RUN_MODE" echo "Time: $(date -u +%Y-%m-%dT%H:%M:%SZ)" echo "" } # ── Usage ───────────────────────────────────────────────────────────── usage() { cat < 0 )); do case "$1" in --create) RUN_MODE="create"; shift ;; --enforce) RUN_MODE="enforce"; shift ;; --clean-snapshots) RUN_MODE="clean-snapshots"; shift ;; --inventory) RUN_MODE="inventory"; shift ;; --instance-id) [[ $# -lt 2 ]] && die "--instance-id requires a value" INSTANCE_ID="$2"; shift 2 ;; --retention-days) [[ $# -lt 2 ]] && die "--retention-days requires a value" RETENTION_DAYS="$2"; shift 2 ;; --dry-run) DRY_RUN="true"; shift ;; --format) [[ $# -lt 2 ]] && die "--format requires a value" OUTPUT_FORMAT="$2"; shift 2 ;; --profile) [[ $# -lt 2 ]] && die "--profile requires a value" AWS_PROFILE_OPT="$2"; shift 2 ;; --region) [[ $# -lt 2 ]] && die "--region requires a value" AWS_REGION="$2"; shift 2 ;; --verbose) VERBOSE="true"; shift ;; --no-color) COLOR="never"; shift ;; --help|-h) usage ;; *) die "Unknown option: $1 (see --help)" ;; esac done if [[ -z "$RUN_MODE" ]]; then log_error "No mode specified" echo "" usage fi if [[ "$RUN_MODE" == "create" && -z "$INSTANCE_ID" ]]; then die "--create requires --instance-id" fi case "$OUTPUT_FORMAT" in text|csv|json) ;; *) die "Invalid --format: $OUTPUT_FORMAT (expected text, csv, json)" ;; esac if ! [[ "$RETENTION_DAYS" =~ ^[0-9]+$ ]]; then die "--retention-days must be a positive integer" fi } # ── Get instance name ───────────────────────────────────────────────── get_instance_name() { local iid="$1" aws_cmd ec2 describe-instances \ --instance-ids "$iid" \ --query 'Reservations[0].Instances[0].Tags[?Key==`Name`].Value | [0]' \ --output text 2>/dev/null || echo "N/A" } # ── Create AMI ──────────────────────────────────────────────────────── create_ami() { local instance_id="$INSTANCE_ID" local today today="$(today_utc)" log_info "Creating AMI from instance ${instance_id}..." # Get instance name local instance_name instance_name=$(get_instance_name "$instance_id") if [[ "$instance_name" == "None" || -z "$instance_name" ]]; then instance_name="unnamed" fi log_info "Instance name: ${instance_name}" # Build AMI name local ami_name="${AMI_NAME_PREFIX}-${instance_name}-${today}" local ami_description="AMI created by ${MANAGER_TAG} from ${instance_id} (${instance_name}) on ${today}" # Calculate expiry date local expires expires=$(date_offset_days "$today" "$RETENTION_DAYS") # Create the AMI (no-reboot to avoid downtime) local ami_id ami_id=$(aws_cmd ec2 create-image \ --instance-id "$instance_id" \ --name "$ami_name" \ --description "$ami_description" \ --no-reboot \ --query 'ImageId' \ --output text 2>/dev/null) || die "Failed to create AMI from ${instance_id}" log_info "AMI created: ${ami_id}" log_info "Name: ${ami_name}" # Tag the AMI aws_cmd ec2 create-tags \ --resources "$ami_id" \ --tags \ "Key=Name,Value=${ami_name}" \ "Key=managed-by,Value=${MANAGER_TAG}" \ "Key=source-instance,Value=${instance_id}" \ "Key=source-name,Value=${instance_name}" \ "Key=created-date,Value=${today}" \ "Key=retention-days,Value=${RETENTION_DAYS}" \ "Key=expires,Value=${expires}" \ >/dev/null 2>&1 || log_warn "Failed to tag AMI ${ami_id}" log_info "Tags applied:" printf " %-16s = %s\n" "managed-by" "$MANAGER_TAG" printf " %-16s = %s\n" "source-instance" "$instance_id" printf " %-16s = %s\n" "source-name" "$instance_name" printf " %-16s = %s\n" "created-date" "$today" printf " %-16s = %s\n" "retention-days" "$RETENTION_DAYS" printf " %-16s = %s\n" "expires" "$expires" # Wait briefly for snapshots to appear, then tag them too log_verbose "Waiting for AMI snapshots to register..." local retries=0 local snap_ids="" while (( retries < 12 )); do snap_ids=$(aws_cmd ec2 describe-images \ --image-ids "$ami_id" \ --query 'Images[0].BlockDeviceMappings[*].Ebs.SnapshotId' \ --output text 2>/dev/null || echo "") if [[ -n "$snap_ids" && "$snap_ids" != "None" ]]; then break fi sleep 5 ((retries++)) || true done if [[ -n "$snap_ids" && "$snap_ids" != "None" ]]; then for snap_id in $snap_ids; do aws_cmd ec2 create-tags \ --resources "$snap_id" \ --tags \ "Key=managed-by,Value=${MANAGER_TAG}" \ "Key=source-ami,Value=${ami_id}" \ "Key=source-instance,Value=${instance_id}" \ "Key=created-date,Value=${today}" \ >/dev/null 2>&1 || log_warn "Failed to tag snapshot ${snap_id}" log_verbose "Tagged snapshot ${snap_id}" done fi } # ── Get managed AMIs ────────────────────────────────────────────────── get_managed_amis() { local account_id account_id=$(aws_cmd sts get-caller-identity --query Account --output text 2>/dev/null) aws_cmd ec2 describe-images \ --owners "$account_id" \ --filters "Name=tag:managed-by,Values=${MANAGER_TAG}" \ --query 'Images[*]' \ --output json 2>/dev/null || echo "[]" } # ── Enforce retention ───────────────────────────────────────────────── enforce_retention() { log_info "Enforcing retention policy (${RETENTION_DAYS} days)..." if [[ "$DRY_RUN" == "true" ]]; then log_info "DRY RUN — no AMIs will be deregistered" fi local amis_json amis_json=$(get_managed_amis) local total total=$(echo "$amis_json" | jq 'length') log_info "Found ${total} managed AMI(s)" if (( total == 0 )); then log_info "No managed AMIs found — nothing to do" return fi local today today=$(today_utc) local active=0 expired=0 deregistered=0 # Print table header for text output if [[ "$OUTPUT_FORMAT" == "text" ]]; then echo "" printf " %-24s %-42s %-6s %-11s %s\n" "AMI" "NAME" "AGE" "RETENTION" "STATUS" echo " ──────────────────────────────────────────────────────────────────────────────────────" fi local csv_lines=() local json_items=() while IFS=$'\t' read -r ami_id ami_name created_date retention_tag; do [[ -z "$ami_id" || "$ami_id" == "null" ]] && continue # Use tag retention or default local ret="${retention_tag}" if [[ -z "$ret" || "$ret" == "null" || "$ret" == "None" ]]; then ret="$RETENTION_DAYS" fi local age=0 if [[ -n "$created_date" && "$created_date" != "null" && "$created_date" != "None" ]]; then age=$(days_since "$created_date") fi local status="active" if (( age > ret )); then status="expired" ((expired++)) || true else ((active++)) || true fi case "$OUTPUT_FORMAT" in text) local status_icon="✓ active" if [[ "$status" == "expired" ]]; then status_icon="✗ expired" fi printf " %-24s %-42s %3dd %3dd %s\n" \ "$ami_id" "$ami_name" "$age" "$ret" "$status_icon" ;; csv) csv_lines+=("\"${ami_id}\",\"${ami_name}\",${age},${ret},\"${status}\"") ;; json) json_items+=("{\"ami_id\":\"${ami_id}\",\"name\":\"${ami_name}\",\"age_days\":${age},\"retention_days\":${ret},\"status\":\"${status}\"}") ;; esac # Deregister expired AMIs if [[ "$status" == "expired" ]]; then if [[ "$DRY_RUN" == "true" ]]; then log_info "[DRY RUN] Would deregister ${ami_id} (${age}d old, retention ${ret}d)" else log_info "Deregistering ${ami_id} (${age}d old, retention ${ret}d)..." if aws_cmd ec2 deregister-image --image-id "$ami_id" >/dev/null 2>&1; then ((deregistered++)) || true else log_warn "Failed to deregister ${ami_id}" fi fi fi done < <(echo "$amis_json" | jq -r '.[] | [.ImageId, (.Tags // [] | map(select(.Key == "Name")) | .[0].Value // "N/A"), (.Tags // [] | map(select(.Key == "created-date")) | .[0].Value // ""), (.Tags // [] | map(select(.Key == "retention-days")) | .[0].Value // "")] | @tsv') echo "" case "$OUTPUT_FORMAT" in text) echo "Summary" printf " Total managed AMIs: %d\n" "$total" printf " Active: %d\n" "$active" printf " Expired: %d\n" "$expired" if [[ "$DRY_RUN" == "true" ]]; then printf " Would deregister: %d\n" "$expired" else printf " Deregistered: %d\n" "$deregistered" fi ;; csv) echo "ami_id,name,age_days,retention_days,status" for line in "${csv_lines[@]}"; do echo "$line" done ;; json) local joined joined=$(printf ",%s" "${json_items[@]}") joined="${joined:1}" printf '{"mode":"enforce","retention_days":%d,"dry_run":%s,"total":%d,"active":%d,"expired":%d,"items":[%s]}\n' \ "$RETENTION_DAYS" "$DRY_RUN" "$total" "$active" "$expired" "$joined" ;; esac } # ── Clean orphan snapshots ──────────────────────────────────────────── clean_orphan_snapshots() { log_info "Searching for orphaned AMI snapshots..." if [[ "$DRY_RUN" == "true" ]]; then log_info "DRY RUN — no snapshots will be deleted" fi local account_id account_id=$(aws_cmd sts get-caller-identity --query Account --output text 2>/dev/null) # Get all snapshots tagged as managed by us local snaps_json snaps_json=$(aws_cmd ec2 describe-snapshots \ --owner-ids "$account_id" \ --filters "Name=tag:managed-by,Values=${MANAGER_TAG}" \ --query 'Snapshots[*]' \ --output json 2>/dev/null) || die "Failed to describe snapshots" local total_snaps total_snaps=$(echo "$snaps_json" | jq 'length') log_info "Found ${total_snaps} managed snapshot(s)" if (( total_snaps == 0 )); then log_info "No managed snapshots found — nothing to do" return fi # Get all currently registered AMI IDs local registered_amis registered_amis=$(aws_cmd ec2 describe-images \ --owners "$account_id" \ --query 'Images[*].ImageId' \ --output text 2>/dev/null) || die "Failed to describe images" local orphan_count=0 local deleted_count=0 local total_size=0 while IFS=$'\t' read -r snap_id source_ami snap_size; do [[ -z "$snap_id" || "$snap_id" == "null" ]] && continue # Check if the source AMI still exists local is_orphan="false" if [[ -z "$source_ami" || "$source_ami" == "null" || "$source_ami" == "None" ]]; then is_orphan="true" elif ! echo "$registered_amis" | grep -qw "$source_ami" 2>/dev/null; then is_orphan="true" fi if [[ "$is_orphan" == "true" ]]; then ((orphan_count++)) || true local size_gb="${snap_size:-0}" ((total_size += size_gb)) || true if [[ "$DRY_RUN" == "true" ]]; then log_info "[DRY RUN] Would delete orphan snapshot ${snap_id} (${size_gb} GiB, source AMI: ${source_ami:-unknown})" else log_info "Deleting orphan snapshot ${snap_id} (${size_gb} GiB)..." if aws_cmd ec2 delete-snapshot --snapshot-id "$snap_id" >/dev/null 2>&1; then ((deleted_count++)) || true else log_warn "Failed to delete snapshot ${snap_id}" fi fi fi done < <(echo "$snaps_json" | jq -r '.[] | [.SnapshotId, (.Tags // [] | map(select(.Key == "source-ami")) | .[0].Value // ""), (.VolumeSize // 0 | tostring)] | @tsv') echo "" echo "Summary" printf " Total managed snapshots: %d\n" "$total_snaps" printf " Orphaned: %d\n" "$orphan_count" if [[ "$DRY_RUN" == "true" ]]; then printf " Would delete: %d\n" "$orphan_count" else printf " Deleted: %d\n" "$deleted_count" fi printf " Storage reclaimed: %d GiB\n" "$total_size" } # ── Inventory report ────────────────────────────────────────────────── inventory_report() { log_info "Generating AMI inventory report..." local amis_json amis_json=$(get_managed_amis) local total total=$(echo "$amis_json" | jq 'length') log_info "Found ${total} managed AMI(s)" if (( total == 0 )); then log_info "No managed AMIs found" return fi local account_id account_id=$(aws_cmd sts get-caller-identity --query Account --output text 2>/dev/null) case "$OUTPUT_FORMAT" in text) echo "" printf " %-24s %-30s %-12s %-6s %-11s %s\n" \ "AMI" "SOURCE INSTANCE" "CREATED" "AGE" "RETENTION" "SNAPSHOTS" echo " ────────────────────────────────────────────────────────────────────────────────────────────────" ;; csv) echo "ami_id,name,source_instance,source_name,created_date,age_days,retention_days,expires,snapshot_count" ;; esac local json_items=() while IFS=$'\t' read -r ami_id ami_name source_instance source_name created_date retention_tag expires_tag snap_count; do [[ -z "$ami_id" || "$ami_id" == "null" ]] && continue # Defaults for missing tags [[ "$source_instance" == "null" || -z "$source_instance" ]] && source_instance="N/A" [[ "$source_name" == "null" || -z "$source_name" ]] && source_name="" [[ "$created_date" == "null" || -z "$created_date" ]] && created_date="unknown" [[ "$retention_tag" == "null" || -z "$retention_tag" ]] && retention_tag="$RETENTION_DAYS" [[ "$expires_tag" == "null" || -z "$expires_tag" ]] && expires_tag="N/A" [[ "$snap_count" == "null" || -z "$snap_count" ]] && snap_count="0" local age=0 if [[ "$created_date" != "unknown" ]]; then age=$(days_since "$created_date") fi local instance_display="$source_instance" if [[ -n "$source_name" && "$source_name" != "N/A" ]]; then instance_display="${source_instance} (${source_name})" fi case "$OUTPUT_FORMAT" in text) printf " %-24s %-30s %-12s %3dd %3dd %s\n" \ "$ami_id" "$instance_display" "$created_date" "$age" "$retention_tag" "$snap_count" ;; csv) echo "\"${ami_id}\",\"${ami_name}\",\"${source_instance}\",\"${source_name}\",\"${created_date}\",${age},${retention_tag},\"${expires_tag}\",${snap_count}" ;; json) json_items+=("{\"ami_id\":\"${ami_id}\",\"name\":\"${ami_name}\",\"source_instance\":\"${source_instance}\",\"source_name\":\"${source_name}\",\"created_date\":\"${created_date}\",\"age_days\":${age},\"retention_days\":${retention_tag},\"expires\":\"${expires_tag}\",\"snapshot_count\":${snap_count}}") ;; esac done < <(echo "$amis_json" | jq -r '.[] | [ .ImageId, (.Tags // [] | map(select(.Key == "Name")) | .[0].Value // "N/A"), (.Tags // [] | map(select(.Key == "source-instance")) | .[0].Value // ""), (.Tags // [] | map(select(.Key == "source-name")) | .[0].Value // ""), (.Tags // [] | map(select(.Key == "created-date")) | .[0].Value // ""), (.Tags // [] | map(select(.Key == "retention-days")) | .[0].Value // ""), (.Tags // [] | map(select(.Key == "expires")) | .[0].Value // ""), (.BlockDeviceMappings // [] | map(select(.Ebs.SnapshotId)) | length | tostring) ] | @tsv') if [[ "$OUTPUT_FORMAT" == "json" ]]; then local joined joined=$(printf ",%s" "${json_items[@]}") joined="${joined:1}" printf '{"mode":"inventory","total":%d,"items":[%s]}\n' "$total" "$joined" fi if [[ "$OUTPUT_FORMAT" == "text" ]]; then echo "" printf " Total: %d managed AMI(s)\n" "$total" fi } # ── Main ────────────────────────────────────────────────────────────── main() { parse_args "$@" setup_colors check_deps START_TIME=$(date +%s) print_header case "$RUN_MODE" in create) create_ami ;; enforce) enforce_retention ;; clean-snapshots) clean_orphan_snapshots ;; inventory) inventory_report ;; *) die "Unknown mode: $RUN_MODE" ;; esac local elapsed=$(( $(date +%s) - START_TIME )) log_info "Completed in ${elapsed}s" } main "$@"