#!/usr/bin/env bash ######################################################################################### #### s3-bucket-manager.sh — Copy, sync, delete, audit, and manage AWS S3 buckets #### #### Works with any AWS credential method — SSO, assume-role, env vars, profiles #### #### Requires: bash 4+, aws-cli v2, jq #### #### #### #### Author: Phil Connor #### #### Contact: contact@mylinux.work #### #### License: MIT #### #### Version 1.01 #### #### #### #### Usage: #### #### export AWS_PROFILE="production" #### #### ./s3-bucket-manager.sh --list-buckets #### #### ./s3-bucket-manager.sh --copy s3://src-bucket s3://dst-bucket #### #### #### #### See --help for all options. #### ######################################################################################### set -euo pipefail # ── Defaults ────────────────────────────────────────────────────────── AWS_REGION="${AWS_REGION:-}" DRY_RUN="${DRY_RUN:-false}" DELETE_OLDER_THAN="${DELETE_OLDER_THAN:-}" DELETE_PREFIX="${DELETE_PREFIX:-}" INCLUDE_PATTERN="${INCLUDE_PATTERN:-}" EXCLUDE_PATTERN="${EXCLUDE_PATTERN:-}" STORAGE_CLASS="${STORAGE_CLASS:-}" ACL="${ACL:-}" SSE="${SSE:-}" VERBOSE="${VERBOSE:-false}" COLOR="${COLOR:-auto}" PARALLEL="${PARALLEL:-true}" # ── State ───────────────────────────────────────────────────────────── SCRIPT_NAME="$(basename "$0")" readonly SCRIPT_NAME RUN_MODE="" SOURCE_PATH="" DEST_PATH="" TARGET_BUCKET="" START_TIME="" WARNINGS=0 # ── Colors ──────────────────────────────────────────────────────────── setup_colors() { if [[ "$COLOR" == "never" ]]; then RED="" GREEN="" YELLOW="" BLUE="" BOLD="" RESET="" return fi if [[ "$COLOR" == "always" ]] || [[ -t 1 ]]; then RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[0;33m' BLUE='\033[0;34m' BOLD='\033[1m' RESET='\033[0m' else RED="" GREEN="" YELLOW="" BLUE="" BOLD="" RESET="" fi } # ── Logging ─────────────────────────────────────────────────────────── log() { echo -e "${BLUE}[INFO]${RESET} $*"; } warn() { echo -e "${YELLOW}[WARN]${RESET} $*" >&2; ((WARNINGS++)) || true; } err() { echo -e "${RED}[ERROR]${RESET} $*" >&2; } verbose() { if [[ "$VERBOSE" == "true" ]]; then echo -e "${BLUE}[DEBUG]${RESET} $*"; fi; } # ── AWS CLI wrapper ─────────────────────────────────────────────────── aws_cmd() { local args=("$@") [[ -n "$AWS_REGION" ]] && args+=(--region "$AWS_REGION") verbose "aws ${args[*]}" aws "${args[@]}" } # ── Credential check ───────────────────────────────────────────────── check_deps() { for cmd in aws jq; do if ! command -v "$cmd" &>/dev/null; then err "${cmd} is required but not installed" exit 1 fi done # Verify credentials are valid local identity identity=$(aws sts get-caller-identity 2>&1) || { err "AWS credentials not configured, expired, or invalid" echo "" >&2 echo "Supported credential methods:" >&2 echo " • AWS_PROFILE — named profile from ~/.aws/credentials" >&2 echo " • AWS SSO — run 'aws sso login --profile your-profile'" >&2 echo " • Environment vars — AWS_ACCESS_KEY_ID + AWS_SECRET_ACCESS_KEY + AWS_SESSION_TOKEN" >&2 echo " • Instance profile — automatic on EC2/ECS" >&2 echo " • AWS_ROLE_ARN — assume role via STS" >&2 exit 1 } local account arn account=$(echo "$identity" | jq -r '.Account') arn=$(echo "$identity" | jq -r '.Arn') verbose "Account: ${account}" verbose "Identity: ${arn}" # Check for session expiry (if using temporary credentials) if [[ -n "${AWS_SESSION_TOKEN:-}" ]]; then verbose "Using temporary credentials (session token present)" fi # Determine region if not set if [[ -z "$AWS_REGION" ]]; then AWS_REGION=$(aws configure get region 2>/dev/null || echo "") if [[ -n "$AWS_REGION" ]]; then verbose "Using region from config: ${AWS_REGION}" fi fi log "Authenticated as ${arn}" } # ── Human-readable sizes ───────────────────────────────────────────── human_size() { local bytes="$1" if [[ "$bytes" -ge 1099511627776 ]]; then printf "%.1f TiB" "$(echo "$bytes / 1099511627776" | bc -l)" elif [[ "$bytes" -ge 1073741824 ]]; then printf "%.1f GiB" "$(echo "$bytes / 1073741824" | bc -l)" elif [[ "$bytes" -ge 1048576 ]]; then printf "%.1f MiB" "$(echo "$bytes / 1048576" | bc -l)" elif [[ "$bytes" -ge 1024 ]]; then printf "%.1f KiB" "$(echo "$bytes / 1024" | bc -l)" else echo "${bytes} B" fi } # ══════════════════════════════════════════════════════════════════════ # LIST BUCKETS # ══════════════════════════════════════════════════════════════════════ do_list_buckets() { log "Listing S3 buckets..." local buckets_json buckets_json=$(aws_cmd s3api list-buckets --output json) local bucket_count bucket_count=$(echo "$buckets_json" | jq '.Buckets | length') if [[ "$bucket_count" -eq 0 ]]; then log "No buckets found" return fi echo "" printf " %-40s %-22s %s\n" "BUCKET" "CREATED" "REGION" echo " $(printf '%.0s─' {1..80})" echo "$buckets_json" | jq -c '.Buckets[]' | while IFS= read -r bucket; do local name created region name=$(echo "$bucket" | jq -r '.Name') created=$(echo "$bucket" | jq -r '.CreationDate' | cut -c1-19) region=$(aws_cmd s3api get-bucket-location \ --bucket "$name" \ --query 'LocationConstraint' \ --output text 2>/dev/null) || region="error" [[ "$region" == "None" || "$region" == "null" ]] && region="us-east-1" printf " %-40s %-22s %s\n" "$name" "$created" "$region" done echo "" log "Total: ${bucket_count} bucket(s)" } # ══════════════════════════════════════════════════════════════════════ # LIST OBJECTS # ══════════════════════════════════════════════════════════════════════ do_list_objects() { if [[ -z "$TARGET_BUCKET" ]]; then err "Bucket required. Use --list s3://bucket-name[/prefix]" exit 1 fi local bucket prefix bucket="${TARGET_BUCKET#s3://}" prefix="" if [[ "$bucket" == */* ]]; then prefix="${bucket#*/}" bucket="${bucket%%/*}" fi log "Listing objects in s3://${bucket}/${prefix}..." local list_args=(s3api list-objects-v2 --bucket "$bucket" --output json) [[ -n "$prefix" ]] && list_args+=(--prefix "$prefix") local objects_json objects_json=$(aws_cmd "${list_args[@]}" 2>/dev/null) || { err "Failed to list objects in s3://${bucket}/${prefix}" exit 1 } local obj_count obj_count=$(echo "$objects_json" | jq '.Contents // [] | length') if [[ "$obj_count" -eq 0 ]]; then log "No objects found" return fi echo "" printf " %12s %-22s %s\n" "SIZE" "MODIFIED" "KEY" echo " $(printf '%.0s─' {1..90})" echo "$objects_json" | jq -c '.Contents[]' | while IFS= read -r obj; do local key size modified key=$(echo "$obj" | jq -r '.Key') size=$(echo "$obj" | jq -r '.Size') modified=$(echo "$obj" | jq -r '.LastModified' | cut -c1-19) printf " %12s %-22s %s\n" "$(human_size "$size")" "$modified" "$key" done local total_size total_size=$(echo "$objects_json" | jq '[.Contents[].Size] | add // 0') echo "" log "Objects: ${obj_count}, Total size: $(human_size "$total_size")" } # ══════════════════════════════════════════════════════════════════════ # COPY # ══════════════════════════════════════════════════════════════════════ do_copy() { if [[ -z "$SOURCE_PATH" || -z "$DEST_PATH" ]]; then err "Source and destination required. Use --copy s3://src s3://dst" exit 1 fi log "Copying ${SOURCE_PATH} → ${DEST_PATH}..." local cp_args=(s3 cp "$SOURCE_PATH" "$DEST_PATH" --recursive) [[ "$DRY_RUN" == "true" ]] && cp_args+=(--dryrun) [[ -n "$INCLUDE_PATTERN" ]] && cp_args+=(--include "$INCLUDE_PATTERN") [[ -n "$EXCLUDE_PATTERN" ]] && cp_args+=(--exclude "$EXCLUDE_PATTERN") [[ -n "$STORAGE_CLASS" ]] && cp_args+=(--storage-class "$STORAGE_CLASS") [[ -n "$ACL" ]] && cp_args+=(--acl "$ACL") [[ -n "$SSE" ]] && cp_args+=(--sse "$SSE") if [[ "$DRY_RUN" == "true" ]]; then log "${YELLOW}DRY RUN${RESET} — no objects will be copied" fi aws_cmd "${cp_args[@]}" if [[ "$DRY_RUN" != "true" ]]; then echo "" echo -e " ${GREEN}✓${RESET} Copy complete" fi } # ══════════════════════════════════════════════════════════════════════ # SYNC # ══════════════════════════════════════════════════════════════════════ do_sync() { if [[ -z "$SOURCE_PATH" || -z "$DEST_PATH" ]]; then err "Source and destination required. Use --sync s3://src s3://dst" exit 1 fi log "Syncing ${SOURCE_PATH} → ${DEST_PATH}..." local sync_args=(s3 sync "$SOURCE_PATH" "$DEST_PATH") [[ "$DRY_RUN" == "true" ]] && sync_args+=(--dryrun) [[ -n "$INCLUDE_PATTERN" ]] && sync_args+=(--include "$INCLUDE_PATTERN") [[ -n "$EXCLUDE_PATTERN" ]] && sync_args+=(--exclude "$EXCLUDE_PATTERN") [[ -n "$STORAGE_CLASS" ]] && sync_args+=(--storage-class "$STORAGE_CLASS") [[ -n "$ACL" ]] && sync_args+=(--acl "$ACL") [[ -n "$SSE" ]] && sync_args+=(--sse "$SSE") if [[ "$DRY_RUN" == "true" ]]; then log "${YELLOW}DRY RUN${RESET} — no objects will be synced" fi aws_cmd "${sync_args[@]}" if [[ "$DRY_RUN" != "true" ]]; then echo "" echo -e " ${GREEN}✓${RESET} Sync complete" fi } # ══════════════════════════════════════════════════════════════════════ # DELETE # ══════════════════════════════════════════════════════════════════════ do_delete() { if [[ -z "$TARGET_BUCKET" ]]; then err "Bucket required. Use --delete s3://bucket-name[/prefix]" exit 1 fi local bucket prefix bucket="${TARGET_BUCKET#s3://}" prefix="" if [[ "$bucket" == */* ]]; then prefix="${bucket#*/}" bucket="${bucket%%/*}" fi # Safety: require prefix or --all or --older-than if [[ -z "$prefix" && -z "$DELETE_PREFIX" && -z "$DELETE_OLDER_THAN" ]]; then err "Refusing to delete all objects without explicit confirmation" err "Use one of:" err " --delete s3://bucket/prefix — delete by prefix" err " --delete s3://bucket --prefix pfx — delete by prefix" err " --delete s3://bucket --older-than 30d — delete by age" err " --empty s3://bucket — empty entire bucket" exit 1 fi [[ -n "$DELETE_PREFIX" ]] && prefix="$DELETE_PREFIX" if [[ -n "$DELETE_OLDER_THAN" ]]; then do_delete_by_age "$bucket" "$prefix" else do_delete_by_prefix "$bucket" "$prefix" fi } do_delete_by_prefix() { local bucket="$1" local prefix="$2" log "Deleting objects from s3://${bucket}/${prefix}..." if [[ "$DRY_RUN" == "true" ]]; then log "${YELLOW}DRY RUN${RESET} — listing objects that would be deleted" aws_cmd s3 rm "s3://${bucket}/${prefix}" --recursive --dryrun return fi aws_cmd s3 rm "s3://${bucket}/${prefix}" --recursive echo "" echo -e " ${GREEN}✓${RESET} Delete complete" } do_delete_by_age() { local bucket="$1" local prefix="$2" # Parse age (e.g., 30d, 12h, 90d) local age_value age_unit cutoff_epoch age_value="${DELETE_OLDER_THAN%%[dhDH]*}" age_unit="${DELETE_OLDER_THAN##*[0-9]}" age_unit="${age_unit,,}" # lowercase case "$age_unit" in d) cutoff_epoch=$(date -d "-${age_value} days" +%s 2>/dev/null) || \ cutoff_epoch=$(date -v-"${age_value}"d +%s 2>/dev/null) ;; h) cutoff_epoch=$(date -d "-${age_value} hours" +%s 2>/dev/null) || \ cutoff_epoch=$(date -v-"${age_value}"H +%s 2>/dev/null) ;; *) err "Invalid age format '${DELETE_OLDER_THAN}'. Use Nd or Nh (e.g., 30d, 12h)"; exit 1 ;; esac local cutoff_date cutoff_date=$(date -d "@${cutoff_epoch}" +%Y-%m-%dT%H:%M:%S 2>/dev/null) || \ cutoff_date=$(date -r "${cutoff_epoch}" +%Y-%m-%dT%H:%M:%S 2>/dev/null) log "Deleting objects older than ${DELETE_OLDER_THAN} (before ${cutoff_date})..." local list_args=(s3api list-objects-v2 --bucket "$bucket" --output json) [[ -n "$prefix" ]] && list_args+=(--prefix "$prefix") local objects_json objects_json=$(aws_cmd "${list_args[@]}" 2>/dev/null) echo "$objects_json" | jq -c '.Contents // [] | .[]' | while IFS= read -r obj; do local key modified size modified_epoch key=$(echo "$obj" | jq -r '.Key') modified=$(echo "$obj" | jq -r '.LastModified') size=$(echo "$obj" | jq -r '.Size') modified_epoch=$(date -d "$modified" +%s 2>/dev/null) || \ modified_epoch=$(date -jf "%Y-%m-%dT%H:%M:%S" "${modified%%.*}" +%s 2>/dev/null) || modified_epoch=0 if [[ $modified_epoch -lt $cutoff_epoch ]]; then local age_days=$(( ($(date +%s) - modified_epoch) / 86400 )) if [[ "$DRY_RUN" == "true" ]]; then echo -e " ${YELLOW}⊘${RESET} ${key} — ${age_days}d old, $(human_size "$size") (would delete)" else if aws_cmd s3api delete-object --bucket "$bucket" --key "$key" >/dev/null 2>&1; then echo -e " ${GREEN}✓${RESET} ${key} — deleted (${age_days}d old)" else echo -e " ${RED}✗${RESET} ${key} — delete failed" fi fi fi done if [[ "$DRY_RUN" == "true" ]]; then log "${YELLOW}DRY RUN${RESET} — no objects were deleted. Use --force to delete." fi } # ══════════════════════════════════════════════════════════════════════ # EMPTY BUCKET # ══════════════════════════════════════════════════════════════════════ do_empty() { if [[ -z "$TARGET_BUCKET" ]]; then err "Bucket required. Use --empty s3://bucket-name" exit 1 fi local bucket="${TARGET_BUCKET#s3://}" bucket="${bucket%%/*}" log "Emptying s3://${bucket}..." # Check for versioning local versioning versioning=$(aws_cmd s3api get-bucket-versioning \ --bucket "$bucket" \ --query 'Status' \ --output text 2>/dev/null) || versioning="" if [[ "$DRY_RUN" == "true" ]]; then local count count=$(aws_cmd s3api list-objects-v2 \ --bucket "$bucket" \ --query 'KeyCount' \ --output text 2>/dev/null) || count="?" log "${YELLOW}DRY RUN${RESET} — would delete ${count} objects from s3://${bucket}" if [[ "$versioning" == "Enabled" ]]; then log "Bucket has versioning enabled — would also delete all version markers" fi return fi if [[ "$versioning" == "Enabled" ]]; then log "Bucket has versioning enabled — deleting all versions and markers..." # Delete all object versions local versions versions=$(aws_cmd s3api list-object-versions \ --bucket "$bucket" \ --output json 2>/dev/null) # Delete versions echo "$versions" | jq -c '.Versions // [] | .[]' 2>/dev/null | while IFS= read -r ver; do local key version_id key=$(echo "$ver" | jq -r '.Key') version_id=$(echo "$ver" | jq -r '.VersionId') aws_cmd s3api delete-object \ --bucket "$bucket" \ --key "$key" \ --version-id "$version_id" >/dev/null 2>&1 || true done # Delete markers echo "$versions" | jq -c '.DeleteMarkers // [] | .[]' 2>/dev/null | while IFS= read -r marker; do local key version_id key=$(echo "$marker" | jq -r '.Key') version_id=$(echo "$marker" | jq -r '.VersionId') aws_cmd s3api delete-object \ --bucket "$bucket" \ --key "$key" \ --version-id "$version_id" >/dev/null 2>&1 || true done else aws_cmd s3 rm "s3://${bucket}" --recursive fi echo -e " ${GREEN}✓${RESET} Bucket s3://${bucket} emptied" } # ══════════════════════════════════════════════════════════════════════ # BUCKET SIZE # ══════════════════════════════════════════════════════════════════════ do_size() { if [[ -z "$TARGET_BUCKET" ]]; then err "Bucket required. Use --size s3://bucket-name" exit 1 fi local bucket="${TARGET_BUCKET#s3://}" bucket="${bucket%%/*}" log "Calculating size of s3://${bucket}..." # Use CloudWatch for accurate billing-level metrics (last 2 days) local cw_size cw_size=$(aws_cmd cloudwatch get-metric-statistics \ --namespace AWS/S3 \ --metric-name BucketSizeBytes \ --dimensions "Name=BucketName,Value=${bucket}" "Name=StorageType,Value=StandardStorage" \ --start-time "$(date -d '-2 days' -u +%Y-%m-%dT%H:%M:%SZ 2>/dev/null || date -v-2d -u +%Y-%m-%dT%H:%M:%SZ)" \ --end-time "$(date -u +%Y-%m-%dT%H:%M:%SZ)" \ --period 86400 \ --statistics Average \ --query 'sort_by(Datapoints, &Timestamp)[-1].Average' \ --output text 2>/dev/null) || cw_size="" local cw_count cw_count=$(aws_cmd cloudwatch get-metric-statistics \ --namespace AWS/S3 \ --metric-name NumberOfObjects \ --dimensions "Name=BucketName,Value=${bucket}" "Name=StorageType,Value=AllStorageTypes" \ --start-time "$(date -d '-2 days' -u +%Y-%m-%dT%H:%M:%SZ 2>/dev/null || date -v-2d -u +%Y-%m-%dT%H:%M:%SZ)" \ --end-time "$(date -u +%Y-%m-%dT%H:%M:%SZ)" \ --period 86400 \ --statistics Average \ --query 'sort_by(Datapoints, &Timestamp)[-1].Average' \ --output text 2>/dev/null) || cw_count="" echo "" echo -e "${BOLD}Bucket: s3://${bucket}${RESET}" if [[ -n "$cw_size" && "$cw_size" != "None" ]]; then local size_int="${cw_size%%.*}" local count_int="${cw_count%%.*}" local monthly_cost monthly_cost=$(echo "${size_int} / 1073741824 * 0.023" | bc -l 2>/dev/null | head -c 8) || monthly_cost="?" echo -e " Size: $(human_size "$size_int") (CloudWatch metric)" echo -e " Objects: ${count_int}" echo -e " Est. cost: \$${monthly_cost}/month (Standard)" else # Fallback: list and sum (slower, but works without CloudWatch) log "CloudWatch metrics not available — counting objects manually..." local summary summary=$(aws_cmd s3 ls "s3://${bucket}" --recursive --summarize 2>/dev/null | tail -2) local obj_count total_size obj_count=$(echo "$summary" | grep 'Total Objects:' | awk '{print $3}') total_size=$(echo "$summary" | grep 'Total Size:' | awk '{print $3}') echo -e " Size: $(human_size "${total_size:-0}")" echo -e " Objects: ${obj_count:-0}" fi } # ══════════════════════════════════════════════════════════════════════ # AUDIT # ══════════════════════════════════════════════════════════════════════ do_audit() { log "Auditing S3 buckets..." local buckets_json buckets_json=$(aws_cmd s3api list-buckets --output json) local bucket_count bucket_count=$(echo "$buckets_json" | jq '.Buckets | length') if [[ "$bucket_count" -eq 0 ]]; then log "No buckets found" return fi echo "" printf " %-35s %-12s %-12s %-10s %-10s %s\n" \ "BUCKET" "VERSIONING" "ENCRYPTION" "PUBLIC" "LIFECYCLE" "LOGGING" echo " $(printf '%.0s─' {1..100})" echo "$buckets_json" | jq -r '.Buckets[].Name' | while IFS= read -r name; do local versioning encryption public_access lifecycle logging # Versioning versioning=$(aws_cmd s3api get-bucket-versioning \ --bucket "$name" \ --query 'Status' \ --output text 2>/dev/null) || versioning="error" [[ "$versioning" == "None" || -z "$versioning" ]] && versioning="off" # Encryption if aws_cmd s3api get-bucket-encryption --bucket "$name" >/dev/null 2>&1; then encryption="on" else encryption="${RED}off${RESET}" fi # Public access block local public_json public_json=$(aws_cmd s3api get-public-access-block \ --bucket "$name" 2>/dev/null) if [[ -n "$public_json" ]]; then local all_blocked all_blocked=$(echo "$public_json" | jq ' .PublicAccessBlockConfiguration | (.BlockPublicAcls and .IgnorePublicAcls and .BlockPublicPolicy and .RestrictPublicBuckets) ') if [[ "$all_blocked" == "true" ]]; then public_access="blocked" else public_access="${RED}partial${RESET}" fi else public_access="${RED}none${RESET}" fi # Lifecycle rules local lc_json lc_json=$(aws_cmd s3api get-bucket-lifecycle-configuration \ --bucket "$name" 2>/dev/null) if [[ -n "$lc_json" ]]; then local rule_count rule_count=$(echo "$lc_json" | jq '.Rules | length') lifecycle="${rule_count} rules" else lifecycle="none" fi # Logging local log_json log_json=$(aws_cmd s3api get-bucket-logging \ --bucket "$name" \ --query 'LoggingEnabled' \ --output text 2>/dev/null) || log_json="" if [[ -n "$log_json" && "$log_json" != "None" ]]; then logging="on" else logging="${YELLOW}off${RESET}" fi printf " %-35s %-12s %-12b %-10b %-10s %b\n" \ "$name" "$versioning" "$encryption" "$public_access" "$lifecycle" "$logging" done echo "" log "Audited ${bucket_count} bucket(s)" } # ══════════════════════════════════════════════════════════════════════ # PRESIGN # ══════════════════════════════════════════════════════════════════════ do_presign() { if [[ -z "$SOURCE_PATH" ]]; then err "S3 path required. Use --presign s3://bucket/key" exit 1 fi local expires="${PRESIGN_EXPIRES:-3600}" log "Generating presigned URL (expires in ${expires}s)..." local url url=$(aws_cmd s3 presign "$SOURCE_PATH" --expires-in "$expires") || { err "Failed to generate presigned URL" exit 1 } echo "" echo -e "${BOLD}Presigned URL:${RESET}" echo "$url" echo "" echo -e "Expires in: ${expires}s ($(( expires / 60 )) minutes)" } # ══════════════════════════════════════════════════════════════════════ # MAIN # ══════════════════════════════════════════════════════════════════════ show_help() { cat <