#!/usr/bin/env bash ##################################################################################### #### aws-smoke-tests.sh — Verify AWS connectivity and core service health #### #### Checks credentials, S3, EC2, IAM, VPC, Route 53, CloudWatch, Security Hub #### #### #### #### Author: Phil Connor #### #### Contact: contact@mylinux.work #### #### License: MIT #### #### Version: 1.0 #### #### #### #### Usage: ./aws-smoke-tests.sh #### #### AWS_PROFILE=prod S3_BUCKET=my-bucket ./aws-smoke-tests.sh #### #### #### #### See --help for all options. #### ##################################################################################### set -euo pipefail # ── Defaults ────────────────────────────────────────────────────────── AWS_REGION="${AWS_REGION:-us-east-1}" S3_BUCKET="${S3_BUCKET:-}" R53_DOMAIN="${R53_DOMAIN:-}" R53_ZONE_ID="${R53_ZONE_ID:-}" VPC_ID="${VPC_ID:-}" COST_THRESHOLD="${COST_THRESHOLD:-}" SG_CHECK_PORTS="${SG_CHECK_PORTS:-22,3389,3306,5432}" REQUIRED_PERMISSIONS="${REQUIRED_PERMISSIONS:-}" OUTPUT_FORMAT="${OUTPUT_FORMAT:-text}" VERBOSE="${VERBOSE:-false}" COLOR="${COLOR:-auto}" # ── State ───────────────────────────────────────────────────────────── PASS=0; FAIL=0; SKIP=0; TOTAL=0 RESULTS=() START_TIME="" CALLER_ARN="" # ── Colors ──────────────────────────────────────────────────────────── RED="" GREEN="" YELLOW="" BLUE="" BOLD="" RESET="" setup_colors() { if [[ "$COLOR" == "never" ]]; then return; fi if [[ "$COLOR" == "always" ]] || [[ -t 1 ]]; then RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[0;33m' BLUE='\033[0;34m' BOLD='\033[1m' RESET='\033[0m' fi } # ── Logging ─────────────────────────────────────────────────────────── log() { echo -e "${BLUE}[INFO]${RESET} $*"; } warn() { echo -e "${YELLOW}[WARN]${RESET} $*" >&2; } err() { echo -e "${RED}[ERROR]${RESET} $*" >&2; } verbose() { [[ "$VERBOSE" == "true" ]] && echo -e "${BLUE}[DEBUG]${RESET} $*" || true; } # ── Test Result Recording ───────────────────────────────────────────── record_pass() { local name="$1" detail="${2:-}" ((PASS++)) || true; ((TOTAL++)) || true local msg="ok ${TOTAL} - ${name}" [[ -n "$detail" ]] && msg="${msg} (${detail})" RESULTS+=("$msg") verbose "PASS: ${name} ${detail}" } record_fail() { local name="$1" detail="${2:-}" ((FAIL++)) || true; ((TOTAL++)) || true local msg="not ok ${TOTAL} - ${name}" [[ -n "$detail" ]] && msg="${msg} (${detail})" RESULTS+=("$msg") verbose "FAIL: ${name} ${detail}" } record_skip() { local name="$1" reason="${2:-}" ((SKIP++)) || true; ((TOTAL++)) || true local msg="ok ${TOTAL} - # SKIP ${name}" [[ -n "$reason" ]] && msg="${msg} — ${reason}" RESULTS+=("$msg") verbose "SKIP: ${name} ${reason}" } # ── Dependency Check ────────────────────────────────────────────────── check_dependencies() { local missing=() command -v aws >/dev/null 2>&1 || missing+=("aws-cli") command -v jq >/dev/null 2>&1 || missing+=("jq") if [[ ${#missing[@]} -gt 0 ]]; then err "Missing required tools: ${missing[*]}" err "Install aws-cli v2 and jq before running this script." exit 1 fi verbose "Dependencies satisfied: aws-cli, jq" } # ── Help ────────────────────────────────────────────────────────────── show_help() { cat <<'EOF' AWS Smoke Tests — Verify AWS connectivity and core service health Environment Variables: AWS_REGION Region to test (default: us-east-1) AWS_PROFILE AWS CLI profile to use S3_BUCKET S3 bucket to verify access R53_DOMAIN Route 53 domain to resolve R53_ZONE_ID Hosted zone ID to verify VPC_ID VPC to inspect (auto-detected if not set) COST_THRESHOLD Monthly cost alert threshold in USD SG_CHECK_PORTS Ports to check for open SGs (default: 22,3389,3306,5432) REQUIRED_PERMISSIONS Comma-separated IAM actions to simulate OUTPUT_FORMAT Output format: text or json (default: text) VERBOSE Show detailed output (default: false) COLOR Color output: auto, always, never (default: auto) Examples: ./aws-smoke-tests.sh AWS_PROFILE=prod S3_BUCKET=my-bucket ./aws-smoke-tests.sh S3_BUCKET=data R53_DOMAIN=example.com COST_THRESHOLD=5000 ./aws-smoke-tests.sh EOF exit 0 } # ── Tests ───────────────────────────────────────────────────────────── test_credentials() { verbose "Testing AWS credentials..." local identity identity=$(aws sts get-caller-identity --output json 2>/dev/null) || { record_fail "AWS credentials configured" "No valid credentials found" return } record_pass "AWS credentials configured" local account arn account=$(echo "$identity" | jq -r '.Account // "unknown"') arn=$(echo "$identity" | jq -r '.Arn // "unknown"') CALLER_ARN="$arn" record_pass "STS GetCallerIdentity succeeds" "account: ${account}" record_pass "Caller identity" "ARN: ${arn}" } test_s3() { verbose "Testing S3 access..." local bucket_count bucket_count=$(aws s3api list-buckets --query 'length(Buckets)' --output text 2>/dev/null) || { record_fail "S3 ListBuckets" "API call failed" return } record_pass "S3 ListBuckets succeeds" "${bucket_count} buckets" if [[ -n "$S3_BUCKET" ]]; then if aws s3api head-bucket --bucket "$S3_BUCKET" 2>/dev/null; then record_pass "S3 bucket '${S3_BUCKET}' exists and is accessible" else record_fail "S3 bucket '${S3_BUCKET}' exists and is accessible" "head-bucket failed" fi else record_skip "S3 specific bucket check" "S3_BUCKET not set" fi } test_ec2() { verbose "Testing EC2 access..." local instances instances=$(aws ec2 describe-instances \ --query 'Reservations[].Instances[]' \ --output json 2>/dev/null) || { record_fail "EC2 DescribeInstances" "API call failed" return } record_pass "EC2 DescribeInstances succeeds" local running running=$(echo "$instances" | jq '[.[] | select(.State.Name == "running")] | length') record_pass "Running instances" "${running}" } test_vpc() { verbose "Testing VPC configuration..." local vpc_id="$VPC_ID" if [[ -z "$vpc_id" ]]; then vpc_id=$(aws ec2 describe-vpcs \ --filters "Name=isDefault,Values=true" \ --query 'Vpcs[0].VpcId' \ --output text 2>/dev/null) || true if [[ -z "$vpc_id" ]] || [[ "$vpc_id" == "None" ]]; then vpc_id=$(aws ec2 describe-vpcs \ --query 'Vpcs[0].VpcId' \ --output text 2>/dev/null) || true fi fi if [[ -z "$vpc_id" ]] || [[ "$vpc_id" == "None" ]]; then record_fail "VPC exists" "No VPC found" return fi record_pass "VPC exists" "${vpc_id}" # Check subnets local subnet_count subnet_count=$(aws ec2 describe-subnets \ --filters "Name=vpc-id,Values=${vpc_id}" \ --query 'length(Subnets)' \ --output text 2>/dev/null) || subnet_count=0 if [[ "$subnet_count" -gt 0 ]]; then record_pass "VPC has subnets" "${subnet_count}" else record_fail "VPC has subnets" "0 subnets found" fi # Check internet gateway local igw igw=$(aws ec2 describe-internet-gateways \ --filters "Name=attachment.vpc-id,Values=${vpc_id}" \ --query 'InternetGateways[0].InternetGatewayId' \ --output text 2>/dev/null) || igw="None" if [[ -n "$igw" ]] && [[ "$igw" != "None" ]]; then record_pass "Internet gateway attached to VPC" "${igw}" else record_fail "Internet gateway attached to VPC" "None found" fi } test_route53() { if [[ -z "$R53_DOMAIN" ]] && [[ -z "$R53_ZONE_ID" ]]; then record_skip "Route 53 checks" "R53_DOMAIN and R53_ZONE_ID not set" return fi verbose "Testing Route 53..." if [[ -n "$R53_ZONE_ID" ]]; then local zone_name zone_name=$(aws route53 get-hosted-zone \ --id "$R53_ZONE_ID" \ --query 'HostedZone.Name' \ --output text 2>/dev/null) || { record_fail "Route 53 zone ${R53_ZONE_ID} exists" return } record_pass "Route 53 zone exists" "${zone_name}" fi if [[ -n "$R53_DOMAIN" ]]; then local zone_count zone_count=$(aws route53 list-hosted-zones \ --query 'length(HostedZones)' \ --output text 2>/dev/null) || { record_fail "Route 53 ListHostedZones" return } record_pass "Route 53 ListHostedZones succeeds" "${zone_count} zones" # Try to resolve the domain using system DNS local resolved resolved=$(dig +short "$R53_DOMAIN" A 2>/dev/null | head -1) || true if [[ -n "$resolved" ]]; then record_pass "Route 53 domain ${R53_DOMAIN} resolves" "A: ${resolved}" else record_fail "Route 53 domain ${R53_DOMAIN} resolves" "No A record returned" fi fi } test_security_groups() { verbose "Testing security groups..." local sgs sgs=$(aws ec2 describe-security-groups \ --query 'SecurityGroups[].{GroupId:GroupId,GroupName:GroupName,IpPermissions:IpPermissions}' \ --output json 2>/dev/null) || { record_fail "Security group audit" "API call failed" return } IFS=',' read -ra ports <<< "$SG_CHECK_PORTS" for port in "${ports[@]}"; do port=$(echo "$port" | tr -d ' ') local open_sgs open_sgs=$(echo "$sgs" | jq -r --argjson port "$port" ' [.[] | select( .IpPermissions[]? | select( (.IpRanges[]?.CidrIp == "0.0.0.0/0" or .Ipv6Ranges[]?.CidrIpv6 == "::/0") and ( (.FromPort <= $port and .ToPort >= $port) or (.IpProtocol == "-1") ) ) ) | .GroupId] | unique | join(", ") ' 2>/dev/null) || open_sgs="" if [[ -z "$open_sgs" ]]; then record_pass "No security groups with 0.0.0.0/0 on port ${port}" else record_fail "Security group allows 0.0.0.0/0 on port ${port}" "${open_sgs}" fi done } test_cloudwatch_alarms() { verbose "Testing CloudWatch alarms..." local alarm_count alarm_count=$(aws cloudwatch describe-alarms \ --state-value ALARM \ --query 'length(MetricAlarms)' \ --output text 2>/dev/null) || { record_fail "CloudWatch alarm check" "API call failed" return } if [[ "$alarm_count" -eq 0 ]]; then record_pass "CloudWatch alarms" "0 in ALARM state" else local alarm_names alarm_names=$(aws cloudwatch describe-alarms \ --state-value ALARM \ --query 'MetricAlarms[].AlarmName' \ --output text 2>/dev/null | head -c 200) record_fail "CloudWatch alarms" "${alarm_count} in ALARM state: ${alarm_names}" fi } test_security_hub() { verbose "Testing Security Hub..." local findings findings=$(aws securityhub get-findings \ --filters '{ "WorkflowStatus": [{"Value":"NEW","Comparison":"EQUALS"}], "RecordState": [{"Value":"ACTIVE","Comparison":"EQUALS"}], "SeverityLabel": [{"Value":"CRITICAL","Comparison":"EQUALS"},{"Value":"HIGH","Comparison":"EQUALS"}] }' \ --max-items 100 \ --query 'length(Findings)' \ --output text 2>/dev/null) || { record_skip "Security Hub findings" "Security Hub not enabled or no access" return } local critical high critical=$(aws securityhub get-findings \ --filters '{ "WorkflowStatus": [{"Value":"NEW","Comparison":"EQUALS"}], "RecordState": [{"Value":"ACTIVE","Comparison":"EQUALS"}], "SeverityLabel": [{"Value":"CRITICAL","Comparison":"EQUALS"}] }' \ --query 'length(Findings)' \ --output text 2>/dev/null) || critical=0 high=$(aws securityhub get-findings \ --filters '{ "WorkflowStatus": [{"Value":"NEW","Comparison":"EQUALS"}], "RecordState": [{"Value":"ACTIVE","Comparison":"EQUALS"}], "SeverityLabel": [{"Value":"HIGH","Comparison":"EQUALS"}] }' \ --query 'length(Findings)' \ --output text 2>/dev/null) || high=0 if [[ "$critical" -eq 0 ]] && [[ "$high" -eq 0 ]]; then record_pass "Security Hub findings" "0 critical, 0 high" else record_fail "Security Hub findings" "${critical} critical, ${high} high" fi } test_iam_permissions() { if [[ -z "$REQUIRED_PERMISSIONS" ]] || [[ -z "$CALLER_ARN" ]]; then record_skip "IAM permission simulation" "REQUIRED_PERMISSIONS not set or no ARN" return fi verbose "Testing IAM permissions..." IFS=',' read -ra actions <<< "$REQUIRED_PERMISSIONS" local denied=() for action in "${actions[@]}"; do action=$(echo "$action" | tr -d ' ') local result result=$(aws iam simulate-principal-policy \ --policy-source-arn "$CALLER_ARN" \ --action-names "$action" \ --query 'EvaluationResults[0].EvalDecision' \ --output text 2>/dev/null) || result="error" if [[ "$result" != "allowed" ]]; then denied+=("$action") fi done if [[ ${#denied[@]} -eq 0 ]]; then record_pass "IAM simulation" "all ${#actions[@]} required actions allowed" else record_fail "IAM simulation" "denied: ${denied[*]}" fi } test_cost() { if [[ -z "$COST_THRESHOLD" ]]; then record_skip "Cost check" "COST_THRESHOLD not set" return fi verbose "Testing monthly cost..." local month_start today month_start=$(date -u +%Y-%m-01) today=$(date -u +%Y-%m-%d) local cost_json cost_json=$(aws ce get-cost-and-usage \ --time-period "Start=${month_start},End=${today}" \ --granularity MONTHLY \ --metrics BlendedCost \ --output json 2>/dev/null) || { record_skip "Cost check" "Cost Explorer API failed (may need to be enabled)" return } local amount amount=$(echo "$cost_json" | jq -r '.ResultsByTime[0].Total.BlendedCost.Amount // "0"') local amount_int=${amount%%.*} if [[ "$amount_int" -lt "$COST_THRESHOLD" ]]; then record_pass "Current month spend" "\$${amount} below threshold \$${COST_THRESHOLD}" else record_fail "Current month spend" "\$${amount} exceeds threshold \$${COST_THRESHOLD}" fi } # ── Output ──────────────────────────────────────────────────────────── print_tap() { echo "TAP version 14" echo "1..${TOTAL}" for result in "${RESULTS[@]}"; do echo "$result" done echo "" local duration=$(( $(date +%s) - START_TIME )) echo "# Tests: ${TOTAL}, Passed: ${PASS}, Failed: ${FAIL}, Skipped: ${SKIP}" echo "# Duration: ${duration}s" } print_json() { local duration=$(( $(date +%s) - START_TIME )) local json_results="[" local first=true for result in "${RESULTS[@]}"; do local status="pass" [[ "$result" == not\ ok* ]] && status="fail" [[ "$result" == *"# SKIP"* ]] && status="skip" local name name=$(echo "$result" | sed -E 's/^(not )?ok [0-9]+ - (# SKIP )?//' | sed 's/ — .*//' | sed 's/ (.*//') $first || json_results+="," first=false json_results+="{\"status\":\"${status}\",\"name\":\"${name}\"}" done json_results+="]" jq -n \ --argjson results "$json_results" \ --argjson total "$TOTAL" \ --argjson passed "$PASS" \ --argjson failed "$FAIL" \ --argjson skipped "$SKIP" \ --argjson duration "$duration" \ '{ total: $total, passed: $passed, failed: $failed, skipped: $skipped, duration_seconds: $duration, results: $results }' } # ── Main ────────────────────────────────────────────────────────────── main() { [[ "${1:-}" == "--help" || "${1:-}" == "-h" ]] && show_help setup_colors check_dependencies START_TIME=$(date +%s) log "AWS Smoke Tests — Region: ${AWS_REGION}" log "────────────────────────────────────────" test_credentials test_s3 test_ec2 test_vpc test_route53 test_security_groups test_cloudwatch_alarms test_security_hub test_iam_permissions test_cost echo "" if [[ "$OUTPUT_FORMAT" == "json" ]]; then print_json else print_tap fi if [[ "$FAIL" -gt 0 ]]; then exit 1 fi exit 0 } main "$@"