a1a17e81a1
Includes updated JS challenge scripts with Claude-User whitelist, same-site referer bypass, Blackbox-Exporter allowed bot, and all new exporters, cheat sheets, and automation scripts.
740 lines
25 KiB
Bash
Executable File
740 lines
25 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
#########################################################################################
|
|
#### chaos-runner.sh — Inject controlled failures and verify system recovery ####
|
|
#### CPU stress, memory pressure, disk fill, service kill, network faults ####
|
|
#### Requires: bash 4+, root privileges ####
|
|
#### ####
|
|
#### Author: Phil Connor ####
|
|
#### Contact: contact@mylinux.work ####
|
|
#### License: MIT ####
|
|
#### Version 1.01 ####
|
|
#### ####
|
|
#### Usage: ####
|
|
#### sudo ./chaos-runner.sh --fault cpu-stress --duration 30 ####
|
|
#### ####
|
|
#### See --help for all options. ####
|
|
#########################################################################################
|
|
|
|
set -euo pipefail
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Color variables — pre-initialized empty, set by setup_colors()
|
|
# ---------------------------------------------------------------------------
|
|
RED=""
|
|
GREEN=""
|
|
YELLOW=""
|
|
BLUE=""
|
|
CYAN=""
|
|
BOLD=""
|
|
DIM=""
|
|
RESET=""
|
|
|
|
setup_colors() {
|
|
if [[ "${COLOR}" == "never" ]]; then
|
|
return
|
|
fi
|
|
if [[ "${COLOR}" == "always" ]] || [[ -t 1 ]]; then
|
|
RED="\033[0;31m"
|
|
GREEN="\033[0;32m"
|
|
YELLOW="\033[0;33m"
|
|
BLUE="\033[0;34m"
|
|
CYAN="\033[0;36m"
|
|
BOLD="\033[1m"
|
|
DIM="\033[2m"
|
|
RESET="\033[0m"
|
|
fi
|
|
}
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Standard helpers
|
|
# ---------------------------------------------------------------------------
|
|
log() { printf "%b[+]%b %s\n" "$GREEN" "$RESET" "$*"; }
|
|
warn() { printf "%b[!]%b %s\n" "$YELLOW" "$RESET" "$*" >&2; }
|
|
err() { printf "%b[-]%b %s\n" "$RED" "$RESET" "$*" >&2; }
|
|
verbose() { [[ "$VERBOSE" == "true" ]] && printf "%b[~]%b %s\n" "$DIM" "$RESET" "$*"; return 0; }
|
|
die() { err "$*"; exit 1; }
|
|
|
|
section_header() {
|
|
printf "\n%b%b══ %b%s%b\n" "$CYAN" "$BOLD" "$BLUE" "$*" "$RESET"
|
|
}
|
|
|
|
field() {
|
|
printf " %-24s %s\n" "$1" "$2"
|
|
}
|
|
|
|
field_color() {
|
|
local label="$1" color="$2" value="$3"
|
|
printf " %-24s %b%s%b\n" "$label" "$color" "$value" "$RESET"
|
|
}
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Defaults
|
|
# ---------------------------------------------------------------------------
|
|
RUN_MODE=""
|
|
FAULT_TYPE=""
|
|
DURATION="${CHAOS_DURATION:-30}"
|
|
TARGET_SERVICE=""
|
|
FILL_PATH="${CHAOS_FILL_PATH:-/tmp}"
|
|
FILL_SIZE="${CHAOS_FILL_SIZE:-90}"
|
|
LATENCY_MS="${CHAOS_LATENCY:-200}"
|
|
DROP_PERCENT="${CHAOS_DROP:-50}"
|
|
NETWORK_IFACE="${CHAOS_IFACE:-eth0}"
|
|
PLAN_FILE=""
|
|
CONFIRM_YES=false
|
|
VERBOSE="${VERBOSE:-false}"
|
|
COLOR="${COLOR:-auto}"
|
|
CLEANUP_PIDS=()
|
|
CLEANUP_FILES=()
|
|
CHAOS_ACTIVE=false
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# State
|
|
# ---------------------------------------------------------------------------
|
|
readonly SCRIPT_NAME="${0##*/}"
|
|
START_TIME=""
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Trap
|
|
# ---------------------------------------------------------------------------
|
|
trap cleanup_all EXIT INT TERM
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Safety — cleanup
|
|
# ---------------------------------------------------------------------------
|
|
cleanup_all() {
|
|
if [[ "${CHAOS_ACTIVE}" != "true" ]]; then
|
|
return
|
|
fi
|
|
CHAOS_ACTIVE=false
|
|
warn "Running cleanup..."
|
|
|
|
# Kill tracked background PIDs
|
|
local pid
|
|
for pid in "${CLEANUP_PIDS[@]}"; do
|
|
kill "$pid" 2>/dev/null || true
|
|
wait "$pid" 2>/dev/null || true
|
|
done
|
|
CLEANUP_PIDS=()
|
|
|
|
# Remove tracked temp files
|
|
local f
|
|
for f in "${CLEANUP_FILES[@]}"; do
|
|
if [[ -d "$f" ]] && mountpoint -q "$f" 2>/dev/null; then
|
|
umount "$f" 2>/dev/null || true
|
|
rmdir "$f" 2>/dev/null || true
|
|
elif [[ -f "$f" ]]; then
|
|
rm -f "$f" 2>/dev/null || true
|
|
elif [[ -d "$f" ]]; then
|
|
rmdir "$f" 2>/dev/null || true
|
|
fi
|
|
done
|
|
CLEANUP_FILES=()
|
|
|
|
# Remove tc qdiscs
|
|
tc qdisc del dev "$NETWORK_IFACE" root 2>/dev/null || true
|
|
|
|
# Restore resolv.conf from backup
|
|
if [[ -f /etc/resolv.conf.chaos-backup ]]; then
|
|
mv /etc/resolv.conf.chaos-backup /etc/resolv.conf 2>/dev/null || true
|
|
log "Restored /etc/resolv.conf from backup"
|
|
fi
|
|
|
|
log "Cleanup complete"
|
|
}
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Utilities
|
|
# ---------------------------------------------------------------------------
|
|
require_root() {
|
|
if [[ "$(id -u)" -ne 0 ]]; then
|
|
die "This operation requires root privileges. Run with sudo."
|
|
fi
|
|
}
|
|
|
|
confirm_action() {
|
|
local message="$1"
|
|
if [[ "$CONFIRM_YES" == "true" ]]; then
|
|
return 0
|
|
fi
|
|
printf "%b[?]%b %s [y/N] " "$YELLOW" "$RESET" "$message"
|
|
local answer
|
|
read -r answer
|
|
case "$answer" in
|
|
[yY]|[yY][eE][sS]) return 0 ;;
|
|
*) die "Aborted by user" ;;
|
|
esac
|
|
}
|
|
|
|
wait_duration() {
|
|
local remaining="$DURATION"
|
|
while [[ "$remaining" -gt 0 ]]; do
|
|
printf "\r %bTime remaining: %ds%b " "$DIM" "$remaining" "$RESET"
|
|
sleep 1
|
|
((remaining--)) || true
|
|
done
|
|
printf "\r%40s\r" ""
|
|
}
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Fault: cpu-stress
|
|
# ---------------------------------------------------------------------------
|
|
fault_cpu_stress() {
|
|
local cores
|
|
cores=$(nproc)
|
|
section_header "CPU Stress — saturating $cores cores for ${DURATION}s"
|
|
CHAOS_ACTIVE=true
|
|
|
|
local i
|
|
for ((i = 0; i < cores; i++)); do
|
|
while :; do :; done &
|
|
CLEANUP_PIDS+=("$!")
|
|
verbose "Spawned CPU worker PID $!"
|
|
done
|
|
|
|
log "Started $cores CPU stress workers"
|
|
wait_duration
|
|
cleanup_all
|
|
}
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Fault: memory-pressure
|
|
# ---------------------------------------------------------------------------
|
|
fault_memory_pressure() {
|
|
section_header "Memory Pressure — filling tmpfs for ${DURATION}s"
|
|
CHAOS_ACTIVE=true
|
|
|
|
local mount_dir
|
|
mount_dir=$(mktemp -d /tmp/chaos-mem-XXXXXX)
|
|
mount -t tmpfs -o size=256M tmpfs "$mount_dir"
|
|
CLEANUP_FILES+=("$mount_dir")
|
|
|
|
log "Mounted tmpfs at $mount_dir (256M)"
|
|
head -c 240M /dev/urandom > "${mount_dir}/fill.dat" 2>/dev/null || true
|
|
log "Filled tmpfs with ~240M of data"
|
|
|
|
wait_duration
|
|
cleanup_all
|
|
}
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Fault: disk-fill
|
|
# ---------------------------------------------------------------------------
|
|
fault_disk_fill() {
|
|
section_header "Disk Fill — filling ${FILL_PATH} to ${FILL_SIZE}% for ${DURATION}s"
|
|
CHAOS_ACTIVE=true
|
|
|
|
local current_usage target_bytes fill_file total_kb
|
|
fill_file="${FILL_PATH}/chaos-fill-$(date +%s).dat"
|
|
|
|
total_kb=$(df --output=size -k "$FILL_PATH" | tail -1 | tr -d ' ')
|
|
current_usage=$(df --output=pcent "$FILL_PATH" | tail -1 | tr -d ' %')
|
|
|
|
if [[ "$current_usage" -ge "$FILL_SIZE" ]]; then
|
|
warn "Disk already at ${current_usage}% — above target ${FILL_SIZE}%"
|
|
return
|
|
fi
|
|
|
|
target_bytes=$(( (FILL_SIZE - current_usage) * total_kb * 1024 / 100 ))
|
|
local target_mb=$(( target_bytes / 1048576 ))
|
|
|
|
log "Writing ${target_mb}M to $fill_file"
|
|
dd if=/dev/zero of="$fill_file" bs=1M count="$target_mb" status=none 2>/dev/null || true
|
|
CLEANUP_FILES+=("$fill_file")
|
|
|
|
log "Disk fill complete — $(df --output=pcent "$FILL_PATH" | tail -1 | tr -d ' ') used"
|
|
wait_duration
|
|
cleanup_all
|
|
}
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Fault: service-kill
|
|
# ---------------------------------------------------------------------------
|
|
fault_service_kill() {
|
|
if [[ -z "$TARGET_SERVICE" ]]; then
|
|
die "service-kill requires --target SERVICE_NAME"
|
|
fi
|
|
section_header "Service Kill — stopping ${TARGET_SERVICE} for ${DURATION}s"
|
|
CHAOS_ACTIVE=true
|
|
|
|
if ! systemctl is-active --quiet "$TARGET_SERVICE"; then
|
|
die "Service '$TARGET_SERVICE' is not currently active"
|
|
fi
|
|
|
|
confirm_action "Stop service '$TARGET_SERVICE' for ${DURATION}s?"
|
|
|
|
systemctl stop "$TARGET_SERVICE"
|
|
log "Stopped $TARGET_SERVICE"
|
|
|
|
wait_duration
|
|
|
|
log "Restarting $TARGET_SERVICE..."
|
|
systemctl start "$TARGET_SERVICE"
|
|
log "Service $TARGET_SERVICE restarted"
|
|
CHAOS_ACTIVE=false
|
|
}
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Fault: network-latency
|
|
# ---------------------------------------------------------------------------
|
|
fault_network_latency() {
|
|
section_header "Network Latency — ${LATENCY_MS}ms on ${NETWORK_IFACE} for ${DURATION}s"
|
|
CHAOS_ACTIVE=true
|
|
|
|
if ! command -v tc &>/dev/null; then
|
|
die "tc (iproute2) is required for network faults"
|
|
fi
|
|
|
|
tc qdisc del dev "$NETWORK_IFACE" root 2>/dev/null || true
|
|
tc qdisc add dev "$NETWORK_IFACE" root netem delay "${LATENCY_MS}ms"
|
|
log "Added ${LATENCY_MS}ms latency to $NETWORK_IFACE"
|
|
|
|
wait_duration
|
|
cleanup_all
|
|
}
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Fault: network-drop
|
|
# ---------------------------------------------------------------------------
|
|
fault_network_drop() {
|
|
section_header "Network Drop — ${DROP_PERCENT}% loss on ${NETWORK_IFACE} for ${DURATION}s"
|
|
CHAOS_ACTIVE=true
|
|
|
|
if ! command -v tc &>/dev/null; then
|
|
die "tc (iproute2) is required for network faults"
|
|
fi
|
|
|
|
tc qdisc del dev "$NETWORK_IFACE" root 2>/dev/null || true
|
|
tc qdisc add dev "$NETWORK_IFACE" root netem loss "${DROP_PERCENT}%"
|
|
log "Added ${DROP_PERCENT}% packet loss to $NETWORK_IFACE"
|
|
|
|
wait_duration
|
|
cleanup_all
|
|
}
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Fault: dns-failure
|
|
# ---------------------------------------------------------------------------
|
|
fault_dns_failure() {
|
|
section_header "DNS Failure — breaking DNS for ${DURATION}s"
|
|
CHAOS_ACTIVE=true
|
|
|
|
if [[ -f /etc/resolv.conf.chaos-backup ]]; then
|
|
die "A chaos backup of resolv.conf already exists — run --cleanup first"
|
|
fi
|
|
|
|
cp /etc/resolv.conf /etc/resolv.conf.chaos-backup
|
|
CLEANUP_FILES+=("/etc/resolv.conf.chaos-backup")
|
|
|
|
printf "# Chaos: DNS intentionally broken\nnameserver 127.0.0.254\n" > /etc/resolv.conf
|
|
log "Replaced /etc/resolv.conf with broken nameserver"
|
|
|
|
wait_duration
|
|
cleanup_all
|
|
}
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Fault: io-latency
|
|
# ---------------------------------------------------------------------------
|
|
fault_io_latency() {
|
|
section_header "I/O Latency — degrading I/O for ${DURATION}s"
|
|
CHAOS_ACTIVE=true
|
|
|
|
local io_file
|
|
io_file="${FILL_PATH}/chaos-io-$(date +%s).dat"
|
|
|
|
ionice -c 2 -n 7 dd if=/dev/urandom of="$io_file" bs=4K count=0 status=none 2>/dev/null &
|
|
CLEANUP_PIDS+=("$!")
|
|
CLEANUP_FILES+=("$io_file")
|
|
|
|
# Run continuous slow I/O in background
|
|
(
|
|
while :; do
|
|
ionice -c 3 dd if=/dev/zero of="$io_file" bs=4K count=256 conv=fdatasync status=none 2>/dev/null || true
|
|
sync
|
|
sleep 0.5
|
|
done
|
|
) &
|
|
CLEANUP_PIDS+=("$!")
|
|
|
|
log "Started degraded I/O worker (idle-class ionice)"
|
|
wait_duration
|
|
cleanup_all
|
|
}
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Dispatch
|
|
# ---------------------------------------------------------------------------
|
|
do_fault() {
|
|
require_root
|
|
if [[ -z "$FAULT_TYPE" ]]; then
|
|
die "No fault type specified. Use --fault TYPE"
|
|
fi
|
|
|
|
confirm_action "Inject fault '${FAULT_TYPE}' for ${DURATION}s?"
|
|
|
|
START_TIME=$(date +%s)
|
|
log "Starting fault injection: $FAULT_TYPE (duration: ${DURATION}s)"
|
|
|
|
case "$FAULT_TYPE" in
|
|
cpu-stress) fault_cpu_stress ;;
|
|
memory-pressure) fault_memory_pressure ;;
|
|
disk-fill) fault_disk_fill ;;
|
|
service-kill) fault_service_kill ;;
|
|
network-latency) fault_network_latency ;;
|
|
network-drop) fault_network_drop ;;
|
|
dns-failure) fault_dns_failure ;;
|
|
io-latency) fault_io_latency ;;
|
|
*) die "Unknown fault type: $FAULT_TYPE" ;;
|
|
esac
|
|
|
|
local elapsed=$(( $(date +%s) - START_TIME ))
|
|
log "Fault injection complete (${elapsed}s elapsed)"
|
|
}
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# List fault types
|
|
# ---------------------------------------------------------------------------
|
|
do_list() {
|
|
section_header "Available Fault Types"
|
|
printf "\n"
|
|
printf " %-20s %s\n" "FAULT TYPE" "DESCRIPTION"
|
|
printf " ─────────────────────────────────────────────────────────────\n"
|
|
printf " %-20s %s\n" "cpu-stress" "Saturate all CPU cores"
|
|
printf " %-20s %s\n" "memory-pressure" "Fill memory via tmpfs allocation"
|
|
printf " %-20s %s\n" "disk-fill" "Fill disk to threshold percentage"
|
|
printf " %-20s %s\n" "service-kill" "Stop a systemd service temporarily"
|
|
printf " %-20s %s\n" "network-latency" "Add network latency via tc netem"
|
|
printf " %-20s %s\n" "network-drop" "Drop packets via tc netem"
|
|
printf " %-20s %s\n" "dns-failure" "Break DNS resolution temporarily"
|
|
printf " %-20s %s\n" "io-latency" "Degrade I/O performance via ionice"
|
|
printf "\n"
|
|
}
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Verify system health
|
|
# ---------------------------------------------------------------------------
|
|
do_verify() {
|
|
section_header "System Health Check"
|
|
local issues=0
|
|
|
|
# CPU load
|
|
local load_1m
|
|
load_1m=$(awk '{print $1}' /proc/loadavg)
|
|
local cores
|
|
cores=$(nproc)
|
|
if awk "BEGIN {exit !($load_1m > $cores * 0.9)}"; then
|
|
field_color "CPU load (1m):" "$RED" "${load_1m} — HIGH (cores: ${cores})"
|
|
((issues++)) || true
|
|
else
|
|
field_color "CPU load (1m):" "$GREEN" "${load_1m} (cores: ${cores})"
|
|
fi
|
|
|
|
# Memory
|
|
local mem_avail_kb mem_total_kb mem_pct
|
|
mem_total_kb=$(awk '/MemTotal/ {print $2}' /proc/meminfo)
|
|
mem_avail_kb=$(awk '/MemAvailable/ {print $2}' /proc/meminfo)
|
|
mem_pct=$(( (mem_total_kb - mem_avail_kb) * 100 / mem_total_kb ))
|
|
if [[ "$mem_pct" -gt 90 ]]; then
|
|
field_color "Memory usage:" "$RED" "${mem_pct}% — HIGH"
|
|
((issues++)) || true
|
|
else
|
|
field_color "Memory usage:" "$GREEN" "${mem_pct}%"
|
|
fi
|
|
|
|
# Disk
|
|
local disk_pct
|
|
disk_pct=$(df --output=pcent / | tail -1 | tr -d ' %')
|
|
if [[ "$disk_pct" -gt 90 ]]; then
|
|
field_color "Disk usage (/):" "$RED" "${disk_pct}% — HIGH"
|
|
((issues++)) || true
|
|
else
|
|
field_color "Disk usage (/):" "$GREEN" "${disk_pct}%"
|
|
fi
|
|
|
|
# Network connectivity
|
|
if ping -c 1 -W 3 8.8.8.8 &>/dev/null; then
|
|
field_color "Network (ping):" "$GREEN" "OK"
|
|
else
|
|
field_color "Network (ping):" "$RED" "UNREACHABLE"
|
|
((issues++)) || true
|
|
fi
|
|
|
|
# DNS resolution
|
|
if host google.com &>/dev/null; then
|
|
field_color "DNS resolution:" "$GREEN" "OK"
|
|
else
|
|
field_color "DNS resolution:" "$RED" "FAILING"
|
|
((issues++)) || true
|
|
fi
|
|
|
|
# Chaos artifacts
|
|
if [[ -f /etc/resolv.conf.chaos-backup ]]; then
|
|
field_color "Chaos artifacts:" "$YELLOW" "resolv.conf backup found"
|
|
((issues++)) || true
|
|
else
|
|
field_color "Chaos artifacts:" "$GREEN" "None"
|
|
fi
|
|
|
|
printf "\n"
|
|
if [[ "$issues" -gt 0 ]]; then
|
|
warn "Found $issues issue(s)"
|
|
return 1
|
|
else
|
|
log "All checks passed"
|
|
return 0
|
|
fi
|
|
}
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Plan execution
|
|
# ---------------------------------------------------------------------------
|
|
do_plan() {
|
|
require_root
|
|
if [[ -z "$PLAN_FILE" ]]; then
|
|
die "No plan file specified. Use --plan FILE"
|
|
fi
|
|
if [[ ! -f "$PLAN_FILE" ]]; then
|
|
die "Plan file not found: $PLAN_FILE"
|
|
fi
|
|
if ! command -v jq &>/dev/null; then
|
|
die "jq is required for plan execution"
|
|
fi
|
|
|
|
section_header "Executing Chaos Plan: $PLAN_FILE"
|
|
|
|
local plan_length
|
|
plan_length=$(jq '.faults | length' "$PLAN_FILE")
|
|
log "Plan contains $plan_length fault(s)"
|
|
|
|
local i fault_entry f_type f_duration
|
|
for ((i = 0; i < plan_length; i++)); do
|
|
fault_entry=$(jq -r ".faults[$i]" "$PLAN_FILE")
|
|
f_type=$(printf '%s' "$fault_entry" | jq -r '.type')
|
|
f_duration=$(printf '%s' "$fault_entry" | jq -r '.duration // 30')
|
|
|
|
log "Step $((i + 1))/$plan_length: $f_type (${f_duration}s)"
|
|
|
|
FAULT_TYPE="$f_type"
|
|
DURATION="$f_duration"
|
|
|
|
# Extract optional fields
|
|
local f_target f_iface
|
|
f_target=$(printf '%s' "$fault_entry" | jq -r '.target // empty')
|
|
f_iface=$(printf '%s' "$fault_entry" | jq -r '.iface // empty')
|
|
|
|
[[ -n "$f_target" ]] && TARGET_SERVICE="$f_target"
|
|
[[ -n "$f_iface" ]] && NETWORK_IFACE="$f_iface"
|
|
|
|
case "$FAULT_TYPE" in
|
|
cpu-stress) fault_cpu_stress ;;
|
|
memory-pressure) fault_memory_pressure ;;
|
|
disk-fill) fault_disk_fill ;;
|
|
service-kill) fault_service_kill ;;
|
|
network-latency) fault_network_latency ;;
|
|
network-drop) fault_network_drop ;;
|
|
dns-failure) fault_dns_failure ;;
|
|
io-latency) fault_io_latency ;;
|
|
*) warn "Unknown fault type in plan: $FAULT_TYPE — skipping" ;;
|
|
esac
|
|
|
|
if [[ "$i" -lt $((plan_length - 1)) ]]; then
|
|
log "Pausing 5s before next fault..."
|
|
sleep 5
|
|
fi
|
|
done
|
|
|
|
log "Plan execution complete"
|
|
}
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Force cleanup
|
|
# ---------------------------------------------------------------------------
|
|
do_cleanup() {
|
|
require_root
|
|
section_header "Force Cleanup"
|
|
CHAOS_ACTIVE=true
|
|
cleanup_all
|
|
log "Force cleanup complete"
|
|
}
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Help
|
|
# ---------------------------------------------------------------------------
|
|
show_help() {
|
|
cat <<EOF
|
|
${SCRIPT_NAME} — Controlled failure injection and recovery verification
|
|
|
|
USAGE
|
|
sudo ./${SCRIPT_NAME} --fault TYPE [OPTIONS]
|
|
sudo ./${SCRIPT_NAME} --list
|
|
sudo ./${SCRIPT_NAME} --verify
|
|
sudo ./${SCRIPT_NAME} --plan FILE [--yes]
|
|
sudo ./${SCRIPT_NAME} --cleanup
|
|
|
|
MODES
|
|
--fault TYPE Inject a specific fault for --duration seconds
|
|
--list List all available fault types
|
|
--verify Run system health checks
|
|
--plan FILE Execute a JSON plan file of sequential faults
|
|
--cleanup Force cleanup of any leftover chaos artifacts
|
|
|
|
FAULT TYPES
|
|
cpu-stress Saturate all CPU cores
|
|
memory-pressure Fill memory via tmpfs allocation
|
|
disk-fill Fill disk to threshold percentage
|
|
service-kill Stop a systemd service (requires --target)
|
|
network-latency Add network latency via tc netem
|
|
network-drop Drop packets via tc netem
|
|
dns-failure Break DNS resolution temporarily
|
|
io-latency Degrade I/O performance
|
|
|
|
OPTIONS
|
|
--duration SEC Fault duration in seconds (default: 30)
|
|
--target SERVICE Target service for service-kill
|
|
--fill-path PATH Path for disk-fill (default: /tmp)
|
|
--fill-size PCT Disk fill target percent (default: 90)
|
|
--latency MS Latency in ms for network-latency (default: 200)
|
|
--drop PCT Drop percent for network-drop (default: 50)
|
|
--iface IFACE Network interface (default: eth0)
|
|
--yes Skip confirmation prompts
|
|
--verbose Enable verbose output
|
|
--no-color Disable color output
|
|
--help Show this help message
|
|
|
|
ENVIRONMENT VARIABLES
|
|
CHAOS_DURATION Default duration (seconds)
|
|
CHAOS_FILL_PATH Default fill path
|
|
CHAOS_FILL_SIZE Default fill size (percent)
|
|
CHAOS_LATENCY Default network latency (ms)
|
|
CHAOS_DROP Default packet drop (percent)
|
|
CHAOS_IFACE Default network interface
|
|
VERBOSE Set to 'true' for verbose output
|
|
COLOR Set to 'never' to disable colors
|
|
|
|
PLAN FILE FORMAT (JSON)
|
|
{
|
|
"faults": [
|
|
{ "type": "cpu-stress", "duration": 15 },
|
|
{ "type": "network-latency", "duration": 20 },
|
|
{ "type": "service-kill", "duration": 10, "target": "nginx" }
|
|
]
|
|
}
|
|
|
|
EXAMPLES
|
|
sudo ./${SCRIPT_NAME} --fault cpu-stress --duration 30
|
|
sudo ./${SCRIPT_NAME} --fault service-kill --target nginx --duration 60
|
|
sudo ./${SCRIPT_NAME} --fault network-latency --latency 500 --iface ens33
|
|
sudo ./${SCRIPT_NAME} --verify
|
|
sudo ./${SCRIPT_NAME} --plan chaos-plan.json --yes
|
|
EOF
|
|
}
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Argument parsing
|
|
# ---------------------------------------------------------------------------
|
|
parse_args() {
|
|
while [[ $# -gt 0 ]]; do
|
|
case "$1" in
|
|
--fault)
|
|
RUN_MODE="fault"
|
|
FAULT_TYPE="${2:-}"
|
|
[[ -z "$FAULT_TYPE" ]] && die "--fault requires a TYPE argument"
|
|
shift 2
|
|
;;
|
|
--list)
|
|
RUN_MODE="list"
|
|
shift
|
|
;;
|
|
--verify)
|
|
RUN_MODE="verify"
|
|
shift
|
|
;;
|
|
--plan)
|
|
RUN_MODE="plan"
|
|
PLAN_FILE="${2:-}"
|
|
[[ -z "$PLAN_FILE" ]] && die "--plan requires a FILE argument"
|
|
shift 2
|
|
;;
|
|
--cleanup)
|
|
RUN_MODE="cleanup"
|
|
shift
|
|
;;
|
|
--duration)
|
|
DURATION="${2:-}"
|
|
[[ -z "$DURATION" ]] && die "--duration requires a value"
|
|
shift 2
|
|
;;
|
|
--target)
|
|
TARGET_SERVICE="${2:-}"
|
|
[[ -z "$TARGET_SERVICE" ]] && die "--target requires a SERVICE name"
|
|
shift 2
|
|
;;
|
|
--fill-path)
|
|
FILL_PATH="${2:-}"
|
|
[[ -z "$FILL_PATH" ]] && die "--fill-path requires a PATH"
|
|
shift 2
|
|
;;
|
|
--fill-size)
|
|
FILL_SIZE="${2:-}"
|
|
[[ -z "$FILL_SIZE" ]] && die "--fill-size requires a percentage"
|
|
shift 2
|
|
;;
|
|
--latency)
|
|
LATENCY_MS="${2:-}"
|
|
[[ -z "$LATENCY_MS" ]] && die "--latency requires a value in ms"
|
|
shift 2
|
|
;;
|
|
--drop)
|
|
DROP_PERCENT="${2:-}"
|
|
[[ -z "$DROP_PERCENT" ]] && die "--drop requires a percentage"
|
|
shift 2
|
|
;;
|
|
--iface)
|
|
NETWORK_IFACE="${2:-}"
|
|
[[ -z "$NETWORK_IFACE" ]] && die "--iface requires an interface name"
|
|
shift 2
|
|
;;
|
|
--yes)
|
|
CONFIRM_YES=true
|
|
shift
|
|
;;
|
|
--verbose)
|
|
VERBOSE="true"
|
|
shift
|
|
;;
|
|
--no-color)
|
|
COLOR="never"
|
|
shift
|
|
;;
|
|
--help|-h)
|
|
RUN_MODE="help"
|
|
shift
|
|
;;
|
|
*)
|
|
die "Unknown option: $1 (see --help)"
|
|
;;
|
|
esac
|
|
done
|
|
}
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Main
|
|
# ---------------------------------------------------------------------------
|
|
main() {
|
|
parse_args "$@"
|
|
setup_colors
|
|
|
|
case "$RUN_MODE" in
|
|
fault) do_fault ;;
|
|
list) do_list ;;
|
|
verify) do_verify ;;
|
|
plan) do_plan ;;
|
|
cleanup) do_cleanup ;;
|
|
help) show_help ;;
|
|
"") show_help; die "No mode specified — use --fault, --list, --verify, --plan, or --cleanup" ;;
|
|
*) die "Unknown mode: $RUN_MODE" ;;
|
|
esac
|
|
}
|
|
|
|
main "$@"
|