Sync all scripts from website downloads — 352 scripts total
Includes updated JS challenge scripts with Claude-User whitelist, same-site referer bypass, Blackbox-Exporter allowed bot, and all new exporters, cheat sheets, and automation scripts.
This commit is contained in:
Executable
+739
@@ -0,0 +1,739 @@
|
||||
#!/usr/bin/env bash
|
||||
#########################################################################################
|
||||
#### chaos-runner.sh — Inject controlled failures and verify system recovery ####
|
||||
#### CPU stress, memory pressure, disk fill, service kill, network faults ####
|
||||
#### Requires: bash 4+, root privileges ####
|
||||
#### ####
|
||||
#### Author: Phil Connor ####
|
||||
#### Contact: contact@mylinux.work ####
|
||||
#### License: MIT ####
|
||||
#### Version 1.01 ####
|
||||
#### ####
|
||||
#### Usage: ####
|
||||
#### sudo ./chaos-runner.sh --fault cpu-stress --duration 30 ####
|
||||
#### ####
|
||||
#### See --help for all options. ####
|
||||
#########################################################################################
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Color variables — pre-initialized empty, set by setup_colors()
|
||||
# ---------------------------------------------------------------------------
|
||||
RED=""
|
||||
GREEN=""
|
||||
YELLOW=""
|
||||
BLUE=""
|
||||
CYAN=""
|
||||
BOLD=""
|
||||
DIM=""
|
||||
RESET=""
|
||||
|
||||
setup_colors() {
|
||||
if [[ "${COLOR}" == "never" ]]; then
|
||||
return
|
||||
fi
|
||||
if [[ "${COLOR}" == "always" ]] || [[ -t 1 ]]; then
|
||||
RED="\033[0;31m"
|
||||
GREEN="\033[0;32m"
|
||||
YELLOW="\033[0;33m"
|
||||
BLUE="\033[0;34m"
|
||||
CYAN="\033[0;36m"
|
||||
BOLD="\033[1m"
|
||||
DIM="\033[2m"
|
||||
RESET="\033[0m"
|
||||
fi
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Standard helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
log() { printf "%b[+]%b %s\n" "$GREEN" "$RESET" "$*"; }
|
||||
warn() { printf "%b[!]%b %s\n" "$YELLOW" "$RESET" "$*" >&2; }
|
||||
err() { printf "%b[-]%b %s\n" "$RED" "$RESET" "$*" >&2; }
|
||||
verbose() { [[ "$VERBOSE" == "true" ]] && printf "%b[~]%b %s\n" "$DIM" "$RESET" "$*"; return 0; }
|
||||
die() { err "$*"; exit 1; }
|
||||
|
||||
section_header() {
|
||||
printf "\n%b%b══ %b%s%b\n" "$CYAN" "$BOLD" "$BLUE" "$*" "$RESET"
|
||||
}
|
||||
|
||||
field() {
|
||||
printf " %-24s %s\n" "$1" "$2"
|
||||
}
|
||||
|
||||
field_color() {
|
||||
local label="$1" color="$2" value="$3"
|
||||
printf " %-24s %b%s%b\n" "$label" "$color" "$value" "$RESET"
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Defaults
|
||||
# ---------------------------------------------------------------------------
|
||||
RUN_MODE=""
|
||||
FAULT_TYPE=""
|
||||
DURATION="${CHAOS_DURATION:-30}"
|
||||
TARGET_SERVICE=""
|
||||
FILL_PATH="${CHAOS_FILL_PATH:-/tmp}"
|
||||
FILL_SIZE="${CHAOS_FILL_SIZE:-90}"
|
||||
LATENCY_MS="${CHAOS_LATENCY:-200}"
|
||||
DROP_PERCENT="${CHAOS_DROP:-50}"
|
||||
NETWORK_IFACE="${CHAOS_IFACE:-eth0}"
|
||||
PLAN_FILE=""
|
||||
CONFIRM_YES=false
|
||||
VERBOSE="${VERBOSE:-false}"
|
||||
COLOR="${COLOR:-auto}"
|
||||
CLEANUP_PIDS=()
|
||||
CLEANUP_FILES=()
|
||||
CHAOS_ACTIVE=false
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# State
|
||||
# ---------------------------------------------------------------------------
|
||||
readonly SCRIPT_NAME="${0##*/}"
|
||||
START_TIME=""
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Trap
|
||||
# ---------------------------------------------------------------------------
|
||||
trap cleanup_all EXIT INT TERM
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Safety — cleanup
|
||||
# ---------------------------------------------------------------------------
|
||||
cleanup_all() {
|
||||
if [[ "${CHAOS_ACTIVE}" != "true" ]]; then
|
||||
return
|
||||
fi
|
||||
CHAOS_ACTIVE=false
|
||||
warn "Running cleanup..."
|
||||
|
||||
# Kill tracked background PIDs
|
||||
local pid
|
||||
for pid in "${CLEANUP_PIDS[@]}"; do
|
||||
kill "$pid" 2>/dev/null || true
|
||||
wait "$pid" 2>/dev/null || true
|
||||
done
|
||||
CLEANUP_PIDS=()
|
||||
|
||||
# Remove tracked temp files
|
||||
local f
|
||||
for f in "${CLEANUP_FILES[@]}"; do
|
||||
if [[ -d "$f" ]] && mountpoint -q "$f" 2>/dev/null; then
|
||||
umount "$f" 2>/dev/null || true
|
||||
rmdir "$f" 2>/dev/null || true
|
||||
elif [[ -f "$f" ]]; then
|
||||
rm -f "$f" 2>/dev/null || true
|
||||
elif [[ -d "$f" ]]; then
|
||||
rmdir "$f" 2>/dev/null || true
|
||||
fi
|
||||
done
|
||||
CLEANUP_FILES=()
|
||||
|
||||
# Remove tc qdiscs
|
||||
tc qdisc del dev "$NETWORK_IFACE" root 2>/dev/null || true
|
||||
|
||||
# Restore resolv.conf from backup
|
||||
if [[ -f /etc/resolv.conf.chaos-backup ]]; then
|
||||
mv /etc/resolv.conf.chaos-backup /etc/resolv.conf 2>/dev/null || true
|
||||
log "Restored /etc/resolv.conf from backup"
|
||||
fi
|
||||
|
||||
log "Cleanup complete"
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Utilities
|
||||
# ---------------------------------------------------------------------------
|
||||
require_root() {
|
||||
if [[ "$(id -u)" -ne 0 ]]; then
|
||||
die "This operation requires root privileges. Run with sudo."
|
||||
fi
|
||||
}
|
||||
|
||||
confirm_action() {
|
||||
local message="$1"
|
||||
if [[ "$CONFIRM_YES" == "true" ]]; then
|
||||
return 0
|
||||
fi
|
||||
printf "%b[?]%b %s [y/N] " "$YELLOW" "$RESET" "$message"
|
||||
local answer
|
||||
read -r answer
|
||||
case "$answer" in
|
||||
[yY]|[yY][eE][sS]) return 0 ;;
|
||||
*) die "Aborted by user" ;;
|
||||
esac
|
||||
}
|
||||
|
||||
wait_duration() {
|
||||
local remaining="$DURATION"
|
||||
while [[ "$remaining" -gt 0 ]]; do
|
||||
printf "\r %bTime remaining: %ds%b " "$DIM" "$remaining" "$RESET"
|
||||
sleep 1
|
||||
((remaining--)) || true
|
||||
done
|
||||
printf "\r%40s\r" ""
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Fault: cpu-stress
|
||||
# ---------------------------------------------------------------------------
|
||||
fault_cpu_stress() {
|
||||
local cores
|
||||
cores=$(nproc)
|
||||
section_header "CPU Stress — saturating $cores cores for ${DURATION}s"
|
||||
CHAOS_ACTIVE=true
|
||||
|
||||
local i
|
||||
for ((i = 0; i < cores; i++)); do
|
||||
while :; do :; done &
|
||||
CLEANUP_PIDS+=("$!")
|
||||
verbose "Spawned CPU worker PID $!"
|
||||
done
|
||||
|
||||
log "Started $cores CPU stress workers"
|
||||
wait_duration
|
||||
cleanup_all
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Fault: memory-pressure
|
||||
# ---------------------------------------------------------------------------
|
||||
fault_memory_pressure() {
|
||||
section_header "Memory Pressure — filling tmpfs for ${DURATION}s"
|
||||
CHAOS_ACTIVE=true
|
||||
|
||||
local mount_dir
|
||||
mount_dir=$(mktemp -d /tmp/chaos-mem-XXXXXX)
|
||||
mount -t tmpfs -o size=256M tmpfs "$mount_dir"
|
||||
CLEANUP_FILES+=("$mount_dir")
|
||||
|
||||
log "Mounted tmpfs at $mount_dir (256M)"
|
||||
head -c 240M /dev/urandom > "${mount_dir}/fill.dat" 2>/dev/null || true
|
||||
log "Filled tmpfs with ~240M of data"
|
||||
|
||||
wait_duration
|
||||
cleanup_all
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Fault: disk-fill
|
||||
# ---------------------------------------------------------------------------
|
||||
fault_disk_fill() {
|
||||
section_header "Disk Fill — filling ${FILL_PATH} to ${FILL_SIZE}% for ${DURATION}s"
|
||||
CHAOS_ACTIVE=true
|
||||
|
||||
local current_usage target_bytes fill_file total_kb
|
||||
fill_file="${FILL_PATH}/chaos-fill-$(date +%s).dat"
|
||||
|
||||
total_kb=$(df --output=size -k "$FILL_PATH" | tail -1 | tr -d ' ')
|
||||
current_usage=$(df --output=pcent "$FILL_PATH" | tail -1 | tr -d ' %')
|
||||
|
||||
if [[ "$current_usage" -ge "$FILL_SIZE" ]]; then
|
||||
warn "Disk already at ${current_usage}% — above target ${FILL_SIZE}%"
|
||||
return
|
||||
fi
|
||||
|
||||
target_bytes=$(( (FILL_SIZE - current_usage) * total_kb * 1024 / 100 ))
|
||||
local target_mb=$(( target_bytes / 1048576 ))
|
||||
|
||||
log "Writing ${target_mb}M to $fill_file"
|
||||
dd if=/dev/zero of="$fill_file" bs=1M count="$target_mb" status=none 2>/dev/null || true
|
||||
CLEANUP_FILES+=("$fill_file")
|
||||
|
||||
log "Disk fill complete — $(df --output=pcent "$FILL_PATH" | tail -1 | tr -d ' ') used"
|
||||
wait_duration
|
||||
cleanup_all
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Fault: service-kill
|
||||
# ---------------------------------------------------------------------------
|
||||
fault_service_kill() {
|
||||
if [[ -z "$TARGET_SERVICE" ]]; then
|
||||
die "service-kill requires --target SERVICE_NAME"
|
||||
fi
|
||||
section_header "Service Kill — stopping ${TARGET_SERVICE} for ${DURATION}s"
|
||||
CHAOS_ACTIVE=true
|
||||
|
||||
if ! systemctl is-active --quiet "$TARGET_SERVICE"; then
|
||||
die "Service '$TARGET_SERVICE' is not currently active"
|
||||
fi
|
||||
|
||||
confirm_action "Stop service '$TARGET_SERVICE' for ${DURATION}s?"
|
||||
|
||||
systemctl stop "$TARGET_SERVICE"
|
||||
log "Stopped $TARGET_SERVICE"
|
||||
|
||||
wait_duration
|
||||
|
||||
log "Restarting $TARGET_SERVICE..."
|
||||
systemctl start "$TARGET_SERVICE"
|
||||
log "Service $TARGET_SERVICE restarted"
|
||||
CHAOS_ACTIVE=false
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Fault: network-latency
|
||||
# ---------------------------------------------------------------------------
|
||||
fault_network_latency() {
|
||||
section_header "Network Latency — ${LATENCY_MS}ms on ${NETWORK_IFACE} for ${DURATION}s"
|
||||
CHAOS_ACTIVE=true
|
||||
|
||||
if ! command -v tc &>/dev/null; then
|
||||
die "tc (iproute2) is required for network faults"
|
||||
fi
|
||||
|
||||
tc qdisc del dev "$NETWORK_IFACE" root 2>/dev/null || true
|
||||
tc qdisc add dev "$NETWORK_IFACE" root netem delay "${LATENCY_MS}ms"
|
||||
log "Added ${LATENCY_MS}ms latency to $NETWORK_IFACE"
|
||||
|
||||
wait_duration
|
||||
cleanup_all
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Fault: network-drop
|
||||
# ---------------------------------------------------------------------------
|
||||
fault_network_drop() {
|
||||
section_header "Network Drop — ${DROP_PERCENT}% loss on ${NETWORK_IFACE} for ${DURATION}s"
|
||||
CHAOS_ACTIVE=true
|
||||
|
||||
if ! command -v tc &>/dev/null; then
|
||||
die "tc (iproute2) is required for network faults"
|
||||
fi
|
||||
|
||||
tc qdisc del dev "$NETWORK_IFACE" root 2>/dev/null || true
|
||||
tc qdisc add dev "$NETWORK_IFACE" root netem loss "${DROP_PERCENT}%"
|
||||
log "Added ${DROP_PERCENT}% packet loss to $NETWORK_IFACE"
|
||||
|
||||
wait_duration
|
||||
cleanup_all
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Fault: dns-failure
|
||||
# ---------------------------------------------------------------------------
|
||||
fault_dns_failure() {
|
||||
section_header "DNS Failure — breaking DNS for ${DURATION}s"
|
||||
CHAOS_ACTIVE=true
|
||||
|
||||
if [[ -f /etc/resolv.conf.chaos-backup ]]; then
|
||||
die "A chaos backup of resolv.conf already exists — run --cleanup first"
|
||||
fi
|
||||
|
||||
cp /etc/resolv.conf /etc/resolv.conf.chaos-backup
|
||||
CLEANUP_FILES+=("/etc/resolv.conf.chaos-backup")
|
||||
|
||||
printf "# Chaos: DNS intentionally broken\nnameserver 127.0.0.254\n" > /etc/resolv.conf
|
||||
log "Replaced /etc/resolv.conf with broken nameserver"
|
||||
|
||||
wait_duration
|
||||
cleanup_all
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Fault: io-latency
|
||||
# ---------------------------------------------------------------------------
|
||||
fault_io_latency() {
|
||||
section_header "I/O Latency — degrading I/O for ${DURATION}s"
|
||||
CHAOS_ACTIVE=true
|
||||
|
||||
local io_file
|
||||
io_file="${FILL_PATH}/chaos-io-$(date +%s).dat"
|
||||
|
||||
ionice -c 2 -n 7 dd if=/dev/urandom of="$io_file" bs=4K count=0 status=none 2>/dev/null &
|
||||
CLEANUP_PIDS+=("$!")
|
||||
CLEANUP_FILES+=("$io_file")
|
||||
|
||||
# Run continuous slow I/O in background
|
||||
(
|
||||
while :; do
|
||||
ionice -c 3 dd if=/dev/zero of="$io_file" bs=4K count=256 conv=fdatasync status=none 2>/dev/null || true
|
||||
sync
|
||||
sleep 0.5
|
||||
done
|
||||
) &
|
||||
CLEANUP_PIDS+=("$!")
|
||||
|
||||
log "Started degraded I/O worker (idle-class ionice)"
|
||||
wait_duration
|
||||
cleanup_all
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Dispatch
|
||||
# ---------------------------------------------------------------------------
|
||||
do_fault() {
|
||||
require_root
|
||||
if [[ -z "$FAULT_TYPE" ]]; then
|
||||
die "No fault type specified. Use --fault TYPE"
|
||||
fi
|
||||
|
||||
confirm_action "Inject fault '${FAULT_TYPE}' for ${DURATION}s?"
|
||||
|
||||
START_TIME=$(date +%s)
|
||||
log "Starting fault injection: $FAULT_TYPE (duration: ${DURATION}s)"
|
||||
|
||||
case "$FAULT_TYPE" in
|
||||
cpu-stress) fault_cpu_stress ;;
|
||||
memory-pressure) fault_memory_pressure ;;
|
||||
disk-fill) fault_disk_fill ;;
|
||||
service-kill) fault_service_kill ;;
|
||||
network-latency) fault_network_latency ;;
|
||||
network-drop) fault_network_drop ;;
|
||||
dns-failure) fault_dns_failure ;;
|
||||
io-latency) fault_io_latency ;;
|
||||
*) die "Unknown fault type: $FAULT_TYPE" ;;
|
||||
esac
|
||||
|
||||
local elapsed=$(( $(date +%s) - START_TIME ))
|
||||
log "Fault injection complete (${elapsed}s elapsed)"
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# List fault types
|
||||
# ---------------------------------------------------------------------------
|
||||
do_list() {
|
||||
section_header "Available Fault Types"
|
||||
printf "\n"
|
||||
printf " %-20s %s\n" "FAULT TYPE" "DESCRIPTION"
|
||||
printf " ─────────────────────────────────────────────────────────────\n"
|
||||
printf " %-20s %s\n" "cpu-stress" "Saturate all CPU cores"
|
||||
printf " %-20s %s\n" "memory-pressure" "Fill memory via tmpfs allocation"
|
||||
printf " %-20s %s\n" "disk-fill" "Fill disk to threshold percentage"
|
||||
printf " %-20s %s\n" "service-kill" "Stop a systemd service temporarily"
|
||||
printf " %-20s %s\n" "network-latency" "Add network latency via tc netem"
|
||||
printf " %-20s %s\n" "network-drop" "Drop packets via tc netem"
|
||||
printf " %-20s %s\n" "dns-failure" "Break DNS resolution temporarily"
|
||||
printf " %-20s %s\n" "io-latency" "Degrade I/O performance via ionice"
|
||||
printf "\n"
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Verify system health
|
||||
# ---------------------------------------------------------------------------
|
||||
do_verify() {
|
||||
section_header "System Health Check"
|
||||
local issues=0
|
||||
|
||||
# CPU load
|
||||
local load_1m
|
||||
load_1m=$(awk '{print $1}' /proc/loadavg)
|
||||
local cores
|
||||
cores=$(nproc)
|
||||
if awk "BEGIN {exit !($load_1m > $cores * 0.9)}"; then
|
||||
field_color "CPU load (1m):" "$RED" "${load_1m} — HIGH (cores: ${cores})"
|
||||
((issues++)) || true
|
||||
else
|
||||
field_color "CPU load (1m):" "$GREEN" "${load_1m} (cores: ${cores})"
|
||||
fi
|
||||
|
||||
# Memory
|
||||
local mem_avail_kb mem_total_kb mem_pct
|
||||
mem_total_kb=$(awk '/MemTotal/ {print $2}' /proc/meminfo)
|
||||
mem_avail_kb=$(awk '/MemAvailable/ {print $2}' /proc/meminfo)
|
||||
mem_pct=$(( (mem_total_kb - mem_avail_kb) * 100 / mem_total_kb ))
|
||||
if [[ "$mem_pct" -gt 90 ]]; then
|
||||
field_color "Memory usage:" "$RED" "${mem_pct}% — HIGH"
|
||||
((issues++)) || true
|
||||
else
|
||||
field_color "Memory usage:" "$GREEN" "${mem_pct}%"
|
||||
fi
|
||||
|
||||
# Disk
|
||||
local disk_pct
|
||||
disk_pct=$(df --output=pcent / | tail -1 | tr -d ' %')
|
||||
if [[ "$disk_pct" -gt 90 ]]; then
|
||||
field_color "Disk usage (/):" "$RED" "${disk_pct}% — HIGH"
|
||||
((issues++)) || true
|
||||
else
|
||||
field_color "Disk usage (/):" "$GREEN" "${disk_pct}%"
|
||||
fi
|
||||
|
||||
# Network connectivity
|
||||
if ping -c 1 -W 3 8.8.8.8 &>/dev/null; then
|
||||
field_color "Network (ping):" "$GREEN" "OK"
|
||||
else
|
||||
field_color "Network (ping):" "$RED" "UNREACHABLE"
|
||||
((issues++)) || true
|
||||
fi
|
||||
|
||||
# DNS resolution
|
||||
if host google.com &>/dev/null; then
|
||||
field_color "DNS resolution:" "$GREEN" "OK"
|
||||
else
|
||||
field_color "DNS resolution:" "$RED" "FAILING"
|
||||
((issues++)) || true
|
||||
fi
|
||||
|
||||
# Chaos artifacts
|
||||
if [[ -f /etc/resolv.conf.chaos-backup ]]; then
|
||||
field_color "Chaos artifacts:" "$YELLOW" "resolv.conf backup found"
|
||||
((issues++)) || true
|
||||
else
|
||||
field_color "Chaos artifacts:" "$GREEN" "None"
|
||||
fi
|
||||
|
||||
printf "\n"
|
||||
if [[ "$issues" -gt 0 ]]; then
|
||||
warn "Found $issues issue(s)"
|
||||
return 1
|
||||
else
|
||||
log "All checks passed"
|
||||
return 0
|
||||
fi
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Plan execution
|
||||
# ---------------------------------------------------------------------------
|
||||
do_plan() {
|
||||
require_root
|
||||
if [[ -z "$PLAN_FILE" ]]; then
|
||||
die "No plan file specified. Use --plan FILE"
|
||||
fi
|
||||
if [[ ! -f "$PLAN_FILE" ]]; then
|
||||
die "Plan file not found: $PLAN_FILE"
|
||||
fi
|
||||
if ! command -v jq &>/dev/null; then
|
||||
die "jq is required for plan execution"
|
||||
fi
|
||||
|
||||
section_header "Executing Chaos Plan: $PLAN_FILE"
|
||||
|
||||
local plan_length
|
||||
plan_length=$(jq '.faults | length' "$PLAN_FILE")
|
||||
log "Plan contains $plan_length fault(s)"
|
||||
|
||||
local i fault_entry f_type f_duration
|
||||
for ((i = 0; i < plan_length; i++)); do
|
||||
fault_entry=$(jq -r ".faults[$i]" "$PLAN_FILE")
|
||||
f_type=$(printf '%s' "$fault_entry" | jq -r '.type')
|
||||
f_duration=$(printf '%s' "$fault_entry" | jq -r '.duration // 30')
|
||||
|
||||
log "Step $((i + 1))/$plan_length: $f_type (${f_duration}s)"
|
||||
|
||||
FAULT_TYPE="$f_type"
|
||||
DURATION="$f_duration"
|
||||
|
||||
# Extract optional fields
|
||||
local f_target f_iface
|
||||
f_target=$(printf '%s' "$fault_entry" | jq -r '.target // empty')
|
||||
f_iface=$(printf '%s' "$fault_entry" | jq -r '.iface // empty')
|
||||
|
||||
[[ -n "$f_target" ]] && TARGET_SERVICE="$f_target"
|
||||
[[ -n "$f_iface" ]] && NETWORK_IFACE="$f_iface"
|
||||
|
||||
case "$FAULT_TYPE" in
|
||||
cpu-stress) fault_cpu_stress ;;
|
||||
memory-pressure) fault_memory_pressure ;;
|
||||
disk-fill) fault_disk_fill ;;
|
||||
service-kill) fault_service_kill ;;
|
||||
network-latency) fault_network_latency ;;
|
||||
network-drop) fault_network_drop ;;
|
||||
dns-failure) fault_dns_failure ;;
|
||||
io-latency) fault_io_latency ;;
|
||||
*) warn "Unknown fault type in plan: $FAULT_TYPE — skipping" ;;
|
||||
esac
|
||||
|
||||
if [[ "$i" -lt $((plan_length - 1)) ]]; then
|
||||
log "Pausing 5s before next fault..."
|
||||
sleep 5
|
||||
fi
|
||||
done
|
||||
|
||||
log "Plan execution complete"
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Force cleanup
|
||||
# ---------------------------------------------------------------------------
|
||||
do_cleanup() {
|
||||
require_root
|
||||
section_header "Force Cleanup"
|
||||
CHAOS_ACTIVE=true
|
||||
cleanup_all
|
||||
log "Force cleanup complete"
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Help
|
||||
# ---------------------------------------------------------------------------
|
||||
show_help() {
|
||||
cat <<EOF
|
||||
${SCRIPT_NAME} — Controlled failure injection and recovery verification
|
||||
|
||||
USAGE
|
||||
sudo ./${SCRIPT_NAME} --fault TYPE [OPTIONS]
|
||||
sudo ./${SCRIPT_NAME} --list
|
||||
sudo ./${SCRIPT_NAME} --verify
|
||||
sudo ./${SCRIPT_NAME} --plan FILE [--yes]
|
||||
sudo ./${SCRIPT_NAME} --cleanup
|
||||
|
||||
MODES
|
||||
--fault TYPE Inject a specific fault for --duration seconds
|
||||
--list List all available fault types
|
||||
--verify Run system health checks
|
||||
--plan FILE Execute a JSON plan file of sequential faults
|
||||
--cleanup Force cleanup of any leftover chaos artifacts
|
||||
|
||||
FAULT TYPES
|
||||
cpu-stress Saturate all CPU cores
|
||||
memory-pressure Fill memory via tmpfs allocation
|
||||
disk-fill Fill disk to threshold percentage
|
||||
service-kill Stop a systemd service (requires --target)
|
||||
network-latency Add network latency via tc netem
|
||||
network-drop Drop packets via tc netem
|
||||
dns-failure Break DNS resolution temporarily
|
||||
io-latency Degrade I/O performance
|
||||
|
||||
OPTIONS
|
||||
--duration SEC Fault duration in seconds (default: 30)
|
||||
--target SERVICE Target service for service-kill
|
||||
--fill-path PATH Path for disk-fill (default: /tmp)
|
||||
--fill-size PCT Disk fill target percent (default: 90)
|
||||
--latency MS Latency in ms for network-latency (default: 200)
|
||||
--drop PCT Drop percent for network-drop (default: 50)
|
||||
--iface IFACE Network interface (default: eth0)
|
||||
--yes Skip confirmation prompts
|
||||
--verbose Enable verbose output
|
||||
--no-color Disable color output
|
||||
--help Show this help message
|
||||
|
||||
ENVIRONMENT VARIABLES
|
||||
CHAOS_DURATION Default duration (seconds)
|
||||
CHAOS_FILL_PATH Default fill path
|
||||
CHAOS_FILL_SIZE Default fill size (percent)
|
||||
CHAOS_LATENCY Default network latency (ms)
|
||||
CHAOS_DROP Default packet drop (percent)
|
||||
CHAOS_IFACE Default network interface
|
||||
VERBOSE Set to 'true' for verbose output
|
||||
COLOR Set to 'never' to disable colors
|
||||
|
||||
PLAN FILE FORMAT (JSON)
|
||||
{
|
||||
"faults": [
|
||||
{ "type": "cpu-stress", "duration": 15 },
|
||||
{ "type": "network-latency", "duration": 20 },
|
||||
{ "type": "service-kill", "duration": 10, "target": "nginx" }
|
||||
]
|
||||
}
|
||||
|
||||
EXAMPLES
|
||||
sudo ./${SCRIPT_NAME} --fault cpu-stress --duration 30
|
||||
sudo ./${SCRIPT_NAME} --fault service-kill --target nginx --duration 60
|
||||
sudo ./${SCRIPT_NAME} --fault network-latency --latency 500 --iface ens33
|
||||
sudo ./${SCRIPT_NAME} --verify
|
||||
sudo ./${SCRIPT_NAME} --plan chaos-plan.json --yes
|
||||
EOF
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Argument parsing
|
||||
# ---------------------------------------------------------------------------
|
||||
parse_args() {
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
--fault)
|
||||
RUN_MODE="fault"
|
||||
FAULT_TYPE="${2:-}"
|
||||
[[ -z "$FAULT_TYPE" ]] && die "--fault requires a TYPE argument"
|
||||
shift 2
|
||||
;;
|
||||
--list)
|
||||
RUN_MODE="list"
|
||||
shift
|
||||
;;
|
||||
--verify)
|
||||
RUN_MODE="verify"
|
||||
shift
|
||||
;;
|
||||
--plan)
|
||||
RUN_MODE="plan"
|
||||
PLAN_FILE="${2:-}"
|
||||
[[ -z "$PLAN_FILE" ]] && die "--plan requires a FILE argument"
|
||||
shift 2
|
||||
;;
|
||||
--cleanup)
|
||||
RUN_MODE="cleanup"
|
||||
shift
|
||||
;;
|
||||
--duration)
|
||||
DURATION="${2:-}"
|
||||
[[ -z "$DURATION" ]] && die "--duration requires a value"
|
||||
shift 2
|
||||
;;
|
||||
--target)
|
||||
TARGET_SERVICE="${2:-}"
|
||||
[[ -z "$TARGET_SERVICE" ]] && die "--target requires a SERVICE name"
|
||||
shift 2
|
||||
;;
|
||||
--fill-path)
|
||||
FILL_PATH="${2:-}"
|
||||
[[ -z "$FILL_PATH" ]] && die "--fill-path requires a PATH"
|
||||
shift 2
|
||||
;;
|
||||
--fill-size)
|
||||
FILL_SIZE="${2:-}"
|
||||
[[ -z "$FILL_SIZE" ]] && die "--fill-size requires a percentage"
|
||||
shift 2
|
||||
;;
|
||||
--latency)
|
||||
LATENCY_MS="${2:-}"
|
||||
[[ -z "$LATENCY_MS" ]] && die "--latency requires a value in ms"
|
||||
shift 2
|
||||
;;
|
||||
--drop)
|
||||
DROP_PERCENT="${2:-}"
|
||||
[[ -z "$DROP_PERCENT" ]] && die "--drop requires a percentage"
|
||||
shift 2
|
||||
;;
|
||||
--iface)
|
||||
NETWORK_IFACE="${2:-}"
|
||||
[[ -z "$NETWORK_IFACE" ]] && die "--iface requires an interface name"
|
||||
shift 2
|
||||
;;
|
||||
--yes)
|
||||
CONFIRM_YES=true
|
||||
shift
|
||||
;;
|
||||
--verbose)
|
||||
VERBOSE="true"
|
||||
shift
|
||||
;;
|
||||
--no-color)
|
||||
COLOR="never"
|
||||
shift
|
||||
;;
|
||||
--help|-h)
|
||||
RUN_MODE="help"
|
||||
shift
|
||||
;;
|
||||
*)
|
||||
die "Unknown option: $1 (see --help)"
|
||||
;;
|
||||
esac
|
||||
done
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Main
|
||||
# ---------------------------------------------------------------------------
|
||||
main() {
|
||||
parse_args "$@"
|
||||
setup_colors
|
||||
|
||||
case "$RUN_MODE" in
|
||||
fault) do_fault ;;
|
||||
list) do_list ;;
|
||||
verify) do_verify ;;
|
||||
plan) do_plan ;;
|
||||
cleanup) do_cleanup ;;
|
||||
help) show_help ;;
|
||||
"") show_help; die "No mode specified — use --fault, --list, --verify, --plan, or --cleanup" ;;
|
||||
*) die "Unknown mode: $RUN_MODE" ;;
|
||||
esac
|
||||
}
|
||||
|
||||
main "$@"
|
||||
Reference in New Issue
Block a user