Files
chiefgeek a1a17e81a1 Sync all scripts from website downloads — 352 scripts total
Includes updated JS challenge scripts with Claude-User whitelist,
same-site referer bypass, Blackbox-Exporter allowed bot, and all
new exporters, cheat sheets, and automation scripts.
2026-05-25 03:31:08 +02:00

568 lines
23 KiB
Bash
Executable File

#!/usr/bin/env bash
#########################################################################################
#### triage.sh — Rapid 60-second diagnostic for misbehaving Linux servers ####
#### Top CPU/memory consumers, disk pressure, OOM kills, failed services, ####
#### recent errors, network states, load averages, and I/O wait ####
#### No dependencies beyond coreutils and standard Linux tools ####
#### ####
#### Author: Phil Connor ####
#### Contact: contact@mylinux.work ####
#### License: MIT ####
#### Version 1.00 ####
#### ####
#### Usage: ####
#### ./triage.sh ####
#### ./triage.sh --no-color ####
#### ./triage.sh --section load,disk,oom ####
#### ####
#### See --help for all options. ####
#########################################################################################
set -euo pipefail
# ── Defaults ──────────────────────────────────────────────────────────
SECTIONS="${SECTIONS:-all}"
VERBOSE="${VERBOSE:-false}"
COLOR="${COLOR:-auto}"
LINES="${LINES:-10}"
ERRORS="${ERRORS:-25}"
# ── State ─────────────────────────────────────────────────────────────
SCRIPT_NAME="$(basename "$0")"
readonly SCRIPT_NAME
# ── Colors ────────────────────────────────────────────────────────────
RED="" GREEN="" YELLOW="" BLUE="" CYAN="" BOLD="" DIM="" RESET=""
setup_colors() {
if [[ "$COLOR" == "never" ]]; then
RED="" GREEN="" YELLOW="" BLUE="" CYAN="" BOLD="" DIM="" RESET=""
return
fi
if [[ "$COLOR" == "always" ]] || [[ -t 1 ]]; then
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[0;33m'
BLUE='\033[0;34m'
CYAN='\033[0;36m'
BOLD='\033[1m'
DIM='\033[2m'
RESET='\033[0m'
else
RED="" GREEN="" YELLOW="" BLUE="" CYAN="" BOLD="" DIM="" RESET=""
fi
}
# ── Logging ───────────────────────────────────────────────────────────
log() { echo -e "${BLUE}[INFO]${RESET} $*"; }
verbose() { if [[ "$VERBOSE" == "true" ]]; then echo -e "${DIM}[DEBUG]${RESET} $*"; fi; }
# ── Helpers ───────────────────────────────────────────────────────────
section_header() {
echo ""
echo -e " ${BOLD}${CYAN}── $1 ──${RESET}"
echo ""
}
field() {
printf " ${BOLD}%-22s${RESET} %s\n" "$1" "$2"
}
field_color() {
printf " ${BOLD}%-22s${RESET} %b\n" "$1" "$2"
}
human_bytes() {
local bytes="$1"
if [[ "$bytes" -ge 1073741824 ]]; then
awk "BEGIN { printf \"%.1f GiB\", $bytes / 1073741824 }"
elif [[ "$bytes" -ge 1048576 ]]; then
awk "BEGIN { printf \"%.0f MiB\", $bytes / 1048576 }"
elif [[ "$bytes" -ge 1024 ]]; then
awk "BEGIN { printf \"%.1f KiB\", $bytes / 1024 }"
else
echo "${bytes} B"
fi
}
should_show() {
[[ "$SECTIONS" == "all" ]] || [[ ",$SECTIONS," == *",$1,"* ]]
}
# ══════════════════════════════════════════════════════════════════════
# LOAD & CPU
# ══════════════════════════════════════════════════════════════════════
show_load() {
section_header "Load & CPU"
local load1 load5 load15 cpus
if [[ -f /proc/loadavg ]]; then
read -r load1 load5 load15 _ _ < /proc/loadavg
else
load1=$(uptime | awk -F'load average:' '{print $2}' | awk -F', ' '{print $1}' | tr -d ' ')
load5=$(uptime | awk -F'load average:' '{print $2}' | awk -F', ' '{print $2}' | tr -d ' ')
load15=$(uptime | awk -F'load average:' '{print $2}' | awk -F', ' '{print $3}' | tr -d ' ')
fi
cpus=$(nproc 2>/dev/null || grep -c "^processor" /proc/cpuinfo 2>/dev/null || echo 1)
field "Load average (1m):" "$load1"
field "Load average (5m):" "$load5"
field "Load average (15m):" "$load15"
field "Logical CPUs:" "$cpus"
local load_per_cpu
load_per_cpu=$(awk "BEGIN { printf \"%.2f\", $load1 / $cpus }")
field "Load per CPU (1m):" "$load_per_cpu"
# I/O wait
local iowait=""
if command -v mpstat &>/dev/null; then
verbose "Using mpstat for I/O wait"
iowait=$(mpstat 1 1 2>/dev/null | tail -1 | awk '{print $6}')
fi
if [[ -z "$iowait" ]] && [[ -f /proc/stat ]]; then
verbose "Using /proc/stat for I/O wait"
local cpu_line
cpu_line=$(head -1 /proc/stat)
local user nice sys idle io_wait
user=$(echo "$cpu_line" | awk '{print $2}')
nice=$(echo "$cpu_line" | awk '{print $3}')
sys=$(echo "$cpu_line" | awk '{print $4}')
idle=$(echo "$cpu_line" | awk '{print $5}')
io_wait=$(echo "$cpu_line" | awk '{print $6}')
local total=$((user + nice + sys + idle + io_wait))
if [[ "$total" -gt 0 ]]; then
iowait=$(awk "BEGIN { printf \"%.1f\", $io_wait * 100 / $total }")
fi
fi
if [[ -n "$iowait" ]]; then
local iow_color="$GREEN"
local iow_int=${iowait%.*}
if [[ "${iow_int:-0}" -ge 20 ]]; then
iow_color="$RED"
elif [[ "${iow_int:-0}" -ge 10 ]]; then
iow_color="$YELLOW"
fi
field_color "I/O wait:" "${iow_color}${iowait}%${RESET}"
else
field "I/O wait:" "N/A"
fi
# CPU usage snapshot
if command -v mpstat &>/dev/null; then
local idle_pct
idle_pct=$(mpstat 1 1 2>/dev/null | tail -1 | awk '{print $NF}')
if [[ -n "$idle_pct" ]]; then
local cpu_used
cpu_used=$(awk "BEGIN { printf \"%.1f\", 100 - $idle_pct }")
local cpu_color="$GREEN"
local cpu_int=${cpu_used%.*}
if [[ "${cpu_int:-0}" -ge 90 ]]; then
cpu_color="$RED"
elif [[ "${cpu_int:-0}" -ge 75 ]]; then
cpu_color="$YELLOW"
fi
field_color "CPU usage:" "${cpu_color}${cpu_used}%${RESET}"
fi
fi
}
# ══════════════════════════════════════════════════════════════════════
# TOP CPU CONSUMERS
# ══════════════════════════════════════════════════════════════════════
show_cpu() {
section_header "Top CPU Consumers"
printf " ${BOLD}%-8s %-12s %6s %6s %-12s %s${RESET}\n" \
"PID" "USER" "CPU%" "MEM%" "TIME" "COMMAND"
printf " %s\n" "$(printf '%.0s─' {1..58})"
ps -eo pid,user,pcpu,pmem,time,comm --sort=-%cpu --no-headers 2>/dev/null \
| head -"$LINES" | while IFS= read -r line; do
local pid user cpu mem time cmd
pid=$(echo "$line" | awk '{print $1}')
user=$(echo "$line" | awk '{print $2}')
cpu=$(echo "$line" | awk '{print $3}')
mem=$(echo "$line" | awk '{print $4}')
time=$(echo "$line" | awk '{print $5}')
cmd=$(echo "$line" | awk '{for(i=6;i<=NF;i++) printf "%s ", $i; print ""}' | sed 's/ $//')
local color=""
local cpu_int=${cpu%.*}
if [[ "${cpu_int:-0}" -ge 80 ]]; then
color="$RED"
elif [[ "${cpu_int:-0}" -ge 50 ]]; then
color="$YELLOW"
fi
if [[ -n "$color" ]]; then
printf " %-8s %-12s %b%6s%b %6s %-12s %s\n" \
"$pid" "$user" "$color" "$cpu" "$RESET" "$mem" "$time" "$cmd"
else
printf " %-8s %-12s %6s %6s %-12s %s\n" \
"$pid" "$user" "$cpu" "$mem" "$time" "$cmd"
fi
done
}
# ══════════════════════════════════════════════════════════════════════
# TOP MEMORY CONSUMERS
# ══════════════════════════════════════════════════════════════════════
show_memory() {
section_header "Top Memory Consumers"
printf " ${BOLD}%-8s %-12s %10s %6s %s${RESET}\n" \
"PID" "USER" "RSS" "MEM%" "COMMAND"
printf " %s\n" "$(printf '%.0s─' {1..58})"
ps -eo pid,user,rss,pmem,comm --sort=-rss --no-headers 2>/dev/null \
| head -"$LINES" | while IFS= read -r line; do
local pid user rss_kb mem cmd rss_human
pid=$(echo "$line" | awk '{print $1}')
user=$(echo "$line" | awk '{print $2}')
rss_kb=$(echo "$line" | awk '{print $3}')
mem=$(echo "$line" | awk '{print $4}')
cmd=$(echo "$line" | awk '{for(i=5;i<=NF;i++) printf "%s ", $i; print ""}' | sed 's/ $//')
local rss_bytes=$((rss_kb * 1024))
rss_human=$(human_bytes "$rss_bytes")
printf " %-8s %-12s %10s %6s %s\n" \
"$pid" "$user" "$rss_human" "$mem" "$cmd"
done
}
# ══════════════════════════════════════════════════════════════════════
# DISK PRESSURE
# ══════════════════════════════════════════════════════════════════════
show_disk() {
section_header "Disk Pressure"
printf " ${BOLD}%-24s %8s %8s %8s %6s${RESET}\n" \
"FILESYSTEM" "SIZE" "USED" "AVAIL" "USE%"
printf " %s\n" "$(printf '%.0s─' {1..58})"
df -h --output=target,size,used,avail,pcent -x tmpfs -x devtmpfs -x overlay 2>/dev/null \
| tail -n +2 | sort | while IFS= read -r line; do
local mount size used avail pct
mount=$(echo "$line" | awk '{print $1}')
size=$(echo "$line" | awk '{print $2}')
used=$(echo "$line" | awk '{print $3}')
avail=$(echo "$line" | awk '{print $4}')
pct=$(echo "$line" | awk '{print $5}' | tr -d '%')
if [[ -z "$pct" ]] || [[ "$pct" -lt 80 ]]; then
continue
fi
local color="$YELLOW"
if [[ "$pct" -ge 95 ]]; then
color="$RED"
elif [[ "$pct" -ge 90 ]]; then
color="$RED"
fi
printf " %-24s %8s %8s %8s %b%5s%%%b\n" \
"$mount" "$size" "$used" "$avail" "$color" "$pct" "$RESET"
done
# If nothing was printed (all under 80%), say so
local over_80
over_80=$(df -h --output=pcent -x tmpfs -x devtmpfs -x overlay 2>/dev/null \
| tail -n +2 | tr -d '%' | awk '$1 >= 80' | wc -l)
if [[ "$over_80" -eq 0 ]]; then
echo -e " ${GREEN}All filesystems below 80%${RESET}"
fi
# Inode pressure for mounted filesystems
echo ""
df -i --output=target,ipcent -x tmpfs -x devtmpfs -x overlay 2>/dev/null \
| tail -n +2 | sort | while IFS= read -r line; do
local mount ipct
mount=$(echo "$line" | awk '{print $1}')
ipct=$(echo "$line" | awk '{print $2}' | tr -d '%')
[[ -z "$ipct" ]] && continue
local icolor="$GREEN"
if [[ "$ipct" -ge 90 ]]; then
icolor="$RED"
elif [[ "$ipct" -ge 75 ]]; then
icolor="$YELLOW"
fi
field_color "Inode usage (${mount}):" "${icolor}${ipct}%${RESET}"
done
}
# ══════════════════════════════════════════════════════════════════════
# OOM KILLS
# ══════════════════════════════════════════════════════════════════════
show_oom() {
section_header "OOM Kills"
local oom_lines=""
# Check dmesg for OOM kills
if command -v dmesg &>/dev/null; then
verbose "Checking dmesg for OOM kills"
local dmesg_oom
dmesg_oom=$(dmesg 2>/dev/null | grep -i "out of memory\|oom-killer\|killed process" || true)
if [[ -n "$dmesg_oom" ]]; then
oom_lines="$dmesg_oom"
fi
fi
# Check journalctl -k for OOM kills
if command -v journalctl &>/dev/null; then
verbose "Checking journalctl -k for OOM kills"
local journal_oom
journal_oom=$(journalctl -k --no-pager -q 2>/dev/null \
| grep -i "out of memory\|oom-killer\|killed process" || true)
if [[ -n "$journal_oom" ]]; then
if [[ -n "$oom_lines" ]]; then
oom_lines="${oom_lines}"$'\n'"${journal_oom}"
else
oom_lines="$journal_oom"
fi
fi
fi
if [[ -z "$oom_lines" ]]; then
echo -e " ${GREEN}No recent OOM kills found${RESET}"
else
# Deduplicate and show last entries
local unique_lines
unique_lines=$(echo "$oom_lines" | sort -u | tail -20)
local count
count=$(echo "$unique_lines" | wc -l)
field_color "OOM events:" "${RED}${count}${RESET}"
echo ""
echo "$unique_lines" | while IFS= read -r line; do
printf " ${RED}${RESET} %s\n" "$line"
done
fi
}
# ══════════════════════════════════════════════════════════════════════
# FAILED SERVICES
# ══════════════════════════════════════════════════════════════════════
show_services() {
section_header "Failed Services"
if ! command -v systemctl &>/dev/null; then
field "Status:" "systemd not available"
return
fi
local failed_count
failed_count=$(systemctl --no-legend --state=failed 2>/dev/null | wc -l)
if [[ "$failed_count" -gt 0 ]]; then
field_color "Failed services:" "${RED}${failed_count}${RESET}"
echo ""
systemctl --no-legend --state=failed 2>/dev/null | while IFS= read -r line; do
local unit
unit=$(echo "$line" | awk '{print $1}')
printf " ${RED}${RESET} %s\n" "$unit"
done
else
field_color "Failed services:" "${GREEN}0${RESET}"
fi
}
# ══════════════════════════════════════════════════════════════════════
# RECENT ERRORS
# ══════════════════════════════════════════════════════════════════════
show_errors() {
section_header "Recent Errors (last ${ERRORS})"
if ! command -v journalctl &>/dev/null; then
field "Status:" "journalctl not available"
return
fi
local error_output
error_output=$(journalctl --no-pager -q -p 3 -n "$ERRORS" 2>/dev/null || true)
if [[ -z "$error_output" ]]; then
echo -e " ${GREEN}No recent priority ≤ 3 messages${RESET}"
else
local count
count=$(echo "$error_output" | wc -l)
verbose "Found $count error entries"
echo "$error_output" | while IFS= read -r line; do
printf " %s\n" "$line"
done
fi
}
# ══════════════════════════════════════════════════════════════════════
# NETWORK STATES
# ══════════════════════════════════════════════════════════════════════
show_network() {
section_header "Network States"
local tcp_states=""
if command -v ss &>/dev/null; then
verbose "Using ss for TCP state summary"
tcp_states=$(ss -tan 2>/dev/null | tail -n +2 | awk '{print $1}' | sort | uniq -c | sort -rn || true)
elif command -v netstat &>/dev/null; then
verbose "Falling back to netstat for TCP state summary"
tcp_states=$(netstat -tan 2>/dev/null | tail -n +3 | awk '{print $6}' | sort | uniq -c | sort -rn || true)
fi
if [[ -z "$tcp_states" ]]; then
field "Status:" "No TCP connection data available"
return
fi
printf " ${BOLD}%-18s %s${RESET}\n" "STATE" "COUNT"
printf " %s\n" "$(printf '%.0s─' {1..26})"
echo "$tcp_states" | while IFS= read -r line; do
[[ -z "$line" ]] && continue
local count state
count=$(echo "$line" | awk '{print $1}')
state=$(echo "$line" | awk '{print $2}')
local color=""
case "$state" in
CLOSE-WAIT|CLOSE_WAIT)
if [[ "$count" -ge 10 ]]; then color="$YELLOW"; fi
if [[ "$count" -ge 50 ]]; then color="$RED"; fi
;;
TIME-WAIT|TIME_WAIT)
if [[ "$count" -ge 500 ]]; then color="$YELLOW"; fi
if [[ "$count" -ge 2000 ]]; then color="$RED"; fi
;;
esac
if [[ -n "$color" ]]; then
printf " %-18s %b%s%b\n" "$state" "$color" "$count" "$RESET"
else
printf " %-18s %s\n" "$state" "$count"
fi
done
# Warnings for notable states
echo ""
local close_wait time_wait
close_wait=$(echo "$tcp_states" | awk '/CLOSE.WAIT/ {print $1}')
time_wait=$(echo "$tcp_states" | awk '/TIME.WAIT/ {print $1}')
if [[ -n "$close_wait" ]] && [[ "$close_wait" -ge 10 ]]; then
echo -e " ${YELLOW}CLOSE_WAIT: ${close_wait} — possible application not closing connections${RESET}"
fi
if [[ -n "$time_wait" ]] && [[ "$time_wait" -ge 500 ]]; then
echo -e " ${YELLOW}TIME_WAIT: ${time_wait} — high volume; consider net.ipv4.tcp_tw_reuse${RESET}"
fi
}
# ══════════════════════════════════════════════════════════════════════
# USAGE
# ══════════════════════════════════════════════════════════════════════
usage() {
cat <<EOF
${SCRIPT_NAME} — Rapid 60-second Linux server triage
USAGE:
${SCRIPT_NAME} [OPTIONS]
OPTIONS:
--section SECTIONS Comma-separated sections to show (default: all)
Available: load, cpu, memory, disk, oom, services,
errors, network
--lines N Number of top processes to show (default: 10)
--errors N Number of recent journal errors to show (default: 25)
--verbose Enable debug output
--no-color Disable colored output
--help Show this help
ENVIRONMENT VARIABLES:
SECTIONS Sections to display (default: all)
COLOR Color mode: auto, always, never (default: auto)
LINES Top process count (default: 10)
ERRORS Journal error count (default: 25)
EXAMPLES:
# Full triage dump
./triage.sh
# Only load and disk sections
./triage.sh --section load,disk
# Show top 20 processes
./triage.sh --lines 20
# Pipe-friendly (no colors)
./triage.sh --no-color | tee /tmp/triage-\$(hostname)-\$(date +%F).txt
EOF
}
# ══════════════════════════════════════════════════════════════════════
# ARGUMENT PARSING
# ══════════════════════════════════════════════════════════════════════
parse_args() {
while [[ $# -gt 0 ]]; do
case "$1" in
--section)
SECTIONS="$2"; shift 2 ;;
--lines)
LINES="$2"; shift 2 ;;
--errors)
ERRORS="$2"; shift 2 ;;
--verbose)
VERBOSE="true"; shift ;;
--no-color)
COLOR="never"; shift ;;
--help|-h)
setup_colors
usage
exit 0 ;;
*)
echo "Unknown option: $1" >&2
echo "Run ${SCRIPT_NAME} --help for usage" >&2
exit 1 ;;
esac
done
}
# ══════════════════════════════════════════════════════════════════════
# MAIN
# ══════════════════════════════════════════════════════════════════════
main() {
parse_args "$@"
setup_colors
echo ""
echo -e "${BOLD}Triage — $(hostname -f 2>/dev/null || hostname)${RESET}"
echo -e "${DIM}$(date '+%Y-%m-%d %H:%M:%S %Z')${RESET}"
should_show "load" && show_load
should_show "cpu" && show_cpu
should_show "memory" && show_memory
should_show "disk" && show_disk
should_show "oom" && show_oom
should_show "services" && show_services
should_show "errors" && show_errors
should_show "network" && show_network
echo ""
}
main "$@"