#!/usr/bin/env bash ######################################################################################### #### memory-pressure-analyzer.sh — Analyze Linux memory pressure signals #### #### OOM kills, swap usage, top consumers, slab caches, PSI metrics, fragmentation #### #### No dependencies beyond coreutils and standard Linux tools #### #### #### #### Author: Phil Connor #### #### Contact: contact@mylinux.work #### #### License: MIT #### #### Version 1.00 #### #### #### #### Usage: #### #### ./memory-pressure-analyzer.sh #### #### #### #### See --help for all options. #### ######################################################################################### set -euo pipefail # ── Defaults ────────────────────────────────────────────────────────── TOP_N="${TOP_N:-10}" VERBOSE="${VERBOSE:-false}" COLOR="${COLOR:-auto}" # Section flags — if none set, run all RUN_OOM="false" RUN_TOP="false" RUN_SWAP="false" RUN_SLAB="false" RUN_PSI="false" RUN_FRAG="false" RUN_SUMMARY="false" RUN_ALL="true" # ── State ───────────────────────────────────────────────────────────── SCRIPT_NAME="$(basename "$0")" readonly SCRIPT_NAME SCORE=100 OOM_COUNT=0 SWAP_PCT=0 PSI_SOME_AVG300=0 # ── Colors ──────────────────────────────────────────────────────────── setup_colors() { if [[ "$COLOR" == "never" ]]; then RED="" GREEN="" YELLOW="" BLUE="" CYAN="" BOLD="" DIM="" RESET="" return fi if [[ "$COLOR" == "always" ]] || [[ -t 1 ]]; then RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[0;33m' BLUE='\033[0;34m' CYAN='\033[0;36m' BOLD='\033[1m' DIM='\033[2m' RESET='\033[0m' else RED="" GREEN="" YELLOW="" BLUE="" CYAN="" BOLD="" DIM="" RESET="" fi } # ── Logging ─────────────────────────────────────────────────────────── log() { echo -e "${BLUE}[INFO]${RESET} $*"; } warn() { echo -e "${YELLOW}[WARN]${RESET} $*" >&2; } verbose() { if [[ "$VERBOSE" == "true" ]]; then echo -e "${DIM}[DEBUG]${RESET} $*"; fi; } # ── Helpers ─────────────────────────────────────────────────────────── section_header() { echo "" echo -e " ${BOLD}${CYAN}── $1 ──${RESET}" echo "" } field() { printf " ${BOLD}%-24s${RESET} %s\n" "$1" "$2" } field_color() { printf " ${BOLD}%-24s${RESET} %b\n" "$1" "$2" } human_bytes() { local bytes="$1" if [[ "$bytes" -ge 1073741824 ]]; then awk "BEGIN { printf \"%.1f GiB\", $bytes / 1073741824 }" elif [[ "$bytes" -ge 1048576 ]]; then awk "BEGIN { printf \"%.1f MiB\", $bytes / 1048576 }" elif [[ "$bytes" -ge 1024 ]]; then awk "BEGIN { printf \"%.1f KiB\", $bytes / 1024 }" else echo "${bytes} B" fi } human_kb() { local kb="$1" human_bytes $((kb * 1024)) } should_show() { local section="$1" if [[ "$RUN_ALL" == "true" ]]; then return 0 fi case "$section" in oom) [[ "$RUN_OOM" == "true" ]] ;; top) [[ "$RUN_TOP" == "true" ]] ;; swap) [[ "$RUN_SWAP" == "true" ]] ;; slab) [[ "$RUN_SLAB" == "true" ]] ;; psi) [[ "$RUN_PSI" == "true" ]] ;; fragmentation) [[ "$RUN_FRAG" == "true" ]] ;; summary) [[ "$RUN_SUMMARY" == "true" ]] ;; *) return 1 ;; esac } # ══════════════════════════════════════════════════════════════════════ # OOM KILL HISTORY # ══════════════════════════════════════════════════════════════════════ show_oom() { section_header "OOM Kill History" local oom_lines="" # Try journalctl first if command -v journalctl &>/dev/null; then oom_lines=$(journalctl -k --since "7 days ago" --no-pager 2>/dev/null | grep -i "killed process" || true) fi # Fallback to dmesg if [[ -z "$oom_lines" ]]; then oom_lines=$(dmesg 2>/dev/null | grep -i "killed process" || true) fi if [[ -z "$oom_lines" ]]; then echo -e " No recent OOM kills found ${GREEN}✓${RESET}" OOM_COUNT=0 return fi OOM_COUNT=$(echo "$oom_lines" | wc -l) echo -e " Found ${RED}${OOM_COUNT}${RESET} OOM event(s) in the last 7 days" echo "" echo "$oom_lines" | tail -20 | while IFS= read -r line; do local proc_name pid proc_name=$(echo "$line" | grep -oP "Killed process \d+ \(\K[^)]+") pid=$(echo "$line" | grep -oP "Killed process \K\d+") local ts ts=$(echo "$line" | awk '{print $1, $2, $3}') printf " ${RED}✗${RESET} %-22s %-20s (PID %s)\n" "$ts" "${proc_name:-unknown}" "${pid:-?}" done } # ══════════════════════════════════════════════════════════════════════ # TOP MEMORY CONSUMERS # ══════════════════════════════════════════════════════════════════════ show_top() { section_header "Top Memory Consumers" printf " ${BOLD}%-8s %-12s %-12s %-12s %s${RESET}\n" "PID" "USER" "RSS" "VSZ" "COMMAND" printf " %s\n" "$(printf '%.0s─' {1..60})" ps axo pid,user,rss,vsz,comm --sort=-rss 2>/dev/null | tail -n +2 | head -"$TOP_N" | while IFS= read -r line; do local pid user rss vsz cmd pid=$(echo "$line" | awk '{print $1}') user=$(echo "$line" | awk '{print $2}') rss=$(echo "$line" | awk '{print $3}') vsz=$(echo "$line" | awk '{print $4}') cmd=$(echo "$line" | awk '{print $5}') local rss_h vsz_h rss_h=$(human_kb "$rss") vsz_h=$(human_kb "$vsz") printf " %-8s %-12s %-12s %-12s %s\n" "$pid" "${user:0:12}" "$rss_h" "$vsz_h" "${cmd:0:30}" done } # ══════════════════════════════════════════════════════════════════════ # SWAP USAGE # ══════════════════════════════════════════════════════════════════════ show_swap() { section_header "Swap Usage" if [[ -f /proc/meminfo ]]; then local swap_total_kb swap_free_kb swap_used_kb swap_total_kb=$(awk '/^SwapTotal:/ {print $2}' /proc/meminfo) swap_free_kb=$(awk '/^SwapFree:/ {print $2}' /proc/meminfo) swap_used_kb=$((swap_total_kb - swap_free_kb)) if [[ "$swap_total_kb" -eq 0 ]]; then field "Swap:" "Not configured" SWAP_PCT=0 return fi SWAP_PCT=$(awk "BEGIN { printf \"%.0f\", $swap_used_kb * 100 / $swap_total_kb }") local color="$GREEN" if [[ "$SWAP_PCT" -ge 80 ]]; then color="$RED" elif [[ "$SWAP_PCT" -ge 50 ]]; then color="$YELLOW" fi field "Total swap:" "$(human_kb "$swap_total_kb")" field_color "Used swap:" "${color}$(human_kb "$swap_used_kb") (${SWAP_PCT}%)${RESET}" field "Free swap:" "$(human_kb "$swap_free_kb")" fi # Swap in/out rates from vmstat if command -v vmstat &>/dev/null; then local si so si=$(vmstat 1 2 2>/dev/null | tail -1 | awk '{print $7}') so=$(vmstat 1 2 2>/dev/null | tail -1 | awk '{print $8}') if [[ -n "$si" && -n "$so" ]]; then field "Swap in rate:" "${si} KB/s" field "Swap out rate:" "${so} KB/s" fi fi # Per-process swap echo "" echo -e " ${BOLD}Top swap consumers:${RESET}" printf " %-8s %-12s %s\n" "PID" "SWAP" "COMMAND" local found=0 for proc_dir in /proc/[0-9]*; do local pid="${proc_dir##*/}" local swap_kb=0 if [[ -r "${proc_dir}/status" ]]; then swap_kb=$(awk '/^VmSwap:/ {print $2}' "${proc_dir}/status" 2>/dev/null || echo "0") fi if [[ "${swap_kb:-0}" -gt 0 ]]; then local cmd_name cmd_name=$(cat "${proc_dir}/comm" 2>/dev/null || echo "?") echo "${swap_kb} ${pid} ${cmd_name}" fi done 2>/dev/null | sort -rn | head -"$TOP_N" | while IFS= read -r line; do local skb pid cmd skb=$(echo "$line" | awk '{print $1}') pid=$(echo "$line" | awk '{print $2}') cmd=$(echo "$line" | awk '{print $3}') printf " %-8s %-12s %s\n" "$pid" "$(human_kb "$skb")" "$cmd" found=1 done if [[ "$found" -eq 0 ]]; then echo " No processes using swap" fi } # ══════════════════════════════════════════════════════════════════════ # SLAB CACHES # ══════════════════════════════════════════════════════════════════════ show_slab() { section_header "Slab Caches" if [[ ! -r /proc/slabinfo ]]; then warn "/proc/slabinfo not readable (requires root)" return fi printf " ${BOLD}%-30s %10s %10s %12s${RESET}\n" "CACHE" "NUM_OBJS" "OBJ_SIZE" "TOTAL_SIZE" printf " %s\n" "$(printf '%.0s─' {1..65})" # Parse slabinfo (skip header lines) tail -n +3 /proc/slabinfo 2>/dev/null | awk '{ name=$1; num_objs=$3; obj_size=$4; total = num_objs * obj_size; print total, name, num_objs, obj_size }' | sort -rn | head -"$TOP_N" | while IFS= read -r line; do local total name num_objs obj_size total=$(echo "$line" | awk '{print $1}') name=$(echo "$line" | awk '{print $2}') num_objs=$(echo "$line" | awk '{print $3}') obj_size=$(echo "$line" | awk '{print $4}') local total_h total_h=$(human_bytes "$total") printf " %-30s %10s %10s %12s\n" "${name:0:30}" "$num_objs" "${obj_size}B" "$total_h" done # Total slab usage local slab_total_kb slab_total_kb=$(awk '/^Slab:/ {print $2}' /proc/meminfo 2>/dev/null || echo "0") echo "" field "Total slab memory:" "$(human_kb "$slab_total_kb")" } # ══════════════════════════════════════════════════════════════════════ # PSI METRICS # ══════════════════════════════════════════════════════════════════════ show_psi() { section_header "PSI Metrics (Pressure Stall Information)" if [[ ! -f /proc/pressure/memory ]]; then echo -e " ${DIM}PSI not available (requires kernel 4.20+)${RESET}" return fi printf " ${BOLD}%-18s %8s %8s %8s %14s${RESET}\n" "PRESSURE" "avg10" "avg60" "avg300" "total (µs)" printf " %s\n" "$(printf '%.0s─' {1..58})" while IFS= read -r line; do local ptype avg10 avg60 avg300 total ptype=$(echo "$line" | awk '{print $1}') avg10=$(echo "$line" | grep -oP 'avg10=\K[0-9.]+') avg60=$(echo "$line" | grep -oP 'avg60=\K[0-9.]+') avg300=$(echo "$line" | grep -oP 'avg300=\K[0-9.]+') total=$(echo "$line" | grep -oP 'total=\K[0-9]+') printf " %-18s %8s %8s %8s %14s\n" "$ptype" "$avg10" "$avg60" "$avg300" "$total" # Track for health score if [[ "$ptype" == "some" ]]; then PSI_SOME_AVG300=$(echo "$avg300" | cut -d. -f1) fi done < /proc/pressure/memory } # ══════════════════════════════════════════════════════════════════════ # FRAGMENTATION # ══════════════════════════════════════════════════════════════════════ show_fragmentation() { section_header "Memory Fragmentation (buddyinfo)" if [[ ! -f /proc/buddyinfo ]]; then warn "/proc/buddyinfo not available" return fi echo -e " ${BOLD}Free pages by order (0=4K, 1=8K, 2=16K, ... 10=4M):${RESET}" echo "" while IFS= read -r line; do local node zone orders node=$(echo "$line" | awk '{print $2}') zone=$(echo "$line" | awk '{print $4}') orders=$(echo "$line" | awk '{for(i=5;i<=NF;i++) printf "%7s", $i; print ""}') printf " %-6s %-10s %s\n" "$node" "$zone" "$orders" done < /proc/buddyinfo echo "" echo -e " ${DIM}Low counts at higher orders indicate fragmentation${RESET}" } # ══════════════════════════════════════════════════════════════════════ # HEALTH SUMMARY # ══════════════════════════════════════════════════════════════════════ show_summary() { section_header "Memory Health Summary" SCORE=100 local findings=() # OOM kills penalty (heavy: -30 per kill, max -60) if [[ "$OOM_COUNT" -gt 0 ]]; then local oom_penalty=$((OOM_COUNT * 30)) [[ "$oom_penalty" -gt 60 ]] && oom_penalty=60 SCORE=$((SCORE - oom_penalty)) findings+=("${RED}⚠${RESET} ${OOM_COUNT} OOM kill(s) in last 7 days") else findings+=("${GREEN}✓${RESET} No recent OOM kills") fi # PSI penalty (medium: -15 if avg300 some > 5) if [[ -f /proc/pressure/memory ]]; then if [[ "$PSI_SOME_AVG300" -gt 5 ]]; then SCORE=$((SCORE - 15)) findings+=("${YELLOW}⚠${RESET} PSI avg300 some > 5.0 — sustained memory contention") elif [[ "$PSI_SOME_AVG300" -gt 1 ]]; then SCORE=$((SCORE - 5)) findings+=("${YELLOW}⚠${RESET} PSI avg300 some > 1.0 — mild memory contention") else findings+=("${GREEN}✓${RESET} PSI levels normal") fi fi # Swap penalty (medium: -15 if >50% used) if [[ "$SWAP_PCT" -gt 80 ]]; then SCORE=$((SCORE - 20)) findings+=("${RED}⚠${RESET} Swap ${SWAP_PCT}% used — heavy swap pressure") elif [[ "$SWAP_PCT" -gt 50 ]]; then SCORE=$((SCORE - 15)) findings+=("${YELLOW}⚠${RESET} Swap ${SWAP_PCT}% used — moderate swap pressure") elif [[ "$SWAP_PCT" -gt 20 ]]; then SCORE=$((SCORE - 5)) findings+=("${YELLOW}⚠${RESET} Swap ${SWAP_PCT}% used — light swap usage") else findings+=("${GREEN}✓${RESET} No active swap pressure") fi # Memory usage penalty if [[ -f /proc/meminfo ]]; then local total_kb avail_kb mem_pct total_kb=$(awk '/^MemTotal:/ {print $2}' /proc/meminfo) avail_kb=$(awk '/^MemAvailable:/ {print $2}' /proc/meminfo) mem_pct=$(awk "BEGIN { printf \"%.0f\", ($total_kb - $avail_kb) * 100 / $total_kb }") if [[ "$mem_pct" -ge 95 ]]; then SCORE=$((SCORE - 15)) findings+=("${RED}⚠${RESET} Memory ${mem_pct}% used — critically low available memory") elif [[ "$mem_pct" -ge 90 ]]; then SCORE=$((SCORE - 10)) findings+=("${YELLOW}⚠${RESET} Memory ${mem_pct}% used — high memory utilization") fi fi [[ "$SCORE" -lt 0 ]] && SCORE=0 # Determine rating local rating color if [[ "$SCORE" -ge 90 ]]; then rating="Excellent"; color="$GREEN" elif [[ "$SCORE" -ge 75 ]]; then rating="Good"; color="$GREEN" elif [[ "$SCORE" -ge 50 ]]; then rating="Fair"; color="$YELLOW" elif [[ "$SCORE" -ge 25 ]]; then rating="Poor"; color="$RED" else rating="Critical"; color="$RED" fi field_color "Overall score:" "${color}${SCORE} / 100 (${rating})${RESET}" echo "" for f in "${findings[@]}"; do echo -e " $f" done } # ══════════════════════════════════════════════════════════════════════ # HELP # ══════════════════════════════════════════════════════════════════════ show_help() { cat <&2; echo "Run $SCRIPT_NAME --help for usage" >&2; exit 1 ;; esac done setup_colors echo "" echo -e "${BOLD}Memory Pressure Analysis — $(hostname -f 2>/dev/null || hostname)${RESET}" echo -e "${DIM}$(date '+%Y-%m-%d %H:%M:%S %Z')${RESET}" should_show "oom" && show_oom should_show "top" && show_top should_show "swap" && show_swap should_show "slab" && show_slab should_show "psi" && show_psi should_show "fragmentation" && show_fragmentation should_show "summary" && show_summary echo "" } main "$@"