#!/usr/bin/env bash ######################################################################################### #### disk-usage-reporter.sh — Find what's consuming disk space #### #### Scans filesystems, ranks largest directories and files, flags old data #### #### #### #### Author: Phil Connor #### #### Contact: contact@mylinux.work #### #### License: MIT #### #### Version 1.00 #### #### #### #### Usage: #### #### ./disk-usage-reporter.sh #### #### ./disk-usage-reporter.sh --path /var #### #### ./disk-usage-reporter.sh --top 50 --min-size 100M #### #### ./disk-usage-reporter.sh --json #### #### #### #### See --help for all options. #### ######################################################################################### set -euo pipefail # ============================================================================ # DEFAULTS # ============================================================================ SCAN_PATH="/" TOP_N=20 MIN_SIZE="1M" MAX_DEPTH=3 AGE_WARN=90 JSON_MODE=false NO_COLOR=false VERSION="1.00" # Colors RED='\033[0;31m' YELLOW='\033[1;33m' GREEN='\033[0;32m' CYAN='\033[0;36m' BOLD='\033[1m' NC='\033[0m' SCRIPT_NAME="$(basename "$0")" # ============================================================================ # USAGE & ARGUMENT PARSING # ============================================================================ show_usage() { cat <= 100M ${SCRIPT_NAME} --path /home --age-warn 365 # Flag files older than 1 year ${SCRIPT_NAME} --json # JSON output for scripting EOF exit 0 } parse_args() { while [[ $# -gt 0 ]]; do case $1 in -h|--help) show_usage ;; --path) SCAN_PATH="$2"; shift 2 ;; --top) TOP_N="$2"; shift 2 ;; --min-size) MIN_SIZE="$2"; shift 2 ;; --max-depth) MAX_DEPTH="$2"; shift 2 ;; --age-warn) AGE_WARN="$2"; shift 2 ;; --json) JSON_MODE=true; shift ;; --no-color) NO_COLOR=true; shift ;; *) echo "Unknown option: $1" >&2; exit 1 ;; esac done if [[ "$NO_COLOR" == true ]]; then RED="" YELLOW="" GREEN="" CYAN="" BOLD="" NC="" fi } # ============================================================================ # HELPERS # ============================================================================ header() { echo "" echo -e "${CYAN}====================================================${NC}" echo -e "${CYAN} ${BOLD}${1}${NC}" echo -e "${CYAN}====================================================${NC}" echo "" } format_bytes() { local b="$1" if [[ "$b" -ge 1073741824 ]]; then awk "BEGIN {printf \"%.2f GB\", $b/1073741824}" elif [[ "$b" -ge 1048576 ]]; then awk "BEGIN {printf \"%.1f MB\", $b/1048576}" elif [[ "$b" -ge 1024 ]]; then awk "BEGIN {printf \"%.1f KB\", $b/1024}" else echo "${b} B" fi } fmt_num() { printf "%'d" "$1" 2>/dev/null || echo "$1" } # Convert human-readable size (1M, 500K, 2G) to find -size argument parse_min_size() { echo "${MIN_SIZE}" } # Convert human-readable size to bytes for comparison size_to_bytes() { local size="$1" local num unit num="$(echo "$size" | sed 's/[^0-9.]//g')" unit="$(echo "$size" | sed 's/[0-9.]//g' | tr '[:lower:]' '[:upper:]')" case "$unit" in K) awk "BEGIN {printf \"%d\", $num * 1024}" ;; M) awk "BEGIN {printf \"%d\", $num * 1048576}" ;; G) awk "BEGIN {printf \"%d\", $num * 1073741824}" ;; T) awk "BEGIN {printf \"%d\", $num * 1099511627776}" ;; *) echo "$num" ;; esac } # ============================================================================ # FILESYSTEM OVERVIEW # ============================================================================ filesystem_overview() { header "Filesystem Overview" printf " ${BOLD}%-30s %6s %6s %6s %5s %-20s${NC}\n" \ "Filesystem" "Size" "Used" "Avail" "Use%" "Mounted on" echo " ────────────────────────────────────────────────────────────────────────────────────" df -hP -x tmpfs -x devtmpfs -x squashfs 2>/dev/null | tail -n +2 | while IFS= read -r line; do local fs size used avail pct mount fs="$(echo "$line" | awk '{print $1}')" size="$(echo "$line" | awk '{print $2}')" used="$(echo "$line" | awk '{print $3}')" avail="$(echo "$line" | awk '{print $4}')" pct="$(echo "$line" | awk '{print $5}')" mount="$(echo "$line" | awk '{print $6}')" local pct_num="${pct%\%}" local color="" if [[ "$pct_num" -ge 90 ]]; then color="${RED}" elif [[ "$pct_num" -ge 80 ]]; then color="${YELLOW}" else color="${GREEN}" fi printf " ${color}%-30s %6s %6s %6s %5s %-20s${NC}\n" \ "$fs" "$size" "$used" "$avail" "$pct" "$mount" done } # ============================================================================ # TOP DIRECTORIES BY SIZE # ============================================================================ top_directories() { header "Top ${TOP_N} Directories by Size" printf " ${BOLD}%4s %-60s %10s${NC}\n" "#" "Directory" "Size" echo " ────────────────────────────────────────────────────────────────────────────────────" du -x --max-depth="${MAX_DEPTH}" "${SCAN_PATH}" 2>/dev/null \ | sort -rn \ | head -n "${TOP_N}" \ | while IFS=$'\t' read -r size_kb dir; do local num num=$((COUNTER + 1)) COUNTER=$num local size_bytes=$((size_kb * 1024)) local hsize hsize="$(format_bytes "$size_bytes")" local color="${NC}" if [[ "$size_bytes" -ge 10737418240 ]]; then color="${RED}" elif [[ "$size_bytes" -ge 1073741824 ]]; then color="${YELLOW}" fi printf " ${color}%4d %-60s %10s${NC}\n" "$num" "$dir" "$hsize" done } # ============================================================================ # TOP FILES BY SIZE # ============================================================================ top_files() { header "Top ${TOP_N} Files by Size" printf " ${BOLD}%4s %-60s %10s${NC}\n" "#" "File" "Size" echo " ────────────────────────────────────────────────────────────────────────────────────" find "${SCAN_PATH}" -xdev -type f -size +"$(parse_min_size)" -printf '%s\t%p\n' 2>/dev/null \ | sort -rn \ | head -n "${TOP_N}" \ | awk -v idx=0 '{idx++; print idx"\t"$1"\t"$2}' \ | while IFS=$'\t' read -r num size_bytes filepath; do local hsize hsize="$(format_bytes "$size_bytes")" local color="${NC}" if [[ "$size_bytes" -ge 1073741824 ]]; then color="${RED}" elif [[ "$size_bytes" -ge 104857600 ]]; then color="${YELLOW}" fi printf " ${color}%4d %-60s %10s${NC}\n" "$num" "$filepath" "$hsize" done } # ============================================================================ # OLD LARGE FILES # ============================================================================ old_large_files() { header "Old Large Files (> ${MIN_SIZE}, older than ${AGE_WARN} days)" printf " ${BOLD}%4s %-50s %10s %12s${NC}\n" "#" "File" "Size" "Last Modified" echo " ────────────────────────────────────────────────────────────────────────────────────" OLD_FILES_DATA="$(find "${SCAN_PATH}" -xdev -type f -size +"$(parse_min_size)" -mtime +"${AGE_WARN}" \ -printf '%s\t%T+\t%p\n' 2>/dev/null \ | sort -rn \ | head -n "${TOP_N}")" OLD_FILE_COUNT=0 OLD_FILE_BYTES=0 if [[ -z "$OLD_FILES_DATA" ]]; then echo " No files found matching criteria." return fi echo "$OLD_FILES_DATA" | awk -v idx=0 '{idx++; print idx"\t"$0}' \ | while IFS=$'\t' read -r num size_bytes mtime filepath; do OLD_FILE_COUNT=$((OLD_FILE_COUNT + 1)) OLD_FILE_BYTES=$((OLD_FILE_BYTES + size_bytes)) local hsize mdate hsize="$(format_bytes "$size_bytes")" mdate="$(echo "$mtime" | cut -d'+' -f1)" printf " ${YELLOW}%4d %-50s %10s %12s${NC}\n" "$num" "$filepath" "$hsize" "$mdate" done } # ============================================================================ # SUMMARY # ============================================================================ compute_summary() { local total_scanned old_count old_bytes total_scanned="$(du -sx "${SCAN_PATH}" 2>/dev/null | awk '{print $1}')" total_scanned=$((total_scanned * 1024)) old_bytes="$(find "${SCAN_PATH}" -xdev -type f -size +"$(parse_min_size)" -mtime +"${AGE_WARN}" \ -printf '%s\n' 2>/dev/null | awk '{s+=$1} END {print s+0}')" old_count="$(find "${SCAN_PATH}" -xdev -type f -size +"$(parse_min_size)" -mtime +"${AGE_WARN}" \ 2>/dev/null | wc -l)" echo "$total_scanned" "$old_count" "$old_bytes" } print_summary() { header "Summary" local data total_scanned old_count old_bytes data="$(compute_summary)" total_scanned="$(echo "$data" | awk '{print $1}')" old_count="$(echo "$data" | awk '{print $2}')" old_bytes="$(echo "$data" | awk '{print $3}')" echo -e " ${BOLD}Scan path:${NC} ${SCAN_PATH}" echo -e " ${BOLD}Total scanned:${NC} $(format_bytes "$total_scanned")" echo -e " ${BOLD}Min file size:${NC} ${MIN_SIZE}" echo -e " ${BOLD}Age threshold:${NC} ${AGE_WARN} days" echo "" echo -e " ${BOLD}Old large files:${NC} $(fmt_num "$old_count") files" echo -e " ${BOLD}Reclaimable space:${NC} ${YELLOW}$(format_bytes "$old_bytes")${NC}" echo "" if [[ "$old_bytes" -gt 0 ]]; then echo -e " ${YELLOW}→ Review old files above — candidates for cleanup or archival${NC}" else echo -e " ${GREEN}✓ No old large files found${NC}" fi echo "" } # ============================================================================ # JSON OUTPUT # ============================================================================ json_output() { local total_scanned old_count old_bytes local data data="$(compute_summary)" total_scanned="$(echo "$data" | awk '{print $1}')" old_count="$(echo "$data" | awk '{print $2}')" old_bytes="$(echo "$data" | awk '{print $3}')" echo "{" echo " \"scan_path\": \"${SCAN_PATH}\"," echo " \"timestamp\": \"$(date -u +%Y-%m-%dT%H:%M:%SZ)\"," echo " \"min_size\": \"${MIN_SIZE}\"," echo " \"age_warn_days\": ${AGE_WARN}," echo " \"max_depth\": ${MAX_DEPTH}," # Filesystems echo " \"filesystems\": [" local fs_first=true df -hP -x tmpfs -x devtmpfs -x squashfs 2>/dev/null | tail -n +2 | while IFS= read -r line; do local fs size used avail pct mount fs="$(echo "$line" | awk '{print $1}')" size="$(echo "$line" | awk '{print $2}')" used="$(echo "$line" | awk '{print $3}')" avail="$(echo "$line" | awk '{print $4}')" pct="$(echo "$line" | awk '{print $5}')" mount="$(echo "$line" | awk '{print $6}')" if [[ "$fs_first" == true ]]; then fs_first=false else echo "," fi printf ' {"filesystem":"%s","size":"%s","used":"%s","avail":"%s","use_pct":"%s","mount":"%s"}' \ "$fs" "$size" "$used" "$avail" "$pct" "$mount" done echo "" echo " ]," # Top directories echo " \"top_directories\": [" local dir_first=true du -x --max-depth="${MAX_DEPTH}" "${SCAN_PATH}" 2>/dev/null \ | sort -rn | head -n "${TOP_N}" \ | while IFS=$'\t' read -r size_kb dir; do if [[ "$dir_first" == true ]]; then dir_first=false else echo "," fi printf ' {"path":"%s","size_bytes":%d}' "$dir" "$((size_kb * 1024))" done echo "" echo " ]," # Top files echo " \"top_files\": [" local file_first=true find "${SCAN_PATH}" -xdev -type f -size +"$(parse_min_size)" -printf '%s\t%p\n' 2>/dev/null \ | sort -rn | head -n "${TOP_N}" \ | while IFS=$'\t' read -r size_bytes filepath; do if [[ "$file_first" == true ]]; then file_first=false else echo "," fi printf ' {"path":"%s","size_bytes":%d}' "$filepath" "$size_bytes" done echo "" echo " ]," # Old files echo " \"old_large_files\": [" local old_first=true find "${SCAN_PATH}" -xdev -type f -size +"$(parse_min_size)" -mtime +"${AGE_WARN}" \ -printf '%s\t%T+\t%p\n' 2>/dev/null \ | sort -rn | head -n "${TOP_N}" \ | while IFS=$'\t' read -r size_bytes mtime filepath; do local mdate mdate="$(echo "$mtime" | cut -d'+' -f1)" if [[ "$old_first" == true ]]; then old_first=false else echo "," fi printf ' {"path":"%s","size_bytes":%d,"last_modified":"%s"}' "$filepath" "$size_bytes" "$mdate" done echo "" echo " ]," # Summary echo " \"summary\": {" echo " \"total_scanned_bytes\": ${total_scanned}," echo " \"old_file_count\": ${old_count}," echo " \"reclaimable_bytes\": ${old_bytes}" echo " }" echo "}" } # ============================================================================ # MAIN # ============================================================================ main() { parse_args "$@" if [[ ! -d "$SCAN_PATH" ]]; then echo -e "${RED}[ERROR]${NC} Path does not exist: ${SCAN_PATH}" >&2 exit 1 fi if [[ "$JSON_MODE" == true ]]; then json_output exit 0 fi echo "" echo -e "${BOLD}Disk Usage Report${NC}" echo -e "$(date '+%Y-%m-%d %H:%M:%S %Z') — Scanning: ${SCAN_PATH}" COUNTER=0 filesystem_overview top_directories top_files old_large_files print_summary } main "$@"