#!/bin/bash ################################################################################ # Script Name: smart-drive-exporter.sh # Version: 1.0 # Description: Prometheus exporter for SMART drive health metrics. # Reads SMART attributes from SATA and NVMe drives using smartctl # and exports temperature, reallocated sectors, pending sectors, # uncorrectable errors, power-on hours, wear leveling, NVMe health, # and overall drive health status. # # Author: Phil Connor # Contact: contact@mylinux.work # Website: https://mylinux.work # License: MIT # # Prerequisites: # - smartmontools (smartctl) # - Root or sudo access for smartctl # - netcat (nc) for HTTP mode # # Usage: # # Output to stdout # sudo ./smart-drive-exporter.sh # # # HTTP server mode # sudo ./smart-drive-exporter.sh --http -p 9198 # # # Textfile collector mode # sudo ./smart-drive-exporter.sh --textfile # # Metrics Exported: # Core Status: # - smart_drive_up - Exporter status (1=up, 0=down) # - smart_drive_exporter_info{version} - Exporter version # # Drive Health: # - smart_drive_health_ok{device,model,serial,type} - SMART health (1=passed) # - smart_drive_temperature_celsius{device,model,serial} - Temperature # - smart_drive_power_on_hours{device,model,serial} - Power-on hours # - smart_drive_power_cycle_count{device,model,serial} - Power cycles # - smart_drive_capacity_bytes{device,model,serial} - Drive capacity # # SATA Attributes: # - smart_drive_reallocated_sectors{device,model,serial} - Reallocated sectors # - smart_drive_pending_sectors{device,model,serial} - Pending sectors # - smart_drive_uncorrectable_errors{device,model,serial} - Uncorrectable errors # - smart_drive_spin_retry_count{device,model,serial} - Spin retries # - smart_drive_command_timeout{device,model,serial} - Command timeouts # - smart_drive_start_stop_count{device,model,serial} - Start/stop count # - smart_drive_wear_leveling_count{device,model,serial} - SSD wear leveling # - smart_drive_interface_speed{device,model,serial,speed} - Interface speed # # NVMe Attributes: # - smart_drive_percentage_used{device,model,serial} - NVMe percentage used # - smart_drive_available_spare{device,model,serial} - Available spare % # - smart_drive_available_spare_threshold{device,model,serial} - Spare threshold # - smart_drive_media_errors{device,model,serial} - Media errors # - smart_drive_critical_warning{device,model,serial} - Critical warning bitmap # # Exporter: # - smart_drive_exporter_duration_seconds - Script execution time # - smart_drive_exporter_last_run_timestamp - Last run timestamp # - smart_drive_devices_total - Total drives detected # # Configuration: # Default HTTP port: 9198 # Textfile directory: /var/lib/node_exporter # SMART_DRIVE_DEVICES: auto (or comma-separated, e.g., /dev/sda,/dev/nvme0n1) # SMART_DRIVE_SMARTCTL_PATH: /usr/sbin/smartctl # SMART_DRIVE_SUDO: auto (auto, yes, no) # ################################################################################ # ============================================================================ # CONFIGURATION VARIABLES # ============================================================================ TEXTFILE_DIR="/var/lib/node_exporter" OUTPUT_FILE="" HTTP_MODE=false HTTP_PORT=9198 DEVICES="${SMART_DRIVE_DEVICES:-auto}" SMARTCTL_PATH="${SMART_DRIVE_SMARTCTL_PATH:-/usr/sbin/smartctl}" SUDO_MODE="${SMART_DRIVE_SUDO:-auto}" # ============================================================================ # HELPER FUNCTIONS # ============================================================================ prom_escape() { local s="$1" s=${s//\\/\\\\} s=${s//\"/\\\"} s=${s//$'\n'/\\n} printf '%s\n' "$s" } show_usage() { cat <&2; exit 1 ;; esac done } # ============================================================================ # SMARTCTL COMMAND SETUP # ============================================================================ setup_smartctl_cmd() { local cmd="$SMARTCTL_PATH" if [ "$SUDO_MODE" = "yes" ]; then SMARTCTL_CMD="sudo $cmd" elif [ "$SUDO_MODE" = "no" ]; then SMARTCTL_CMD="$cmd" else # auto: use sudo if not root if [ "$(id -u)" -ne 0 ]; then SMARTCTL_CMD="sudo $cmd" else SMARTCTL_CMD="$cmd" fi fi } # ============================================================================ # DRIVE DETECTION # ============================================================================ detect_drives() { if [ "$DEVICES" = "auto" ]; then $SMARTCTL_CMD --scan 2>/dev/null | awk '{print $1}' else echo "$DEVICES" | tr ',' '\n' fi } # ============================================================================ # DRIVE DATA PARSING # ============================================================================ # Parse smartctl -iHA output for a single drive. # Outputs structured KEY VALUE pairs for metric generation. parse_drive_data() { local device="$1" local raw_output raw_output=$($SMARTCTL_CMD -iHA "$device" 2>/dev/null) if [ -z "$raw_output" ]; then echo "PARSE_ERROR 1" return fi echo "$raw_output" | awk -v dev="$device" ' BEGIN { model = "" serial = "" capacity_bytes = 0 drive_type = "unknown" health = -1 temperature = -1 power_on_hours = -1 power_cycle_count = -1 reallocated_sectors = -1 pending_sectors = -1 uncorrectable = -1 spin_retry = -1 command_timeout = -1 start_stop = -1 wear_leveling = -1 nvme_pct_used = -1 nvme_spare = -1 nvme_spare_thresh = -1 nvme_media_errors = -1 nvme_critical_warn = -1 sata_speed = "" in_smart_attrs = 0 in_nvme_health = 0 } # Drive info section /^Device Model:/ || /^Model Number:/ { sub(/^[^:]+:[ \t]+/, "") gsub(/^ +| +$/, "") model = $0 } /^Serial Number:/ { sub(/^[^:]+:[ \t]+/, "") gsub(/^ +| +$/, "") serial = $0 } /^User Capacity:/ { # Extract bytes from "User Capacity: 1,000,204,886,016 bytes [1.00 TB]" s = $0 sub(/^[^:]+:[ \t]+/, "", s) # Get the number before "bytes" if (match(s, /[0-9,]+/)) { cap_str = substr(s, RSTART, RLENGTH) gsub(/,/, "", cap_str) capacity_bytes = cap_str + 0 } } /^Total NVM Capacity:/ || /^Namespace 1 Size\/Capacity:/ { s = $0 sub(/^[^:]+:[ \t]+/, "", s) if (match(s, /[0-9,]+/)) { cap_str = substr(s, RSTART, RLENGTH) gsub(/,/, "", cap_str) capacity_bytes = cap_str + 0 } } /^SATA Version is:/ { s = $0 sub(/^[^:]+:[ \t]+/, "", s) # Extract speed like "6.0 Gb/s" if (match(s, /[0-9.]+ Gb\/s/)) { sata_speed = substr(s, RSTART, RLENGTH) } } /^Rotation Rate:/ { if ($0 ~ /Solid State/) { drive_type = "ssd" } else if ($0 ~ /[0-9]+ rpm/) { drive_type = "hdd" } } # Detect NVMe /^Model Number:/ { drive_type = "nvme" } # Health status /SMART overall-health self-assessment test result:/ { if ($0 ~ /PASSED/) health = 1 else health = 0 } /SMART Health Status:/ { if ($0 ~ /OK/) health = 1 else health = 0 } # SATA SMART attributes table detection /^ID#/ && /ATTRIBUTE_NAME/ { in_smart_attrs = 1; next } /^$/ { in_smart_attrs = 0; in_nvme_health = 0 } # Parse SATA SMART attributes in_smart_attrs && NF >= 10 { attr_id = $1 + 0 raw_val = $10 + 0 if (attr_id == 5) reallocated_sectors = raw_val if (attr_id == 4) start_stop = raw_val if (attr_id == 9) power_on_hours = raw_val if (attr_id == 10) spin_retry = raw_val if (attr_id == 12) power_cycle_count = raw_val if (attr_id == 177 || attr_id == 233) wear_leveling = raw_val if (attr_id == 188) command_timeout = raw_val if (attr_id == 190 || attr_id == 194) { # Temperature -- raw value may contain "min/max" appended # e.g., "35 (Min/Max 22/42)" -- take first number raw_str = $10 if (match(raw_str, /^[0-9]+/)) { temperature = substr(raw_str, RSTART, RLENGTH) + 0 } } if (attr_id == 197) pending_sectors = raw_val if (attr_id == 198) uncorrectable = raw_val } # NVMe SMART/Health Information detection /^SMART\/Health Information/ { in_nvme_health = 1; next } # Parse NVMe health attributes in_nvme_health { if (/^Temperature:/) { sub(/^[^:]+:[ \t]+/, "") gsub(/ .*/, "") temperature = $0 + 0 } if (/^Percentage Used:/) { sub(/^[^:]+:[ \t]+/, "") gsub(/%.*/, "") nvme_pct_used = $0 + 0 } if (/^Available Spare:/) { if (!/Threshold/) { sub(/^[^:]+:[ \t]+/, "") gsub(/%.*/, "") nvme_spare = $0 + 0 } } if (/^Available Spare Threshold:/) { sub(/^[^:]+:[ \t]+/, "") gsub(/%.*/, "") nvme_spare_thresh = $0 + 0 } if (/^Power On Hours:/) { sub(/^[^:]+:[ \t]+/, "") gsub(/,/, "") power_on_hours = $0 + 0 } if (/^Power Cycles:/) { sub(/^[^:]+:[ \t]+/, "") gsub(/,/, "") power_cycle_count = $0 + 0 } if (/^Media and Data Integrity Errors:/) { sub(/^[^:]+:[ \t]+/, "") gsub(/,/, "") nvme_media_errors = $0 + 0 } if (/^Critical Warning:/) { sub(/^[^:]+:[ \t]+/, "") gsub(/ .*/, "") # Convert hex to decimal if (substr($0, 1, 2) == "0x") { hex_str = substr($0, 3) nvme_critical_warn = 0 for (i = 1; i <= length(hex_str); i++) { c = substr(hex_str, i, 1) if (c >= 0 && c <= 9) d = c + 0 else if (c == "a" || c == "A") d = 10 else if (c == "b" || c == "B") d = 11 else if (c == "c" || c == "C") d = 12 else if (c == "d" || c == "D") d = 13 else if (c == "e" || c == "E") d = 14 else if (c == "f" || c == "F") d = 15 else d = 0 nvme_critical_warn = nvme_critical_warn * 16 + d } } else { nvme_critical_warn = $0 + 0 } } } END { # Fix drive type for NVMe if not already detected if (drive_type == "unknown" && nvme_pct_used >= 0) drive_type = "nvme" if (drive_type == "unknown") drive_type = "sata" print "DEVICE " dev print "MODEL " model print "SERIAL " serial print "TYPE " drive_type print "HEALTH " health print "CAPACITY " capacity_bytes if (temperature >= 0) print "TEMPERATURE " temperature if (power_on_hours >= 0) print "POWER_ON_HOURS " power_on_hours if (power_cycle_count >= 0) print "POWER_CYCLE_COUNT " power_cycle_count if (reallocated_sectors >= 0) print "REALLOCATED_SECTORS " reallocated_sectors if (pending_sectors >= 0) print "PENDING_SECTORS " pending_sectors if (uncorrectable >= 0) print "UNCORRECTABLE " uncorrectable if (spin_retry >= 0) print "SPIN_RETRY " spin_retry if (command_timeout >= 0) print "COMMAND_TIMEOUT " command_timeout if (start_stop >= 0) print "START_STOP " start_stop if (wear_leveling >= 0) print "WEAR_LEVELING " wear_leveling if (nvme_pct_used >= 0) print "NVME_PCT_USED " nvme_pct_used if (nvme_spare >= 0) print "NVME_SPARE " nvme_spare if (nvme_spare_thresh >= 0) print "NVME_SPARE_THRESH " nvme_spare_thresh if (nvme_media_errors >= 0) print "NVME_MEDIA_ERRORS " nvme_media_errors if (nvme_critical_warn >= 0) print "NVME_CRITICAL_WARN " nvme_critical_warn if (sata_speed != "") print "SATA_SPEED " sata_speed } ' } # ============================================================================ # METRICS GENERATION # ============================================================================ generate_metrics() { local script_start script_start=$(date +%s) # ======================================================================== # Exporter Status # ======================================================================== if ! command -v "$SMARTCTL_PATH" >/dev/null 2>&1; then cat < "$parsed_file" # Check for parse errors if grep -q "^PARSE_ERROR" "$parsed_file" 2>/dev/null; then rm -f "$parsed_file" continue fi device_count=$((device_count + 1)) # Extract values local dev_model dev_serial dev_type dev_health local dev_temp dev_poh dev_pcc dev_cap local dev_realloc dev_pending dev_uncorr local dev_spin dev_cmdto dev_startstop dev_wear local dev_nvme_pct dev_nvme_spare dev_nvme_thresh local dev_nvme_media dev_nvme_crit dev_speed dev_model=$(awk '/^MODEL / {$1=""; sub(/^ /, ""); print}' "$parsed_file") dev_serial=$(awk '/^SERIAL / {$1=""; sub(/^ /, ""); print}' "$parsed_file") dev_type=$(awk '/^TYPE / {print $2}' "$parsed_file") dev_health=$(awk '/^HEALTH / {print $2}' "$parsed_file") dev_cap=$(awk '/^CAPACITY / {print $2}' "$parsed_file") local esc_dev esc_model esc_serial esc_dev=$(prom_escape "$device") esc_model=$(prom_escape "$dev_model") esc_serial=$(prom_escape "$dev_serial") local base_labels="device=\"$esc_dev\",model=\"$esc_model\",serial=\"$esc_serial\"" # Health if [ "$dev_health" != "-1" ] && [ -n "$dev_health" ]; then health_lines="${health_lines}smart_drive_health_ok{${base_labels},type=\"$dev_type\"} $dev_health " fi # Temperature dev_temp=$(awk '/^TEMPERATURE / {print $2}' "$parsed_file") if [ -n "$dev_temp" ]; then temp_lines="${temp_lines}smart_drive_temperature_celsius{${base_labels}} $dev_temp " fi # Power-on hours dev_poh=$(awk '/^POWER_ON_HOURS / {print $2}' "$parsed_file") if [ -n "$dev_poh" ]; then poh_lines="${poh_lines}smart_drive_power_on_hours{${base_labels}} $dev_poh " fi # Power cycle count dev_pcc=$(awk '/^POWER_CYCLE_COUNT / {print $2}' "$parsed_file") if [ -n "$dev_pcc" ]; then pcc_lines="${pcc_lines}smart_drive_power_cycle_count{${base_labels}} $dev_pcc " fi # Capacity if [ -n "$dev_cap" ] && [ "$dev_cap" != "0" ]; then cap_lines="${cap_lines}smart_drive_capacity_bytes{${base_labels}} $dev_cap " fi # SATA: Reallocated sectors dev_realloc=$(awk '/^REALLOCATED_SECTORS / {print $2}' "$parsed_file") if [ -n "$dev_realloc" ]; then realloc_lines="${realloc_lines}smart_drive_reallocated_sectors{${base_labels}} $dev_realloc " fi # SATA: Pending sectors dev_pending=$(awk '/^PENDING_SECTORS / {print $2}' "$parsed_file") if [ -n "$dev_pending" ]; then pending_lines="${pending_lines}smart_drive_pending_sectors{${base_labels}} $dev_pending " fi # SATA: Uncorrectable errors dev_uncorr=$(awk '/^UNCORRECTABLE / {print $2}' "$parsed_file") if [ -n "$dev_uncorr" ]; then uncorr_lines="${uncorr_lines}smart_drive_uncorrectable_errors{${base_labels}} $dev_uncorr " fi # SATA: Spin retry count dev_spin=$(awk '/^SPIN_RETRY / {print $2}' "$parsed_file") if [ -n "$dev_spin" ]; then spin_lines="${spin_lines}smart_drive_spin_retry_count{${base_labels}} $dev_spin " fi # SATA: Command timeout dev_cmdto=$(awk '/^COMMAND_TIMEOUT / {print $2}' "$parsed_file") if [ -n "$dev_cmdto" ]; then cmdto_lines="${cmdto_lines}smart_drive_command_timeout{${base_labels}} $dev_cmdto " fi # SATA: Start/stop count dev_startstop=$(awk '/^START_STOP / {print $2}' "$parsed_file") if [ -n "$dev_startstop" ]; then startstop_lines="${startstop_lines}smart_drive_start_stop_count{${base_labels}} $dev_startstop " fi # SSD: Wear leveling dev_wear=$(awk '/^WEAR_LEVELING / {print $2}' "$parsed_file") if [ -n "$dev_wear" ]; then wear_lines="${wear_lines}smart_drive_wear_leveling_count{${base_labels}} $dev_wear " fi # NVMe: Percentage used dev_nvme_pct=$(awk '/^NVME_PCT_USED / {print $2}' "$parsed_file") if [ -n "$dev_nvme_pct" ]; then nvme_pct_lines="${nvme_pct_lines}smart_drive_percentage_used{${base_labels}} $dev_nvme_pct " fi # NVMe: Available spare dev_nvme_spare=$(awk '/^NVME_SPARE / {print $2}' "$parsed_file") if [ -n "$dev_nvme_spare" ]; then nvme_spare_lines="${nvme_spare_lines}smart_drive_available_spare{${base_labels}} $dev_nvme_spare " fi # NVMe: Spare threshold dev_nvme_thresh=$(awk '/^NVME_SPARE_THRESH / {print $2}' "$parsed_file") if [ -n "$dev_nvme_thresh" ]; then nvme_thresh_lines="${nvme_thresh_lines}smart_drive_available_spare_threshold{${base_labels}} $dev_nvme_thresh " fi # NVMe: Media errors dev_nvme_media=$(awk '/^NVME_MEDIA_ERRORS / {print $2}' "$parsed_file") if [ -n "$dev_nvme_media" ]; then nvme_media_lines="${nvme_media_lines}smart_drive_media_errors{${base_labels}} $dev_nvme_media " fi # NVMe: Critical warning dev_nvme_crit=$(awk '/^NVME_CRITICAL_WARN / {print $2}' "$parsed_file") if [ -n "$dev_nvme_crit" ]; then nvme_crit_lines="${nvme_crit_lines}smart_drive_critical_warning{${base_labels}} $dev_nvme_crit " fi # SATA: Interface speed dev_speed=$(awk '/^SATA_SPEED / {$1=""; sub(/^ /, ""); print}' "$parsed_file") if [ -n "$dev_speed" ]; then local esc_speed esc_speed=$(prom_escape "$dev_speed") speed_lines="${speed_lines}smart_drive_interface_speed{${base_labels},speed=\"$esc_speed\"} 1 " fi rm -f "$parsed_file" done <<< "$drive_list" # ================================================================ # Devices Total # ================================================================ echo "# HELP smart_drive_devices_total Total drives detected" echo "# TYPE smart_drive_devices_total gauge" echo "smart_drive_devices_total $device_count" echo "" # ================================================================ # Health Status # ================================================================ if [ -n "$health_lines" ]; then echo "# HELP smart_drive_health_ok SMART health status (1=passed, 0=failed)" echo "# TYPE smart_drive_health_ok gauge" printf '%s' "$health_lines" echo "" fi # ================================================================ # Temperature # ================================================================ if [ -n "$temp_lines" ]; then echo "# HELP smart_drive_temperature_celsius Current drive temperature in Celsius" echo "# TYPE smart_drive_temperature_celsius gauge" printf '%s' "$temp_lines" echo "" fi # ================================================================ # Power-On Hours # ================================================================ if [ -n "$poh_lines" ]; then echo "# HELP smart_drive_power_on_hours Total power-on hours" echo "# TYPE smart_drive_power_on_hours gauge" printf '%s' "$poh_lines" echo "" fi # ================================================================ # Power Cycle Count # ================================================================ if [ -n "$pcc_lines" ]; then echo "# HELP smart_drive_power_cycle_count Total power cycle count" echo "# TYPE smart_drive_power_cycle_count gauge" printf '%s' "$pcc_lines" echo "" fi # ================================================================ # Capacity # ================================================================ if [ -n "$cap_lines" ]; then echo "# HELP smart_drive_capacity_bytes Drive capacity in bytes" echo "# TYPE smart_drive_capacity_bytes gauge" printf '%s' "$cap_lines" echo "" fi # ================================================================ # Reallocated Sectors # ================================================================ if [ -n "$realloc_lines" ]; then echo "# HELP smart_drive_reallocated_sectors Reallocated sector count" echo "# TYPE smart_drive_reallocated_sectors gauge" printf '%s' "$realloc_lines" echo "" fi # ================================================================ # Pending Sectors # ================================================================ if [ -n "$pending_lines" ]; then echo "# HELP smart_drive_pending_sectors Current pending sector count" echo "# TYPE smart_drive_pending_sectors gauge" printf '%s' "$pending_lines" echo "" fi # ================================================================ # Uncorrectable Errors # ================================================================ if [ -n "$uncorr_lines" ]; then echo "# HELP smart_drive_uncorrectable_errors Offline uncorrectable error count" echo "# TYPE smart_drive_uncorrectable_errors gauge" printf '%s' "$uncorr_lines" echo "" fi # ================================================================ # Spin Retry Count # ================================================================ if [ -n "$spin_lines" ]; then echo "# HELP smart_drive_spin_retry_count Spin retry count" echo "# TYPE smart_drive_spin_retry_count gauge" printf '%s' "$spin_lines" echo "" fi # ================================================================ # Command Timeout # ================================================================ if [ -n "$cmdto_lines" ]; then echo "# HELP smart_drive_command_timeout Command timeout count" echo "# TYPE smart_drive_command_timeout gauge" printf '%s' "$cmdto_lines" echo "" fi # ================================================================ # Start/Stop Count # ================================================================ if [ -n "$startstop_lines" ]; then echo "# HELP smart_drive_start_stop_count Start/stop count" echo "# TYPE smart_drive_start_stop_count gauge" printf '%s' "$startstop_lines" echo "" fi # ================================================================ # Wear Leveling Count # ================================================================ if [ -n "$wear_lines" ]; then echo "# HELP smart_drive_wear_leveling_count SSD wear leveling count" echo "# TYPE smart_drive_wear_leveling_count gauge" printf '%s' "$wear_lines" echo "" fi # ================================================================ # NVMe Percentage Used # ================================================================ if [ -n "$nvme_pct_lines" ]; then echo "# HELP smart_drive_percentage_used NVMe percentage used estimate" echo "# TYPE smart_drive_percentage_used gauge" printf '%s' "$nvme_pct_lines" echo "" fi # ================================================================ # NVMe Available Spare # ================================================================ if [ -n "$nvme_spare_lines" ]; then echo "# HELP smart_drive_available_spare NVMe available spare percentage" echo "# TYPE smart_drive_available_spare gauge" printf '%s' "$nvme_spare_lines" echo "" fi # ================================================================ # NVMe Available Spare Threshold # ================================================================ if [ -n "$nvme_thresh_lines" ]; then echo "# HELP smart_drive_available_spare_threshold NVMe available spare threshold percentage" echo "# TYPE smart_drive_available_spare_threshold gauge" printf '%s' "$nvme_thresh_lines" echo "" fi # ================================================================ # NVMe Media Errors # ================================================================ if [ -n "$nvme_media_lines" ]; then echo "# HELP smart_drive_media_errors NVMe media and data integrity errors" echo "# TYPE smart_drive_media_errors gauge" printf '%s' "$nvme_media_lines" echo "" fi # ================================================================ # NVMe Critical Warning # ================================================================ if [ -n "$nvme_crit_lines" ]; then echo "# HELP smart_drive_critical_warning NVMe critical warning bitmap" echo "# TYPE smart_drive_critical_warning gauge" printf '%s' "$nvme_crit_lines" echo "" fi # ================================================================ # Interface Speed # ================================================================ if [ -n "$speed_lines" ]; then echo "# HELP smart_drive_interface_speed SATA interface speed info metric" echo "# TYPE smart_drive_interface_speed gauge" printf '%s' "$speed_lines" echo "" fi fi # ======================================================================== # Exporter Runtime # ======================================================================== local script_end script_duration script_end=$(date +%s) script_duration=$((script_end - script_start)) cat <&2 if ! command -v nc >/dev/null 2>&1; then echo "ERROR: netcat (nc) required for HTTP mode" >&2 exit 1 fi trap 'echo "Shutting down SMART drive exporter..." >&2; exit 0' INT TERM while true; do { read -r request local body if [[ "$request" =~ ^GET\ /metrics ]]; then body=$(generate_metrics) printf "HTTP/1.1 200 OK\r\nContent-Type: text/plain; version=0.0.4\r\nContent-Length: %d\r\nConnection: close\r\n\r\n%s" "${#body}" "$body" else body=$(cat <<'HTMLEOF' SMART Drive Exporter v1.0

SMART Drive Exporter v1.0

Metrics

Sections

  • Drive health status (PASSED/FAILED)
  • Temperature per drive
  • SATA attributes (reallocated sectors, pending sectors, etc.)
  • NVMe health (percentage used, available spare, media errors)
  • Power-on hours and power cycle count
  • SSD wear leveling
HTMLEOF ) printf "HTTP/1.1 200 OK\r\nContent-Type: text/html\r\nContent-Length: %d\r\nConnection: close\r\n\r\n%s" "${#body}" "$body" fi } | if nc -h 2>&1 | grep -q 'GNU\|traditional'; then nc -l -p "$HTTP_PORT" -q 1 2>/dev/null else nc -l "$HTTP_PORT" 2>/dev/null fi done } # ============================================================================ # MAIN EXECUTION # ============================================================================ main() { parse_args "$@" setup_smartctl_cmd if [ "$HTTP_MODE" = true ]; then run_http_server elif [ -n "$OUTPUT_FILE" ]; then local output_dir output_dir="$(dirname "$OUTPUT_FILE")" mkdir -p "$output_dir" local temp_file temp_file=$(mktemp "${output_dir}/.smart_drive_metrics.XXXXXX") if ! generate_metrics > "$temp_file" 2>/dev/null; then rm -f "$temp_file" echo "ERROR: Failed to generate metrics" >&2 exit 1 fi local file_lines file_lines=$(wc -l < "$temp_file" 2>/dev/null || echo 0) if [ "$file_lines" -lt 3 ]; then rm -f "$temp_file" echo "ERROR: Metrics file too small ($file_lines lines), keeping previous" >&2 exit 1 fi chmod 644 "$temp_file" mv -f "$temp_file" "$OUTPUT_FILE" echo "Metrics written to $OUTPUT_FILE ($file_lines lines)" >&2 else generate_metrics fi } main "$@"