#!/bin/bash ################################################################################ # Script Name: nfs-server-exporter.sh # Version: 1.0 # Description: Prometheus exporter for NFS server metrics — export status, # thread pool utilization, per-operation counters, RPC errors, # connected clients, and server health # # Author: Phil Connor # Contact: contact@mylinux.work # Website: https://mylinux.work # License: MIT # # Prerequisites: # - NFS server running (nfs-kernel-server / nfs-utils) # - /proc/net/rpc/nfsd available # - /proc/fs/nfsd available # - exportfs command available # - netcat (nc) for HTTP mode # # Usage: # sudo ./nfs-server-exporter.sh # sudo ./nfs-server-exporter.sh --http -p 9588 # sudo ./nfs-server-exporter.sh --textfile # # Metrics Exported: # - nfsd_up - Exporter status (1=up, 0=down) # - nfsd_exports_total - Number of active exports # - nfsd_export_info{export,client,options} - Export configuration # - nfsd_threads_total - Configured NFS server threads # - nfsd_threads_busy - Threads currently handling requests # - nfsd_pool_packets_total{pool} - Packets received per pool # - nfsd_pool_sockets_enqueued_total{pool} - Sockets enqueued per pool # - nfsd_pool_threads_woken_total{pool} - Threads woken per pool # - nfsd_pool_threads_timedout_total{pool} - Thread timeouts per pool # - nfsd_rpc_calls_total - Total RPC calls received # - nfsd_rpc_bad_calls_total - Total bad RPC calls # - nfsd_rpc_bad_auth_total - Total bad auth attempts # - nfsd_rpc_bad_client_total - Total bad client calls # - nfsd_v3_ops_total{op} - NFSv3 per-operation counters # - nfsd_v4_ops_total{op} - NFSv4 per-operation counters # - nfsd_io_read_bytes_total - Total bytes read by server # - nfsd_io_write_bytes_total - Total bytes written by server # - nfsd_clients_total - Connected NFSv4 clients # - nfsd_file_handles_stale_total - Total stale file handle replies # - nfsd_exporter_duration_seconds - Script execution time # - nfsd_exporter_last_run_timestamp - Last successful run time # # Configuration: # Default HTTP port: 9588 # Textfile directory: /var/lib/node_exporter # ################################################################################ set -o pipefail # ============================================================================ # CONFIGURATION VARIABLES # ============================================================================ TEXTFILE_DIR="/var/lib/node_exporter" OUTPUT_FILE="" HTTP_MODE=false HTTP_PORT=9588 LOCK_FILE="/tmp/nfs-server-exporter.lock" PROC_NFSD="/proc/net/rpc/nfsd" NFSD_DIR="/proc/fs/nfsd" # ============================================================================ # LOGGING FUNCTIONS # ============================================================================ RED='\033[0;31m' YELLOW='\033[1;33m' GREEN='\033[0;32m' NC='\033[0m' log() { echo -e "${GREEN}[INFO]${NC} $*" >&2; } warn() { echo -e "${YELLOW}[WARN]${NC} $*" >&2; } error(){ echo -e "${RED}[ERROR]${NC} $*" >&2; } # ============================================================================ # HELPER FUNCTIONS # ============================================================================ show_usage() { cat </dev/null) if [ -n "$lock_pid" ] && kill -0 "$lock_pid" 2>/dev/null; then error "Another instance is running (PID $lock_pid)" return 1 fi rm -f "$LOCK_FILE" fi echo $$ > "$LOCK_FILE" trap 'rm -f "$LOCK_FILE"' EXIT return 0 } # Check if NFS server is running and proc files are available check_nfsd() { if [ ! -f "$PROC_NFSD" ]; then error "$PROC_NFSD not found — is the NFS server running?" return 1 fi if [ ! -d "$NFSD_DIR" ]; then error "$NFSD_DIR not found" return 1 fi return 0 } # Get number of configured NFS threads # Returns: Thread count get_thread_count() { if [ -f "$NFSD_DIR/threads" ]; then cat "$NFSD_DIR/threads" 2>/dev/null | head -1 else echo "0" fi } # Get pool stats from /proc/fs/nfsd/pool_stats # Returns: Lines with "pool packets sockets_enqueued threads_woken threads_timedout" get_pool_stats() { if [ -f "$NFSD_DIR/pool_stats" ]; then # Skip header line tail -n +2 "$NFSD_DIR/pool_stats" 2>/dev/null fi } # Get RPC stats from /proc/net/rpc/nfsd # The "rc" line: hits misses nocache # The "io" line: read write # The "rpc" line: calls badcalls badfmt badauth badclnt get_rpc_calls() { awk '/^rpc / { print $2 }' "$PROC_NFSD" 2>/dev/null || echo "0" } get_rpc_bad_calls() { awk '/^rpc / { print $3 }' "$PROC_NFSD" 2>/dev/null || echo "0" } get_rpc_bad_auth() { awk '/^rpc / { print $5 }' "$PROC_NFSD" 2>/dev/null || echo "0" } get_rpc_bad_client() { awk '/^rpc / { print $6 }' "$PROC_NFSD" 2>/dev/null || echo "0" } # Get server I/O bytes from /proc/net/rpc/nfsd "io" line get_io_read_bytes() { awk '/^io / { print $2 }' "$PROC_NFSD" 2>/dev/null || echo "0" } get_io_write_bytes() { awk '/^io / { print $3 }' "$PROC_NFSD" 2>/dev/null || echo "0" } # Get stale file handle count from /proc/net/rpc/nfsd "fh" line # fh: stale total_lookups anonlookups dirnocache nondir_nocache get_stale_handles() { awk '/^fh / { print $2 }' "$PROC_NFSD" 2>/dev/null || echo "0" } # Get NFSv3 per-operation counters from /proc/net/rpc/nfsd # proc3 line: num_ops null getattr setattr lookup access readlink read write create mkdir symlink ... get_v3_ops() { awk '/^proc3 / { ops[1]="null"; ops[2]="getattr"; ops[3]="setattr"; ops[4]="lookup" ops[5]="access"; ops[6]="readlink"; ops[7]="read"; ops[8]="write" ops[9]="create"; ops[10]="mkdir"; ops[11]="symlink"; ops[12]="mknod" ops[13]="remove"; ops[14]="rmdir"; ops[15]="rename"; ops[16]="link" ops[17]="readdir"; ops[18]="readdirplus"; ops[19]="fsstat"; ops[20]="fsinfo" ops[21]="pathconf"; ops[22]="commit" n = $2 + 0 for (i = 1; i <= n && i <= 22; i++) { printf "%s %s\n", ops[i], $(i + 2) } }' "$PROC_NFSD" 2>/dev/null } # Get NFSv4 per-operation counters from /proc/net/rpc/nfsd # proc4ops line: num_ops op0 op1 op2 ... get_v4_ops() { awk '/^proc4ops / { ops[1]="op0-unused"; ops[2]="op1-unused"; ops[3]="access"; ops[4]="close" ops[5]="commit"; ops[6]="create"; ops[7]="delegpurge"; ops[8]="delegreturn" ops[9]="getattr"; ops[10]="getfh"; ops[11]="link"; ops[12]="lock" ops[13]="lockt"; ops[14]="locku"; ops[15]="lookup"; ops[16]="lookupp" ops[17]="nverify"; ops[18]="open"; ops[19]="openattr"; ops[20]="open_confirm" ops[21]="open_downgrade"; ops[22]="putfh"; ops[23]="putpubfh"; ops[24]="putrootfh" ops[25]="read"; ops[26]="readdir"; ops[27]="readlink"; ops[28]="remove" ops[29]="rename"; ops[30]="renew"; ops[31]="restorefh"; ops[32]="savefh" ops[33]="secinfo"; ops[34]="setattr"; ops[35]="setclientid"; ops[36]="setclientid_confirm" ops[37]="verify"; ops[38]="write"; ops[39]="release_lockowner" n = $2 + 0 for (i = 1; i <= n && i <= 39; i++) { if (ops[i] !~ /unused/) { printf "%s %s\n", ops[i], $(i + 2) } } }' "$PROC_NFSD" 2>/dev/null } # Get active exports # Tries /proc/fs/nfsd/exports first, then /var/lib/nfs/etab, then exportfs -v # Returns: Lines with "export client(options)" — one per line get_exports() { local raw="" # /proc/fs/nfsd/exports is most reliable (kernel export table) if [ -f "$NFSD_DIR/exports" ]; then raw=$(grep '^/' "$NFSD_DIR/exports" 2>/dev/null) fi # Fall back to etab (userspace export table) if [ -z "$raw" ] && [ -f /var/lib/nfs/etab ]; then raw=$(grep '^/' /var/lib/nfs/etab 2>/dev/null) fi # Fall back to exportfs command if [ -z "$raw" ]; then raw=$(/usr/sbin/exportfs -v 2>/dev/null || exportfs -v 2>/dev/null) # Merge continuation lines raw=$(echo "$raw" | awk ' /^\// { if (line) print line line = $0 next } { line = line " " $0 } END { if (line) print line } ') fi echo "$raw" | sed 's/[[:space:]]\+/ /g' | sed 's/^ //' | grep -v '^$' } # Count active exports get_export_count() { local count count=$(get_exports | wc -l) echo "${count:-0}" } # Count connected NFSv4 clients get_client_count() { if [ -d "$NFSD_DIR/clients" ]; then local count count=$(ls -1 "$NFSD_DIR/clients" 2>/dev/null | wc -l) echo "${count:-0}" else echo "0" fi } # ============================================================================ # METRIC GENERATION # ============================================================================ generate_metrics() { local script_start script_start=$(date +%s) if ! check_nfsd; then cat </dev/null 2>&1; then error "netcat (nc) required for HTTP mode" exit 1 fi while true; do { read -r request if [[ "$request" =~ ^GET\ /metrics ]]; then echo -e "HTTP/1.1 200 OK\r\nContent-Type: text/plain; version=0.0.4\r\n\r" generate_metrics else echo -e "HTTP/1.1 200 OK\r\nContent-Type: text/html\r\n\r" echo "NFS Server Exporter

NFS Server Prometheus Exporter

Metrics

" fi } | nc -l -p "$HTTP_PORT" -q 1 2>/dev/null done } # ============================================================================ # MAIN EXECUTION # ============================================================================ main() { parse_args "$@" if ! acquire_lock; then exit 1 fi if [ "$HTTP_MODE" = true ]; then run_http_server elif [ -n "$OUTPUT_FILE" ]; then local output_dir output_dir="$(dirname "$OUTPUT_FILE")" mkdir -p "$output_dir" local temp_file temp_file=$(mktemp "${output_dir}/.nfsd_metrics.XXXXXX") if ! generate_metrics > "$temp_file" 2>/dev/null; then rm -f "$temp_file" error "Failed to generate metrics" exit 1 fi local file_lines file_lines=$(wc -l < "$temp_file" 2>/dev/null || echo 0) if [ "$file_lines" -lt 5 ]; then rm -f "$temp_file" error "Metrics file too small ($file_lines lines), keeping previous" exit 1 fi chmod 644 "$temp_file" mv -f "$temp_file" "$OUTPUT_FILE" log "Metrics written to $OUTPUT_FILE ($file_lines lines)" else generate_metrics fi } main "$@"