#!/bin/bash ################################################################################ # Script Name: nfs-exporter.sh # Version: 1.0 # Description: Prometheus exporter for NFS client metrics — mount status, RPC # retransmits, stale file handles, read/write latency, transfer # bytes, operation counts, timeouts, and mount options # # Author: Phil Connor # Contact: contact@mylinux.work # Website: https://mylinux.work # License: MIT # # Prerequisites: # - NFS client mounts active # - /proc/self/mountstats available # - /proc/net/rpc/nfs available # - netcat (nc) for HTTP mode # # Usage: # ./nfs-exporter.sh # ./nfs-exporter.sh --http -p 9587 # ./nfs-exporter.sh --textfile # # Metrics Exported: # - nfs_up - Exporter status (1=up, 0=down) # - nfs_mount_up{mount,server,export} - Mount status # - nfs_mount_info{mount,server,export,version,proto} - Mount configuration # - nfs_mount_age_seconds{mount} - Seconds since mount was established # - nfs_rpc_retransmits_total - Total RPC retransmits # - nfs_rpc_auth_refreshes_total - Total RPC auth refreshes # - nfs_mount_stale_handles_total{mount} - Stale file handle count # - nfs_mount_read_bytes_total{mount} - Total bytes read # - nfs_mount_write_bytes_total{mount} - Total bytes written # - nfs_mount_read_ops_total{mount} - Total read operations # - nfs_mount_write_ops_total{mount} - Total write operations # - nfs_mount_read_latency_seconds{mount} - Cumulative read latency # - nfs_mount_write_latency_seconds{mount} - Cumulative write latency # - nfs_mount_getattr_ops_total{mount} - Total GETATTR operations # - nfs_mount_access_ops_total{mount} - Total ACCESS operations # - nfs_mount_lookup_ops_total{mount} - Total LOOKUP operations # - nfs_mount_readdir_ops_total{mount} - Total READDIR operations # - nfs_mount_timeouts_total{mount} - Total operation timeouts # - nfs_mounts_total - Total number of NFS mounts # - nfs_exporter_duration_seconds - Script execution time # - nfs_exporter_last_run_timestamp - Last successful run time # # Configuration: # Default HTTP port: 9587 # Textfile directory: /var/lib/node_exporter # ################################################################################ set -o pipefail # ============================================================================ # CONFIGURATION VARIABLES # ============================================================================ TEXTFILE_DIR="/var/lib/node_exporter" OUTPUT_FILE="" HTTP_MODE=false HTTP_PORT=9587 LOCK_FILE="/tmp/nfs-exporter.lock" MOUNTSTATS="/proc/self/mountstats" RPC_NFS="/proc/net/rpc/nfs" # ============================================================================ # LOGGING FUNCTIONS # ============================================================================ RED='\033[0;31m' YELLOW='\033[1;33m' GREEN='\033[0;32m' NC='\033[0m' log() { echo -e "${GREEN}[INFO]${NC} $*" >&2; } warn() { echo -e "${YELLOW}[WARN]${NC} $*" >&2; } error(){ echo -e "${RED}[ERROR]${NC} $*" >&2; } # ============================================================================ # HELPER FUNCTIONS # ============================================================================ show_usage() { cat </dev/null) if [ -n "$lock_pid" ] && kill -0 "$lock_pid" 2>/dev/null; then error "Another instance is running (PID $lock_pid)" return 1 fi rm -f "$LOCK_FILE" fi echo $$ > "$LOCK_FILE" trap 'rm -f "$LOCK_FILE"' EXIT return 0 } # Check if NFS client data is available check_nfs() { if [ ! -f "$MOUNTSTATS" ]; then error "$MOUNTSTATS not found" return 1 fi # Check for at least one NFS mount if ! grep -q "nfs" /proc/mounts 2>/dev/null; then warn "No NFS mounts found" return 1 fi return 0 } # Get list of NFS mount points from /proc/mounts # Returns: Lines with "server:export mountpoint nfsversion" get_nfs_mounts() { awk '$3 ~ /^nfs/ { print $1, $2, $3 }' /proc/mounts 2>/dev/null } # Parse mountstats for a specific mount point # Args: $1 - mount point # Returns: Associative data from /proc/self/mountstats get_mountstats_block() { local mount="$1" awk -v mp="$mount" ' /^device .* mounted on / { if (index($0, "mounted on " mp " ") > 0 || $0 ~ "mounted on " mp "$") { found=1 } else { found=0 } } found { print } ' "$MOUNTSTATS" 2>/dev/null } # Extract NFS version from mount options # Args: $1 - mount point # Returns: NFS version string (e.g., "4.2", "3") get_nfs_version() { local mount="$1" local vers vers=$(awk -v mp="$mount" '$3 ~ /^nfs/ && $2 == mp { print $4 }' /proc/mounts 2>/dev/null | head -1) # Extract version from mount options local ver ver=$(echo "$vers" | grep -oE 'vers=[0-9.]+' | cut -d= -f2) if [ -z "$ver" ]; then # Try from mountstats ver=$(get_mountstats_block "$mount" | grep -oE 'vers=[0-9.]+' | head -1 | cut -d= -f2) fi if [ -z "$ver" ]; then # Fall back to fstype local fstype fstype=$(awk -v mp="$mount" '$2 == mp { print $3 }' /proc/mounts 2>/dev/null | head -1) case "$fstype" in nfs4) ver="4" ;; nfs) ver="3" ;; *) ver="unknown" ;; esac fi echo "$ver" } # Extract protocol from mount options # Args: $1 - mount point # Returns: Protocol (tcp/udp) get_nfs_proto() { local mount="$1" local proto proto=$(get_mountstats_block "$mount" | grep -oE 'proto=[a-z0-9]+' | head -1 | cut -d= -f2) echo "${proto:-tcp}" } # Parse per-operation stats from mountstats # Args: $1 - mount point, $2 - operation name (READ, WRITE, GETATTR, etc.) # Returns: "ops transmits timeouts bytes_sent bytes_recv queue_time rtt_time exec_time" get_op_stats() { local mount="$1" local op="$2" get_mountstats_block "$mount" | awk -v op="$op:" ' $1 == op { # Fields: op ops transmits timeouts bytes_sent bytes_recv queue rtt exec printf "%s %s %s %s %s %s %s %s", $2, $3, $4, $5, $6, $7, $8, $9 exit } ' 2>/dev/null } # Get total bytes read for a mount from mountstats # Args: $1 - mount point # Returns: Total bytes read get_read_bytes() { local mount="$1" local stats stats=$(get_op_stats "$mount" "READ") local bytes_recv bytes_recv=$(echo "$stats" | awk '{print $5}') echo "${bytes_recv:-0}" } # Get total bytes written for a mount from mountstats # Args: $1 - mount point # Returns: Total bytes written get_write_bytes() { local mount="$1" local stats stats=$(get_op_stats "$mount" "WRITE") local bytes_sent bytes_sent=$(echo "$stats" | awk '{print $4}') echo "${bytes_sent:-0}" } # Get operation count for a specific NFS operation # Args: $1 - mount point, $2 - operation name # Returns: Operation count get_op_count() { local mount="$1" local op="$2" local stats stats=$(get_op_stats "$mount" "$op") local ops ops=$(echo "$stats" | awk '{print $1}') echo "${ops:-0}" } # Get cumulative latency for a specific operation (execution time in milliseconds) # Args: $1 - mount point, $2 - operation name # Returns: Cumulative execution time in seconds get_op_latency() { local mount="$1" local op="$2" local stats stats=$(get_op_stats "$mount" "$op") local exec_ms exec_ms=$(echo "$stats" | awk '{print $8}') if [ -n "$exec_ms" ] && [ "$exec_ms" != "0" ]; then awk "BEGIN {printf \"%.6f\", ${exec_ms} / 1000}" 2>/dev/null || echo "0" else echo "0" fi } # Get timeout count for a specific operation # Args: $1 - mount point, $2 - operation name # Returns: Timeout count get_op_timeouts() { local mount="$1" local op="$2" local stats stats=$(get_op_stats "$mount" "$op") local timeouts timeouts=$(echo "$stats" | awk '{print $3}') echo "${timeouts:-0}" } # Get total timeouts across all operations for a mount # Args: $1 - mount point # Returns: Total timeout count get_total_timeouts() { local mount="$1" get_mountstats_block "$mount" | awk ' /^[[:space:]]+[A-Z]+:/ { timeouts += $4 } END { print timeouts+0 } ' 2>/dev/null } # Get stale file handle count from mountstats events # Args: $1 - mount point # Returns: Stale file handle count get_stale_handles() { local mount="$1" local count count=$(get_mountstats_block "$mount" | grep -oE 'badxid=[0-9]+' | cut -d= -f2) if [ -z "$count" ]; then # Try dmesg for stale NFS handle messages for this mount count=$(dmesg 2>/dev/null | grep -c "Stale file handle" | head -1) fi echo "${count:-0}" } # Get RPC retransmits from /proc/net/rpc/nfs # Returns: Total retransmits get_rpc_retransmits() { if [ -f "$RPC_NFS" ]; then # Line starting with "rpc" has: rpc count retrans authrefresh awk '/^rpc / { print $3 }' "$RPC_NFS" 2>/dev/null else echo "0" fi } # Get RPC auth refreshes from /proc/net/rpc/nfs # Returns: Total auth refreshes get_rpc_auth_refreshes() { if [ -f "$RPC_NFS" ]; then awk '/^rpc / { print $4 }' "$RPC_NFS" 2>/dev/null else echo "0" fi } # Get mount age in seconds # Args: $1 - mount point # Returns: Seconds since mount was established get_mount_age() { local mount="$1" local age age=$(get_mountstats_block "$mount" | awk '/age:/ { print $2; exit }') echo "${age:-0}" } # ============================================================================ # METRIC GENERATION # ============================================================================ generate_metrics() { local script_start script_start=$(date +%s) if ! check_nfs; then cat </dev/null 2>&1; then error "netcat (nc) required for HTTP mode" exit 1 fi while true; do { read -r request if [[ "$request" =~ ^GET\ /metrics ]]; then echo -e "HTTP/1.1 200 OK\r\nContent-Type: text/plain; version=0.0.4\r\n\r" generate_metrics else echo -e "HTTP/1.1 200 OK\r\nContent-Type: text/html\r\n\r" echo "NFS Exporter

NFS Prometheus Exporter

Metrics

" fi } | nc -l -p "$HTTP_PORT" -q 1 2>/dev/null done } # ============================================================================ # MAIN EXECUTION # ============================================================================ main() { parse_args "$@" if ! acquire_lock; then exit 1 fi if [ "$HTTP_MODE" = true ]; then run_http_server elif [ -n "$OUTPUT_FILE" ]; then local output_dir output_dir="$(dirname "$OUTPUT_FILE")" mkdir -p "$output_dir" local temp_file temp_file=$(mktemp "${output_dir}/.nfs_metrics.XXXXXX") if ! generate_metrics > "$temp_file" 2>/dev/null; then rm -f "$temp_file" error "Failed to generate metrics" exit 1 fi local file_lines file_lines=$(wc -l < "$temp_file" 2>/dev/null || echo 0) if [ "$file_lines" -lt 5 ]; then rm -f "$temp_file" error "Metrics file too small ($file_lines lines), keeping previous" exit 1 fi chmod 644 "$temp_file" mv -f "$temp_file" "$OUTPUT_FILE" log "Metrics written to $OUTPUT_FILE ($file_lines lines)" else generate_metrics fi } main "$@"