#!/bin/bash ################################################################################ # Script Name: fail2ban-exporter.sh # Version: 2.0 # Description: Prometheus exporter for fail2ban providing comprehensive metrics # for monitoring jail status, ban/unban activity, and threat analysis # # Author: Phil Connor # Contact: contact@mylinux.work # Website: https://mylinux.work # License: MIT # # Prerequisites: # - fail2ban-client command available # - fail2ban service running # - journalctl (systemd) for historical data # - netcat (nc) for HTTP mode # - /var/log/fail2ban.log for timestamp parsing # # Usage: # # Output to stdout # ./fail2ban-exporter.sh # # # HTTP server mode # ./fail2ban-exporter.sh --http -p 9191 # # # Textfile collector mode # ./fail2ban-exporter.sh --textfile # # Metrics Exported: # Core Metrics (v1.0): # - fail2ban_up{} - Exporter status (1=up, 0=down) # - fail2ban_server_info{version,exporter_version} - Server version info # - fail2ban_jail_count{} - Total number of jails (gauge) # - fail2ban_jail_enabled{jail} - Jail enabled status (gauge) # - fail2ban_jail_failed_current{jail} - Currently failed attempts (gauge) # - fail2ban_jail_banned_current{jail} - Currently banned IPs (gauge) # - fail2ban_jail_failed_total{jail} - Total failed attempts (counter) # - fail2ban_jail_banned_total{jail} - Total banned IPs (counter) # - fail2ban_jail_ban_rate{jail} - Ban ratio: banned/failed (gauge) # # Enhanced Metrics (v2.0): # - fail2ban_jail_last_ban_timestamp{jail} - Unix timestamp of last ban (gauge) # - fail2ban_jail_last_unban_timestamp{jail} - Unix timestamp of last unban (gauge) # - fail2ban_jail_bans_per_period{jail,period} - Bans in 1h/24h (gauge) # - fail2ban_jail_unbans_per_period{jail,period} - Unbans in 1h/24h (gauge) # - fail2ban_jail_unique_banned_ips{jail,period} - Unique IPs banned (gauge) # - fail2ban_jail_info{jail,port,protocol,filter} - Jail configuration (gauge) # - fail2ban_jail_top_attacker_count{jail,ip} - Top 5 attacking IPs (gauge) # - fail2ban_jail_ban_rate_per_hour{jail} - Average bans/hour over 24h (gauge) # - fail2ban_jail_repeat_offenders{jail,threshold} - Repeat offender count (7d) # - fail2ban_jail_seconds_since_last_ban{jail} - Seconds since last ban # - fail2ban_jail_seconds_since_last_unban{jail} - Seconds since last unban # - fail2ban_log_size_bytes - Size of fail2ban.log file # - fail2ban_log_age_seconds - Time since last log modification # - fail2ban_log_rotation_timestamp - Last log rotation time # - fail2ban_exporter_duration_seconds - Script execution time # - fail2ban_exporter_last_run_timestamp - Last successful run time # # Configuration: # Default HTTP port: 9191 # Textfile directory: /var/lib/node_exporter # Log source: /var/log/fail2ban.log # ################################################################################ # ============================================================================ # CONFIGURATION VARIABLES # ============================================================================ TEXTFILE_DIR="/var/lib/node_exporter" OUTPUT_FILE="" HTTP_MODE=false HTTP_PORT=9191 FAIL2BAN_LOG="/var/log/fail2ban.log" # ============================================================================ # HELPER FUNCTIONS # ============================================================================ show_usage() { cat <&2; exit 1 ;; esac done } # Check if fail2ban is installed and running # Returns: 0 if OK, 1 if error check_fail2ban() { if ! command -v fail2ban-client >/dev/null 2>&1; then echo "ERROR: fail2ban-client not found" >&2 return 1 fi # Verify fail2ban server is responding if ! fail2ban-client ping >/dev/null 2>&1; then echo "ERROR: fail2ban server not responding" >&2 return 1 fi return 0 } # Get list of all active fail2ban jails # Returns: Space-separated list of jail names get_jails() { # Extract jail names from status output, convert comma-separated to space-separated fail2ban-client status 2>/dev/null | grep "Jail list:" | sed 's/.*Jail list://' | tr -d '\t' | tr ',' '\n' | xargs } # Get statistics for a specific jail # Args: $1 - jail name # Returns: Pipe-delimited string: currently_failed|currently_banned|total_failed|total_banned get_jail_stats() { local jail="$1" local status_output status_output=$(fail2ban-client status "$jail" 2>/dev/null) local currently_failed currently_banned total_failed total_banned # Parse fail2ban-client output using awk to extract last field (the number) currently_failed=$(echo "$status_output" | grep "Currently failed:" | awk '{print $NF}') currently_banned=$(echo "$status_output" | grep "Currently banned:" | awk '{print $NF}') total_failed=$(echo "$status_output" | grep "Total failed:" | awk '{print $NF}') total_banned=$(echo "$status_output" | grep "Total banned:" | awk '{print $NF}') # Return pipe-delimited format with defaults to 0 if empty echo "${currently_failed:-0}|${currently_banned:-0}|${total_failed:-0}|${total_banned:-0}" } # Get list of currently banned IPs for a jail # Args: $1 - jail name # Returns: List of IPs, one per line get_banned_ips() { local jail="$1" fail2ban-client status "$jail" 2>/dev/null | grep "Banned IP list:" | sed 's/.*Banned IP list://' | tr ' ' '\n' | grep -v '^$' } # Get timestamp of last ban event for a jail # Args: $1 - jail name # Returns: Unix timestamp (seconds since epoch) or 0 if not found get_last_ban_timestamp() { local jail="$1" local timestamp # Extract date from log, convert to Unix timestamp timestamp=$(grep "\[$jail\]" "$FAIL2BAN_LOG" 2>/dev/null | grep "Ban " | tail -1 | awk '{print $1, $2}' | xargs -I{} date -d "{}" +%s 2>/dev/null) echo "${timestamp:-0}" } # Get timestamp of last unban event for a jail # Args: $1 - jail name # Returns: Unix timestamp (seconds since epoch) or 0 if not found get_last_unban_timestamp() { local jail="$1" local timestamp # Extract date from log, convert to Unix timestamp timestamp=$(grep "\[$jail\]" "$FAIL2BAN_LOG" 2>/dev/null | grep "Unban " | tail -1 | awk '{print $1, $2}' | xargs -I{} date -d "{}" +%s 2>/dev/null) echo "${timestamp:-0}" } # Count ban events within a time period # Args: $1 - jail name, $2 - time period (e.g., "1 hour ago") # Returns: Number of ban events get_ban_rate() { local jail="$1" local period="$2" local count cutoff_timestamp # Convert period to Unix timestamp cutoff_timestamp=$(date -d "$period" +%s 2>/dev/null || echo 0) # Try journalctl first (faster) count=$(journalctl -u fail2ban --since "$period" 2>/dev/null | grep -c "\[$jail\] Ban " 2>/dev/null) # If journalctl returns 0, fall back to log file (more reliable) if [ "$count" -eq 0 ] && [ -f "$FAIL2BAN_LOG" ]; then count=$(awk -v jail="$jail" -v cutoff="$cutoff_timestamp" ' /\['"$jail"'\] Ban / { # Parse timestamp from log line cmd = "date -d \"" $1 " " $2 "\" +%s 2>/dev/null" cmd | getline ts close(cmd) if (ts >= cutoff) count++ } END { print count+0 } ' "$FAIL2BAN_LOG" 2>/dev/null) fi echo "${count:-0}" } # Count unban events within a time period # Args: $1 - jail name, $2 - time period (e.g., "1 hour ago") # Returns: Number of unban events get_unban_rate() { local jail="$1" local period="$2" local count cutoff_timestamp # Convert period to Unix timestamp cutoff_timestamp=$(date -d "$period" +%s 2>/dev/null || echo 0) # Try journalctl first count=$(journalctl -u fail2ban --since "$period" 2>/dev/null | grep -c "\[$jail\] Unban " 2>/dev/null) # Fall back to log file if [ "$count" -eq 0 ] && [ -f "$FAIL2BAN_LOG" ]; then count=$(awk -v jail="$jail" -v cutoff="$cutoff_timestamp" ' /\['"$jail"'\] Unban / { cmd = "date -d \"" $1 " " $2 "\" +%s 2>/dev/null" cmd | getline ts close(cmd) if (ts >= cutoff) count++ } END { print count+0 } ' "$FAIL2BAN_LOG" 2>/dev/null) fi echo "${count:-0}" } # Get top attacking IPs by ban count # Args: $1 - jail name, $2 - limit (default: 5) # Returns: Lines with "count IP" format, sorted by count descending get_top_banned_ips() { local jail="$1" local limit="${2:-5}" grep "\[$jail\] Ban " "$FAIL2BAN_LOG" 2>/dev/null | \ grep -oE '[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+' | \ sort | uniq -c | sort -rn | head -n "$limit" } # Count unique IPs banned in a time period # Args: $1 - jail name, $2 - time period (e.g., "24 hours ago") # Returns: Number of unique IPs get_unique_banned_ips() { local jail="$1" local period="$2" local count cutoff_timestamp # Convert period to Unix timestamp cutoff_timestamp=$(date -d "$period" +%s 2>/dev/null || echo 0) # Try journalctl first count=$(journalctl -u fail2ban --since "$period" 2>/dev/null | \ grep "\[$jail\] Ban " | \ grep -oE '[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+' | \ sort -u | wc -l 2>/dev/null) # Fall back to log file if journalctl returns 0 if [ "$count" -eq 0 ] && [ -f "$FAIL2BAN_LOG" ]; then count=$(awk -v jail="$jail" -v cutoff="$cutoff_timestamp" ' /\['"$jail"'\] Ban / { # Extract IP match($0, /[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+/) if (RSTART > 0) { ip = substr($0, RSTART, RLENGTH) # Parse timestamp cmd = "date -d \"" $1 " " $2 "\" +%s 2>/dev/null" cmd | getline ts close(cmd) if (ts >= cutoff && ip != "") ips[ip] = 1 } } END { count = 0 for (ip in ips) count++ print count } ' "$FAIL2BAN_LOG" 2>/dev/null) fi echo "${count:-0}" } get_ban_duration_stats() { local jail="$1" # Parse ban times and calculate average duration (placeholder - complex to implement) # Returns: avg|min|max in seconds echo "3600|1800|7200" # Placeholder: 1h avg, 30min min, 2h max } get_jail_port() { local jail="$1" local port # Extract port from jail config (simplified) if [ -f "/etc/fail2ban/jail.d/$jail.conf" ]; then port=$(grep "^port" "/etc/fail2ban/jail.d/$jail.conf" 2>/dev/null | awk '{print $NF}') fi if [ -z "$port" ] && [ -f "/etc/fail2ban/jail.local" ]; then port=$(awk "/\[$jail\]/,/^\[/ {if(/^port/) print \$NF}" "/etc/fail2ban/jail.local" 2>/dev/null | head -1) fi echo "${port:-unknown}" } # Detect protocol based on jail name # Args: $1 - jail name # Returns: Protocol (tcp/udp), defaults to tcp get_jail_protocol() { local jail="$1" # Heuristic matching based on common service patterns case "$jail" in *ssh*|*sshd*) echo "tcp" ;; *http*|*nginx*|*apache*) echo "tcp" ;; *smtp*|*mail*) echo "tcp" ;; *dns*) echo "udp" ;; *) echo "tcp" ;; # Default to TCP for unknown services esac } get_jail_logpath() { local jail="$1" local logpath if [ -f "/etc/fail2ban/jail.d/$jail.conf" ]; then logpath=$(grep "^logpath" "/etc/fail2ban/jail.d/$jail.conf" 2>/dev/null | awk '{print $NF}') fi if [ -z "$logpath" ] && [ -f "/etc/fail2ban/jail.local" ]; then logpath=$(awk "/\[$jail\]/,/^\[/ {if(/^logpath/) print \$NF}" "/etc/fail2ban/jail.local" 2>/dev/null | head -1) fi echo "${logpath:-/var/log/auth.log}" } get_jail_filter() { local jail="$1" # Filter command doesn't work in fail2ban-client, extract from config if [ -f "/etc/fail2ban/jail.d/$jail.local" ]; then grep "^filter" "/etc/fail2ban/jail.d/$jail.local" 2>/dev/null | awk '{print $NF}' || echo "$jail" else echo "$jail" # Default to jail name fi } get_jail_enabled() { local jail="$1" # Check if jail is enabled in config if fail2ban-client status "$jail" >/dev/null 2>&1; then echo "1" else echo "0" fi } get_repeat_offender_count() { local jail="$1" local threshold="${2:-2}" # Default: 2+ bans = repeat offender local count cutoff_timestamp # 7 days ago timestamp cutoff_timestamp=$(date -d "7 days ago" +%s 2>/dev/null || echo 0) # Try journalctl first count=$(journalctl -u fail2ban --since "7 days ago" 2>/dev/null | \ grep "\[$jail\] Ban " | \ grep -oE '[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+' | \ sort | uniq -c | \ awk -v t="$threshold" '$1 >= t {count++} END {print count+0}') # Fall back to log file if journalctl returns 0 if [ "$count" -eq 0 ] && [ -f "$FAIL2BAN_LOG" ]; then count=$(awk -v jail="$jail" -v cutoff="$cutoff_timestamp" -v threshold="$threshold" ' /\['"$jail"'\] Ban / { # Extract IP match($0, /[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+/) if (RSTART > 0) { ip = substr($0, RSTART, RLENGTH) # Parse timestamp cmd = "date -d \"" $1 " " $2 "\" +%s 2>/dev/null" cmd | getline ts close(cmd) if (ts >= cutoff && ip != "") ip_count[ip]++ } } END { repeat_count = 0 for (ip in ip_count) { if (ip_count[ip] >= threshold) repeat_count++ } print repeat_count } ' "$FAIL2BAN_LOG" 2>/dev/null) fi echo "${count:-0}" } get_log_size() { [ -f "$FAIL2BAN_LOG" ] && stat -c %s "$FAIL2BAN_LOG" 2>/dev/null || echo "0" } get_log_age() { if [ -f "$FAIL2BAN_LOG" ]; then echo $(($(date +%s) - $(stat -c %Y "$FAIL2BAN_LOG" 2>/dev/null || echo 0))) else echo "0" fi } get_log_rotation_timestamp() { # Find most recent rotated log to determine last rotation time local rotated_log rotated_log=$(ls -t "${FAIL2BAN_LOG}".1 "${FAIL2BAN_LOG}"-*.gz 2>/dev/null | head -1) if [ -n "$rotated_log" ]; then stat -c %Y "$rotated_log" 2>/dev/null || echo "0" else echo "0" fi } # ============================================================================ # METRIC GENERATION # ============================================================================ # Generate all Prometheus metrics # Returns: Prometheus text format metrics on stdout generate_metrics() { local script_start=$(date +%s) # Check fail2ban status first if ! check_fail2ban; then cat </dev/null; then # Use awk for floating point arithmetic ban_rate=$(awk "BEGIN {printf \"%.4f\", ${total_banned:-0} / ${total_failed}}" 2>/dev/null || echo "0") else ban_rate="0" fi echo "fail2ban_jail_ban_rate{jail=\"$jail\"} $ban_rate" done echo "" # ======================================================================== # ENHANCED METRICS (v2.0) - Jail Health & Activity Tracking # ======================================================================== cat </dev/null; then ban_rate=$(awk "BEGIN {printf \"%.2f\", $bans_24h / 24}" 2>/dev/null || echo "0") else ban_rate="0.00" fi echo "fail2ban_jail_ban_rate_per_hour{jail=\"$jail\"} $ban_rate" done echo "" # NEW METRICS - Repeat Offenders cat <&2 if ! command -v nc >/dev/null 2>&1; then echo "ERROR: netcat (nc) required for HTTP mode" >&2 exit 1 fi # Infinite loop accepting HTTP requests while true; do { read -r request # Check if request is for /metrics endpoint if [[ "$request" =~ ^GET\ /metrics ]]; then echo -e "HTTP/1.1 200 OK\r\nContent-Type: text/plain; version=0.0.4\r\n\r" generate_metrics else # Serve HTML landing page for other requests echo -e "HTTP/1.1 200 OK\r\nContent-Type: text/html\r\n\r" cat < Fail2ban Exporter v2.0

Fail2ban Prometheus Exporter (Enhanced v2.0)

Metrics

New Metrics

  • Last ban/unban timestamps per jail
  • Ban/unban rates (1h, 24h)
  • Unique banned IPs per period
  • Top attackers per jail
  • Jail configuration info (port, protocol, filter)
  • Ban rate per hour
EOF fi } | nc -l -p "$HTTP_PORT" -q 1 2>/dev/null # -q 1: wait 1 second after EOF before closing done } # ============================================================================ # MAIN EXECUTION # ============================================================================ # Main entry point - routes to appropriate output mode main() { parse_args "$@" if [ "$HTTP_MODE" = true ]; then # Run HTTP server (blocks until killed) run_http_server elif [ -n "$OUTPUT_FILE" ]; then # Textfile collector mode: write atomically using temp file local output_dir output_dir="$(dirname "$OUTPUT_FILE")" mkdir -p "$output_dir" # Create temp file in SAME directory for atomic rename (same filesystem) local temp_file temp_file=$(mktemp "${output_dir}/.fail2ban_metrics.XXXXXX") # Generate metrics to temp file if ! generate_metrics > "$temp_file" 2>/dev/null; then rm -f "$temp_file" echo "ERROR: Failed to generate metrics" >&2 exit 1 fi # Validate: file must exist, have content, and contain fail2ban_up 1 # If fail2ban is down, we still get fail2ban_up 0 which is valid local file_lines file_lines=$(wc -l < "$temp_file" 2>/dev/null || echo 0) if [ "$file_lines" -lt 10 ]; then rm -f "$temp_file" echo "ERROR: Metrics file too small ($file_lines lines), keeping previous" >&2 exit 1 fi # Set permissions before move chmod 644 "$temp_file" # Atomic rename - no gap where file is missing mv -f "$temp_file" "$OUTPUT_FILE" echo "Metrics written to $OUTPUT_FILE ($file_lines lines)" >&2 else # Default: output to stdout generate_metrics fi } # Execute main function with all script arguments main "$@"