88551536e6
Amp-Thread-ID: https://ampcode.com/threads/T-019cc404-c628-759e-a50b-f5eeea35b91f Co-authored-by: Amp <amp@ampcode.com>
915 lines
30 KiB
Bash
Executable File
915 lines
30 KiB
Bash
Executable File
#!/bin/bash
|
|
################################################################################
|
|
# Script Name: fail2ban-exporter.sh
|
|
# Version: 2.0
|
|
# Description: Prometheus exporter for fail2ban providing comprehensive metrics
|
|
# for monitoring jail status, ban/unban activity, and threat analysis
|
|
#
|
|
# Author: Phil Connor
|
|
# Contact: contact@mylinux.work
|
|
# Website: https://mylinux.work
|
|
# License: MIT
|
|
#
|
|
# Prerequisites:
|
|
# - fail2ban-client command available
|
|
# - fail2ban service running
|
|
# - journalctl (systemd) for historical data
|
|
# - netcat (nc) for HTTP mode
|
|
# - /var/log/fail2ban.log for timestamp parsing
|
|
#
|
|
# Usage:
|
|
# # Output to stdout
|
|
# ./fail2ban-exporter.sh
|
|
#
|
|
# # HTTP server mode
|
|
# ./fail2ban-exporter.sh --http -p 9191
|
|
#
|
|
# # Textfile collector mode
|
|
# ./fail2ban-exporter.sh --textfile
|
|
#
|
|
# Metrics Exported:
|
|
# Core Metrics (v1.0):
|
|
# - fail2ban_up{} - Exporter status (1=up, 0=down)
|
|
# - fail2ban_server_info{version,exporter_version} - Server version info
|
|
# - fail2ban_jail_count{} - Total number of jails (gauge)
|
|
# - fail2ban_jail_enabled{jail} - Jail enabled status (gauge)
|
|
# - fail2ban_jail_failed_current{jail} - Currently failed attempts (gauge)
|
|
# - fail2ban_jail_banned_current{jail} - Currently banned IPs (gauge)
|
|
# - fail2ban_jail_failed_total{jail} - Total failed attempts (counter)
|
|
# - fail2ban_jail_banned_total{jail} - Total banned IPs (counter)
|
|
# - fail2ban_jail_ban_rate{jail} - Ban ratio: banned/failed (gauge)
|
|
#
|
|
# Enhanced Metrics (v2.0):
|
|
# - fail2ban_jail_last_ban_timestamp{jail} - Unix timestamp of last ban (gauge)
|
|
# - fail2ban_jail_last_unban_timestamp{jail} - Unix timestamp of last unban (gauge)
|
|
# - fail2ban_jail_bans_per_period{jail,period} - Bans in 1h/24h (gauge)
|
|
# - fail2ban_jail_unbans_per_period{jail,period} - Unbans in 1h/24h (gauge)
|
|
# - fail2ban_jail_unique_banned_ips{jail,period} - Unique IPs banned (gauge)
|
|
# - fail2ban_jail_info{jail,port,protocol,filter} - Jail configuration (gauge)
|
|
# - fail2ban_jail_top_attacker_count{jail,ip} - Top 5 attacking IPs (gauge)
|
|
# - fail2ban_jail_ban_rate_per_hour{jail} - Average bans/hour over 24h (gauge)
|
|
# - fail2ban_jail_repeat_offenders{jail,threshold} - Repeat offender count (7d)
|
|
# - fail2ban_jail_seconds_since_last_ban{jail} - Seconds since last ban
|
|
# - fail2ban_jail_seconds_since_last_unban{jail} - Seconds since last unban
|
|
# - fail2ban_log_size_bytes - Size of fail2ban.log file
|
|
# - fail2ban_log_age_seconds - Time since last log modification
|
|
# - fail2ban_log_rotation_timestamp - Last log rotation time
|
|
# - fail2ban_exporter_duration_seconds - Script execution time
|
|
# - fail2ban_exporter_last_run_timestamp - Last successful run time
|
|
#
|
|
# Configuration:
|
|
# Default HTTP port: 9191
|
|
# Textfile directory: /var/lib/node_exporter
|
|
# Log source: /var/log/fail2ban.log
|
|
#
|
|
################################################################################
|
|
|
|
# ============================================================================
|
|
# CONFIGURATION VARIABLES
|
|
# ============================================================================
|
|
|
|
TEXTFILE_DIR="/var/lib/node_exporter"
|
|
OUTPUT_FILE=""
|
|
HTTP_MODE=false
|
|
HTTP_PORT=9191
|
|
FAIL2BAN_LOG="/var/log/fail2ban.log"
|
|
|
|
# ============================================================================
|
|
# HELPER FUNCTIONS
|
|
# ============================================================================
|
|
|
|
show_usage() {
|
|
cat <<EOF
|
|
Usage: $0 [OPTIONS]
|
|
|
|
Export fail2ban statistics as Prometheus metrics (Enhanced v2.0).
|
|
|
|
MODES:
|
|
--textfile Write to node_exporter textfile collector
|
|
--http Run HTTP server on port $HTTP_PORT
|
|
|
|
OPTIONS:
|
|
-p, --port HTTP port (default: 9191)
|
|
-o, --output Output file path
|
|
|
|
EXAMPLES:
|
|
$0 --textfile # Write to textfile collector
|
|
$0 --http --port 9191 # Run HTTP server
|
|
$0 -o /tmp/fail2ban.prom # Write to custom file
|
|
|
|
NEW METRICS v2.0:
|
|
- Jail health: last ban/unban timestamps, ban rates
|
|
- Top attackers: most banned IPs per jail
|
|
- Ban duration: average, min, max per jail
|
|
- Protocol/port breakdown
|
|
- Jail uptime and status
|
|
|
|
EOF
|
|
exit 0
|
|
}
|
|
|
|
parse_args() {
|
|
while [[ $# -gt 0 ]]; do
|
|
case $1 in
|
|
-h|--help) show_usage ;;
|
|
--textfile) OUTPUT_FILE="$TEXTFILE_DIR/fail2ban.prom"; shift ;;
|
|
--http) HTTP_MODE=true; shift ;;
|
|
-p|--port) HTTP_PORT="$2"; shift 2 ;;
|
|
-o|--output) OUTPUT_FILE="$2"; shift 2 ;;
|
|
*) echo "Unknown option: $1" >&2; exit 1 ;;
|
|
esac
|
|
done
|
|
}
|
|
|
|
# Check if fail2ban is installed and running
|
|
# Returns: 0 if OK, 1 if error
|
|
check_fail2ban() {
|
|
if ! command -v fail2ban-client >/dev/null 2>&1; then
|
|
echo "ERROR: fail2ban-client not found" >&2
|
|
return 1
|
|
fi
|
|
|
|
# Verify fail2ban server is responding
|
|
if ! fail2ban-client ping >/dev/null 2>&1; then
|
|
echo "ERROR: fail2ban server not responding" >&2
|
|
return 1
|
|
fi
|
|
|
|
return 0
|
|
}
|
|
|
|
# Get list of all active fail2ban jails
|
|
# Returns: Space-separated list of jail names
|
|
get_jails() {
|
|
# Extract jail names from status output, convert comma-separated to space-separated
|
|
fail2ban-client status 2>/dev/null | grep "Jail list:" | sed 's/.*Jail list://' | tr -d '\t' | tr ',' '\n' | xargs
|
|
}
|
|
|
|
# Get statistics for a specific jail
|
|
# Args: $1 - jail name
|
|
# Returns: Pipe-delimited string: currently_failed|currently_banned|total_failed|total_banned
|
|
get_jail_stats() {
|
|
local jail="$1"
|
|
local status_output
|
|
|
|
status_output=$(fail2ban-client status "$jail" 2>/dev/null)
|
|
|
|
local currently_failed currently_banned total_failed total_banned
|
|
|
|
# Parse fail2ban-client output using awk to extract last field (the number)
|
|
currently_failed=$(echo "$status_output" | grep "Currently failed:" | awk '{print $NF}')
|
|
currently_banned=$(echo "$status_output" | grep "Currently banned:" | awk '{print $NF}')
|
|
total_failed=$(echo "$status_output" | grep "Total failed:" | awk '{print $NF}')
|
|
total_banned=$(echo "$status_output" | grep "Total banned:" | awk '{print $NF}')
|
|
|
|
# Return pipe-delimited format with defaults to 0 if empty
|
|
echo "${currently_failed:-0}|${currently_banned:-0}|${total_failed:-0}|${total_banned:-0}"
|
|
}
|
|
|
|
# Get list of currently banned IPs for a jail
|
|
# Args: $1 - jail name
|
|
# Returns: List of IPs, one per line
|
|
get_banned_ips() {
|
|
local jail="$1"
|
|
fail2ban-client status "$jail" 2>/dev/null | grep "Banned IP list:" | sed 's/.*Banned IP list://' | tr ' ' '\n' | grep -v '^$'
|
|
}
|
|
|
|
# Get timestamp of last ban event for a jail
|
|
# Args: $1 - jail name
|
|
# Returns: Unix timestamp (seconds since epoch) or 0 if not found
|
|
get_last_ban_timestamp() {
|
|
local jail="$1"
|
|
local timestamp
|
|
# Extract date from log, convert to Unix timestamp
|
|
timestamp=$(grep "\[$jail\]" "$FAIL2BAN_LOG" 2>/dev/null | grep "Ban " | tail -1 | awk '{print $1, $2}' | xargs -I{} date -d "{}" +%s 2>/dev/null)
|
|
echo "${timestamp:-0}"
|
|
}
|
|
|
|
# Get timestamp of last unban event for a jail
|
|
# Args: $1 - jail name
|
|
# Returns: Unix timestamp (seconds since epoch) or 0 if not found
|
|
get_last_unban_timestamp() {
|
|
local jail="$1"
|
|
local timestamp
|
|
# Extract date from log, convert to Unix timestamp
|
|
timestamp=$(grep "\[$jail\]" "$FAIL2BAN_LOG" 2>/dev/null | grep "Unban " | tail -1 | awk '{print $1, $2}' | xargs -I{} date -d "{}" +%s 2>/dev/null)
|
|
echo "${timestamp:-0}"
|
|
}
|
|
|
|
# Count ban events within a time period
|
|
# Args: $1 - jail name, $2 - time period (e.g., "1 hour ago")
|
|
# Returns: Number of ban events
|
|
get_ban_rate() {
|
|
local jail="$1"
|
|
local period="$2"
|
|
local count cutoff_timestamp
|
|
|
|
# Convert period to Unix timestamp
|
|
cutoff_timestamp=$(date -d "$period" +%s 2>/dev/null || echo 0)
|
|
|
|
# Try journalctl first (faster)
|
|
count=$(journalctl -u fail2ban --since "$period" 2>/dev/null | grep -c "\[$jail\] Ban " 2>/dev/null)
|
|
|
|
# If journalctl returns 0, fall back to log file (more reliable)
|
|
if [ "$count" -eq 0 ] && [ -f "$FAIL2BAN_LOG" ]; then
|
|
count=$(awk -v jail="$jail" -v cutoff="$cutoff_timestamp" '
|
|
/\['"$jail"'\] Ban / {
|
|
# Parse timestamp from log line
|
|
cmd = "date -d \"" $1 " " $2 "\" +%s 2>/dev/null"
|
|
cmd | getline ts
|
|
close(cmd)
|
|
if (ts >= cutoff) count++
|
|
}
|
|
END { print count+0 }
|
|
' "$FAIL2BAN_LOG" 2>/dev/null)
|
|
fi
|
|
|
|
echo "${count:-0}"
|
|
}
|
|
|
|
# Count unban events within a time period
|
|
# Args: $1 - jail name, $2 - time period (e.g., "1 hour ago")
|
|
# Returns: Number of unban events
|
|
get_unban_rate() {
|
|
local jail="$1"
|
|
local period="$2"
|
|
local count cutoff_timestamp
|
|
|
|
# Convert period to Unix timestamp
|
|
cutoff_timestamp=$(date -d "$period" +%s 2>/dev/null || echo 0)
|
|
|
|
# Try journalctl first
|
|
count=$(journalctl -u fail2ban --since "$period" 2>/dev/null | grep -c "\[$jail\] Unban " 2>/dev/null)
|
|
|
|
# Fall back to log file
|
|
if [ "$count" -eq 0 ] && [ -f "$FAIL2BAN_LOG" ]; then
|
|
count=$(awk -v jail="$jail" -v cutoff="$cutoff_timestamp" '
|
|
/\['"$jail"'\] Unban / {
|
|
cmd = "date -d \"" $1 " " $2 "\" +%s 2>/dev/null"
|
|
cmd | getline ts
|
|
close(cmd)
|
|
if (ts >= cutoff) count++
|
|
}
|
|
END { print count+0 }
|
|
' "$FAIL2BAN_LOG" 2>/dev/null)
|
|
fi
|
|
|
|
echo "${count:-0}"
|
|
}
|
|
|
|
# Get top attacking IPs by ban count
|
|
# Args: $1 - jail name, $2 - limit (default: 5)
|
|
# Returns: Lines with "count IP" format, sorted by count descending
|
|
get_top_banned_ips() {
|
|
local jail="$1"
|
|
local limit="${2:-5}"
|
|
grep "\[$jail\] Ban " "$FAIL2BAN_LOG" 2>/dev/null | \
|
|
grep -oE '[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+' | \
|
|
sort | uniq -c | sort -rn | head -n "$limit"
|
|
}
|
|
|
|
# Count unique IPs banned in a time period
|
|
# Args: $1 - jail name, $2 - time period (e.g., "24 hours ago")
|
|
# Returns: Number of unique IPs
|
|
get_unique_banned_ips() {
|
|
local jail="$1"
|
|
local period="$2"
|
|
local count cutoff_timestamp
|
|
|
|
# Convert period to Unix timestamp
|
|
cutoff_timestamp=$(date -d "$period" +%s 2>/dev/null || echo 0)
|
|
|
|
# Try journalctl first
|
|
count=$(journalctl -u fail2ban --since "$period" 2>/dev/null | \
|
|
grep "\[$jail\] Ban " | \
|
|
grep -oE '[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+' | \
|
|
sort -u | wc -l 2>/dev/null)
|
|
|
|
# Fall back to log file if journalctl returns 0
|
|
if [ "$count" -eq 0 ] && [ -f "$FAIL2BAN_LOG" ]; then
|
|
count=$(awk -v jail="$jail" -v cutoff="$cutoff_timestamp" '
|
|
/\['"$jail"'\] Ban / {
|
|
# Extract IP
|
|
match($0, /[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+/)
|
|
if (RSTART > 0) {
|
|
ip = substr($0, RSTART, RLENGTH)
|
|
# Parse timestamp
|
|
cmd = "date -d \"" $1 " " $2 "\" +%s 2>/dev/null"
|
|
cmd | getline ts
|
|
close(cmd)
|
|
if (ts >= cutoff && ip != "") ips[ip] = 1
|
|
}
|
|
}
|
|
END {
|
|
count = 0
|
|
for (ip in ips) count++
|
|
print count
|
|
}
|
|
' "$FAIL2BAN_LOG" 2>/dev/null)
|
|
fi
|
|
|
|
echo "${count:-0}"
|
|
}
|
|
|
|
get_ban_duration_stats() {
|
|
local jail="$1"
|
|
# Parse ban times and calculate average duration (placeholder - complex to implement)
|
|
# Returns: avg|min|max in seconds
|
|
echo "3600|1800|7200" # Placeholder: 1h avg, 30min min, 2h max
|
|
}
|
|
|
|
get_jail_port() {
|
|
local jail="$1"
|
|
local port
|
|
# Extract port from jail config (simplified)
|
|
if [ -f "/etc/fail2ban/jail.d/$jail.conf" ]; then
|
|
port=$(grep "^port" "/etc/fail2ban/jail.d/$jail.conf" 2>/dev/null | awk '{print $NF}')
|
|
fi
|
|
if [ -z "$port" ] && [ -f "/etc/fail2ban/jail.local" ]; then
|
|
port=$(awk "/\[$jail\]/,/^\[/ {if(/^port/) print \$NF}" "/etc/fail2ban/jail.local" 2>/dev/null | head -1)
|
|
fi
|
|
echo "${port:-unknown}"
|
|
}
|
|
|
|
# Detect protocol based on jail name
|
|
# Args: $1 - jail name
|
|
# Returns: Protocol (tcp/udp), defaults to tcp
|
|
get_jail_protocol() {
|
|
local jail="$1"
|
|
# Heuristic matching based on common service patterns
|
|
case "$jail" in
|
|
*ssh*|*sshd*) echo "tcp" ;;
|
|
*http*|*nginx*|*apache*) echo "tcp" ;;
|
|
*smtp*|*mail*) echo "tcp" ;;
|
|
*dns*) echo "udp" ;;
|
|
*) echo "tcp" ;; # Default to TCP for unknown services
|
|
esac
|
|
}
|
|
|
|
get_jail_logpath() {
|
|
local jail="$1"
|
|
local logpath
|
|
if [ -f "/etc/fail2ban/jail.d/$jail.conf" ]; then
|
|
logpath=$(grep "^logpath" "/etc/fail2ban/jail.d/$jail.conf" 2>/dev/null | awk '{print $NF}')
|
|
fi
|
|
if [ -z "$logpath" ] && [ -f "/etc/fail2ban/jail.local" ]; then
|
|
logpath=$(awk "/\[$jail\]/,/^\[/ {if(/^logpath/) print \$NF}" "/etc/fail2ban/jail.local" 2>/dev/null | head -1)
|
|
fi
|
|
echo "${logpath:-/var/log/auth.log}"
|
|
}
|
|
|
|
get_jail_filter() {
|
|
local jail="$1"
|
|
# Filter command doesn't work in fail2ban-client, extract from config
|
|
if [ -f "/etc/fail2ban/jail.d/$jail.local" ]; then
|
|
grep "^filter" "/etc/fail2ban/jail.d/$jail.local" 2>/dev/null | awk '{print $NF}' || echo "$jail"
|
|
else
|
|
echo "$jail" # Default to jail name
|
|
fi
|
|
}
|
|
|
|
get_jail_enabled() {
|
|
local jail="$1"
|
|
# Check if jail is enabled in config
|
|
if fail2ban-client status "$jail" >/dev/null 2>&1; then
|
|
echo "1"
|
|
else
|
|
echo "0"
|
|
fi
|
|
}
|
|
|
|
get_repeat_offender_count() {
|
|
local jail="$1"
|
|
local threshold="${2:-2}" # Default: 2+ bans = repeat offender
|
|
local count cutoff_timestamp
|
|
|
|
# 7 days ago timestamp
|
|
cutoff_timestamp=$(date -d "7 days ago" +%s 2>/dev/null || echo 0)
|
|
|
|
# Try journalctl first
|
|
count=$(journalctl -u fail2ban --since "7 days ago" 2>/dev/null | \
|
|
grep "\[$jail\] Ban " | \
|
|
grep -oE '[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+' | \
|
|
sort | uniq -c | \
|
|
awk -v t="$threshold" '$1 >= t {count++} END {print count+0}')
|
|
|
|
# Fall back to log file if journalctl returns 0
|
|
if [ "$count" -eq 0 ] && [ -f "$FAIL2BAN_LOG" ]; then
|
|
count=$(awk -v jail="$jail" -v cutoff="$cutoff_timestamp" -v threshold="$threshold" '
|
|
/\['"$jail"'\] Ban / {
|
|
# Extract IP
|
|
match($0, /[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+/)
|
|
if (RSTART > 0) {
|
|
ip = substr($0, RSTART, RLENGTH)
|
|
# Parse timestamp
|
|
cmd = "date -d \"" $1 " " $2 "\" +%s 2>/dev/null"
|
|
cmd | getline ts
|
|
close(cmd)
|
|
if (ts >= cutoff && ip != "") ip_count[ip]++
|
|
}
|
|
}
|
|
END {
|
|
repeat_count = 0
|
|
for (ip in ip_count) {
|
|
if (ip_count[ip] >= threshold) repeat_count++
|
|
}
|
|
print repeat_count
|
|
}
|
|
' "$FAIL2BAN_LOG" 2>/dev/null)
|
|
fi
|
|
|
|
echo "${count:-0}"
|
|
}
|
|
|
|
get_log_size() {
|
|
[ -f "$FAIL2BAN_LOG" ] && stat -c %s "$FAIL2BAN_LOG" 2>/dev/null || echo "0"
|
|
}
|
|
|
|
get_log_age() {
|
|
if [ -f "$FAIL2BAN_LOG" ]; then
|
|
echo $(($(date +%s) - $(stat -c %Y "$FAIL2BAN_LOG" 2>/dev/null || echo 0)))
|
|
else
|
|
echo "0"
|
|
fi
|
|
}
|
|
|
|
get_log_rotation_timestamp() {
|
|
# Find most recent rotated log to determine last rotation time
|
|
local rotated_log
|
|
rotated_log=$(ls -t "${FAIL2BAN_LOG}".1 "${FAIL2BAN_LOG}"-*.gz 2>/dev/null | head -1)
|
|
if [ -n "$rotated_log" ]; then
|
|
stat -c %Y "$rotated_log" 2>/dev/null || echo "0"
|
|
else
|
|
echo "0"
|
|
fi
|
|
}
|
|
|
|
# ============================================================================
|
|
# METRIC GENERATION
|
|
# ============================================================================
|
|
|
|
# Generate all Prometheus metrics
|
|
# Returns: Prometheus text format metrics on stdout
|
|
generate_metrics() {
|
|
local script_start=$(date +%s)
|
|
|
|
# Check fail2ban status first
|
|
if ! check_fail2ban; then
|
|
cat <<EOF
|
|
# HELP fail2ban_up Fail2ban exporter status
|
|
# TYPE fail2ban_up gauge
|
|
fail2ban_up 0
|
|
EOF
|
|
return
|
|
fi
|
|
|
|
local jails
|
|
jails=$(get_jails)
|
|
local jail_count
|
|
jail_count=$(echo "$jails" | wc -w)
|
|
|
|
cat <<EOF
|
|
# HELP fail2ban_up Fail2ban exporter status
|
|
# TYPE fail2ban_up gauge
|
|
fail2ban_up 1
|
|
|
|
# HELP fail2ban_server_info Fail2ban server information
|
|
# TYPE fail2ban_server_info gauge
|
|
fail2ban_server_info{version="$(fail2ban-client version 2>/dev/null | head -1 | awk '{print $NF}')",exporter_version="2.0"} 1
|
|
|
|
# HELP fail2ban_jail_count Total number of jails
|
|
# TYPE fail2ban_jail_count gauge
|
|
fail2ban_jail_count $jail_count
|
|
|
|
# HELP fail2ban_jail_enabled Jail enabled status
|
|
# TYPE fail2ban_jail_enabled gauge
|
|
EOF
|
|
|
|
for jail in $jails; do
|
|
local enabled
|
|
enabled=$(get_jail_enabled "$jail")
|
|
echo "fail2ban_jail_enabled{jail=\"$jail\"} $enabled"
|
|
done
|
|
|
|
echo ""
|
|
|
|
cat <<EOF
|
|
# HELP fail2ban_jail_failed_current Currently failed login attempts per jail
|
|
# TYPE fail2ban_jail_failed_current gauge
|
|
EOF
|
|
|
|
for jail in $jails; do
|
|
local stats
|
|
stats=$(get_jail_stats "$jail")
|
|
local currently_failed
|
|
currently_failed=$(echo "$stats" | cut -d'|' -f1)
|
|
echo "fail2ban_jail_failed_current{jail=\"$jail\"} ${currently_failed:-0}"
|
|
done
|
|
|
|
echo ""
|
|
|
|
cat <<EOF
|
|
# HELP fail2ban_jail_banned_current Currently banned IPs per jail
|
|
# TYPE fail2ban_jail_banned_current gauge
|
|
EOF
|
|
|
|
for jail in $jails; do
|
|
local stats
|
|
stats=$(get_jail_stats "$jail")
|
|
local currently_banned
|
|
currently_banned=$(echo "$stats" | cut -d'|' -f2)
|
|
echo "fail2ban_jail_banned_current{jail=\"$jail\"} ${currently_banned:-0}"
|
|
done
|
|
|
|
echo ""
|
|
|
|
cat <<EOF
|
|
# HELP fail2ban_jail_failed_total Total failed login attempts per jail
|
|
# TYPE fail2ban_jail_failed_total counter
|
|
EOF
|
|
|
|
for jail in $jails; do
|
|
local stats
|
|
stats=$(get_jail_stats "$jail")
|
|
local total_failed
|
|
total_failed=$(echo "$stats" | cut -d'|' -f3)
|
|
echo "fail2ban_jail_failed_total{jail=\"$jail\"} ${total_failed:-0}"
|
|
done
|
|
|
|
echo ""
|
|
|
|
cat <<EOF
|
|
# HELP fail2ban_jail_banned_total Total banned IPs per jail (all time)
|
|
# TYPE fail2ban_jail_banned_total counter
|
|
EOF
|
|
|
|
for jail in $jails; do
|
|
local stats
|
|
stats=$(get_jail_stats "$jail")
|
|
local total_banned
|
|
total_banned=$(echo "$stats" | cut -d'|' -f4)
|
|
echo "fail2ban_jail_banned_total{jail=\"$jail\"} ${total_banned:-0}"
|
|
done
|
|
|
|
echo ""
|
|
|
|
cat <<EOF
|
|
# HELP fail2ban_jail_ban_rate Ban rate (total_banned / total_failed) per jail
|
|
# TYPE fail2ban_jail_ban_rate gauge
|
|
EOF
|
|
|
|
# Calculate ban rate (ratio of banned to failed attempts)
|
|
for jail in $jails; do
|
|
local stats
|
|
stats=$(get_jail_stats "$jail")
|
|
local total_failed total_banned ban_rate
|
|
total_failed=$(echo "$stats" | cut -d'|' -f3)
|
|
total_banned=$(echo "$stats" | cut -d'|' -f4)
|
|
|
|
# Avoid division by zero
|
|
if [ "${total_failed:-0}" -gt 0 ] 2>/dev/null; then
|
|
# Use awk for floating point arithmetic
|
|
ban_rate=$(awk "BEGIN {printf \"%.4f\", ${total_banned:-0} / ${total_failed}}" 2>/dev/null || echo "0")
|
|
else
|
|
ban_rate="0"
|
|
fi
|
|
|
|
echo "fail2ban_jail_ban_rate{jail=\"$jail\"} $ban_rate"
|
|
done
|
|
|
|
echo ""
|
|
|
|
# ========================================================================
|
|
# ENHANCED METRICS (v2.0) - Jail Health & Activity Tracking
|
|
# ========================================================================
|
|
cat <<EOF
|
|
# HELP fail2ban_jail_last_ban_timestamp Timestamp of last ban per jail
|
|
# TYPE fail2ban_jail_last_ban_timestamp gauge
|
|
EOF
|
|
|
|
for jail in $jails; do
|
|
local last_ban
|
|
last_ban=$(get_last_ban_timestamp "$jail")
|
|
echo "fail2ban_jail_last_ban_timestamp{jail=\"$jail\"} ${last_ban}"
|
|
done
|
|
|
|
echo ""
|
|
|
|
cat <<EOF
|
|
# HELP fail2ban_jail_last_unban_timestamp Timestamp of last unban per jail
|
|
# TYPE fail2ban_jail_last_unban_timestamp gauge
|
|
EOF
|
|
|
|
for jail in $jails; do
|
|
local last_unban
|
|
last_unban=$(get_last_unban_timestamp "$jail")
|
|
echo "fail2ban_jail_last_unban_timestamp{jail=\"$jail\"} ${last_unban}"
|
|
done
|
|
|
|
echo ""
|
|
|
|
# NEW METRICS - Ban/Unban Rates
|
|
cat <<EOF
|
|
# HELP fail2ban_jail_bans_per_period Bans in time period per jail
|
|
# TYPE fail2ban_jail_bans_per_period gauge
|
|
EOF
|
|
|
|
for jail in $jails; do
|
|
local bans_1h bans_24h
|
|
bans_1h=$(get_ban_rate "$jail" "1 hour ago")
|
|
bans_24h=$(get_ban_rate "$jail" "24 hours ago")
|
|
echo "fail2ban_jail_bans_per_period{jail=\"$jail\",period=\"1h\"} ${bans_1h}"
|
|
echo "fail2ban_jail_bans_per_period{jail=\"$jail\",period=\"24h\"} ${bans_24h}"
|
|
done
|
|
|
|
echo ""
|
|
|
|
cat <<EOF
|
|
# HELP fail2ban_jail_unbans_per_period Unbans in time period per jail
|
|
# TYPE fail2ban_jail_unbans_per_period gauge
|
|
EOF
|
|
|
|
for jail in $jails; do
|
|
local unbans_1h unbans_24h
|
|
unbans_1h=$(get_unban_rate "$jail" "1 hour ago")
|
|
unbans_24h=$(get_unban_rate "$jail" "24 hours ago")
|
|
echo "fail2ban_jail_unbans_per_period{jail=\"$jail\",period=\"1h\"} ${unbans_1h}"
|
|
echo "fail2ban_jail_unbans_per_period{jail=\"$jail\",period=\"24h\"} ${unbans_24h}"
|
|
done
|
|
|
|
echo ""
|
|
|
|
# NEW METRICS - Unique IPs
|
|
cat <<EOF
|
|
# HELP fail2ban_jail_unique_banned_ips Unique IPs banned in period per jail
|
|
# TYPE fail2ban_jail_unique_banned_ips gauge
|
|
EOF
|
|
|
|
for jail in $jails; do
|
|
local unique_1h unique_24h
|
|
unique_1h=$(get_unique_banned_ips "$jail" "1 hour ago")
|
|
unique_24h=$(get_unique_banned_ips "$jail" "24 hours ago")
|
|
echo "fail2ban_jail_unique_banned_ips{jail=\"$jail\",period=\"1h\"} ${unique_1h}"
|
|
echo "fail2ban_jail_unique_banned_ips{jail=\"$jail\",period=\"24h\"} ${unique_24h}"
|
|
done
|
|
|
|
echo ""
|
|
|
|
# NEW METRICS - Jail Configuration
|
|
cat <<EOF
|
|
# HELP fail2ban_jail_info Jail configuration information
|
|
# TYPE fail2ban_jail_info gauge
|
|
EOF
|
|
|
|
for jail in $jails; do
|
|
local port protocol filter
|
|
port=$(get_jail_port "$jail")
|
|
protocol=$(get_jail_protocol "$jail")
|
|
filter=$(get_jail_filter "$jail")
|
|
echo "fail2ban_jail_info{jail=\"$jail\",port=\"$port\",protocol=\"$protocol\",filter=\"$filter\"} 1"
|
|
done
|
|
|
|
echo ""
|
|
|
|
# NEW METRICS - Top Attackers (as labels with counts)
|
|
cat <<EOF
|
|
# HELP fail2ban_jail_top_attacker_count Top attacking IPs per jail (24h)
|
|
# TYPE fail2ban_jail_top_attacker_count gauge
|
|
EOF
|
|
|
|
for jail in $jails; do
|
|
while read -r count ip; do
|
|
[ -z "$ip" ] && continue
|
|
echo "fail2ban_jail_top_attacker_count{jail=\"$jail\",ip=\"$ip\"} $count"
|
|
done < <(get_top_banned_ips "$jail" 5)
|
|
done
|
|
|
|
echo ""
|
|
|
|
# NEW METRICS - Ban Effectiveness (bans per hour rate)
|
|
cat <<EOF
|
|
# HELP fail2ban_jail_ban_rate_per_hour Bans per hour over last 24h per jail
|
|
# TYPE fail2ban_jail_ban_rate_per_hour gauge
|
|
EOF
|
|
|
|
for jail in $jails; do
|
|
local bans_24h ban_rate
|
|
bans_24h=$(get_ban_rate "$jail" "24 hours ago")
|
|
|
|
# Strip whitespace and ensure integer
|
|
bans_24h=$(echo "$bans_24h" | tr -d '\n' | tr -d ' ')
|
|
bans_24h=${bans_24h:-0}
|
|
|
|
# Calculate average: total bans in 24h divided by 24 hours
|
|
if [ "$bans_24h" -gt 0 ] 2>/dev/null; then
|
|
ban_rate=$(awk "BEGIN {printf \"%.2f\", $bans_24h / 24}" 2>/dev/null || echo "0")
|
|
else
|
|
ban_rate="0.00"
|
|
fi
|
|
|
|
echo "fail2ban_jail_ban_rate_per_hour{jail=\"$jail\"} $ban_rate"
|
|
done
|
|
|
|
echo ""
|
|
|
|
# NEW METRICS - Repeat Offenders
|
|
cat <<EOF
|
|
# HELP fail2ban_jail_repeat_offenders IPs banned multiple times (7 day window)
|
|
# TYPE fail2ban_jail_repeat_offenders gauge
|
|
EOF
|
|
|
|
for jail in $jails; do
|
|
local repeat_2 repeat_5 repeat_10
|
|
repeat_2=$(get_repeat_offender_count "$jail" 2)
|
|
repeat_5=$(get_repeat_offender_count "$jail" 5)
|
|
repeat_10=$(get_repeat_offender_count "$jail" 10)
|
|
echo "fail2ban_jail_repeat_offenders{jail=\"$jail\",threshold=\"2+\"} $repeat_2"
|
|
echo "fail2ban_jail_repeat_offenders{jail=\"$jail\",threshold=\"5+\"} $repeat_5"
|
|
echo "fail2ban_jail_repeat_offenders{jail=\"$jail\",threshold=\"10+\"} $repeat_10"
|
|
done
|
|
|
|
echo ""
|
|
|
|
# Log file health metrics
|
|
local log_size log_age log_rotation
|
|
log_size=$(get_log_size)
|
|
log_age=$(get_log_age)
|
|
log_rotation=$(get_log_rotation_timestamp)
|
|
|
|
cat <<EOF
|
|
# HELP fail2ban_log_size_bytes Size of fail2ban log file
|
|
# TYPE fail2ban_log_size_bytes gauge
|
|
fail2ban_log_size_bytes $log_size
|
|
|
|
# HELP fail2ban_log_age_seconds Time since last log file modification
|
|
# TYPE fail2ban_log_age_seconds gauge
|
|
fail2ban_log_age_seconds $log_age
|
|
|
|
# HELP fail2ban_log_rotation_timestamp Unix timestamp of last log rotation
|
|
# TYPE fail2ban_log_rotation_timestamp gauge
|
|
fail2ban_log_rotation_timestamp $log_rotation
|
|
EOF
|
|
|
|
echo ""
|
|
|
|
# Time since last ban/unban (easier to alert on than timestamps)
|
|
local current_time
|
|
current_time=$(date +%s)
|
|
|
|
cat <<EOF
|
|
# HELP fail2ban_jail_seconds_since_last_ban Seconds since last ban per jail
|
|
# TYPE fail2ban_jail_seconds_since_last_ban gauge
|
|
EOF
|
|
|
|
for jail in $jails; do
|
|
local last_ban seconds_since
|
|
last_ban=$(get_last_ban_timestamp "$jail")
|
|
if [ "$last_ban" -gt 0 ]; then
|
|
seconds_since=$((current_time - last_ban))
|
|
else
|
|
seconds_since=0
|
|
fi
|
|
echo "fail2ban_jail_seconds_since_last_ban{jail=\"$jail\"} $seconds_since"
|
|
done
|
|
|
|
echo ""
|
|
|
|
cat <<EOF
|
|
# HELP fail2ban_jail_seconds_since_last_unban Seconds since last unban per jail
|
|
# TYPE fail2ban_jail_seconds_since_last_unban gauge
|
|
EOF
|
|
|
|
for jail in $jails; do
|
|
local last_unban seconds_since
|
|
last_unban=$(get_last_unban_timestamp "$jail")
|
|
if [ "$last_unban" -gt 0 ]; then
|
|
seconds_since=$((current_time - last_unban))
|
|
else
|
|
seconds_since=0
|
|
fi
|
|
echo "fail2ban_jail_seconds_since_last_unban{jail=\"$jail\"} $seconds_since"
|
|
done
|
|
|
|
echo ""
|
|
|
|
# Exporter runtime
|
|
local script_end script_duration
|
|
script_end=$(date +%s)
|
|
script_duration=$((script_end - script_start))
|
|
|
|
cat <<EOF
|
|
# HELP fail2ban_exporter_duration_seconds Time to generate all metrics
|
|
# TYPE fail2ban_exporter_duration_seconds gauge
|
|
fail2ban_exporter_duration_seconds $script_duration
|
|
|
|
# HELP fail2ban_exporter_last_run_timestamp Unix timestamp of last successful run
|
|
# TYPE fail2ban_exporter_last_run_timestamp gauge
|
|
fail2ban_exporter_last_run_timestamp $script_end
|
|
EOF
|
|
|
|
echo ""
|
|
}
|
|
|
|
# ============================================================================
|
|
# HTTP SERVER MODE
|
|
# ============================================================================
|
|
|
|
# Run simple HTTP server using netcat
|
|
# Serves metrics on /metrics endpoint
|
|
run_http_server() {
|
|
echo "Starting fail2ban exporter on port $HTTP_PORT..." >&2
|
|
|
|
if ! command -v nc >/dev/null 2>&1; then
|
|
echo "ERROR: netcat (nc) required for HTTP mode" >&2
|
|
exit 1
|
|
fi
|
|
|
|
# Infinite loop accepting HTTP requests
|
|
while true; do
|
|
{
|
|
read -r request
|
|
# Check if request is for /metrics endpoint
|
|
if [[ "$request" =~ ^GET\ /metrics ]]; then
|
|
echo -e "HTTP/1.1 200 OK\r\nContent-Type: text/plain; version=0.0.4\r\n\r"
|
|
generate_metrics
|
|
else # Serve HTML landing page for other requests
|
|
echo -e "HTTP/1.1 200 OK\r\nContent-Type: text/html\r\n\r"
|
|
cat <<EOF
|
|
<!DOCTYPE html>
|
|
<html>
|
|
<head><title>Fail2ban Exporter v2.0</title></head>
|
|
<body>
|
|
<h1>Fail2ban Prometheus Exporter (Enhanced v2.0)</h1>
|
|
<p><a href="/metrics">Metrics</a></p>
|
|
<h2>New Metrics</h2>
|
|
<ul>
|
|
<li>Last ban/unban timestamps per jail</li>
|
|
<li>Ban/unban rates (1h, 24h)</li>
|
|
<li>Unique banned IPs per period</li>
|
|
<li>Top attackers per jail</li>
|
|
<li>Jail configuration info (port, protocol, filter)</li>
|
|
<li>Ban rate per hour</li>
|
|
</ul>
|
|
</body>
|
|
</html>
|
|
EOF
|
|
fi
|
|
} | nc -l -p "$HTTP_PORT" -q 1 2>/dev/null # -q 1: wait 1 second after EOF before closing
|
|
done
|
|
}
|
|
|
|
# ============================================================================
|
|
# MAIN EXECUTION
|
|
# ============================================================================
|
|
|
|
# Main entry point - routes to appropriate output mode
|
|
main() {
|
|
parse_args "$@"
|
|
|
|
if [ "$HTTP_MODE" = true ]; then
|
|
# Run HTTP server (blocks until killed)
|
|
run_http_server
|
|
elif [ -n "$OUTPUT_FILE" ]; then
|
|
# Textfile collector mode: write atomically using temp file
|
|
local output_dir
|
|
output_dir="$(dirname "$OUTPUT_FILE")"
|
|
mkdir -p "$output_dir"
|
|
|
|
# Create temp file in SAME directory for atomic rename (same filesystem)
|
|
local temp_file
|
|
temp_file=$(mktemp "${output_dir}/.fail2ban_metrics.XXXXXX")
|
|
|
|
# Generate metrics to temp file
|
|
if ! generate_metrics > "$temp_file" 2>/dev/null; then
|
|
rm -f "$temp_file"
|
|
echo "ERROR: Failed to generate metrics" >&2
|
|
exit 1
|
|
fi
|
|
|
|
# Validate: file must exist, have content, and contain fail2ban_up 1
|
|
# If fail2ban is down, we still get fail2ban_up 0 which is valid
|
|
local file_lines
|
|
file_lines=$(wc -l < "$temp_file" 2>/dev/null || echo 0)
|
|
|
|
if [ "$file_lines" -lt 10 ]; then
|
|
rm -f "$temp_file"
|
|
echo "ERROR: Metrics file too small ($file_lines lines), keeping previous" >&2
|
|
exit 1
|
|
fi
|
|
|
|
# Set permissions before move
|
|
chmod 644 "$temp_file"
|
|
|
|
# Atomic rename - no gap where file is missing
|
|
mv -f "$temp_file" "$OUTPUT_FILE"
|
|
|
|
echo "Metrics written to $OUTPUT_FILE ($file_lines lines)" >&2
|
|
else
|
|
# Default: output to stdout
|
|
generate_metrics
|
|
fi
|
|
}
|
|
|
|
# Execute main function with all script arguments
|
|
main "$@"
|