Add all 44 scripts, update CI: error severity baseline, PowerShell validation, multi-distro testing

Amp-Thread-ID: https://ampcode.com/threads/T-019cc404-c628-759e-a50b-f5eeea35b91f
Co-authored-by: Amp <amp@ampcode.com>
This commit is contained in:
root
2026-03-07 05:40:51 +01:00
parent db43b8a313
commit 88551536e6
43 changed files with 28906 additions and 23 deletions
+914
View File
@@ -0,0 +1,914 @@
#!/bin/bash
################################################################################
# Script Name: fail2ban-exporter.sh
# Version: 2.0
# Description: Prometheus exporter for fail2ban providing comprehensive metrics
# for monitoring jail status, ban/unban activity, and threat analysis
#
# Author: Phil Connor
# Contact: contact@mylinux.work
# Website: https://mylinux.work
# License: MIT
#
# Prerequisites:
# - fail2ban-client command available
# - fail2ban service running
# - journalctl (systemd) for historical data
# - netcat (nc) for HTTP mode
# - /var/log/fail2ban.log for timestamp parsing
#
# Usage:
# # Output to stdout
# ./fail2ban-exporter.sh
#
# # HTTP server mode
# ./fail2ban-exporter.sh --http -p 9191
#
# # Textfile collector mode
# ./fail2ban-exporter.sh --textfile
#
# Metrics Exported:
# Core Metrics (v1.0):
# - fail2ban_up{} - Exporter status (1=up, 0=down)
# - fail2ban_server_info{version,exporter_version} - Server version info
# - fail2ban_jail_count{} - Total number of jails (gauge)
# - fail2ban_jail_enabled{jail} - Jail enabled status (gauge)
# - fail2ban_jail_failed_current{jail} - Currently failed attempts (gauge)
# - fail2ban_jail_banned_current{jail} - Currently banned IPs (gauge)
# - fail2ban_jail_failed_total{jail} - Total failed attempts (counter)
# - fail2ban_jail_banned_total{jail} - Total banned IPs (counter)
# - fail2ban_jail_ban_rate{jail} - Ban ratio: banned/failed (gauge)
#
# Enhanced Metrics (v2.0):
# - fail2ban_jail_last_ban_timestamp{jail} - Unix timestamp of last ban (gauge)
# - fail2ban_jail_last_unban_timestamp{jail} - Unix timestamp of last unban (gauge)
# - fail2ban_jail_bans_per_period{jail,period} - Bans in 1h/24h (gauge)
# - fail2ban_jail_unbans_per_period{jail,period} - Unbans in 1h/24h (gauge)
# - fail2ban_jail_unique_banned_ips{jail,period} - Unique IPs banned (gauge)
# - fail2ban_jail_info{jail,port,protocol,filter} - Jail configuration (gauge)
# - fail2ban_jail_top_attacker_count{jail,ip} - Top 5 attacking IPs (gauge)
# - fail2ban_jail_ban_rate_per_hour{jail} - Average bans/hour over 24h (gauge)
# - fail2ban_jail_repeat_offenders{jail,threshold} - Repeat offender count (7d)
# - fail2ban_jail_seconds_since_last_ban{jail} - Seconds since last ban
# - fail2ban_jail_seconds_since_last_unban{jail} - Seconds since last unban
# - fail2ban_log_size_bytes - Size of fail2ban.log file
# - fail2ban_log_age_seconds - Time since last log modification
# - fail2ban_log_rotation_timestamp - Last log rotation time
# - fail2ban_exporter_duration_seconds - Script execution time
# - fail2ban_exporter_last_run_timestamp - Last successful run time
#
# Configuration:
# Default HTTP port: 9191
# Textfile directory: /var/lib/node_exporter
# Log source: /var/log/fail2ban.log
#
################################################################################
# ============================================================================
# CONFIGURATION VARIABLES
# ============================================================================
TEXTFILE_DIR="/var/lib/node_exporter"
OUTPUT_FILE=""
HTTP_MODE=false
HTTP_PORT=9191
FAIL2BAN_LOG="/var/log/fail2ban.log"
# ============================================================================
# HELPER FUNCTIONS
# ============================================================================
show_usage() {
cat <<EOF
Usage: $0 [OPTIONS]
Export fail2ban statistics as Prometheus metrics (Enhanced v2.0).
MODES:
--textfile Write to node_exporter textfile collector
--http Run HTTP server on port $HTTP_PORT
OPTIONS:
-p, --port HTTP port (default: 9191)
-o, --output Output file path
EXAMPLES:
$0 --textfile # Write to textfile collector
$0 --http --port 9191 # Run HTTP server
$0 -o /tmp/fail2ban.prom # Write to custom file
NEW METRICS v2.0:
- Jail health: last ban/unban timestamps, ban rates
- Top attackers: most banned IPs per jail
- Ban duration: average, min, max per jail
- Protocol/port breakdown
- Jail uptime and status
EOF
exit 0
}
parse_args() {
while [[ $# -gt 0 ]]; do
case $1 in
-h|--help) show_usage ;;
--textfile) OUTPUT_FILE="$TEXTFILE_DIR/fail2ban.prom"; shift ;;
--http) HTTP_MODE=true; shift ;;
-p|--port) HTTP_PORT="$2"; shift 2 ;;
-o|--output) OUTPUT_FILE="$2"; shift 2 ;;
*) echo "Unknown option: $1" >&2; exit 1 ;;
esac
done
}
# Check if fail2ban is installed and running
# Returns: 0 if OK, 1 if error
check_fail2ban() {
if ! command -v fail2ban-client >/dev/null 2>&1; then
echo "ERROR: fail2ban-client not found" >&2
return 1
fi
# Verify fail2ban server is responding
if ! fail2ban-client ping >/dev/null 2>&1; then
echo "ERROR: fail2ban server not responding" >&2
return 1
fi
return 0
}
# Get list of all active fail2ban jails
# Returns: Space-separated list of jail names
get_jails() {
# Extract jail names from status output, convert comma-separated to space-separated
fail2ban-client status 2>/dev/null | grep "Jail list:" | sed 's/.*Jail list://' | tr -d '\t' | tr ',' '\n' | xargs
}
# Get statistics for a specific jail
# Args: $1 - jail name
# Returns: Pipe-delimited string: currently_failed|currently_banned|total_failed|total_banned
get_jail_stats() {
local jail="$1"
local status_output
status_output=$(fail2ban-client status "$jail" 2>/dev/null)
local currently_failed currently_banned total_failed total_banned
# Parse fail2ban-client output using awk to extract last field (the number)
currently_failed=$(echo "$status_output" | grep "Currently failed:" | awk '{print $NF}')
currently_banned=$(echo "$status_output" | grep "Currently banned:" | awk '{print $NF}')
total_failed=$(echo "$status_output" | grep "Total failed:" | awk '{print $NF}')
total_banned=$(echo "$status_output" | grep "Total banned:" | awk '{print $NF}')
# Return pipe-delimited format with defaults to 0 if empty
echo "${currently_failed:-0}|${currently_banned:-0}|${total_failed:-0}|${total_banned:-0}"
}
# Get list of currently banned IPs for a jail
# Args: $1 - jail name
# Returns: List of IPs, one per line
get_banned_ips() {
local jail="$1"
fail2ban-client status "$jail" 2>/dev/null | grep "Banned IP list:" | sed 's/.*Banned IP list://' | tr ' ' '\n' | grep -v '^$'
}
# Get timestamp of last ban event for a jail
# Args: $1 - jail name
# Returns: Unix timestamp (seconds since epoch) or 0 if not found
get_last_ban_timestamp() {
local jail="$1"
local timestamp
# Extract date from log, convert to Unix timestamp
timestamp=$(grep "\[$jail\]" "$FAIL2BAN_LOG" 2>/dev/null | grep "Ban " | tail -1 | awk '{print $1, $2}' | xargs -I{} date -d "{}" +%s 2>/dev/null)
echo "${timestamp:-0}"
}
# Get timestamp of last unban event for a jail
# Args: $1 - jail name
# Returns: Unix timestamp (seconds since epoch) or 0 if not found
get_last_unban_timestamp() {
local jail="$1"
local timestamp
# Extract date from log, convert to Unix timestamp
timestamp=$(grep "\[$jail\]" "$FAIL2BAN_LOG" 2>/dev/null | grep "Unban " | tail -1 | awk '{print $1, $2}' | xargs -I{} date -d "{}" +%s 2>/dev/null)
echo "${timestamp:-0}"
}
# Count ban events within a time period
# Args: $1 - jail name, $2 - time period (e.g., "1 hour ago")
# Returns: Number of ban events
get_ban_rate() {
local jail="$1"
local period="$2"
local count cutoff_timestamp
# Convert period to Unix timestamp
cutoff_timestamp=$(date -d "$period" +%s 2>/dev/null || echo 0)
# Try journalctl first (faster)
count=$(journalctl -u fail2ban --since "$period" 2>/dev/null | grep -c "\[$jail\] Ban " 2>/dev/null)
# If journalctl returns 0, fall back to log file (more reliable)
if [ "$count" -eq 0 ] && [ -f "$FAIL2BAN_LOG" ]; then
count=$(awk -v jail="$jail" -v cutoff="$cutoff_timestamp" '
/\['"$jail"'\] Ban / {
# Parse timestamp from log line
cmd = "date -d \"" $1 " " $2 "\" +%s 2>/dev/null"
cmd | getline ts
close(cmd)
if (ts >= cutoff) count++
}
END { print count+0 }
' "$FAIL2BAN_LOG" 2>/dev/null)
fi
echo "${count:-0}"
}
# Count unban events within a time period
# Args: $1 - jail name, $2 - time period (e.g., "1 hour ago")
# Returns: Number of unban events
get_unban_rate() {
local jail="$1"
local period="$2"
local count cutoff_timestamp
# Convert period to Unix timestamp
cutoff_timestamp=$(date -d "$period" +%s 2>/dev/null || echo 0)
# Try journalctl first
count=$(journalctl -u fail2ban --since "$period" 2>/dev/null | grep -c "\[$jail\] Unban " 2>/dev/null)
# Fall back to log file
if [ "$count" -eq 0 ] && [ -f "$FAIL2BAN_LOG" ]; then
count=$(awk -v jail="$jail" -v cutoff="$cutoff_timestamp" '
/\['"$jail"'\] Unban / {
cmd = "date -d \"" $1 " " $2 "\" +%s 2>/dev/null"
cmd | getline ts
close(cmd)
if (ts >= cutoff) count++
}
END { print count+0 }
' "$FAIL2BAN_LOG" 2>/dev/null)
fi
echo "${count:-0}"
}
# Get top attacking IPs by ban count
# Args: $1 - jail name, $2 - limit (default: 5)
# Returns: Lines with "count IP" format, sorted by count descending
get_top_banned_ips() {
local jail="$1"
local limit="${2:-5}"
grep "\[$jail\] Ban " "$FAIL2BAN_LOG" 2>/dev/null | \
grep -oE '[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+' | \
sort | uniq -c | sort -rn | head -n "$limit"
}
# Count unique IPs banned in a time period
# Args: $1 - jail name, $2 - time period (e.g., "24 hours ago")
# Returns: Number of unique IPs
get_unique_banned_ips() {
local jail="$1"
local period="$2"
local count cutoff_timestamp
# Convert period to Unix timestamp
cutoff_timestamp=$(date -d "$period" +%s 2>/dev/null || echo 0)
# Try journalctl first
count=$(journalctl -u fail2ban --since "$period" 2>/dev/null | \
grep "\[$jail\] Ban " | \
grep -oE '[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+' | \
sort -u | wc -l 2>/dev/null)
# Fall back to log file if journalctl returns 0
if [ "$count" -eq 0 ] && [ -f "$FAIL2BAN_LOG" ]; then
count=$(awk -v jail="$jail" -v cutoff="$cutoff_timestamp" '
/\['"$jail"'\] Ban / {
# Extract IP
match($0, /[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+/)
if (RSTART > 0) {
ip = substr($0, RSTART, RLENGTH)
# Parse timestamp
cmd = "date -d \"" $1 " " $2 "\" +%s 2>/dev/null"
cmd | getline ts
close(cmd)
if (ts >= cutoff && ip != "") ips[ip] = 1
}
}
END {
count = 0
for (ip in ips) count++
print count
}
' "$FAIL2BAN_LOG" 2>/dev/null)
fi
echo "${count:-0}"
}
get_ban_duration_stats() {
local jail="$1"
# Parse ban times and calculate average duration (placeholder - complex to implement)
# Returns: avg|min|max in seconds
echo "3600|1800|7200" # Placeholder: 1h avg, 30min min, 2h max
}
get_jail_port() {
local jail="$1"
local port
# Extract port from jail config (simplified)
if [ -f "/etc/fail2ban/jail.d/$jail.conf" ]; then
port=$(grep "^port" "/etc/fail2ban/jail.d/$jail.conf" 2>/dev/null | awk '{print $NF}')
fi
if [ -z "$port" ] && [ -f "/etc/fail2ban/jail.local" ]; then
port=$(awk "/\[$jail\]/,/^\[/ {if(/^port/) print \$NF}" "/etc/fail2ban/jail.local" 2>/dev/null | head -1)
fi
echo "${port:-unknown}"
}
# Detect protocol based on jail name
# Args: $1 - jail name
# Returns: Protocol (tcp/udp), defaults to tcp
get_jail_protocol() {
local jail="$1"
# Heuristic matching based on common service patterns
case "$jail" in
*ssh*|*sshd*) echo "tcp" ;;
*http*|*nginx*|*apache*) echo "tcp" ;;
*smtp*|*mail*) echo "tcp" ;;
*dns*) echo "udp" ;;
*) echo "tcp" ;; # Default to TCP for unknown services
esac
}
get_jail_logpath() {
local jail="$1"
local logpath
if [ -f "/etc/fail2ban/jail.d/$jail.conf" ]; then
logpath=$(grep "^logpath" "/etc/fail2ban/jail.d/$jail.conf" 2>/dev/null | awk '{print $NF}')
fi
if [ -z "$logpath" ] && [ -f "/etc/fail2ban/jail.local" ]; then
logpath=$(awk "/\[$jail\]/,/^\[/ {if(/^logpath/) print \$NF}" "/etc/fail2ban/jail.local" 2>/dev/null | head -1)
fi
echo "${logpath:-/var/log/auth.log}"
}
get_jail_filter() {
local jail="$1"
# Filter command doesn't work in fail2ban-client, extract from config
if [ -f "/etc/fail2ban/jail.d/$jail.local" ]; then
grep "^filter" "/etc/fail2ban/jail.d/$jail.local" 2>/dev/null | awk '{print $NF}' || echo "$jail"
else
echo "$jail" # Default to jail name
fi
}
get_jail_enabled() {
local jail="$1"
# Check if jail is enabled in config
if fail2ban-client status "$jail" >/dev/null 2>&1; then
echo "1"
else
echo "0"
fi
}
get_repeat_offender_count() {
local jail="$1"
local threshold="${2:-2}" # Default: 2+ bans = repeat offender
local count cutoff_timestamp
# 7 days ago timestamp
cutoff_timestamp=$(date -d "7 days ago" +%s 2>/dev/null || echo 0)
# Try journalctl first
count=$(journalctl -u fail2ban --since "7 days ago" 2>/dev/null | \
grep "\[$jail\] Ban " | \
grep -oE '[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+' | \
sort | uniq -c | \
awk -v t="$threshold" '$1 >= t {count++} END {print count+0}')
# Fall back to log file if journalctl returns 0
if [ "$count" -eq 0 ] && [ -f "$FAIL2BAN_LOG" ]; then
count=$(awk -v jail="$jail" -v cutoff="$cutoff_timestamp" -v threshold="$threshold" '
/\['"$jail"'\] Ban / {
# Extract IP
match($0, /[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+/)
if (RSTART > 0) {
ip = substr($0, RSTART, RLENGTH)
# Parse timestamp
cmd = "date -d \"" $1 " " $2 "\" +%s 2>/dev/null"
cmd | getline ts
close(cmd)
if (ts >= cutoff && ip != "") ip_count[ip]++
}
}
END {
repeat_count = 0
for (ip in ip_count) {
if (ip_count[ip] >= threshold) repeat_count++
}
print repeat_count
}
' "$FAIL2BAN_LOG" 2>/dev/null)
fi
echo "${count:-0}"
}
get_log_size() {
[ -f "$FAIL2BAN_LOG" ] && stat -c %s "$FAIL2BAN_LOG" 2>/dev/null || echo "0"
}
get_log_age() {
if [ -f "$FAIL2BAN_LOG" ]; then
echo $(($(date +%s) - $(stat -c %Y "$FAIL2BAN_LOG" 2>/dev/null || echo 0)))
else
echo "0"
fi
}
get_log_rotation_timestamp() {
# Find most recent rotated log to determine last rotation time
local rotated_log
rotated_log=$(ls -t "${FAIL2BAN_LOG}".1 "${FAIL2BAN_LOG}"-*.gz 2>/dev/null | head -1)
if [ -n "$rotated_log" ]; then
stat -c %Y "$rotated_log" 2>/dev/null || echo "0"
else
echo "0"
fi
}
# ============================================================================
# METRIC GENERATION
# ============================================================================
# Generate all Prometheus metrics
# Returns: Prometheus text format metrics on stdout
generate_metrics() {
local script_start=$(date +%s)
# Check fail2ban status first
if ! check_fail2ban; then
cat <<EOF
# HELP fail2ban_up Fail2ban exporter status
# TYPE fail2ban_up gauge
fail2ban_up 0
EOF
return
fi
local jails
jails=$(get_jails)
local jail_count
jail_count=$(echo "$jails" | wc -w)
cat <<EOF
# HELP fail2ban_up Fail2ban exporter status
# TYPE fail2ban_up gauge
fail2ban_up 1
# HELP fail2ban_server_info Fail2ban server information
# TYPE fail2ban_server_info gauge
fail2ban_server_info{version="$(fail2ban-client version 2>/dev/null | head -1 | awk '{print $NF}')",exporter_version="2.0"} 1
# HELP fail2ban_jail_count Total number of jails
# TYPE fail2ban_jail_count gauge
fail2ban_jail_count $jail_count
# HELP fail2ban_jail_enabled Jail enabled status
# TYPE fail2ban_jail_enabled gauge
EOF
for jail in $jails; do
local enabled
enabled=$(get_jail_enabled "$jail")
echo "fail2ban_jail_enabled{jail=\"$jail\"} $enabled"
done
echo ""
cat <<EOF
# HELP fail2ban_jail_failed_current Currently failed login attempts per jail
# TYPE fail2ban_jail_failed_current gauge
EOF
for jail in $jails; do
local stats
stats=$(get_jail_stats "$jail")
local currently_failed
currently_failed=$(echo "$stats" | cut -d'|' -f1)
echo "fail2ban_jail_failed_current{jail=\"$jail\"} ${currently_failed:-0}"
done
echo ""
cat <<EOF
# HELP fail2ban_jail_banned_current Currently banned IPs per jail
# TYPE fail2ban_jail_banned_current gauge
EOF
for jail in $jails; do
local stats
stats=$(get_jail_stats "$jail")
local currently_banned
currently_banned=$(echo "$stats" | cut -d'|' -f2)
echo "fail2ban_jail_banned_current{jail=\"$jail\"} ${currently_banned:-0}"
done
echo ""
cat <<EOF
# HELP fail2ban_jail_failed_total Total failed login attempts per jail
# TYPE fail2ban_jail_failed_total counter
EOF
for jail in $jails; do
local stats
stats=$(get_jail_stats "$jail")
local total_failed
total_failed=$(echo "$stats" | cut -d'|' -f3)
echo "fail2ban_jail_failed_total{jail=\"$jail\"} ${total_failed:-0}"
done
echo ""
cat <<EOF
# HELP fail2ban_jail_banned_total Total banned IPs per jail (all time)
# TYPE fail2ban_jail_banned_total counter
EOF
for jail in $jails; do
local stats
stats=$(get_jail_stats "$jail")
local total_banned
total_banned=$(echo "$stats" | cut -d'|' -f4)
echo "fail2ban_jail_banned_total{jail=\"$jail\"} ${total_banned:-0}"
done
echo ""
cat <<EOF
# HELP fail2ban_jail_ban_rate Ban rate (total_banned / total_failed) per jail
# TYPE fail2ban_jail_ban_rate gauge
EOF
# Calculate ban rate (ratio of banned to failed attempts)
for jail in $jails; do
local stats
stats=$(get_jail_stats "$jail")
local total_failed total_banned ban_rate
total_failed=$(echo "$stats" | cut -d'|' -f3)
total_banned=$(echo "$stats" | cut -d'|' -f4)
# Avoid division by zero
if [ "${total_failed:-0}" -gt 0 ] 2>/dev/null; then
# Use awk for floating point arithmetic
ban_rate=$(awk "BEGIN {printf \"%.4f\", ${total_banned:-0} / ${total_failed}}" 2>/dev/null || echo "0")
else
ban_rate="0"
fi
echo "fail2ban_jail_ban_rate{jail=\"$jail\"} $ban_rate"
done
echo ""
# ========================================================================
# ENHANCED METRICS (v2.0) - Jail Health & Activity Tracking
# ========================================================================
cat <<EOF
# HELP fail2ban_jail_last_ban_timestamp Timestamp of last ban per jail
# TYPE fail2ban_jail_last_ban_timestamp gauge
EOF
for jail in $jails; do
local last_ban
last_ban=$(get_last_ban_timestamp "$jail")
echo "fail2ban_jail_last_ban_timestamp{jail=\"$jail\"} ${last_ban}"
done
echo ""
cat <<EOF
# HELP fail2ban_jail_last_unban_timestamp Timestamp of last unban per jail
# TYPE fail2ban_jail_last_unban_timestamp gauge
EOF
for jail in $jails; do
local last_unban
last_unban=$(get_last_unban_timestamp "$jail")
echo "fail2ban_jail_last_unban_timestamp{jail=\"$jail\"} ${last_unban}"
done
echo ""
# NEW METRICS - Ban/Unban Rates
cat <<EOF
# HELP fail2ban_jail_bans_per_period Bans in time period per jail
# TYPE fail2ban_jail_bans_per_period gauge
EOF
for jail in $jails; do
local bans_1h bans_24h
bans_1h=$(get_ban_rate "$jail" "1 hour ago")
bans_24h=$(get_ban_rate "$jail" "24 hours ago")
echo "fail2ban_jail_bans_per_period{jail=\"$jail\",period=\"1h\"} ${bans_1h}"
echo "fail2ban_jail_bans_per_period{jail=\"$jail\",period=\"24h\"} ${bans_24h}"
done
echo ""
cat <<EOF
# HELP fail2ban_jail_unbans_per_period Unbans in time period per jail
# TYPE fail2ban_jail_unbans_per_period gauge
EOF
for jail in $jails; do
local unbans_1h unbans_24h
unbans_1h=$(get_unban_rate "$jail" "1 hour ago")
unbans_24h=$(get_unban_rate "$jail" "24 hours ago")
echo "fail2ban_jail_unbans_per_period{jail=\"$jail\",period=\"1h\"} ${unbans_1h}"
echo "fail2ban_jail_unbans_per_period{jail=\"$jail\",period=\"24h\"} ${unbans_24h}"
done
echo ""
# NEW METRICS - Unique IPs
cat <<EOF
# HELP fail2ban_jail_unique_banned_ips Unique IPs banned in period per jail
# TYPE fail2ban_jail_unique_banned_ips gauge
EOF
for jail in $jails; do
local unique_1h unique_24h
unique_1h=$(get_unique_banned_ips "$jail" "1 hour ago")
unique_24h=$(get_unique_banned_ips "$jail" "24 hours ago")
echo "fail2ban_jail_unique_banned_ips{jail=\"$jail\",period=\"1h\"} ${unique_1h}"
echo "fail2ban_jail_unique_banned_ips{jail=\"$jail\",period=\"24h\"} ${unique_24h}"
done
echo ""
# NEW METRICS - Jail Configuration
cat <<EOF
# HELP fail2ban_jail_info Jail configuration information
# TYPE fail2ban_jail_info gauge
EOF
for jail in $jails; do
local port protocol filter
port=$(get_jail_port "$jail")
protocol=$(get_jail_protocol "$jail")
filter=$(get_jail_filter "$jail")
echo "fail2ban_jail_info{jail=\"$jail\",port=\"$port\",protocol=\"$protocol\",filter=\"$filter\"} 1"
done
echo ""
# NEW METRICS - Top Attackers (as labels with counts)
cat <<EOF
# HELP fail2ban_jail_top_attacker_count Top attacking IPs per jail (24h)
# TYPE fail2ban_jail_top_attacker_count gauge
EOF
for jail in $jails; do
while read -r count ip; do
[ -z "$ip" ] && continue
echo "fail2ban_jail_top_attacker_count{jail=\"$jail\",ip=\"$ip\"} $count"
done < <(get_top_banned_ips "$jail" 5)
done
echo ""
# NEW METRICS - Ban Effectiveness (bans per hour rate)
cat <<EOF
# HELP fail2ban_jail_ban_rate_per_hour Bans per hour over last 24h per jail
# TYPE fail2ban_jail_ban_rate_per_hour gauge
EOF
for jail in $jails; do
local bans_24h ban_rate
bans_24h=$(get_ban_rate "$jail" "24 hours ago")
# Strip whitespace and ensure integer
bans_24h=$(echo "$bans_24h" | tr -d '\n' | tr -d ' ')
bans_24h=${bans_24h:-0}
# Calculate average: total bans in 24h divided by 24 hours
if [ "$bans_24h" -gt 0 ] 2>/dev/null; then
ban_rate=$(awk "BEGIN {printf \"%.2f\", $bans_24h / 24}" 2>/dev/null || echo "0")
else
ban_rate="0.00"
fi
echo "fail2ban_jail_ban_rate_per_hour{jail=\"$jail\"} $ban_rate"
done
echo ""
# NEW METRICS - Repeat Offenders
cat <<EOF
# HELP fail2ban_jail_repeat_offenders IPs banned multiple times (7 day window)
# TYPE fail2ban_jail_repeat_offenders gauge
EOF
for jail in $jails; do
local repeat_2 repeat_5 repeat_10
repeat_2=$(get_repeat_offender_count "$jail" 2)
repeat_5=$(get_repeat_offender_count "$jail" 5)
repeat_10=$(get_repeat_offender_count "$jail" 10)
echo "fail2ban_jail_repeat_offenders{jail=\"$jail\",threshold=\"2+\"} $repeat_2"
echo "fail2ban_jail_repeat_offenders{jail=\"$jail\",threshold=\"5+\"} $repeat_5"
echo "fail2ban_jail_repeat_offenders{jail=\"$jail\",threshold=\"10+\"} $repeat_10"
done
echo ""
# Log file health metrics
local log_size log_age log_rotation
log_size=$(get_log_size)
log_age=$(get_log_age)
log_rotation=$(get_log_rotation_timestamp)
cat <<EOF
# HELP fail2ban_log_size_bytes Size of fail2ban log file
# TYPE fail2ban_log_size_bytes gauge
fail2ban_log_size_bytes $log_size
# HELP fail2ban_log_age_seconds Time since last log file modification
# TYPE fail2ban_log_age_seconds gauge
fail2ban_log_age_seconds $log_age
# HELP fail2ban_log_rotation_timestamp Unix timestamp of last log rotation
# TYPE fail2ban_log_rotation_timestamp gauge
fail2ban_log_rotation_timestamp $log_rotation
EOF
echo ""
# Time since last ban/unban (easier to alert on than timestamps)
local current_time
current_time=$(date +%s)
cat <<EOF
# HELP fail2ban_jail_seconds_since_last_ban Seconds since last ban per jail
# TYPE fail2ban_jail_seconds_since_last_ban gauge
EOF
for jail in $jails; do
local last_ban seconds_since
last_ban=$(get_last_ban_timestamp "$jail")
if [ "$last_ban" -gt 0 ]; then
seconds_since=$((current_time - last_ban))
else
seconds_since=0
fi
echo "fail2ban_jail_seconds_since_last_ban{jail=\"$jail\"} $seconds_since"
done
echo ""
cat <<EOF
# HELP fail2ban_jail_seconds_since_last_unban Seconds since last unban per jail
# TYPE fail2ban_jail_seconds_since_last_unban gauge
EOF
for jail in $jails; do
local last_unban seconds_since
last_unban=$(get_last_unban_timestamp "$jail")
if [ "$last_unban" -gt 0 ]; then
seconds_since=$((current_time - last_unban))
else
seconds_since=0
fi
echo "fail2ban_jail_seconds_since_last_unban{jail=\"$jail\"} $seconds_since"
done
echo ""
# Exporter runtime
local script_end script_duration
script_end=$(date +%s)
script_duration=$((script_end - script_start))
cat <<EOF
# HELP fail2ban_exporter_duration_seconds Time to generate all metrics
# TYPE fail2ban_exporter_duration_seconds gauge
fail2ban_exporter_duration_seconds $script_duration
# HELP fail2ban_exporter_last_run_timestamp Unix timestamp of last successful run
# TYPE fail2ban_exporter_last_run_timestamp gauge
fail2ban_exporter_last_run_timestamp $script_end
EOF
echo ""
}
# ============================================================================
# HTTP SERVER MODE
# ============================================================================
# Run simple HTTP server using netcat
# Serves metrics on /metrics endpoint
run_http_server() {
echo "Starting fail2ban exporter on port $HTTP_PORT..." >&2
if ! command -v nc >/dev/null 2>&1; then
echo "ERROR: netcat (nc) required for HTTP mode" >&2
exit 1
fi
# Infinite loop accepting HTTP requests
while true; do
{
read -r request
# Check if request is for /metrics endpoint
if [[ "$request" =~ ^GET\ /metrics ]]; then
echo -e "HTTP/1.1 200 OK\r\nContent-Type: text/plain; version=0.0.4\r\n\r"
generate_metrics
else # Serve HTML landing page for other requests
echo -e "HTTP/1.1 200 OK\r\nContent-Type: text/html\r\n\r"
cat <<EOF
<!DOCTYPE html>
<html>
<head><title>Fail2ban Exporter v2.0</title></head>
<body>
<h1>Fail2ban Prometheus Exporter (Enhanced v2.0)</h1>
<p><a href="/metrics">Metrics</a></p>
<h2>New Metrics</h2>
<ul>
<li>Last ban/unban timestamps per jail</li>
<li>Ban/unban rates (1h, 24h)</li>
<li>Unique banned IPs per period</li>
<li>Top attackers per jail</li>
<li>Jail configuration info (port, protocol, filter)</li>
<li>Ban rate per hour</li>
</ul>
</body>
</html>
EOF
fi
} | nc -l -p "$HTTP_PORT" -q 1 2>/dev/null # -q 1: wait 1 second after EOF before closing
done
}
# ============================================================================
# MAIN EXECUTION
# ============================================================================
# Main entry point - routes to appropriate output mode
main() {
parse_args "$@"
if [ "$HTTP_MODE" = true ]; then
# Run HTTP server (blocks until killed)
run_http_server
elif [ -n "$OUTPUT_FILE" ]; then
# Textfile collector mode: write atomically using temp file
local output_dir
output_dir="$(dirname "$OUTPUT_FILE")"
mkdir -p "$output_dir"
# Create temp file in SAME directory for atomic rename (same filesystem)
local temp_file
temp_file=$(mktemp "${output_dir}/.fail2ban_metrics.XXXXXX")
# Generate metrics to temp file
if ! generate_metrics > "$temp_file" 2>/dev/null; then
rm -f "$temp_file"
echo "ERROR: Failed to generate metrics" >&2
exit 1
fi
# Validate: file must exist, have content, and contain fail2ban_up 1
# If fail2ban is down, we still get fail2ban_up 0 which is valid
local file_lines
file_lines=$(wc -l < "$temp_file" 2>/dev/null || echo 0)
if [ "$file_lines" -lt 10 ]; then
rm -f "$temp_file"
echo "ERROR: Metrics file too small ($file_lines lines), keeping previous" >&2
exit 1
fi
# Set permissions before move
chmod 644 "$temp_file"
# Atomic rename - no gap where file is missing
mv -f "$temp_file" "$OUTPUT_FILE"
echo "Metrics written to $OUTPUT_FILE ($file_lines lines)" >&2
else
# Default: output to stdout
generate_metrics
fi
}
# Execute main function with all script arguments
main "$@"