Add all 44 scripts, update CI: error severity baseline, PowerShell validation, multi-distro testing
Amp-Thread-ID: https://ampcode.com/threads/T-019cc404-c628-759e-a50b-f5eeea35b91f Co-authored-by: Amp <amp@ampcode.com>
This commit is contained in:
Executable
+914
@@ -0,0 +1,914 @@
|
||||
#!/bin/bash
|
||||
################################################################################
|
||||
# Script Name: fail2ban-exporter.sh
|
||||
# Version: 2.0
|
||||
# Description: Prometheus exporter for fail2ban providing comprehensive metrics
|
||||
# for monitoring jail status, ban/unban activity, and threat analysis
|
||||
#
|
||||
# Author: Phil Connor
|
||||
# Contact: contact@mylinux.work
|
||||
# Website: https://mylinux.work
|
||||
# License: MIT
|
||||
#
|
||||
# Prerequisites:
|
||||
# - fail2ban-client command available
|
||||
# - fail2ban service running
|
||||
# - journalctl (systemd) for historical data
|
||||
# - netcat (nc) for HTTP mode
|
||||
# - /var/log/fail2ban.log for timestamp parsing
|
||||
#
|
||||
# Usage:
|
||||
# # Output to stdout
|
||||
# ./fail2ban-exporter.sh
|
||||
#
|
||||
# # HTTP server mode
|
||||
# ./fail2ban-exporter.sh --http -p 9191
|
||||
#
|
||||
# # Textfile collector mode
|
||||
# ./fail2ban-exporter.sh --textfile
|
||||
#
|
||||
# Metrics Exported:
|
||||
# Core Metrics (v1.0):
|
||||
# - fail2ban_up{} - Exporter status (1=up, 0=down)
|
||||
# - fail2ban_server_info{version,exporter_version} - Server version info
|
||||
# - fail2ban_jail_count{} - Total number of jails (gauge)
|
||||
# - fail2ban_jail_enabled{jail} - Jail enabled status (gauge)
|
||||
# - fail2ban_jail_failed_current{jail} - Currently failed attempts (gauge)
|
||||
# - fail2ban_jail_banned_current{jail} - Currently banned IPs (gauge)
|
||||
# - fail2ban_jail_failed_total{jail} - Total failed attempts (counter)
|
||||
# - fail2ban_jail_banned_total{jail} - Total banned IPs (counter)
|
||||
# - fail2ban_jail_ban_rate{jail} - Ban ratio: banned/failed (gauge)
|
||||
#
|
||||
# Enhanced Metrics (v2.0):
|
||||
# - fail2ban_jail_last_ban_timestamp{jail} - Unix timestamp of last ban (gauge)
|
||||
# - fail2ban_jail_last_unban_timestamp{jail} - Unix timestamp of last unban (gauge)
|
||||
# - fail2ban_jail_bans_per_period{jail,period} - Bans in 1h/24h (gauge)
|
||||
# - fail2ban_jail_unbans_per_period{jail,period} - Unbans in 1h/24h (gauge)
|
||||
# - fail2ban_jail_unique_banned_ips{jail,period} - Unique IPs banned (gauge)
|
||||
# - fail2ban_jail_info{jail,port,protocol,filter} - Jail configuration (gauge)
|
||||
# - fail2ban_jail_top_attacker_count{jail,ip} - Top 5 attacking IPs (gauge)
|
||||
# - fail2ban_jail_ban_rate_per_hour{jail} - Average bans/hour over 24h (gauge)
|
||||
# - fail2ban_jail_repeat_offenders{jail,threshold} - Repeat offender count (7d)
|
||||
# - fail2ban_jail_seconds_since_last_ban{jail} - Seconds since last ban
|
||||
# - fail2ban_jail_seconds_since_last_unban{jail} - Seconds since last unban
|
||||
# - fail2ban_log_size_bytes - Size of fail2ban.log file
|
||||
# - fail2ban_log_age_seconds - Time since last log modification
|
||||
# - fail2ban_log_rotation_timestamp - Last log rotation time
|
||||
# - fail2ban_exporter_duration_seconds - Script execution time
|
||||
# - fail2ban_exporter_last_run_timestamp - Last successful run time
|
||||
#
|
||||
# Configuration:
|
||||
# Default HTTP port: 9191
|
||||
# Textfile directory: /var/lib/node_exporter
|
||||
# Log source: /var/log/fail2ban.log
|
||||
#
|
||||
################################################################################
|
||||
|
||||
# ============================================================================
|
||||
# CONFIGURATION VARIABLES
|
||||
# ============================================================================
|
||||
|
||||
TEXTFILE_DIR="/var/lib/node_exporter"
|
||||
OUTPUT_FILE=""
|
||||
HTTP_MODE=false
|
||||
HTTP_PORT=9191
|
||||
FAIL2BAN_LOG="/var/log/fail2ban.log"
|
||||
|
||||
# ============================================================================
|
||||
# HELPER FUNCTIONS
|
||||
# ============================================================================
|
||||
|
||||
show_usage() {
|
||||
cat <<EOF
|
||||
Usage: $0 [OPTIONS]
|
||||
|
||||
Export fail2ban statistics as Prometheus metrics (Enhanced v2.0).
|
||||
|
||||
MODES:
|
||||
--textfile Write to node_exporter textfile collector
|
||||
--http Run HTTP server on port $HTTP_PORT
|
||||
|
||||
OPTIONS:
|
||||
-p, --port HTTP port (default: 9191)
|
||||
-o, --output Output file path
|
||||
|
||||
EXAMPLES:
|
||||
$0 --textfile # Write to textfile collector
|
||||
$0 --http --port 9191 # Run HTTP server
|
||||
$0 -o /tmp/fail2ban.prom # Write to custom file
|
||||
|
||||
NEW METRICS v2.0:
|
||||
- Jail health: last ban/unban timestamps, ban rates
|
||||
- Top attackers: most banned IPs per jail
|
||||
- Ban duration: average, min, max per jail
|
||||
- Protocol/port breakdown
|
||||
- Jail uptime and status
|
||||
|
||||
EOF
|
||||
exit 0
|
||||
}
|
||||
|
||||
parse_args() {
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case $1 in
|
||||
-h|--help) show_usage ;;
|
||||
--textfile) OUTPUT_FILE="$TEXTFILE_DIR/fail2ban.prom"; shift ;;
|
||||
--http) HTTP_MODE=true; shift ;;
|
||||
-p|--port) HTTP_PORT="$2"; shift 2 ;;
|
||||
-o|--output) OUTPUT_FILE="$2"; shift 2 ;;
|
||||
*) echo "Unknown option: $1" >&2; exit 1 ;;
|
||||
esac
|
||||
done
|
||||
}
|
||||
|
||||
# Check if fail2ban is installed and running
|
||||
# Returns: 0 if OK, 1 if error
|
||||
check_fail2ban() {
|
||||
if ! command -v fail2ban-client >/dev/null 2>&1; then
|
||||
echo "ERROR: fail2ban-client not found" >&2
|
||||
return 1
|
||||
fi
|
||||
|
||||
# Verify fail2ban server is responding
|
||||
if ! fail2ban-client ping >/dev/null 2>&1; then
|
||||
echo "ERROR: fail2ban server not responding" >&2
|
||||
return 1
|
||||
fi
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
# Get list of all active fail2ban jails
|
||||
# Returns: Space-separated list of jail names
|
||||
get_jails() {
|
||||
# Extract jail names from status output, convert comma-separated to space-separated
|
||||
fail2ban-client status 2>/dev/null | grep "Jail list:" | sed 's/.*Jail list://' | tr -d '\t' | tr ',' '\n' | xargs
|
||||
}
|
||||
|
||||
# Get statistics for a specific jail
|
||||
# Args: $1 - jail name
|
||||
# Returns: Pipe-delimited string: currently_failed|currently_banned|total_failed|total_banned
|
||||
get_jail_stats() {
|
||||
local jail="$1"
|
||||
local status_output
|
||||
|
||||
status_output=$(fail2ban-client status "$jail" 2>/dev/null)
|
||||
|
||||
local currently_failed currently_banned total_failed total_banned
|
||||
|
||||
# Parse fail2ban-client output using awk to extract last field (the number)
|
||||
currently_failed=$(echo "$status_output" | grep "Currently failed:" | awk '{print $NF}')
|
||||
currently_banned=$(echo "$status_output" | grep "Currently banned:" | awk '{print $NF}')
|
||||
total_failed=$(echo "$status_output" | grep "Total failed:" | awk '{print $NF}')
|
||||
total_banned=$(echo "$status_output" | grep "Total banned:" | awk '{print $NF}')
|
||||
|
||||
# Return pipe-delimited format with defaults to 0 if empty
|
||||
echo "${currently_failed:-0}|${currently_banned:-0}|${total_failed:-0}|${total_banned:-0}"
|
||||
}
|
||||
|
||||
# Get list of currently banned IPs for a jail
|
||||
# Args: $1 - jail name
|
||||
# Returns: List of IPs, one per line
|
||||
get_banned_ips() {
|
||||
local jail="$1"
|
||||
fail2ban-client status "$jail" 2>/dev/null | grep "Banned IP list:" | sed 's/.*Banned IP list://' | tr ' ' '\n' | grep -v '^$'
|
||||
}
|
||||
|
||||
# Get timestamp of last ban event for a jail
|
||||
# Args: $1 - jail name
|
||||
# Returns: Unix timestamp (seconds since epoch) or 0 if not found
|
||||
get_last_ban_timestamp() {
|
||||
local jail="$1"
|
||||
local timestamp
|
||||
# Extract date from log, convert to Unix timestamp
|
||||
timestamp=$(grep "\[$jail\]" "$FAIL2BAN_LOG" 2>/dev/null | grep "Ban " | tail -1 | awk '{print $1, $2}' | xargs -I{} date -d "{}" +%s 2>/dev/null)
|
||||
echo "${timestamp:-0}"
|
||||
}
|
||||
|
||||
# Get timestamp of last unban event for a jail
|
||||
# Args: $1 - jail name
|
||||
# Returns: Unix timestamp (seconds since epoch) or 0 if not found
|
||||
get_last_unban_timestamp() {
|
||||
local jail="$1"
|
||||
local timestamp
|
||||
# Extract date from log, convert to Unix timestamp
|
||||
timestamp=$(grep "\[$jail\]" "$FAIL2BAN_LOG" 2>/dev/null | grep "Unban " | tail -1 | awk '{print $1, $2}' | xargs -I{} date -d "{}" +%s 2>/dev/null)
|
||||
echo "${timestamp:-0}"
|
||||
}
|
||||
|
||||
# Count ban events within a time period
|
||||
# Args: $1 - jail name, $2 - time period (e.g., "1 hour ago")
|
||||
# Returns: Number of ban events
|
||||
get_ban_rate() {
|
||||
local jail="$1"
|
||||
local period="$2"
|
||||
local count cutoff_timestamp
|
||||
|
||||
# Convert period to Unix timestamp
|
||||
cutoff_timestamp=$(date -d "$period" +%s 2>/dev/null || echo 0)
|
||||
|
||||
# Try journalctl first (faster)
|
||||
count=$(journalctl -u fail2ban --since "$period" 2>/dev/null | grep -c "\[$jail\] Ban " 2>/dev/null)
|
||||
|
||||
# If journalctl returns 0, fall back to log file (more reliable)
|
||||
if [ "$count" -eq 0 ] && [ -f "$FAIL2BAN_LOG" ]; then
|
||||
count=$(awk -v jail="$jail" -v cutoff="$cutoff_timestamp" '
|
||||
/\['"$jail"'\] Ban / {
|
||||
# Parse timestamp from log line
|
||||
cmd = "date -d \"" $1 " " $2 "\" +%s 2>/dev/null"
|
||||
cmd | getline ts
|
||||
close(cmd)
|
||||
if (ts >= cutoff) count++
|
||||
}
|
||||
END { print count+0 }
|
||||
' "$FAIL2BAN_LOG" 2>/dev/null)
|
||||
fi
|
||||
|
||||
echo "${count:-0}"
|
||||
}
|
||||
|
||||
# Count unban events within a time period
|
||||
# Args: $1 - jail name, $2 - time period (e.g., "1 hour ago")
|
||||
# Returns: Number of unban events
|
||||
get_unban_rate() {
|
||||
local jail="$1"
|
||||
local period="$2"
|
||||
local count cutoff_timestamp
|
||||
|
||||
# Convert period to Unix timestamp
|
||||
cutoff_timestamp=$(date -d "$period" +%s 2>/dev/null || echo 0)
|
||||
|
||||
# Try journalctl first
|
||||
count=$(journalctl -u fail2ban --since "$period" 2>/dev/null | grep -c "\[$jail\] Unban " 2>/dev/null)
|
||||
|
||||
# Fall back to log file
|
||||
if [ "$count" -eq 0 ] && [ -f "$FAIL2BAN_LOG" ]; then
|
||||
count=$(awk -v jail="$jail" -v cutoff="$cutoff_timestamp" '
|
||||
/\['"$jail"'\] Unban / {
|
||||
cmd = "date -d \"" $1 " " $2 "\" +%s 2>/dev/null"
|
||||
cmd | getline ts
|
||||
close(cmd)
|
||||
if (ts >= cutoff) count++
|
||||
}
|
||||
END { print count+0 }
|
||||
' "$FAIL2BAN_LOG" 2>/dev/null)
|
||||
fi
|
||||
|
||||
echo "${count:-0}"
|
||||
}
|
||||
|
||||
# Get top attacking IPs by ban count
|
||||
# Args: $1 - jail name, $2 - limit (default: 5)
|
||||
# Returns: Lines with "count IP" format, sorted by count descending
|
||||
get_top_banned_ips() {
|
||||
local jail="$1"
|
||||
local limit="${2:-5}"
|
||||
grep "\[$jail\] Ban " "$FAIL2BAN_LOG" 2>/dev/null | \
|
||||
grep -oE '[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+' | \
|
||||
sort | uniq -c | sort -rn | head -n "$limit"
|
||||
}
|
||||
|
||||
# Count unique IPs banned in a time period
|
||||
# Args: $1 - jail name, $2 - time period (e.g., "24 hours ago")
|
||||
# Returns: Number of unique IPs
|
||||
get_unique_banned_ips() {
|
||||
local jail="$1"
|
||||
local period="$2"
|
||||
local count cutoff_timestamp
|
||||
|
||||
# Convert period to Unix timestamp
|
||||
cutoff_timestamp=$(date -d "$period" +%s 2>/dev/null || echo 0)
|
||||
|
||||
# Try journalctl first
|
||||
count=$(journalctl -u fail2ban --since "$period" 2>/dev/null | \
|
||||
grep "\[$jail\] Ban " | \
|
||||
grep -oE '[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+' | \
|
||||
sort -u | wc -l 2>/dev/null)
|
||||
|
||||
# Fall back to log file if journalctl returns 0
|
||||
if [ "$count" -eq 0 ] && [ -f "$FAIL2BAN_LOG" ]; then
|
||||
count=$(awk -v jail="$jail" -v cutoff="$cutoff_timestamp" '
|
||||
/\['"$jail"'\] Ban / {
|
||||
# Extract IP
|
||||
match($0, /[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+/)
|
||||
if (RSTART > 0) {
|
||||
ip = substr($0, RSTART, RLENGTH)
|
||||
# Parse timestamp
|
||||
cmd = "date -d \"" $1 " " $2 "\" +%s 2>/dev/null"
|
||||
cmd | getline ts
|
||||
close(cmd)
|
||||
if (ts >= cutoff && ip != "") ips[ip] = 1
|
||||
}
|
||||
}
|
||||
END {
|
||||
count = 0
|
||||
for (ip in ips) count++
|
||||
print count
|
||||
}
|
||||
' "$FAIL2BAN_LOG" 2>/dev/null)
|
||||
fi
|
||||
|
||||
echo "${count:-0}"
|
||||
}
|
||||
|
||||
get_ban_duration_stats() {
|
||||
local jail="$1"
|
||||
# Parse ban times and calculate average duration (placeholder - complex to implement)
|
||||
# Returns: avg|min|max in seconds
|
||||
echo "3600|1800|7200" # Placeholder: 1h avg, 30min min, 2h max
|
||||
}
|
||||
|
||||
get_jail_port() {
|
||||
local jail="$1"
|
||||
local port
|
||||
# Extract port from jail config (simplified)
|
||||
if [ -f "/etc/fail2ban/jail.d/$jail.conf" ]; then
|
||||
port=$(grep "^port" "/etc/fail2ban/jail.d/$jail.conf" 2>/dev/null | awk '{print $NF}')
|
||||
fi
|
||||
if [ -z "$port" ] && [ -f "/etc/fail2ban/jail.local" ]; then
|
||||
port=$(awk "/\[$jail\]/,/^\[/ {if(/^port/) print \$NF}" "/etc/fail2ban/jail.local" 2>/dev/null | head -1)
|
||||
fi
|
||||
echo "${port:-unknown}"
|
||||
}
|
||||
|
||||
# Detect protocol based on jail name
|
||||
# Args: $1 - jail name
|
||||
# Returns: Protocol (tcp/udp), defaults to tcp
|
||||
get_jail_protocol() {
|
||||
local jail="$1"
|
||||
# Heuristic matching based on common service patterns
|
||||
case "$jail" in
|
||||
*ssh*|*sshd*) echo "tcp" ;;
|
||||
*http*|*nginx*|*apache*) echo "tcp" ;;
|
||||
*smtp*|*mail*) echo "tcp" ;;
|
||||
*dns*) echo "udp" ;;
|
||||
*) echo "tcp" ;; # Default to TCP for unknown services
|
||||
esac
|
||||
}
|
||||
|
||||
get_jail_logpath() {
|
||||
local jail="$1"
|
||||
local logpath
|
||||
if [ -f "/etc/fail2ban/jail.d/$jail.conf" ]; then
|
||||
logpath=$(grep "^logpath" "/etc/fail2ban/jail.d/$jail.conf" 2>/dev/null | awk '{print $NF}')
|
||||
fi
|
||||
if [ -z "$logpath" ] && [ -f "/etc/fail2ban/jail.local" ]; then
|
||||
logpath=$(awk "/\[$jail\]/,/^\[/ {if(/^logpath/) print \$NF}" "/etc/fail2ban/jail.local" 2>/dev/null | head -1)
|
||||
fi
|
||||
echo "${logpath:-/var/log/auth.log}"
|
||||
}
|
||||
|
||||
get_jail_filter() {
|
||||
local jail="$1"
|
||||
# Filter command doesn't work in fail2ban-client, extract from config
|
||||
if [ -f "/etc/fail2ban/jail.d/$jail.local" ]; then
|
||||
grep "^filter" "/etc/fail2ban/jail.d/$jail.local" 2>/dev/null | awk '{print $NF}' || echo "$jail"
|
||||
else
|
||||
echo "$jail" # Default to jail name
|
||||
fi
|
||||
}
|
||||
|
||||
get_jail_enabled() {
|
||||
local jail="$1"
|
||||
# Check if jail is enabled in config
|
||||
if fail2ban-client status "$jail" >/dev/null 2>&1; then
|
||||
echo "1"
|
||||
else
|
||||
echo "0"
|
||||
fi
|
||||
}
|
||||
|
||||
get_repeat_offender_count() {
|
||||
local jail="$1"
|
||||
local threshold="${2:-2}" # Default: 2+ bans = repeat offender
|
||||
local count cutoff_timestamp
|
||||
|
||||
# 7 days ago timestamp
|
||||
cutoff_timestamp=$(date -d "7 days ago" +%s 2>/dev/null || echo 0)
|
||||
|
||||
# Try journalctl first
|
||||
count=$(journalctl -u fail2ban --since "7 days ago" 2>/dev/null | \
|
||||
grep "\[$jail\] Ban " | \
|
||||
grep -oE '[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+' | \
|
||||
sort | uniq -c | \
|
||||
awk -v t="$threshold" '$1 >= t {count++} END {print count+0}')
|
||||
|
||||
# Fall back to log file if journalctl returns 0
|
||||
if [ "$count" -eq 0 ] && [ -f "$FAIL2BAN_LOG" ]; then
|
||||
count=$(awk -v jail="$jail" -v cutoff="$cutoff_timestamp" -v threshold="$threshold" '
|
||||
/\['"$jail"'\] Ban / {
|
||||
# Extract IP
|
||||
match($0, /[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+/)
|
||||
if (RSTART > 0) {
|
||||
ip = substr($0, RSTART, RLENGTH)
|
||||
# Parse timestamp
|
||||
cmd = "date -d \"" $1 " " $2 "\" +%s 2>/dev/null"
|
||||
cmd | getline ts
|
||||
close(cmd)
|
||||
if (ts >= cutoff && ip != "") ip_count[ip]++
|
||||
}
|
||||
}
|
||||
END {
|
||||
repeat_count = 0
|
||||
for (ip in ip_count) {
|
||||
if (ip_count[ip] >= threshold) repeat_count++
|
||||
}
|
||||
print repeat_count
|
||||
}
|
||||
' "$FAIL2BAN_LOG" 2>/dev/null)
|
||||
fi
|
||||
|
||||
echo "${count:-0}"
|
||||
}
|
||||
|
||||
get_log_size() {
|
||||
[ -f "$FAIL2BAN_LOG" ] && stat -c %s "$FAIL2BAN_LOG" 2>/dev/null || echo "0"
|
||||
}
|
||||
|
||||
get_log_age() {
|
||||
if [ -f "$FAIL2BAN_LOG" ]; then
|
||||
echo $(($(date +%s) - $(stat -c %Y "$FAIL2BAN_LOG" 2>/dev/null || echo 0)))
|
||||
else
|
||||
echo "0"
|
||||
fi
|
||||
}
|
||||
|
||||
get_log_rotation_timestamp() {
|
||||
# Find most recent rotated log to determine last rotation time
|
||||
local rotated_log
|
||||
rotated_log=$(ls -t "${FAIL2BAN_LOG}".1 "${FAIL2BAN_LOG}"-*.gz 2>/dev/null | head -1)
|
||||
if [ -n "$rotated_log" ]; then
|
||||
stat -c %Y "$rotated_log" 2>/dev/null || echo "0"
|
||||
else
|
||||
echo "0"
|
||||
fi
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# METRIC GENERATION
|
||||
# ============================================================================
|
||||
|
||||
# Generate all Prometheus metrics
|
||||
# Returns: Prometheus text format metrics on stdout
|
||||
generate_metrics() {
|
||||
local script_start=$(date +%s)
|
||||
|
||||
# Check fail2ban status first
|
||||
if ! check_fail2ban; then
|
||||
cat <<EOF
|
||||
# HELP fail2ban_up Fail2ban exporter status
|
||||
# TYPE fail2ban_up gauge
|
||||
fail2ban_up 0
|
||||
EOF
|
||||
return
|
||||
fi
|
||||
|
||||
local jails
|
||||
jails=$(get_jails)
|
||||
local jail_count
|
||||
jail_count=$(echo "$jails" | wc -w)
|
||||
|
||||
cat <<EOF
|
||||
# HELP fail2ban_up Fail2ban exporter status
|
||||
# TYPE fail2ban_up gauge
|
||||
fail2ban_up 1
|
||||
|
||||
# HELP fail2ban_server_info Fail2ban server information
|
||||
# TYPE fail2ban_server_info gauge
|
||||
fail2ban_server_info{version="$(fail2ban-client version 2>/dev/null | head -1 | awk '{print $NF}')",exporter_version="2.0"} 1
|
||||
|
||||
# HELP fail2ban_jail_count Total number of jails
|
||||
# TYPE fail2ban_jail_count gauge
|
||||
fail2ban_jail_count $jail_count
|
||||
|
||||
# HELP fail2ban_jail_enabled Jail enabled status
|
||||
# TYPE fail2ban_jail_enabled gauge
|
||||
EOF
|
||||
|
||||
for jail in $jails; do
|
||||
local enabled
|
||||
enabled=$(get_jail_enabled "$jail")
|
||||
echo "fail2ban_jail_enabled{jail=\"$jail\"} $enabled"
|
||||
done
|
||||
|
||||
echo ""
|
||||
|
||||
cat <<EOF
|
||||
# HELP fail2ban_jail_failed_current Currently failed login attempts per jail
|
||||
# TYPE fail2ban_jail_failed_current gauge
|
||||
EOF
|
||||
|
||||
for jail in $jails; do
|
||||
local stats
|
||||
stats=$(get_jail_stats "$jail")
|
||||
local currently_failed
|
||||
currently_failed=$(echo "$stats" | cut -d'|' -f1)
|
||||
echo "fail2ban_jail_failed_current{jail=\"$jail\"} ${currently_failed:-0}"
|
||||
done
|
||||
|
||||
echo ""
|
||||
|
||||
cat <<EOF
|
||||
# HELP fail2ban_jail_banned_current Currently banned IPs per jail
|
||||
# TYPE fail2ban_jail_banned_current gauge
|
||||
EOF
|
||||
|
||||
for jail in $jails; do
|
||||
local stats
|
||||
stats=$(get_jail_stats "$jail")
|
||||
local currently_banned
|
||||
currently_banned=$(echo "$stats" | cut -d'|' -f2)
|
||||
echo "fail2ban_jail_banned_current{jail=\"$jail\"} ${currently_banned:-0}"
|
||||
done
|
||||
|
||||
echo ""
|
||||
|
||||
cat <<EOF
|
||||
# HELP fail2ban_jail_failed_total Total failed login attempts per jail
|
||||
# TYPE fail2ban_jail_failed_total counter
|
||||
EOF
|
||||
|
||||
for jail in $jails; do
|
||||
local stats
|
||||
stats=$(get_jail_stats "$jail")
|
||||
local total_failed
|
||||
total_failed=$(echo "$stats" | cut -d'|' -f3)
|
||||
echo "fail2ban_jail_failed_total{jail=\"$jail\"} ${total_failed:-0}"
|
||||
done
|
||||
|
||||
echo ""
|
||||
|
||||
cat <<EOF
|
||||
# HELP fail2ban_jail_banned_total Total banned IPs per jail (all time)
|
||||
# TYPE fail2ban_jail_banned_total counter
|
||||
EOF
|
||||
|
||||
for jail in $jails; do
|
||||
local stats
|
||||
stats=$(get_jail_stats "$jail")
|
||||
local total_banned
|
||||
total_banned=$(echo "$stats" | cut -d'|' -f4)
|
||||
echo "fail2ban_jail_banned_total{jail=\"$jail\"} ${total_banned:-0}"
|
||||
done
|
||||
|
||||
echo ""
|
||||
|
||||
cat <<EOF
|
||||
# HELP fail2ban_jail_ban_rate Ban rate (total_banned / total_failed) per jail
|
||||
# TYPE fail2ban_jail_ban_rate gauge
|
||||
EOF
|
||||
|
||||
# Calculate ban rate (ratio of banned to failed attempts)
|
||||
for jail in $jails; do
|
||||
local stats
|
||||
stats=$(get_jail_stats "$jail")
|
||||
local total_failed total_banned ban_rate
|
||||
total_failed=$(echo "$stats" | cut -d'|' -f3)
|
||||
total_banned=$(echo "$stats" | cut -d'|' -f4)
|
||||
|
||||
# Avoid division by zero
|
||||
if [ "${total_failed:-0}" -gt 0 ] 2>/dev/null; then
|
||||
# Use awk for floating point arithmetic
|
||||
ban_rate=$(awk "BEGIN {printf \"%.4f\", ${total_banned:-0} / ${total_failed}}" 2>/dev/null || echo "0")
|
||||
else
|
||||
ban_rate="0"
|
||||
fi
|
||||
|
||||
echo "fail2ban_jail_ban_rate{jail=\"$jail\"} $ban_rate"
|
||||
done
|
||||
|
||||
echo ""
|
||||
|
||||
# ========================================================================
|
||||
# ENHANCED METRICS (v2.0) - Jail Health & Activity Tracking
|
||||
# ========================================================================
|
||||
cat <<EOF
|
||||
# HELP fail2ban_jail_last_ban_timestamp Timestamp of last ban per jail
|
||||
# TYPE fail2ban_jail_last_ban_timestamp gauge
|
||||
EOF
|
||||
|
||||
for jail in $jails; do
|
||||
local last_ban
|
||||
last_ban=$(get_last_ban_timestamp "$jail")
|
||||
echo "fail2ban_jail_last_ban_timestamp{jail=\"$jail\"} ${last_ban}"
|
||||
done
|
||||
|
||||
echo ""
|
||||
|
||||
cat <<EOF
|
||||
# HELP fail2ban_jail_last_unban_timestamp Timestamp of last unban per jail
|
||||
# TYPE fail2ban_jail_last_unban_timestamp gauge
|
||||
EOF
|
||||
|
||||
for jail in $jails; do
|
||||
local last_unban
|
||||
last_unban=$(get_last_unban_timestamp "$jail")
|
||||
echo "fail2ban_jail_last_unban_timestamp{jail=\"$jail\"} ${last_unban}"
|
||||
done
|
||||
|
||||
echo ""
|
||||
|
||||
# NEW METRICS - Ban/Unban Rates
|
||||
cat <<EOF
|
||||
# HELP fail2ban_jail_bans_per_period Bans in time period per jail
|
||||
# TYPE fail2ban_jail_bans_per_period gauge
|
||||
EOF
|
||||
|
||||
for jail in $jails; do
|
||||
local bans_1h bans_24h
|
||||
bans_1h=$(get_ban_rate "$jail" "1 hour ago")
|
||||
bans_24h=$(get_ban_rate "$jail" "24 hours ago")
|
||||
echo "fail2ban_jail_bans_per_period{jail=\"$jail\",period=\"1h\"} ${bans_1h}"
|
||||
echo "fail2ban_jail_bans_per_period{jail=\"$jail\",period=\"24h\"} ${bans_24h}"
|
||||
done
|
||||
|
||||
echo ""
|
||||
|
||||
cat <<EOF
|
||||
# HELP fail2ban_jail_unbans_per_period Unbans in time period per jail
|
||||
# TYPE fail2ban_jail_unbans_per_period gauge
|
||||
EOF
|
||||
|
||||
for jail in $jails; do
|
||||
local unbans_1h unbans_24h
|
||||
unbans_1h=$(get_unban_rate "$jail" "1 hour ago")
|
||||
unbans_24h=$(get_unban_rate "$jail" "24 hours ago")
|
||||
echo "fail2ban_jail_unbans_per_period{jail=\"$jail\",period=\"1h\"} ${unbans_1h}"
|
||||
echo "fail2ban_jail_unbans_per_period{jail=\"$jail\",period=\"24h\"} ${unbans_24h}"
|
||||
done
|
||||
|
||||
echo ""
|
||||
|
||||
# NEW METRICS - Unique IPs
|
||||
cat <<EOF
|
||||
# HELP fail2ban_jail_unique_banned_ips Unique IPs banned in period per jail
|
||||
# TYPE fail2ban_jail_unique_banned_ips gauge
|
||||
EOF
|
||||
|
||||
for jail in $jails; do
|
||||
local unique_1h unique_24h
|
||||
unique_1h=$(get_unique_banned_ips "$jail" "1 hour ago")
|
||||
unique_24h=$(get_unique_banned_ips "$jail" "24 hours ago")
|
||||
echo "fail2ban_jail_unique_banned_ips{jail=\"$jail\",period=\"1h\"} ${unique_1h}"
|
||||
echo "fail2ban_jail_unique_banned_ips{jail=\"$jail\",period=\"24h\"} ${unique_24h}"
|
||||
done
|
||||
|
||||
echo ""
|
||||
|
||||
# NEW METRICS - Jail Configuration
|
||||
cat <<EOF
|
||||
# HELP fail2ban_jail_info Jail configuration information
|
||||
# TYPE fail2ban_jail_info gauge
|
||||
EOF
|
||||
|
||||
for jail in $jails; do
|
||||
local port protocol filter
|
||||
port=$(get_jail_port "$jail")
|
||||
protocol=$(get_jail_protocol "$jail")
|
||||
filter=$(get_jail_filter "$jail")
|
||||
echo "fail2ban_jail_info{jail=\"$jail\",port=\"$port\",protocol=\"$protocol\",filter=\"$filter\"} 1"
|
||||
done
|
||||
|
||||
echo ""
|
||||
|
||||
# NEW METRICS - Top Attackers (as labels with counts)
|
||||
cat <<EOF
|
||||
# HELP fail2ban_jail_top_attacker_count Top attacking IPs per jail (24h)
|
||||
# TYPE fail2ban_jail_top_attacker_count gauge
|
||||
EOF
|
||||
|
||||
for jail in $jails; do
|
||||
while read -r count ip; do
|
||||
[ -z "$ip" ] && continue
|
||||
echo "fail2ban_jail_top_attacker_count{jail=\"$jail\",ip=\"$ip\"} $count"
|
||||
done < <(get_top_banned_ips "$jail" 5)
|
||||
done
|
||||
|
||||
echo ""
|
||||
|
||||
# NEW METRICS - Ban Effectiveness (bans per hour rate)
|
||||
cat <<EOF
|
||||
# HELP fail2ban_jail_ban_rate_per_hour Bans per hour over last 24h per jail
|
||||
# TYPE fail2ban_jail_ban_rate_per_hour gauge
|
||||
EOF
|
||||
|
||||
for jail in $jails; do
|
||||
local bans_24h ban_rate
|
||||
bans_24h=$(get_ban_rate "$jail" "24 hours ago")
|
||||
|
||||
# Strip whitespace and ensure integer
|
||||
bans_24h=$(echo "$bans_24h" | tr -d '\n' | tr -d ' ')
|
||||
bans_24h=${bans_24h:-0}
|
||||
|
||||
# Calculate average: total bans in 24h divided by 24 hours
|
||||
if [ "$bans_24h" -gt 0 ] 2>/dev/null; then
|
||||
ban_rate=$(awk "BEGIN {printf \"%.2f\", $bans_24h / 24}" 2>/dev/null || echo "0")
|
||||
else
|
||||
ban_rate="0.00"
|
||||
fi
|
||||
|
||||
echo "fail2ban_jail_ban_rate_per_hour{jail=\"$jail\"} $ban_rate"
|
||||
done
|
||||
|
||||
echo ""
|
||||
|
||||
# NEW METRICS - Repeat Offenders
|
||||
cat <<EOF
|
||||
# HELP fail2ban_jail_repeat_offenders IPs banned multiple times (7 day window)
|
||||
# TYPE fail2ban_jail_repeat_offenders gauge
|
||||
EOF
|
||||
|
||||
for jail in $jails; do
|
||||
local repeat_2 repeat_5 repeat_10
|
||||
repeat_2=$(get_repeat_offender_count "$jail" 2)
|
||||
repeat_5=$(get_repeat_offender_count "$jail" 5)
|
||||
repeat_10=$(get_repeat_offender_count "$jail" 10)
|
||||
echo "fail2ban_jail_repeat_offenders{jail=\"$jail\",threshold=\"2+\"} $repeat_2"
|
||||
echo "fail2ban_jail_repeat_offenders{jail=\"$jail\",threshold=\"5+\"} $repeat_5"
|
||||
echo "fail2ban_jail_repeat_offenders{jail=\"$jail\",threshold=\"10+\"} $repeat_10"
|
||||
done
|
||||
|
||||
echo ""
|
||||
|
||||
# Log file health metrics
|
||||
local log_size log_age log_rotation
|
||||
log_size=$(get_log_size)
|
||||
log_age=$(get_log_age)
|
||||
log_rotation=$(get_log_rotation_timestamp)
|
||||
|
||||
cat <<EOF
|
||||
# HELP fail2ban_log_size_bytes Size of fail2ban log file
|
||||
# TYPE fail2ban_log_size_bytes gauge
|
||||
fail2ban_log_size_bytes $log_size
|
||||
|
||||
# HELP fail2ban_log_age_seconds Time since last log file modification
|
||||
# TYPE fail2ban_log_age_seconds gauge
|
||||
fail2ban_log_age_seconds $log_age
|
||||
|
||||
# HELP fail2ban_log_rotation_timestamp Unix timestamp of last log rotation
|
||||
# TYPE fail2ban_log_rotation_timestamp gauge
|
||||
fail2ban_log_rotation_timestamp $log_rotation
|
||||
EOF
|
||||
|
||||
echo ""
|
||||
|
||||
# Time since last ban/unban (easier to alert on than timestamps)
|
||||
local current_time
|
||||
current_time=$(date +%s)
|
||||
|
||||
cat <<EOF
|
||||
# HELP fail2ban_jail_seconds_since_last_ban Seconds since last ban per jail
|
||||
# TYPE fail2ban_jail_seconds_since_last_ban gauge
|
||||
EOF
|
||||
|
||||
for jail in $jails; do
|
||||
local last_ban seconds_since
|
||||
last_ban=$(get_last_ban_timestamp "$jail")
|
||||
if [ "$last_ban" -gt 0 ]; then
|
||||
seconds_since=$((current_time - last_ban))
|
||||
else
|
||||
seconds_since=0
|
||||
fi
|
||||
echo "fail2ban_jail_seconds_since_last_ban{jail=\"$jail\"} $seconds_since"
|
||||
done
|
||||
|
||||
echo ""
|
||||
|
||||
cat <<EOF
|
||||
# HELP fail2ban_jail_seconds_since_last_unban Seconds since last unban per jail
|
||||
# TYPE fail2ban_jail_seconds_since_last_unban gauge
|
||||
EOF
|
||||
|
||||
for jail in $jails; do
|
||||
local last_unban seconds_since
|
||||
last_unban=$(get_last_unban_timestamp "$jail")
|
||||
if [ "$last_unban" -gt 0 ]; then
|
||||
seconds_since=$((current_time - last_unban))
|
||||
else
|
||||
seconds_since=0
|
||||
fi
|
||||
echo "fail2ban_jail_seconds_since_last_unban{jail=\"$jail\"} $seconds_since"
|
||||
done
|
||||
|
||||
echo ""
|
||||
|
||||
# Exporter runtime
|
||||
local script_end script_duration
|
||||
script_end=$(date +%s)
|
||||
script_duration=$((script_end - script_start))
|
||||
|
||||
cat <<EOF
|
||||
# HELP fail2ban_exporter_duration_seconds Time to generate all metrics
|
||||
# TYPE fail2ban_exporter_duration_seconds gauge
|
||||
fail2ban_exporter_duration_seconds $script_duration
|
||||
|
||||
# HELP fail2ban_exporter_last_run_timestamp Unix timestamp of last successful run
|
||||
# TYPE fail2ban_exporter_last_run_timestamp gauge
|
||||
fail2ban_exporter_last_run_timestamp $script_end
|
||||
EOF
|
||||
|
||||
echo ""
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# HTTP SERVER MODE
|
||||
# ============================================================================
|
||||
|
||||
# Run simple HTTP server using netcat
|
||||
# Serves metrics on /metrics endpoint
|
||||
run_http_server() {
|
||||
echo "Starting fail2ban exporter on port $HTTP_PORT..." >&2
|
||||
|
||||
if ! command -v nc >/dev/null 2>&1; then
|
||||
echo "ERROR: netcat (nc) required for HTTP mode" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Infinite loop accepting HTTP requests
|
||||
while true; do
|
||||
{
|
||||
read -r request
|
||||
# Check if request is for /metrics endpoint
|
||||
if [[ "$request" =~ ^GET\ /metrics ]]; then
|
||||
echo -e "HTTP/1.1 200 OK\r\nContent-Type: text/plain; version=0.0.4\r\n\r"
|
||||
generate_metrics
|
||||
else # Serve HTML landing page for other requests
|
||||
echo -e "HTTP/1.1 200 OK\r\nContent-Type: text/html\r\n\r"
|
||||
cat <<EOF
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head><title>Fail2ban Exporter v2.0</title></head>
|
||||
<body>
|
||||
<h1>Fail2ban Prometheus Exporter (Enhanced v2.0)</h1>
|
||||
<p><a href="/metrics">Metrics</a></p>
|
||||
<h2>New Metrics</h2>
|
||||
<ul>
|
||||
<li>Last ban/unban timestamps per jail</li>
|
||||
<li>Ban/unban rates (1h, 24h)</li>
|
||||
<li>Unique banned IPs per period</li>
|
||||
<li>Top attackers per jail</li>
|
||||
<li>Jail configuration info (port, protocol, filter)</li>
|
||||
<li>Ban rate per hour</li>
|
||||
</ul>
|
||||
</body>
|
||||
</html>
|
||||
EOF
|
||||
fi
|
||||
} | nc -l -p "$HTTP_PORT" -q 1 2>/dev/null # -q 1: wait 1 second after EOF before closing
|
||||
done
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# MAIN EXECUTION
|
||||
# ============================================================================
|
||||
|
||||
# Main entry point - routes to appropriate output mode
|
||||
main() {
|
||||
parse_args "$@"
|
||||
|
||||
if [ "$HTTP_MODE" = true ]; then
|
||||
# Run HTTP server (blocks until killed)
|
||||
run_http_server
|
||||
elif [ -n "$OUTPUT_FILE" ]; then
|
||||
# Textfile collector mode: write atomically using temp file
|
||||
local output_dir
|
||||
output_dir="$(dirname "$OUTPUT_FILE")"
|
||||
mkdir -p "$output_dir"
|
||||
|
||||
# Create temp file in SAME directory for atomic rename (same filesystem)
|
||||
local temp_file
|
||||
temp_file=$(mktemp "${output_dir}/.fail2ban_metrics.XXXXXX")
|
||||
|
||||
# Generate metrics to temp file
|
||||
if ! generate_metrics > "$temp_file" 2>/dev/null; then
|
||||
rm -f "$temp_file"
|
||||
echo "ERROR: Failed to generate metrics" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Validate: file must exist, have content, and contain fail2ban_up 1
|
||||
# If fail2ban is down, we still get fail2ban_up 0 which is valid
|
||||
local file_lines
|
||||
file_lines=$(wc -l < "$temp_file" 2>/dev/null || echo 0)
|
||||
|
||||
if [ "$file_lines" -lt 10 ]; then
|
||||
rm -f "$temp_file"
|
||||
echo "ERROR: Metrics file too small ($file_lines lines), keeping previous" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Set permissions before move
|
||||
chmod 644 "$temp_file"
|
||||
|
||||
# Atomic rename - no gap where file is missing
|
||||
mv -f "$temp_file" "$OUTPUT_FILE"
|
||||
|
||||
echo "Metrics written to $OUTPUT_FILE ($file_lines lines)" >&2
|
||||
else
|
||||
# Default: output to stdout
|
||||
generate_metrics
|
||||
fi
|
||||
}
|
||||
|
||||
# Execute main function with all script arguments
|
||||
main "$@"
|
||||
Reference in New Issue
Block a user