#!/bin/bash ################################################################################ # Script Name: add-fail2ban-head-crawler.sh # Version: 1.0 # Description: Adds a Fail2ban jail to block HEAD-only crawlers — bots that # systematically send HEAD requests with no referer to probe or # index your site while spoofing real browser user agents. # # Author: Phil Connor # Contact: contact@mylinux.work # Website: https://mylinux.work # License: MIT # # Usage: # sudo ./add-fail2ban-head-crawler.sh # sudo ./add-fail2ban-head-crawler.sh --logpath /var/log/nginx/access.log # sudo ./add-fail2ban-head-crawler.sh --maxretry 10 # sudo ./add-fail2ban-head-crawler.sh --dry-run # ################################################################################ set -euo pipefail # ============================================================================ # DEFAULTS # ============================================================================ readonly VERSION="1.0" readonly SCRIPT_NAME="${0##*/}" LOGPATH="auto" BANTIME="86400" MAXRETRY="5" FINDTIME="300" DRY_RUN=false # Colors RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[1;33m' CYAN='\033[0;36m' NC='\033[0m' # ============================================================================ # HELPER FUNCTIONS # ============================================================================ log_info() { echo -e "${GREEN}[INFO]${NC} $*"; } log_warn() { echo -e "${YELLOW}[WARN]${NC} $*"; } log_error() { echo -e "${RED}[ERROR]${NC} $*" >&2; } log_step() { echo -e "${CYAN}[STEP]${NC} $*"; } show_usage() { cat </dev/null; then log_error "Fail2ban is not installed" log_error "Install it first: https://mylinux.work/guides/fail2ban-setup/" exit 1 fi if ! systemctl is-active --quiet fail2ban; then log_error "Fail2ban is not running" exit 1 fi log_info "Fail2ban is installed and running" } detect_logpath() { if [[ "$LOGPATH" != "auto" ]]; then # shellcheck disable=SC2086 local matches=( $LOGPATH ) if [[ ${#matches[@]} -eq 0 || ! -f "${matches[0]}" ]]; then log_error "Log file not found: $LOGPATH" exit 1 fi log_info "Using specified log path: $LOGPATH" return fi log_step "Auto-detecting web server access log..." # HestiaCP — apache domains local hestia_apache=( /var/log/apache2/domains/*.log ) if [[ -f "${hestia_apache[0]:-}" ]]; then LOGPATH="/var/log/apache2/domains/*.log" log_info "Detected HestiaCP apache: $LOGPATH" return fi # HestiaCP — nginx domains local hestia_nginx=( /var/log/nginx/domains/*.log ) if [[ -f "${hestia_nginx[0]:-}" ]]; then LOGPATH="/var/log/nginx/domains/*.log" log_info "Detected HestiaCP nginx: $LOGPATH" return fi # Nginx (standard) if [[ -f /var/log/nginx/access.log ]]; then LOGPATH="/var/log/nginx/access.log" log_info "Detected nginx: $LOGPATH" return fi # Apache (Debian/Ubuntu) if [[ -f /var/log/apache2/access.log ]]; then LOGPATH="/var/log/apache2/access.log" log_info "Detected apache2: $LOGPATH" return fi # Apache (RHEL/Rocky) if [[ -f /var/log/httpd/access_log ]]; then LOGPATH="/var/log/httpd/access_log" log_info "Detected httpd: $LOGPATH" return fi log_error "Could not auto-detect access log. Use --logpath to specify." exit 1 } # ============================================================================ # REMOVE # ============================================================================ do_remove() { local filter_file="/etc/fail2ban/filter.d/head-crawler.conf" local jail_file="/etc/fail2ban/jail.d/head-crawler.conf" log_step "Removing HEAD crawler jail..." if $DRY_RUN; then log_info "[DRY RUN] Would remove $filter_file" log_info "[DRY RUN] Would remove $jail_file" log_info "[DRY RUN] Would reload fail2ban" return fi if [[ -f "$jail_file" ]]; then rm -f "$jail_file" log_info "Removed: $jail_file" else log_warn "Jail config not found: $jail_file" fi if [[ -f "$filter_file" ]]; then rm -f "$filter_file" log_info "Removed: $filter_file" else log_warn "Filter not found: $filter_file" fi fail2ban-client reload sleep 2 log_info "Fail2ban reloaded — head-crawler jail removed" exit 0 } # ============================================================================ # INSTALL FILTER # ============================================================================ install_filter() { local filter_file="/etc/fail2ban/filter.d/head-crawler.conf" log_step "Installing filter: $filter_file" if $DRY_RUN; then log_info "[DRY RUN] Would create $filter_file" echo "" generate_filter echo "" return fi if [[ -f "$filter_file" ]]; then log_warn "Filter already exists — backing up to ${filter_file}.bak" cp "$filter_file" "${filter_file}.bak" fi generate_filter > "$filter_file" log_info "Filter installed: $filter_file" } generate_filter() { cat <<'EOF' # Fail2ban filter to block HEAD-only crawlers # https://mylinux.work # # Catches bots that send HEAD requests with no referer. These are typically # scrapers, SEO tools, or reconnaissance bots that spoof real browser user # agents and rotate through cloud IPs to avoid detection. # # The filter matches: # - HTTP HEAD method # - No referer (logged as "-") # - Any user agent (spoofed or otherwise) # # Combined with a low maxretry (default: 5 in 5 min), this catches # systematic crawlers while ignoring occasional legitimate HEAD requests # (browser prefetch, monitoring probes). [Definition] # HEAD request with no referer — combined log format # Format: IP - - [date] "HEAD /path HTTP/x.x" status size "-" "user agent" failregex = ^ \S+ \S+ \[.*\] "HEAD \S+ \S+" \d+ \d+ "-" ".*" ignoreregex = # Author: Phil Connor — https://mylinux.work EOF } # ============================================================================ # INSTALL JAIL # ============================================================================ install_jail() { local jail_file="/etc/fail2ban/jail.d/head-crawler.conf" log_step "Installing jail: $jail_file" if $DRY_RUN; then log_info "[DRY RUN] Would create $jail_file" echo "" generate_jail echo "" return fi if [[ -f "$jail_file" ]]; then log_warn "Jail config already exists — backing up to ${jail_file}.bak" cp "$jail_file" "${jail_file}.bak" fi generate_jail > "$jail_file" log_info "Jail config installed: $jail_file" } generate_jail() { cat </dev/null; then log_warn "Config test not available — reloading directly" fi fail2ban-client reload sleep 2 if systemctl is-active --quiet fail2ban; then log_info "Fail2ban reloaded successfully" else log_error "Fail2ban failed to restart — check: journalctl -u fail2ban" exit 1 fi } verify_jail() { log_step "Verifying head-crawler jail..." if $DRY_RUN; then log_info "[DRY RUN] Would verify jail status" return fi echo "" if fail2ban-client status head-crawler 2>/dev/null; then echo "" log_info "HEAD crawler jail is active and monitoring $LOGPATH" else log_error "Jail 'head-crawler' is not running — check: fail2ban-client status" log_error "Debug with: fail2ban-regex $LOGPATH /etc/fail2ban/filter.d/head-crawler.conf" exit 1 fi } test_against_logs() { if $DRY_RUN; then # shellcheck disable=SC2086 local matches=( $LOGPATH ) if [[ -f "${matches[0]}" ]]; then log_step "Testing filter against existing logs..." echo "" fail2ban-regex "${matches[0]}" /dev/stdin <<'FILTER' 2>&1 | tail -5 [Definition] failregex = ^ \S+ \S+ \[.*\] "HEAD \S+ \S+" \d+ \d+ "-" ".*" ignoreregex = FILTER echo "" fi fi } # ============================================================================ # MAIN # ============================================================================ main() { parse_args "$@" echo "" echo "============================================" echo " Fail2ban HEAD Crawler Blocker v${VERSION}" echo " https://mylinux.work" echo "============================================" echo "" check_root check_fail2ban if $REMOVE; then do_remove fi detect_logpath test_against_logs install_filter install_jail reload_fail2ban verify_jail echo "" echo "============================================" echo " Setup Complete" echo "============================================" echo "" echo " Jail: head-crawler" echo " Log: $LOGPATH" echo " Ban time: ${BANTIME}s ($(( BANTIME / 3600 ))h)" echo " Max retry: $MAXRETRY (HEAD requests before ban)" echo " Find time: ${FINDTIME}s ($(( FINDTIME / 60 ))m window)" echo "" echo " Useful commands:" echo " fail2ban-client status head-crawler" echo " fail2ban-client set head-crawler unbanip " echo " fail2ban-regex $LOGPATH /etc/fail2ban/filter.d/head-crawler.conf" echo "" } main "$@"