#!/bin/bash ################################################################################ # Script Name: add-fail2ban-ai-bots.sh # Version: 1.1 # Description: Adds a Fail2ban jail to block AI scrapers and unwanted bots # that ignore robots.txt. Installs filter + jail config and # reloads Fail2ban. # # Author: Phil Connor # Contact: contact@mylinux.work # Website: https://mylinux.work # License: MIT # # Usage: # sudo ./add-fail2ban-ai-bots.sh # sudo ./add-fail2ban-ai-bots.sh --logpath /var/log/nginx/access.log # sudo ./add-fail2ban-ai-bots.sh --bantime 604800 # sudo ./add-fail2ban-ai-bots.sh --dry-run # # Changelog: # 1.1 — 2026-05-04: Removed Claude-Web, Perplexity-User, ChatGPT-User, and # OAI-SearchBot from blocklist. These are user-facing fetcher bots that # retrieve content when someone pastes a URL into an AI chat or search. # Blocking them prevents your content from being cited in AI answers. # Training crawlers (ClaudeBot, PerplexityBot, GPTBot) remain blocked. # ################################################################################ set -euo pipefail # ============================================================================ # DEFAULTS # ============================================================================ readonly VERSION="1.1" readonly SCRIPT_NAME="${0##*/}" LOGPATH="auto" BANTIME="86400" MAXRETRY="1" DRY_RUN=false # Colors RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[1;33m' CYAN='\033[0;36m' NC='\033[0m' # ============================================================================ # HELPER FUNCTIONS # ============================================================================ log_info() { echo -e "${GREEN}[INFO]${NC} $*"; } log_warn() { echo -e "${YELLOW}[WARN]${NC} $*"; } log_error() { echo -e "${RED}[ERROR]${NC} $*" >&2; } log_step() { echo -e "${CYAN}[STEP]${NC} $*"; } show_usage() { cat </dev/null; then log_error "Fail2ban is not installed" log_error "Install it first: https://mylinux.work/guides/fail2ban-setup/" exit 1 fi if ! systemctl is-active --quiet fail2ban; then log_error "Fail2ban is not running" exit 1 fi log_info "Fail2ban is installed and running" } detect_logpath() { if [[ "$LOGPATH" != "auto" ]]; then # Support glob patterns (e.g. /var/log/apache2/domains/*.log) # shellcheck disable=SC2086,SC2206 local matches=( $LOGPATH ) if [[ ${#matches[@]} -eq 0 || ! -f "${matches[0]}" ]]; then log_error "Log file not found: $LOGPATH" exit 1 fi log_info "Using specified log path: $LOGPATH (${#matches[@]} file(s))" return fi log_step "Auto-detecting web server access log..." # HestiaCP / VestaCP — apache domains (check first: has full access logs with user agents) local hestia_apache=( /var/log/apache2/domains/*.log ) if [[ -f "${hestia_apache[0]:-}" ]]; then LOGPATH="/var/log/apache2/domains/*.log" log_info "Detected HestiaCP/VestaCP apache: $LOGPATH (${#hestia_apache[@]} file(s))" return fi # HestiaCP / VestaCP — nginx domains (proxy logs only in nginx+apache mode) local hestia_nginx=( /var/log/nginx/domains/*.log ) if [[ -f "${hestia_nginx[0]:-}" ]]; then LOGPATH="/var/log/nginx/domains/*.log" log_info "Detected HestiaCP/VestaCP nginx: $LOGPATH (${#hestia_nginx[@]} file(s))" return fi # Nginx (standard) if [[ -f /var/log/nginx/access.log ]]; then LOGPATH="/var/log/nginx/access.log" log_info "Detected nginx: $LOGPATH" return fi # Apache (Debian/Ubuntu) if [[ -f /var/log/apache2/access.log ]]; then LOGPATH="/var/log/apache2/access.log" log_info "Detected apache2: $LOGPATH" return fi # Apache (RHEL/Rocky) if [[ -f /var/log/httpd/access_log ]]; then LOGPATH="/var/log/httpd/access_log" log_info "Detected httpd: $LOGPATH" return fi log_error "Could not auto-detect access log. Use --logpath to specify." exit 1 } # ============================================================================ # INSTALL FILTER # ============================================================================ install_filter() { local filter_file="/etc/fail2ban/filter.d/ai-bots.conf" log_step "Installing filter: $filter_file" if $DRY_RUN; then log_info "[DRY RUN] Would create $filter_file" echo "" generate_filter echo "" return fi if [[ -f "$filter_file" ]]; then log_warn "Filter already exists — backing up to ${filter_file}.bak" cp "$filter_file" "${filter_file}.bak" fi generate_filter > "$filter_file" log_info "Filter installed: $filter_file" } generate_filter() { cat <<'EOF' # Fail2ban filter to block AI scrapers and unwanted bots # https://mylinux.work # # Matches common AI crawler user agents in web server access logs. # These bots scrape content for AI model training and typically # ignore robots.txt directives. [Definition] # Match AI and unwanted bot user agents in access logs # Supports both combined and common log formats failregex = ^ \S+ \S+ \[.*\] "\S+ \S+ \S+" \d+ \d+ "\S+" ".*(?:ABEvalBot|GPTBot|CCBot|ClaudeBot|anthropic-ai|Bytespider|TikTokSpider|cohere-ai|meta-externalagent|Meta-ExternalFetcher|PetalBot|Amazonbot|AI2Bot|Ai2Bot-Dolma|YouBot|PerplexityBot|Diffbot|Applebot-Extended|Google-Extended|MistralBot|YandexGPTBot|MJ12bot|Scrapy|DataForSeoBot|Timpibot|img2dataset|HanaleiBot|SemrushBot|AhrefsBot|DotBot|SERanking|trafilatura).*" ignoreregex = # Author: Phil Connor — https://mylinux.work EOF } # ============================================================================ # INSTALL JAIL # ============================================================================ install_jail() { local jail_file="/etc/fail2ban/jail.d/ai-bots.conf" log_step "Installing jail: $jail_file" if $DRY_RUN; then log_info "[DRY RUN] Would create $jail_file" echo "" generate_jail echo "" return fi if [[ -f "$jail_file" ]]; then log_warn "Jail config already exists — backing up to ${jail_file}.bak" cp "$jail_file" "${jail_file}.bak" fi generate_jail > "$jail_file" log_info "Jail config installed: $jail_file" } generate_jail() { cat </dev/null; then log_warn "Config test not available — reloading directly" fi fail2ban-client reload sleep 2 if systemctl is-active --quiet fail2ban; then log_info "Fail2ban reloaded successfully" else log_error "Fail2ban failed to restart — check: journalctl -u fail2ban" exit 1 fi } verify_jail() { log_step "Verifying ai-bots jail..." if $DRY_RUN; then log_info "[DRY RUN] Would verify jail status" return fi echo "" if fail2ban-client status ai-bots 2>/dev/null; then echo "" log_info "AI bots jail is active and monitoring $LOGPATH" else log_error "Jail 'ai-bots' is not running — check: fail2ban-client status" log_error "Debug with: fail2ban-regex $LOGPATH /etc/fail2ban/filter.d/ai-bots.conf" exit 1 fi } # ============================================================================ # MAIN # ============================================================================ main() { parse_args "$@" echo "" echo "============================================" echo " Fail2ban AI Bot Blocker v${VERSION}" echo " https://mylinux.work" echo "============================================" echo "" check_root check_fail2ban detect_logpath install_filter install_jail reload_fail2ban verify_jail echo "" echo "============================================" echo " Setup Complete" echo "============================================" echo "" echo " Jail: ai-bots" echo " Log: $LOGPATH" echo " Ban time: ${BANTIME}s ($(( BANTIME / 3600 ))h)" echo " Max retry: $MAXRETRY" echo "" echo " Useful commands:" echo " fail2ban-client status ai-bots" echo " fail2ban-client set ai-bots unbanip " echo " fail2ban-regex $LOGPATH /etc/fail2ban/filter.d/ai-bots.conf" echo "" } main "$@"