#!/bin/bash
################################################################################
# Script Name: add-fail2ban-image-scraper.sh
# Version: 1.0
# Description: Adds a Fail2ban jail to block image scrapers — bots that
# directly request image files with no referer. Real browsers
# always send a referer when loading images (the page containing
# the
tag). Direct image requests with no referer are
# almost always scrapers harvesting images for AI training
# datasets or content theft.
#
# Author: Phil Connor
# Contact: contact@mylinux.work
# Website: https://mylinux.work
# License: MIT
#
# Usage:
# sudo ./add-fail2ban-image-scraper.sh
# sudo ./add-fail2ban-image-scraper.sh --logpath /var/log/nginx/access.log
# sudo ./add-fail2ban-image-scraper.sh --maxretry 3
# sudo ./add-fail2ban-image-scraper.sh --dry-run
#
################################################################################
set -euo pipefail
# ============================================================================
# DEFAULTS
# ============================================================================
readonly VERSION="1.1"
readonly SCRIPT_NAME="${0##*/}"
LOGPATH="auto"
BANTIME="86400"
MAXRETRY="5"
FINDTIME="300"
IGNOREIP=""
DRY_RUN=false
# Colors
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
CYAN='\033[0;36m'
NC='\033[0m'
# ============================================================================
# HELPER FUNCTIONS
# ============================================================================
log_info() { echo -e "${GREEN}[INFO]${NC} $*"; }
log_warn() { echo -e "${YELLOW}[WARN]${NC} $*"; }
log_error() { echo -e "${RED}[ERROR]${NC} $*" >&2; }
log_step() { echo -e "${CYAN}[STEP]${NC} $*"; }
show_usage() {
cat </dev/null; then
log_error "Fail2ban is not installed"
log_error "Install it first: https://mylinux.work/guides/fail2ban-setup/"
exit 1
fi
if ! systemctl is-active --quiet fail2ban; then
log_error "Fail2ban is not running"
exit 1
fi
log_info "Fail2ban is installed and running"
}
detect_logpath() {
if [[ "$LOGPATH" != "auto" ]]; then
# shellcheck disable=SC2086
local matches=( $LOGPATH )
if [[ ${#matches[@]} -eq 0 || ! -f "${matches[0]}" ]]; then
log_error "Log file not found: $LOGPATH"
exit 1
fi
log_info "Using specified log path: $LOGPATH"
return
fi
log_step "Auto-detecting web server access log..."
# HestiaCP — apache domains
local hestia_apache=( /var/log/apache2/domains/*.log )
if [[ -f "${hestia_apache[0]:-}" ]]; then
LOGPATH="/var/log/apache2/domains/*.log"
log_info "Detected HestiaCP apache: $LOGPATH"
return
fi
# HestiaCP — nginx domains
local hestia_nginx=( /var/log/nginx/domains/*.log )
if [[ -f "${hestia_nginx[0]:-}" ]]; then
LOGPATH="/var/log/nginx/domains/*.log"
log_info "Detected HestiaCP nginx: $LOGPATH"
return
fi
# Nginx (standard)
if [[ -f /var/log/nginx/access.log ]]; then
LOGPATH="/var/log/nginx/access.log"
log_info "Detected nginx: $LOGPATH"
return
fi
# Apache (Debian/Ubuntu)
if [[ -f /var/log/apache2/access.log ]]; then
LOGPATH="/var/log/apache2/access.log"
log_info "Detected apache2: $LOGPATH"
return
fi
# Apache (RHEL/Rocky)
if [[ -f /var/log/httpd/access_log ]]; then
LOGPATH="/var/log/httpd/access_log"
log_info "Detected httpd: $LOGPATH"
return
fi
log_error "Could not auto-detect access log. Use --logpath to specify."
exit 1
}
# ============================================================================
# REMOVE
# ============================================================================
do_remove() {
local filter_file="/etc/fail2ban/filter.d/image-scraper.conf"
local jail_file="/etc/fail2ban/jail.d/image-scraper.conf"
log_step "Removing image scraper jail..."
if $DRY_RUN; then
log_info "[DRY RUN] Would remove $filter_file"
log_info "[DRY RUN] Would remove $jail_file"
log_info "[DRY RUN] Would reload fail2ban"
return
fi
if [[ -f "$jail_file" ]]; then
rm -f "$jail_file"
log_info "Removed: $jail_file"
else
log_warn "Jail config not found: $jail_file"
fi
if [[ -f "$filter_file" ]]; then
rm -f "$filter_file"
log_info "Removed: $filter_file"
else
log_warn "Filter not found: $filter_file"
fi
fail2ban-client reload
sleep 2
log_info "Fail2ban reloaded — image-scraper jail removed"
exit 0
}
# ============================================================================
# INSTALL FILTER
# ============================================================================
install_filter() {
local filter_file="/etc/fail2ban/filter.d/image-scraper.conf"
log_step "Installing filter: $filter_file"
if $DRY_RUN; then
log_info "[DRY RUN] Would create $filter_file"
echo ""
generate_filter
echo ""
return
fi
if [[ -f "$filter_file" ]]; then
log_warn "Filter already exists — backing up to ${filter_file}.bak"
cp "$filter_file" "${filter_file}.bak"
fi
generate_filter > "$filter_file"
log_info "Filter installed: $filter_file"
}
generate_filter() {
cat <<'EOF'
# Fail2ban filter to block image scrapers
# https://mylinux.work
#
# Catches bots that directly request image files with no referer.
# When a real browser loads an image from a web page, it sends the page
# URL as the referer header. Direct image requests with no referer
# indicate scraping — typically for AI training datasets or content theft.
#
# Matches: GET requests for .png, .jpg, .jpeg, .gif, .webp, .svg, .avif
# with referer logged as "-" (absent/empty).
#
# Does NOT match .ico (favicons are legitimately requested without referer).
[Definition]
# Direct image request with no referer — combined log format
# Format: IP - - [date] "GET /path/image.png HTTP/x.x" status size "-" "user agent"
failregex = ^ \S+ \S+ \[.*\] "GET \S+\.(?:png|jpe?g|gif|webp|svg|avif) \S+" \d+ \d+ "-" ".*"
ignoreregex =
# Author: Phil Connor — https://mylinux.work
EOF
}
# ============================================================================
# INSTALL JAIL
# ============================================================================
install_jail() {
local jail_file="/etc/fail2ban/jail.d/image-scraper.conf"
log_step "Installing jail: $jail_file"
if $DRY_RUN; then
log_info "[DRY RUN] Would create $jail_file"
echo ""
generate_jail
echo ""
return
fi
if [[ -f "$jail_file" ]]; then
log_warn "Jail config already exists — backing up to ${jail_file}.bak"
cp "$jail_file" "${jail_file}.bak"
fi
generate_jail > "$jail_file"
log_info "Jail config installed: $jail_file"
}
generate_jail() {
cat </dev/null; then
log_warn "Config test not available — reloading directly"
fi
fail2ban-client reload
sleep 2
if systemctl is-active --quiet fail2ban; then
log_info "Fail2ban reloaded successfully"
else
log_error "Fail2ban failed to restart — check: journalctl -u fail2ban"
exit 1
fi
}
verify_jail() {
log_step "Verifying image-scraper jail..."
if $DRY_RUN; then
log_info "[DRY RUN] Would verify jail status"
return
fi
echo ""
if fail2ban-client status image-scraper 2>/dev/null; then
echo ""
log_info "Image scraper jail is active and monitoring $LOGPATH"
else
log_error "Jail 'image-scraper' is not running — check: fail2ban-client status"
log_error "Debug with: fail2ban-regex $LOGPATH /etc/fail2ban/filter.d/image-scraper.conf"
exit 1
fi
}
test_against_logs() {
if $DRY_RUN; then
# shellcheck disable=SC2086
local matches=( $LOGPATH )
if [[ -f "${matches[0]}" ]]; then
log_step "Testing filter against existing logs..."
echo ""
fail2ban-regex "${matches[0]}" /dev/stdin <<'FILTER' 2>&1 | tail -5
[Definition]
failregex = ^ \S+ \S+ \[.*\] "GET \S+\.(?:png|jpe?g|gif|webp|svg|avif) \S+" \d+ \d+ "-" ".*"
ignoreregex =
FILTER
echo ""
fi
fi
}
# ============================================================================
# MAIN
# ============================================================================
main() {
parse_args "$@"
echo ""
echo "============================================"
echo " Fail2ban Image Scraper Blocker v${VERSION}"
echo " https://mylinux.work"
echo "============================================"
echo ""
check_root
check_fail2ban
if $REMOVE; then
do_remove
fi
detect_logpath
test_against_logs
install_filter
install_jail
reload_fail2ban
verify_jail
echo ""
echo "============================================"
echo " Setup Complete"
echo "============================================"
echo ""
echo " Jail: image-scraper"
echo " Log: $LOGPATH"
echo " Ban time: ${BANTIME}s ($(( BANTIME / 3600 ))h)"
echo " Max retry: $MAXRETRY (direct image requests before ban)"
echo " Find time: ${FINDTIME}s ($(( FINDTIME / 60 ))m window)"
if [[ -n "$IGNOREIP" ]]; then
echo " Ignore: $IGNOREIP"
fi
echo ""
echo " Useful commands:"
echo " fail2ban-client status image-scraper"
echo " fail2ban-client set image-scraper unbanip "
echo " fail2ban-regex $LOGPATH /etc/fail2ban/filter.d/image-scraper.conf"
echo ""
}
main "$@"