Sync all scripts from website downloads — 352 scripts total
Includes updated JS challenge scripts with Claude-User whitelist, same-site referer bypass, Blackbox-Exporter allowed bot, and all new exporters, cheat sheets, and automation scripts.
This commit is contained in:
Executable
+443
@@ -0,0 +1,443 @@
|
||||
#!/bin/bash
|
||||
################################################################################
|
||||
# Script Name: add-nginx-bot-block.sh
|
||||
# Version: 1.3
|
||||
# Description: Configure AI scraper and bot blocking on standard nginx servers.
|
||||
# Creates an nginx map in conf.d and injects bot-blocking rules
|
||||
# into server blocks found in sites-enabled and conf.d.
|
||||
# For HestiaCP / VestaCP / myVesta servers, use hestia-bot-block.sh instead.
|
||||
#
|
||||
# Author: Phil Connor
|
||||
# Contact: contact@mylinux.work
|
||||
# Website: https://mylinux.work
|
||||
# License: MIT
|
||||
#
|
||||
# Prerequisites:
|
||||
# - nginx installed and running
|
||||
# - Root access
|
||||
#
|
||||
# Usage:
|
||||
# sudo ./add-nginx-bot-block.sh
|
||||
# sudo ./add-nginx-bot-block.sh --dry-run
|
||||
# sudo ./add-nginx-bot-block.sh --conf /etc/nginx/sites-enabled/mysite.conf
|
||||
# sudo ./add-nginx-bot-block.sh --status-code 403
|
||||
# sudo ./add-nginx-bot-block.sh --remove
|
||||
#
|
||||
# Changelog:
|
||||
# 1.3 — 2026-05-11: Added fragment-in-referer blocking (real browsers strip
|
||||
# URI fragments from the Referer header). Added request method blocking
|
||||
# (only GET/HEAD allowed — static sites never need POST/PUT/DELETE).
|
||||
# Added ospa-radar (lead-gen/business intelligence crawler) to blocklist.
|
||||
# 1.2 — 2026-05-08: Added Exabot (defunct Dassault/Exalead crawler, now
|
||||
# spoofed) and Sogou (Tencent Chinese search crawler) to blocklist.
|
||||
# 1.1 — 2026-05-04: Removed Claude-Web and OAI-SearchBot from blocklist.
|
||||
# These are user-facing fetcher bots, not training crawlers. Blocking
|
||||
# them prevents your content from being cited in AI answers.
|
||||
#
|
||||
################################################################################
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# --- Configuration ---
|
||||
CONF_DIR="/etc/nginx/conf.d"
|
||||
SITES_DIR="/etc/nginx/sites-enabled"
|
||||
MAP_FILE="${CONF_DIR}/bot-block.conf"
|
||||
DRY_RUN=false
|
||||
REMOVE=false
|
||||
SINGLE_CONF=""
|
||||
STATUS_CODE="444"
|
||||
TIMESTAMP=$(date +%s)
|
||||
MARKER_START="# bot-block-managed-start"
|
||||
MARKER_END="# bot-block-managed-end"
|
||||
|
||||
# --- Colors ---
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[0;33m'
|
||||
CYAN='\033[0;36m'
|
||||
BOLD='\033[1m'
|
||||
NC='\033[0m'
|
||||
|
||||
info() { echo -e "${GREEN}[OK]${NC} $*"; }
|
||||
warn() { echo -e "${YELLOW}[WARN]${NC} $*"; }
|
||||
step() { echo -e "${CYAN}[STEP]${NC} $*"; }
|
||||
|
||||
usage() {
|
||||
cat <<EOF
|
||||
Usage: sudo $(basename "$0") [OPTIONS]
|
||||
|
||||
Blocks AI scrapers, SEO bots, vulnerability scanners, and scraping frameworks
|
||||
on standard nginx servers by creating an http-level map and injecting
|
||||
bot-blocking rules into server blocks.
|
||||
|
||||
Options:
|
||||
--dry-run Show what would be done without making changes
|
||||
--remove Remove bot-block.conf and strip injected rules
|
||||
--conf FILE Only modify a specific config file
|
||||
--status-code CODE HTTP status code to return (default: 444)
|
||||
Common alternatives: 403, 444
|
||||
-h, --help Show this help
|
||||
|
||||
Examples:
|
||||
sudo $(basename "$0")
|
||||
sudo $(basename "$0") --dry-run
|
||||
sudo $(basename "$0") --conf /etc/nginx/sites-enabled/mysite.conf
|
||||
sudo $(basename "$0") --status-code 403
|
||||
sudo $(basename "$0") --remove
|
||||
EOF
|
||||
exit 0
|
||||
}
|
||||
|
||||
# --- Argument parsing ---
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
--dry-run) DRY_RUN=true; shift ;;
|
||||
--remove) REMOVE=true; shift ;;
|
||||
--conf) SINGLE_CONF="$2"; shift 2 ;;
|
||||
--status-code) STATUS_CODE="$2"; shift 2 ;;
|
||||
-h|--help) usage ;;
|
||||
*) echo "Unknown option: $1"; usage ;;
|
||||
esac
|
||||
done
|
||||
|
||||
# --- Checks ---
|
||||
if [[ $EUID -ne 0 ]]; then
|
||||
echo -e "${RED}Error: Run as root (sudo)${NC}" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if ! command -v nginx &>/dev/null; then
|
||||
echo -e "${RED}Error: nginx not found${NC}" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# =====================================================
|
||||
# Collect config files to process
|
||||
# =====================================================
|
||||
collect_configs() {
|
||||
local configs=()
|
||||
|
||||
if [[ -n "$SINGLE_CONF" ]]; then
|
||||
if [[ ! -f "$SINGLE_CONF" ]]; then
|
||||
echo -e "${RED}Error: Config file not found: ${SINGLE_CONF}${NC}" >&2
|
||||
exit 1
|
||||
fi
|
||||
configs+=("$SINGLE_CONF")
|
||||
else
|
||||
# Scan sites-enabled
|
||||
if [[ -d "$SITES_DIR" ]]; then
|
||||
for f in "$SITES_DIR"/*; do
|
||||
[[ -f "$f" ]] && configs+=("$f")
|
||||
done
|
||||
fi
|
||||
# Scan conf.d (skip bot-block.conf itself)
|
||||
if [[ -d "$CONF_DIR" ]]; then
|
||||
for f in "$CONF_DIR"/*.conf; do
|
||||
[[ -f "$f" ]] || continue
|
||||
[[ "$f" == "$MAP_FILE" ]] && continue
|
||||
configs+=("$f")
|
||||
done
|
||||
fi
|
||||
fi
|
||||
|
||||
# Filter to only files containing a server block
|
||||
local server_configs=()
|
||||
for f in "${configs[@]}"; do
|
||||
if grep -qP '^\s*server\s*\{' "$f" 2>/dev/null; then
|
||||
server_configs+=("$f")
|
||||
fi
|
||||
done
|
||||
|
||||
printf '%s\n' "${server_configs[@]}"
|
||||
}
|
||||
|
||||
# =====================================================
|
||||
# REMOVE MODE
|
||||
# =====================================================
|
||||
if [[ "$REMOVE" == "true" ]]; then
|
||||
step "Removing bot-block configuration"
|
||||
|
||||
# Remove map file
|
||||
if [[ -f "$MAP_FILE" ]]; then
|
||||
if [[ "$DRY_RUN" == "true" ]]; then
|
||||
echo " Would remove: ${MAP_FILE}"
|
||||
else
|
||||
rm -f "$MAP_FILE"
|
||||
info "Removed: ${MAP_FILE}"
|
||||
fi
|
||||
else
|
||||
warn "Map file not found: ${MAP_FILE} (already removed?)"
|
||||
fi
|
||||
|
||||
# Strip managed blocks from config files
|
||||
step "Scanning for injected bot-block rules"
|
||||
|
||||
mapfile -t configs < <(collect_configs)
|
||||
|
||||
if [[ ${#configs[@]} -eq 0 ]]; then
|
||||
warn "No server block config files found"
|
||||
else
|
||||
for conf in "${configs[@]}"; do
|
||||
if grep -q "$MARKER_START" "$conf" 2>/dev/null; then
|
||||
if [[ "$DRY_RUN" == "true" ]]; then
|
||||
echo " Would clean: ${conf}"
|
||||
else
|
||||
cp "$conf" "${conf}.bak.${TIMESTAMP}"
|
||||
sed -i "/${MARKER_START}/,/${MARKER_END}/d" "$conf"
|
||||
info "Cleaned: ${conf}"
|
||||
fi
|
||||
fi
|
||||
done
|
||||
fi
|
||||
|
||||
# Validate and reload
|
||||
if [[ "$DRY_RUN" == "true" ]]; then
|
||||
echo " Would run: nginx -t"
|
||||
echo " Would run: systemctl reload nginx"
|
||||
else
|
||||
step "Testing nginx configuration"
|
||||
if nginx -t 2>&1; then
|
||||
info "nginx config valid"
|
||||
else
|
||||
echo -e "${RED}[ERROR] nginx config test failed — restore .bak files${NC}" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
step "Reloading nginx"
|
||||
systemctl reload nginx
|
||||
info "nginx reloaded"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo -e "${BOLD}Bot-block rules removed.${NC}"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# =====================================================
|
||||
# INSTALL MODE
|
||||
# =====================================================
|
||||
|
||||
# Step 1: Create nginx map
|
||||
# =====================================================
|
||||
step "Creating bot-block map at ${MAP_FILE}"
|
||||
|
||||
MAP_CONTENT='# Bot-blocking map for AI scrapers, SEO bots, and vulnerability scanners
|
||||
# Generated by add-nginx-bot-block.sh — https://mylinux.work
|
||||
|
||||
map $http_user_agent $is_bad_bot {
|
||||
default 0;
|
||||
|
||||
# AI scrapers
|
||||
~*ABEvalBot 1;
|
||||
~*GPTBot 1;
|
||||
~*ClaudeBot 1;
|
||||
~*anthropic-ai 1;
|
||||
~*CCBot 1;
|
||||
~*Bytespider 1;
|
||||
~*TikTokSpider 1;
|
||||
~*cohere-ai 1;
|
||||
~*PerplexityBot 1;
|
||||
~*Diffbot 1;
|
||||
~*MistralBot 1;
|
||||
~*YandexGPTBot 1;
|
||||
~*meta-externalagent 1;
|
||||
~*Meta-ExternalFetcher 1;
|
||||
~*meta-webindexer 1;
|
||||
~*PetalBot 1;
|
||||
~*Amazonbot 1;
|
||||
~*Amzn-SearchBot 1;
|
||||
~*AI2Bot 1;
|
||||
~*Timpibot 1;
|
||||
~*img2dataset 1;
|
||||
~*YouBot 1;
|
||||
~*HanaleiBot 1;
|
||||
|
||||
# Defunct crawlers (spoofed user agents)
|
||||
~*Exabot 1;
|
||||
~*Sogou 1;
|
||||
|
||||
# SEO scrapers
|
||||
~*MJ12bot 1;
|
||||
~*SemrushBot 1;
|
||||
~*AhrefsBot 1;
|
||||
~*DotBot 1;
|
||||
~*DataForSeoBot 1;
|
||||
~*SERanking 1;
|
||||
|
||||
# Vulnerability scanners
|
||||
~*Nikto 1;
|
||||
~*sqlmap 1;
|
||||
~*Nmap 1;
|
||||
~*masscan 1;
|
||||
~*ZmEu 1;
|
||||
~*Morpheus 1;
|
||||
|
||||
# Lead-gen / business intelligence bots
|
||||
~*ospa-radar 1;
|
||||
~*HubSeedsBot 1;
|
||||
|
||||
# AI scrapers / research bots
|
||||
~*Aranet-SearchBot 1;
|
||||
~*AzureAI-SearchBot 1;
|
||||
~*MINERVA-DeepResearch 1;
|
||||
~*NagetBot 1;
|
||||
~*LAIABot 1;
|
||||
~*pi-coding-agent 1;
|
||||
|
||||
# Probe / monitoring bots
|
||||
~*CMS-Checker 1;
|
||||
~*NexoFaviconBot 1;
|
||||
~*AwarioBot 1;
|
||||
~*AwarioSmartBot 1;
|
||||
~*CopyousBot 1;
|
||||
~*SurdotlyBot 1;
|
||||
~*trendictionbot 1;
|
||||
~*wpbot 1;
|
||||
~*WebFetchTool 1;
|
||||
~*YisouSpider 1;
|
||||
|
||||
# Scraping frameworks
|
||||
~*Scrapy 1;
|
||||
~*python-requests 1;
|
||||
~*Go-http-client 1;
|
||||
~*Java/ 1;
|
||||
~*libwww-perl 1;
|
||||
~*trafilatura 1;
|
||||
~*node-fetch 1;
|
||||
|
||||
# Outdated browsers (Chrome < 115 — almost certainly bots)
|
||||
~*Chrome/([1-9][0-9]?|10[0-9]|11[0-4])\. 1;
|
||||
|
||||
# Empty / missing user agent
|
||||
"" 1;
|
||||
"-" 1;
|
||||
}'
|
||||
|
||||
if [[ "$DRY_RUN" == "true" ]]; then
|
||||
echo " Would create: ${MAP_FILE}"
|
||||
else
|
||||
if [[ -f "$MAP_FILE" ]]; then
|
||||
cp "$MAP_FILE" "${MAP_FILE}.bak.${TIMESTAMP}"
|
||||
warn "Existing map backed up"
|
||||
fi
|
||||
echo "$MAP_CONTENT" > "$MAP_FILE"
|
||||
info "Map created: ${MAP_FILE}"
|
||||
fi
|
||||
|
||||
# =====================================================
|
||||
# Step 2: Inject bot-blocking rule into server blocks
|
||||
# =====================================================
|
||||
step "Scanning for server blocks to inject bot-blocking rule"
|
||||
|
||||
mapfile -t configs < <(collect_configs)
|
||||
|
||||
if [[ ${#configs[@]} -eq 0 ]]; then
|
||||
warn "No server block config files found in ${SITES_DIR} or ${CONF_DIR}"
|
||||
else
|
||||
MODIFIED=0
|
||||
for conf in "${configs[@]}"; do
|
||||
# Skip if already managed
|
||||
if grep -q "$MARKER_START" "$conf" 2>/dev/null; then
|
||||
warn "Already managed: ${conf} — skipping"
|
||||
continue
|
||||
fi
|
||||
|
||||
if [[ "$DRY_RUN" == "true" ]]; then
|
||||
echo " Would inject into: ${conf}"
|
||||
MODIFIED=$((MODIFIED + 1))
|
||||
continue
|
||||
fi
|
||||
|
||||
# Backup
|
||||
cp "$conf" "${conf}.bak.${TIMESTAMP}"
|
||||
|
||||
# Inject the if block before the first location directive inside each server block
|
||||
BOT_BLOCK="\\
|
||||
${MARKER_START}\\
|
||||
if (\$is_bad_bot) {\\
|
||||
return ${STATUS_CODE};\\
|
||||
}\\
|
||||
# Block broken srcset scrapers\\
|
||||
if (\$request_uri ~* \"%20[0-9]+w,https?://\") {\\
|
||||
return ${STATUS_CODE};\\
|
||||
}\\
|
||||
# Block spoofed referers with fragment identifiers (real browsers strip these)\\
|
||||
if (\$http_referer ~* \"#\") {\\
|
||||
return ${STATUS_CODE};\\
|
||||
}\\
|
||||
# Block non-GET/HEAD methods (static sites never need POST/PUT/DELETE)\\
|
||||
if (\$request_method !~ ^(GET|HEAD)\$ ) {\\
|
||||
return ${STATUS_CODE};\\
|
||||
}\\
|
||||
${MARKER_END}"
|
||||
|
||||
awk -v block="$BOT_BLOCK" '
|
||||
/^\s*server\s*\{/ { in_server = 1; injected = 0 }
|
||||
in_server && !injected && /^\s*location\s/ {
|
||||
print block
|
||||
print ""
|
||||
injected = 1
|
||||
}
|
||||
/^\s*\}/ && in_server {
|
||||
# Track brace depth to know when server block ends
|
||||
}
|
||||
{ print }
|
||||
' "$conf" > "${conf}.tmp"
|
||||
mv "${conf}.tmp" "$conf"
|
||||
|
||||
info "Injected into: ${conf}"
|
||||
MODIFIED=$((MODIFIED + 1))
|
||||
done
|
||||
|
||||
if [[ $MODIFIED -eq 0 ]]; then
|
||||
warn "No files modified (all already managed)"
|
||||
fi
|
||||
fi
|
||||
|
||||
# =====================================================
|
||||
# Step 3: Validate nginx config
|
||||
# =====================================================
|
||||
step "Testing nginx configuration"
|
||||
|
||||
if [[ "$DRY_RUN" == "true" ]]; then
|
||||
echo " Would run: nginx -t"
|
||||
else
|
||||
if nginx -t 2>&1; then
|
||||
info "nginx config valid"
|
||||
else
|
||||
echo -e "${RED}[ERROR] nginx config test failed${NC}" >&2
|
||||
echo " Restore backups (.bak.${TIMESTAMP}) from ${SITES_DIR} and ${CONF_DIR}" >&2
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
# =====================================================
|
||||
# Step 4: Reload nginx
|
||||
# =====================================================
|
||||
step "Reloading nginx"
|
||||
|
||||
if [[ "$DRY_RUN" == "true" ]]; then
|
||||
echo " Would run: systemctl reload nginx"
|
||||
else
|
||||
systemctl reload nginx
|
||||
info "nginx reloaded"
|
||||
fi
|
||||
|
||||
# =====================================================
|
||||
# Summary
|
||||
# =====================================================
|
||||
echo ""
|
||||
echo -e "${BOLD}Done.${NC}"
|
||||
echo ""
|
||||
echo " Map: ${MAP_FILE}"
|
||||
echo " Status code: ${STATUS_CODE}"
|
||||
if [[ -n "$SINGLE_CONF" ]]; then
|
||||
echo " Config: ${SINGLE_CONF}"
|
||||
else
|
||||
echo " Scanned: ${SITES_DIR}/ and ${CONF_DIR}/*.conf"
|
||||
fi
|
||||
echo ""
|
||||
echo " To remove: sudo $(basename "$0") --remove"
|
||||
echo ""
|
||||
echo " Verify: curl -A 'GPTBot' -o /dev/null -s -w '%{http_code}' https://yourdomain.com"
|
||||
echo " Expected: 444 (connection dropped) or 000 (no response)"
|
||||
Reference in New Issue
Block a user