Files
linux-scripts/add-nginx-js-challenge.sh
T
chiefgeek 515c9843dd
Lint Scripts / shellcheck (push) Failing after 14m30s
Lint Scripts / powershell-lint (push) Failing after 15m29s
Fix ShellCheck errors: remove local outside functions, fix openssl redirections, unquote loop var
2026-05-25 05:28:31 +02:00

583 lines
21 KiB
Bash

#!/bin/bash
################################################################################
# Script Name: add-nginx-js-challenge.sh
# Version: 3.1
# Description: Adds a lightweight JavaScript cookie challenge to nginx.
# Bots that don't execute JavaScript are silently dropped.
# Legitimate search engine crawlers are whitelisted by user agent.
# Headless Chrome bots from suspect GeoIP regions with no external
# referrer are tarpitted (served at 50 bytes/sec).
# Works alongside bot-block.conf (run add-nginx-bot-block.sh first).
#
# Author: Phil Connor
# Contact: contact@mylinux.work
# Website: https://mylinux.work
# License: MIT
#
# Prerequisites:
# - nginx installed and running
# - Root access
#
# Usage:
# sudo ./add-nginx-js-challenge.sh
# sudo ./add-nginx-js-challenge.sh --dry-run
# sudo ./add-nginx-js-challenge.sh --remove
#
# How it works:
# 1. Whitelisted bot UAs (Googlebot, Bingbot, etc.) bypass the check entirely
# 2. All other visitors must have a cookie with a randomized name and token
# 3. First-time visitors get a brief redirect to a challenge page that sets
# the cookie via JS and bounces them back — takes < 100ms
# 4. Bots that don't run JS never get the cookie and get 444'd
# 5. Cookie name and token are randomized per installation — re-running the
# script rotates them, immediately invalidating old pre-set cookies
#
# Changelog:
# 3.1 — 2026-05-21: Challenge endpoint rate limiting. Headless Chrome bot farms
# were passing the JS challenge on every request by spawning fresh browser
# instances without persistent cookies. Added limit_req_zone on the
# challenge endpoint: 3 requests allowed (burst), then 1/min sustained.
# Excess requests get 444. Added --challenge-burst and --challenge-rate.
# Fixed geoip2 variable name ($geoip2_country_code to match standard
# geoip2.conf). Conditional geoip2 block — only added if no existing
# mmdb is loaded elsewhere in nginx config. Challenge JS now treats
# same-domain referrers as "direct" for tarpit purposes.
# 3.0 — 2026-05-20: Referrer tracking through challenge redirect. Original
# HTTP Referer is passed as &ref= param in the 302 redirect. Challenge
# JS stores it in a _bc_ref cookie. Tarpit map: visitors from suspect
# GeoIP countries (CN by default) with no external referrer are served
# at 50 bytes/sec via limit_rate, draining headless Chrome resources.
# Requires ngx_http_geoip2_module for GeoIP-based tarpitting.
# Added --tarpit-countries option (default: CN).
# Added --tarpit-rate option (default: 50 bytes/sec).
# 2.0 — 2026-05-19: Randomized cookie name and token per installation.
# Cookie name is now a random 2-character suffix (e.g. _v7, _xq).
# Cookie value is now a 32-char hex token instead of static "verified".
# Values persist in /etc/nginx/js-challenge.env for future reference.
# Re-running rotates credentials and invalidates old bot bypass cookies.
# Added no-cache headers on challenge page to prevent stale HTML after
# rotation. Fixed challenge page Secure flag to be conditional on HTTPS.
# Fixed challenge location — removed incorrect 'internal' directive.
# 1.0 — 2026-05-11: Initial release
#
################################################################################
set -euo pipefail
# --- Configuration ---
CONF_DIR="/etc/nginx/conf.d"
CHALLENGE_MAP="${CONF_DIR}/js-challenge.conf"
CHALLENGE_DIR="/var/www/js-challenge"
CHALLENGE_HTML="${CHALLENGE_DIR}/challenge.html"
STATE_FILE="/etc/nginx/js-challenge.env"
CHALLENGE_PATH="/_bc"
DRY_RUN=false
REMOVE=false
COOKIE_MAX_AGE=86400 # 24 hours
TARPIT_COUNTRIES="${TARPIT_COUNTRIES:-CN}" # GeoIP country codes to tarpit (space-separated)
TARPIT_RATE="${TARPIT_RATE:-50}" # bytes/sec for tarpitted responses
CHALLENGE_RATE="${CHALLENGE_RATE:-1}" # sustained challenge requests per minute per IP
CHALLENGE_BURST="${CHALLENGE_BURST:-3}" # initial burst of challenge requests allowed
TIMESTAMP=$(date +%s)
# --- Colors ---
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[0;33m'
CYAN='\033[0;36m'
BOLD='\033[1m'
NC='\033[0m'
info() { echo -e "${GREEN}[OK]${NC} $*"; }
warn() { echo -e "${YELLOW}[WARN]${NC} $*"; }
step() { echo -e "${CYAN}[STEP]${NC} $*"; }
usage() {
cat <<EOF
Usage: sudo $(basename "$0") [OPTIONS]
Adds a JavaScript cookie challenge to nginx. Visitors that don't execute
JavaScript (headless scrapers, curl-based bots) are silently dropped.
Legitimate search engine crawlers are whitelisted by user agent.
This is designed to work alongside add-nginx-bot-block.sh — run that first
to block known bad bots, then use this to catch the ones spoofing real
browser user agents.
The cookie name and token are randomized per installation. Re-running the script
rotates them, immediately invalidating any bot-cached bypass cookies.
Options:
--dry-run Show what would be done without making changes
--remove Remove challenge config, HTML, and injected rules
--cookie-ttl SECONDS Cookie lifetime in seconds (default: 86400 / 24h)
--tarpit-countries CC Space-separated GeoIP country codes to tarpit (default: CN)
--tarpit-rate BYTES Response rate in bytes/sec for tarpitted visitors (default: 50)
--challenge-rate N Sustained challenge requests per minute per IP (default: 1)
--challenge-burst N Initial burst of challenge requests allowed (default: 3)
-h, --help Show this help
Examples:
sudo $(basename "$0")
sudo $(basename "$0") --dry-run
sudo $(basename "$0") --remove
EOF
exit 0
}
# --- Argument parsing ---
while [[ $# -gt 0 ]]; do
case "$1" in
--dry-run) DRY_RUN=true; shift ;;
--remove) REMOVE=true; shift ;;
--cookie-ttl) COOKIE_MAX_AGE="$2"; shift 2 ;;
--tarpit-countries) TARPIT_COUNTRIES="$2"; shift 2 ;;
--tarpit-rate) TARPIT_RATE="$2"; shift 2 ;;
--challenge-rate) CHALLENGE_RATE="$2"; shift 2 ;;
--challenge-burst) CHALLENGE_BURST="$2"; shift 2 ;;
-h|--help) usage ;;
*) echo "Unknown option: $1"; usage ;;
esac
done
# --- Root check ---
if [[ $EUID -ne 0 && "$DRY_RUN" != "true" ]]; then
echo -e "${RED}[ERROR] Must run as root (or use --dry-run)${NC}" >&2
exit 1
fi
# =====================================================
# Generate or load cookie credentials
# =====================================================
generate_credentials() {
COOKIE_NAME="_$(openssl rand -hex 1)"
COOKIE_VALUE="$(openssl rand -hex 16)"
}
save_credentials() {
if [[ "$DRY_RUN" != "true" ]]; then
cat > "$STATE_FILE" <<ENVEOF
# JS challenge credentials — generated $(date -Iseconds)
# Re-run the script to rotate these values
COOKIE_NAME='${COOKIE_NAME}'
COOKIE_VALUE='${COOKIE_VALUE}'
CHALLENGE_PATH='${CHALLENGE_PATH}'
ENVEOF
chmod 600 "$STATE_FILE"
fi
}
if [[ "$REMOVE" != "true" ]]; then
generate_credentials
info "Generated new credentials — cookie: ${COOKIE_NAME} token: ${COOKIE_VALUE:0:8}..."
fi
# =====================================================
# Remove mode
# =====================================================
if [[ "$REMOVE" == "true" ]]; then
step "Removing JS challenge configuration"
if [[ "$DRY_RUN" == "true" ]]; then
echo " Would remove: ${CHALLENGE_MAP}"
echo " Would remove: ${CHALLENGE_DIR}"
echo " Would remove: ${STATE_FILE}"
echo " Would run: nginx -t && systemctl reload nginx"
else
[[ -f "$CHALLENGE_MAP" ]] && rm -f "$CHALLENGE_MAP" && info "Removed: ${CHALLENGE_MAP}"
[[ -d "$CHALLENGE_DIR" ]] && rm -rf "$CHALLENGE_DIR" && info "Removed: ${CHALLENGE_DIR}"
[[ -f "$STATE_FILE" ]] && rm -f "$STATE_FILE" && info "Removed: ${STATE_FILE}"
if nginx -t 2>&1; then
systemctl reload nginx
info "nginx reloaded"
else
echo -e "${RED}[ERROR] nginx config test failed after removal${NC}" >&2
exit 1
fi
fi
echo ""
echo -e "${BOLD}JS challenge removed.${NC}"
echo ""
echo " Note: You may also need to remove the js-challenge location blocks"
echo " from your server block configs (look for 'js-challenge-managed')."
exit 0
fi
# =====================================================
# Step 1: Create the challenge HTML page
# =====================================================
step "Creating challenge page at ${CHALLENGE_HTML}"
CHALLENGE_CONTENT='<!DOCTYPE html>
<html>
<head><meta charset="utf-8"><title>Verifying</title></head>
<body>
<noscript><p>JavaScript is required to access this site.</p></noscript>
<p id="msg" style="display:none;font-family:sans-serif;text-align:center;margin-top:2em;">
Cookies must be enabled to access this site.</p>
<script>
(function(){
var p = new URLSearchParams(window.location.search);
var r = p.get("r") || "/";
if (r.charAt(0) !== "/") r = "/";
var cn = "'"${COOKIE_NAME}"'";
var cv = "'"${COOKIE_VALUE}"'";
var secure = (location.protocol === "https:") ? ";Secure" : "";
document.cookie = cn + "=" + cv + ";path=/;max-age='"${COOKIE_MAX_AGE}"';SameSite=Lax" + secure;
var origRef = p.get("ref") || "direct";
if (origRef !== "direct") {
try { var rh = new URL(origRef).hostname; if (rh === location.hostname) origRef = "direct"; } catch(e) {}
}
document.cookie = "_bc_ref=" + encodeURIComponent(origRef) + ";path=/;max-age=120;SameSite=Lax" + secure;
if (document.cookie.split(/;\s*/).every(function(c){ return c.indexOf(cn + "=") !== 0; })) {
document.getElementById("msg").style.display = "block";
return;
}
window.location.replace(r);
})();
</script>
</body>
</html>'
if [[ "$DRY_RUN" == "true" ]]; then
echo " Would create: ${CHALLENGE_DIR}/"
echo " Would create: ${CHALLENGE_HTML}"
else
mkdir -p "$CHALLENGE_DIR"
echo "$CHALLENGE_CONTENT" > "$CHALLENGE_HTML"
info "Challenge page created: ${CHALLENGE_HTML}"
fi
# Save credentials
save_credentials
# =====================================================
# Step 2: Create nginx map config
# =====================================================
step "Creating JS challenge map at ${CHALLENGE_MAP}"
# Build the cookie variable name for nginx (e.g. _v7 → $cookie__v7)
NGINX_COOKIE_VAR="\$cookie_${COOKIE_NAME}"
# Check if a geoip2 block already loads an mmdb anywhere in nginx config.
# If so, $geoip2_country_code should already be defined — don't duplicate.
GEOIP2_BLOCK=""
if ! grep -r 'geoip2.*\.mmdb' /etc/nginx/ \
--include='*.conf' --exclude='js-challenge.conf' --exclude='*.bak.*' \
-q 2>/dev/null; then
GEOIP2_BLOCK='
# ── GeoIP2: country lookup for tarpit decisions ──────────────────────
# Uses the City database (superset of Country). Adjust path if needed.
geoip2 /usr/share/GeoIP/GeoLite2-City.mmdb {
$geoip2_country_code country iso_code;
}
'
step "No existing geoip2 country_code config found — adding to map config"
fi
# Collect server_name values from nginx configs to build same-site referer map
REFERER_ENTRIES=""
_jsc_domain_seen=()
for _conf in /etc/nginx/conf.d/*.conf /etc/nginx/sites-enabled/*; do
[[ -f "$_conf" ]] || continue
while read -r _sn; do
for _d in $_sn; do
[[ "$_d" == "server_name" || "$_d" == ";" || "$_d" == "_" || "$_d" =~ ^[0-9] ]] && continue
_d="${_d%;}"
[[ " ${_jsc_domain_seen[*]:-} " == *" $_d "* ]] && continue
_jsc_domain_seen+=("$_d")
_d_escaped="${_d//./\\.}"
REFERER_ENTRIES+=" ~^1:https?://${_d_escaped} 1;\n"
done
done < <(grep -oP '^\s*server_name\s+\K[^;]+;?' "$_conf" 2>/dev/null)
done
if [[ -z "$REFERER_ENTRIES" ]]; then
warn "No server_name values found — same-site image bypass will not work"
warn "Images behind the challenge may cause redirect loops for browsers"
fi
MAP_CONTENT='# JS cookie challenge — allowed bots and cookie check
# Generated by add-nginx-js-challenge.sh — https://mylinux.work
# Cookie: '"${COOKIE_NAME}"' Token: '"${COOKIE_VALUE:0:8}"'...
# Generated: '"$(date -Iseconds)"'
# ── Rate limit: challenge endpoint ───────────────────────────────────
# Real users hit the challenge once and keep the cookie. Headless bot farms
# spawn fresh browsers per request, hitting the challenge every time.
# Rate: '"${CHALLENGE_RATE}"'r/m with burst of '"${CHALLENGE_BURST}"' — excess gets 444.
limit_req_zone $binary_remote_addr zone=jschallenge:10m rate='"${CHALLENGE_RATE}"'r/m;
# Bots that legitimately identify themselves and should bypass the JS check
map $http_user_agent $is_allowed_bot {
default 0;
# Search engines
~*Googlebot 1;
~*bingbot 1;
~*Slurp 1;
~*DuckDuckBot 1;
~*DuckAssistBot 1;
~*Baiduspider 1;
~*YandexBot 1;
~*YandexFavicons 1;
~*Applebot 1;
~*Qwantbot 1;
~*Qwantify 1;
~*Bravebot 1;
~*kagi-fetcher 1;
~*Kagibot 1;
~*Yahoo! 1;
~*Yeti 1;
# Social media / link previews
~*facebookexternalhit 1;
~*Facebot 1;
~*Twitterbot 1;
~*LinkedInBot 1;
~*Slackbot 1;
~*Slack-ImgProxy 1;
~*Discordbot 1;
~*TelegramBot 1;
~*WhatsApp 1;
~*redditbot 1;
~*ArenaUnfurlBot 1;
# Feed readers
~*Feedly 1;
~*Miniflux 1;
~*FreshRSS 1;
~*NewsBlur 1;
~*Tiny\ Tiny\ RSS 1;
~*Inoreader 1;
~*NetNewsWire 1;
# Monitoring / uptime
~*UptimeRobot 1;
~*Pingdom 1;
~*StatusCake 1;
~*Blackbox-Exporter 1;
# AI answer bots (user-facing, not training crawlers)
~*OAI-SearchBot 1;
~*ChatGPT-User 1;
~*Claude-Web 1;
~*Claude-User 1;
~*MistralAI-User 1;
# Archive / research
~*archive\.org_bot 1;
# Apple Safari prefetch
~*safarifetcherd 1;
# Link checkers / validators
~*W3C_Validator 1;
~*W3C-checklink 1;
~*LinkChecker 1;
~*link-check 1;
# Decentralized search
~*yacybot 1;
# Add your own allowed bots below
}
# Validate the challenge cookie — exact token match
map '"${NGINX_COOKIE_VAR}"' $js_cookie_valid {
default 0;
"'"${COOKIE_VALUE}"'" 1;
}
# Detect requests to the challenge page and download paths (prevent redirect loops)
map $uri $is_challenge_uri {
default 0;
"'"${CHALLENGE_PATH}"'" 1;
~^/downloads/ 1;
~*\.(css|js|woff2?)$ 1;
~*favicon 1;
~*apple-touch-icon 1;
}
# Detect image sub-resource requests with same-site referer (browser <img> loads)
# These bypass the challenge because: (a) images cannot execute JS challenges,
# and (b) the same-site referer proves the browser loaded a page from this domain.
# Direct image requests from scrapers (no referer or external referer) still get challenged.
map $uri $is_image_request {
default 0;
~*\.(png|jpe?g|gif|svg|webp|ico|avif)$ 1;
}
map "$is_image_request:$http_referer" $is_samesite_image {
default 0;
'"${REFERER_ENTRIES}"'}
# Combined check: need challenge if not allowed bot, no valid cookie, and not the challenge page
map "$is_allowed_bot:$js_cookie_valid:$is_challenge_uri:$is_samesite_image" $needs_js_challenge {
default 1;
"1:0:0:0" 0;
"1:0:0:1" 0;
"1:0:1:0" 0;
"1:0:1:1" 0;
"1:1:0:0" 0;
"1:1:0:1" 0;
"1:1:1:0" 0;
"1:1:1:1" 0;
"0:1:0:0" 0;
"0:1:0:1" 0;
"0:1:1:0" 0;
"0:1:1:1" 0;
"0:0:1:0" 0;
"0:0:1:1" 0;
"0:0:0:1" 0;
}
'"${GEOIP2_BLOCK}"'
# ── Tarpit: headless Chrome bots from suspect regions ─────────────────
# Visitors from tarpit countries with no external referrer (passed through
# the challenge redirect as the _bc_ref cookie) are served at a crawl.
# This drains headless Chrome resources (~200-500 MB RAM per instance)
# without giving the bot a clear "blocked" signal to adapt to.
#
# The _bc_ref cookie is set by the challenge page JS from the &ref= param.
# It contains the original HTTP Referer before the 302 redirect destroyed it.
# "direct" = no external referrer (typed URL or bot). Cookie expires in 120s.
# Check if visitor is from a tarpit country (requires geoip2 module)
map $geoip2_country_code $is_tarpit_country {
default 0;
'"$(for cc in $TARPIT_COUNTRIES; do echo " \"${cc}\" 1;"; done)"'
}
# Tarpit only if: tarpit country + no external referrer + passed JS challenge
map "$is_tarpit_country:$cookie__bc_ref" $tarpit_client {
default 0;
"1:direct" 1;
"1:" 1;
}
# Serve the challenge page
server {
listen 127.0.0.1:18444;
server_name _;
root /var/www/js-challenge;
location / {
add_header Cache-Control "no-store, no-cache, must-revalidate" always;
add_header Pragma "no-cache" always;
try_files /challenge.html =404;
}
}'
if [[ "$DRY_RUN" == "true" ]]; then
echo " Would create: ${CHALLENGE_MAP}"
else
if [[ -f "$CHALLENGE_MAP" ]]; then
cp "$CHALLENGE_MAP" "${CHALLENGE_MAP}.bak.${TIMESTAMP}"
warn "Existing config backed up"
fi
echo "$MAP_CONTENT" > "$CHALLENGE_MAP"
info "Map config created: ${CHALLENGE_MAP}"
fi
# =====================================================
# Step 3: Show injection instructions
# =====================================================
step "Server block configuration"
echo ""
echo " Add the following inside each server block (after your bot-block rules):"
echo ""
echo -e "${CYAN} # js-challenge-managed-start"
echo " location = ${CHALLENGE_PATH} {"
echo " limit_req zone=jschallenge burst=${CHALLENGE_BURST} nodelay;"
echo " limit_req_status 444;"
echo " proxy_pass http://127.0.0.1:18444/;"
echo " }"
echo ""
echo " # JS cookie challenge — redirect non-JS visitors"
echo " if (\$needs_js_challenge) {"
echo " return 302 ${CHALLENGE_PATH}?r=\$request_uri&ref=\$http_referer;"
echo " }"
echo ""
echo " # Tarpit headless Chrome bots from suspect GeoIP regions"
echo " if (\$tarpit_client) {"
echo " set \$limit_rate ${TARPIT_RATE};"
echo " }"
echo -e " # js-challenge-managed-end${NC}"
echo ""
echo " Or re-run add-nginx-bot-block.sh to have it injected automatically"
echo " (if supported in your version)."
echo ""
# =====================================================
# Step 4: Validate nginx config
# =====================================================
step "Testing nginx configuration"
if [[ "$DRY_RUN" == "true" ]]; then
echo " Would run: nginx -t"
else
if nginx -t 2>&1; then
info "nginx config valid"
else
echo -e "${RED}[ERROR] nginx config test failed${NC}" >&2
echo " Restore backup: ${CHALLENGE_MAP}.bak.${TIMESTAMP}" >&2
exit 1
fi
fi
# =====================================================
# Step 5: Reload nginx
# =====================================================
step "Reloading nginx"
if [[ "$DRY_RUN" == "true" ]]; then
echo " Would run: systemctl reload nginx"
else
systemctl reload nginx
info "nginx reloaded"
fi
# =====================================================
# Summary
# =====================================================
echo ""
echo -e "${BOLD}Done.${NC}"
echo ""
echo " Challenge map: ${CHALLENGE_MAP}"
echo " Challenge page: ${CHALLENGE_HTML}"
echo " State file: ${STATE_FILE}"
echo " Cookie name: ${COOKIE_NAME}"
echo " Cookie token: ${COOKIE_VALUE:0:8}... (32 hex chars)"
echo " Cookie TTL: ${COOKIE_MAX_AGE}s"
echo " Tarpit countries: ${TARPIT_COUNTRIES}"
echo " Tarpit rate: ${TARPIT_RATE} bytes/sec"
echo " Challenge rate: ${CHALLENGE_RATE}r/m (burst: ${CHALLENGE_BURST})"
echo ""
echo " To rotate credentials (invalidate bot-cached cookies):"
echo " sudo $(basename "$0")"
echo ""
echo " To remove: sudo $(basename "$0") --remove"
echo ""
echo " Test (bot without cookie gets redirected to challenge):"
echo " curl -o /dev/null -s -w '%{http_code}' https://yourdomain.com"
echo " Expected: 302"
echo ""
echo " Test (browser completes challenge — 302 → 200):"
echo " Open https://yourdomain.com in a browser"
echo " Expected: brief redirect then page loads normally"
echo ""
echo " Test (old static bypass no longer works):"
echo " curl -b '_bc=verified' -o /dev/null -s -w '%{http_code}' https://yourdomain.com"
echo " Expected: 302 (not 200 — old cookie is invalid)"
echo ""
echo " Test (rate limit on challenge endpoint):"
echo " for i in 1 2 3 4 5; do curl -o /dev/null -s -w \"\$i: %{http_code}\n\" https://yourdomain.com${CHALLENGE_PATH}; done"
echo " Expected: first 3 return 200, then 444 (rate limited)"
echo ""
echo " Test (allowed bot bypasses challenge):"
echo " curl -A 'Googlebot' -o /dev/null -s -w '%{http_code}' https://yourdomain.com"
echo " Expected: 200"