#!/bin/bash ################################################################################ # Script Name: add-nginx-js-challenge.sh # Version: 3.1 # Description: Adds a lightweight JavaScript cookie challenge to nginx. # Bots that don't execute JavaScript are silently dropped. # Legitimate search engine crawlers are whitelisted by user agent. # Headless Chrome bots from suspect GeoIP regions with no external # referrer are tarpitted (served at 50 bytes/sec). # Works alongside bot-block.conf (run add-nginx-bot-block.sh first). # # Author: Phil Connor # Contact: contact@mylinux.work # Website: https://mylinux.work # License: MIT # # Prerequisites: # - nginx installed and running # - Root access # # Usage: # sudo ./add-nginx-js-challenge.sh # sudo ./add-nginx-js-challenge.sh --dry-run # sudo ./add-nginx-js-challenge.sh --remove # # How it works: # 1. Whitelisted bot UAs (Googlebot, Bingbot, etc.) bypass the check entirely # 2. All other visitors must have a cookie with a randomized name and token # 3. First-time visitors get a brief redirect to a challenge page that sets # the cookie via JS and bounces them back — takes < 100ms # 4. Bots that don't run JS never get the cookie and get 444'd # 5. Cookie name and token are randomized per installation — re-running the # script rotates them, immediately invalidating old pre-set cookies # # Changelog: # 3.1 — 2026-05-21: Challenge endpoint rate limiting. Headless Chrome bot farms # were passing the JS challenge on every request by spawning fresh browser # instances without persistent cookies. Added limit_req_zone on the # challenge endpoint: 3 requests allowed (burst), then 1/min sustained. # Excess requests get 444. Added --challenge-burst and --challenge-rate. # Fixed geoip2 variable name ($geoip2_country_code to match standard # geoip2.conf). Conditional geoip2 block — only added if no existing # mmdb is loaded elsewhere in nginx config. Challenge JS now treats # same-domain referrers as "direct" for tarpit purposes. # 3.0 — 2026-05-20: Referrer tracking through challenge redirect. Original # HTTP Referer is passed as &ref= param in the 302 redirect. Challenge # JS stores it in a _bc_ref cookie. Tarpit map: visitors from suspect # GeoIP countries (CN by default) with no external referrer are served # at 50 bytes/sec via limit_rate, draining headless Chrome resources. # Requires ngx_http_geoip2_module for GeoIP-based tarpitting. # Added --tarpit-countries option (default: CN). # Added --tarpit-rate option (default: 50 bytes/sec). # 2.0 — 2026-05-19: Randomized cookie name and token per installation. # Cookie name is now a random 2-character suffix (e.g. _v7, _xq). # Cookie value is now a 32-char hex token instead of static "verified". # Values persist in /etc/nginx/js-challenge.env for future reference. # Re-running rotates credentials and invalidates old bot bypass cookies. # Added no-cache headers on challenge page to prevent stale HTML after # rotation. Fixed challenge page Secure flag to be conditional on HTTPS. # Fixed challenge location — removed incorrect 'internal' directive. # 1.0 — 2026-05-11: Initial release # ################################################################################ set -euo pipefail # --- Configuration --- CONF_DIR="/etc/nginx/conf.d" CHALLENGE_MAP="${CONF_DIR}/js-challenge.conf" CHALLENGE_DIR="/var/www/js-challenge" CHALLENGE_HTML="${CHALLENGE_DIR}/challenge.html" STATE_FILE="/etc/nginx/js-challenge.env" CHALLENGE_PATH="/_bc" DRY_RUN=false REMOVE=false COOKIE_MAX_AGE=86400 # 24 hours TARPIT_COUNTRIES="${TARPIT_COUNTRIES:-CN}" # GeoIP country codes to tarpit (space-separated) TARPIT_RATE="${TARPIT_RATE:-50}" # bytes/sec for tarpitted responses CHALLENGE_RATE="${CHALLENGE_RATE:-1}" # sustained challenge requests per minute per IP CHALLENGE_BURST="${CHALLENGE_BURST:-3}" # initial burst of challenge requests allowed TIMESTAMP=$(date +%s) # --- Colors --- RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[0;33m' CYAN='\033[0;36m' BOLD='\033[1m' NC='\033[0m' info() { echo -e "${GREEN}[OK]${NC} $*"; } warn() { echo -e "${YELLOW}[WARN]${NC} $*"; } step() { echo -e "${CYAN}[STEP]${NC} $*"; } usage() { cat <&2 exit 1 fi # ===================================================== # Generate or load cookie credentials # ===================================================== generate_credentials() { COOKIE_NAME="_$(openssl rand -hex 1)" COOKIE_VALUE="$(openssl rand -hex 16)" } save_credentials() { if [[ "$DRY_RUN" != "true" ]]; then cat > "$STATE_FILE" <&1; then systemctl reload nginx info "nginx reloaded" else echo -e "${RED}[ERROR] nginx config test failed after removal${NC}" >&2 exit 1 fi fi echo "" echo -e "${BOLD}JS challenge removed.${NC}" echo "" echo " Note: You may also need to remove the js-challenge location blocks" echo " from your server block configs (look for 'js-challenge-managed')." exit 0 fi # ===================================================== # Step 1: Create the challenge HTML page # ===================================================== step "Creating challenge page at ${CHALLENGE_HTML}" CHALLENGE_CONTENT=' Verifying ' if [[ "$DRY_RUN" == "true" ]]; then echo " Would create: ${CHALLENGE_DIR}/" echo " Would create: ${CHALLENGE_HTML}" else mkdir -p "$CHALLENGE_DIR" echo "$CHALLENGE_CONTENT" > "$CHALLENGE_HTML" info "Challenge page created: ${CHALLENGE_HTML}" fi # Save credentials save_credentials # ===================================================== # Step 2: Create nginx map config # ===================================================== step "Creating JS challenge map at ${CHALLENGE_MAP}" # Build the cookie variable name for nginx (e.g. _v7 → $cookie__v7) NGINX_COOKIE_VAR="\$cookie_${COOKIE_NAME}" # Check if a geoip2 block already loads an mmdb anywhere in nginx config. # If so, $geoip2_country_code should already be defined — don't duplicate. GEOIP2_BLOCK="" if ! grep -r 'geoip2.*\.mmdb' /etc/nginx/ \ --include='*.conf' --exclude='js-challenge.conf' --exclude='*.bak.*' \ -q 2>/dev/null; then GEOIP2_BLOCK=' # ── GeoIP2: country lookup for tarpit decisions ────────────────────── # Uses the City database (superset of Country). Adjust path if needed. geoip2 /usr/share/GeoIP/GeoLite2-City.mmdb { $geoip2_country_code country iso_code; } ' step "No existing geoip2 country_code config found — adding to map config" fi # Collect server_name values from nginx configs to build same-site referer map local REFERER_ENTRIES="" local _jsc_domain_seen=() for _conf in /etc/nginx/conf.d/*.conf /etc/nginx/sites-enabled/*; do [[ -f "$_conf" ]] || continue while read -r _sn; do for _d in $_sn; do [[ "$_d" == "server_name" || "$_d" == ";" || "$_d" == "_" || "$_d" =~ ^[0-9] ]] && continue _d="${_d%;}" [[ " ${_jsc_domain_seen[*]:-} " == *" $_d "* ]] && continue _jsc_domain_seen+=("$_d") local _d_escaped="${_d//./\\.}" REFERER_ENTRIES+=" ~^1:https?://${_d_escaped} 1;\n" done done < <(grep -oP '^\s*server_name\s+\K[^;]+;?' "$_conf" 2>/dev/null) done if [[ -z "$REFERER_ENTRIES" ]]; then warn "No server_name values found — same-site image bypass will not work" warn "Images behind the challenge may cause redirect loops for browsers" fi MAP_CONTENT='# JS cookie challenge — allowed bots and cookie check # Generated by add-nginx-js-challenge.sh — https://mylinux.work # Cookie: '"${COOKIE_NAME}"' Token: '"${COOKIE_VALUE:0:8}"'... # Generated: '"$(date -Iseconds)"' # ── Rate limit: challenge endpoint ─────────────────────────────────── # Real users hit the challenge once and keep the cookie. Headless bot farms # spawn fresh browsers per request, hitting the challenge every time. # Rate: '"${CHALLENGE_RATE}"'r/m with burst of '"${CHALLENGE_BURST}"' — excess gets 444. limit_req_zone $binary_remote_addr zone=jschallenge:10m rate='"${CHALLENGE_RATE}"'r/m; # Bots that legitimately identify themselves and should bypass the JS check map $http_user_agent $is_allowed_bot { default 0; # Search engines ~*Googlebot 1; ~*bingbot 1; ~*Slurp 1; ~*DuckDuckBot 1; ~*DuckAssistBot 1; ~*Baiduspider 1; ~*YandexBot 1; ~*YandexFavicons 1; ~*Applebot 1; ~*Qwantbot 1; ~*Qwantify 1; ~*Bravebot 1; ~*kagi-fetcher 1; ~*Kagibot 1; ~*Yahoo! 1; ~*Yeti 1; # Social media / link previews ~*facebookexternalhit 1; ~*Facebot 1; ~*Twitterbot 1; ~*LinkedInBot 1; ~*Slackbot 1; ~*Slack-ImgProxy 1; ~*Discordbot 1; ~*TelegramBot 1; ~*WhatsApp 1; ~*redditbot 1; ~*ArenaUnfurlBot 1; # Feed readers ~*Feedly 1; ~*Miniflux 1; ~*FreshRSS 1; ~*NewsBlur 1; ~*Tiny\ Tiny\ RSS 1; ~*Inoreader 1; ~*NetNewsWire 1; # Monitoring / uptime ~*UptimeRobot 1; ~*Pingdom 1; ~*StatusCake 1; ~*Blackbox-Exporter 1; # AI answer bots (user-facing, not training crawlers) ~*OAI-SearchBot 1; ~*ChatGPT-User 1; ~*Claude-Web 1; ~*Claude-User 1; ~*MistralAI-User 1; # Archive / research ~*archive\.org_bot 1; # Apple Safari prefetch ~*safarifetcherd 1; # Link checkers / validators ~*W3C_Validator 1; ~*W3C-checklink 1; ~*LinkChecker 1; ~*link-check 1; # Decentralized search ~*yacybot 1; # Add your own allowed bots below } # Validate the challenge cookie — exact token match map '"${NGINX_COOKIE_VAR}"' $js_cookie_valid { default 0; "'"${COOKIE_VALUE}"'" 1; } # Detect requests to the challenge page and download paths (prevent redirect loops) map $uri $is_challenge_uri { default 0; "'"${CHALLENGE_PATH}"'" 1; ~^/downloads/ 1; ~*\.(css|js|woff2?)$ 1; ~*favicon 1; ~*apple-touch-icon 1; } # Detect image sub-resource requests with same-site referer (browser loads) # These bypass the challenge because: (a) images cannot execute JS challenges, # and (b) the same-site referer proves the browser loaded a page from this domain. # Direct image requests from scrapers (no referer or external referer) still get challenged. map $uri $is_image_request { default 0; ~*\.(png|jpe?g|gif|svg|webp|ico|avif)$ 1; } map "$is_image_request:$http_referer" $is_samesite_image { default 0; '"${REFERER_ENTRIES}"'} # Combined check: need challenge if not allowed bot, no valid cookie, and not the challenge page map "$is_allowed_bot:$js_cookie_valid:$is_challenge_uri:$is_samesite_image" $needs_js_challenge { default 1; "1:0:0:0" 0; "1:0:0:1" 0; "1:0:1:0" 0; "1:0:1:1" 0; "1:1:0:0" 0; "1:1:0:1" 0; "1:1:1:0" 0; "1:1:1:1" 0; "0:1:0:0" 0; "0:1:0:1" 0; "0:1:1:0" 0; "0:1:1:1" 0; "0:0:1:0" 0; "0:0:1:1" 0; "0:0:0:1" 0; } '"${GEOIP2_BLOCK}"' # ── Tarpit: headless Chrome bots from suspect regions ───────────────── # Visitors from tarpit countries with no external referrer (passed through # the challenge redirect as the _bc_ref cookie) are served at a crawl. # This drains headless Chrome resources (~200-500 MB RAM per instance) # without giving the bot a clear "blocked" signal to adapt to. # # The _bc_ref cookie is set by the challenge page JS from the &ref= param. # It contains the original HTTP Referer before the 302 redirect destroyed it. # "direct" = no external referrer (typed URL or bot). Cookie expires in 120s. # Check if visitor is from a tarpit country (requires geoip2 module) map $geoip2_country_code $is_tarpit_country { default 0; '"$(for cc in $TARPIT_COUNTRIES; do echo " \"${cc}\" 1;"; done)"' } # Tarpit only if: tarpit country + no external referrer + passed JS challenge map "$is_tarpit_country:$cookie__bc_ref" $tarpit_client { default 0; "1:direct" 1; "1:" 1; } # Serve the challenge page server { listen 127.0.0.1:18444; server_name _; root /var/www/js-challenge; location / { add_header Cache-Control "no-store, no-cache, must-revalidate" always; add_header Pragma "no-cache" always; try_files /challenge.html =404; } }' if [[ "$DRY_RUN" == "true" ]]; then echo " Would create: ${CHALLENGE_MAP}" else if [[ -f "$CHALLENGE_MAP" ]]; then cp "$CHALLENGE_MAP" "${CHALLENGE_MAP}.bak.${TIMESTAMP}" warn "Existing config backed up" fi echo "$MAP_CONTENT" > "$CHALLENGE_MAP" info "Map config created: ${CHALLENGE_MAP}" fi # ===================================================== # Step 3: Show injection instructions # ===================================================== step "Server block configuration" echo "" echo " Add the following inside each server block (after your bot-block rules):" echo "" echo -e "${CYAN} # js-challenge-managed-start" echo " location = ${CHALLENGE_PATH} {" echo " limit_req zone=jschallenge burst=${CHALLENGE_BURST} nodelay;" echo " limit_req_status 444;" echo " proxy_pass http://127.0.0.1:18444/;" echo " }" echo "" echo " # JS cookie challenge — redirect non-JS visitors" echo " if (\$needs_js_challenge) {" echo " return 302 ${CHALLENGE_PATH}?r=\$request_uri&ref=\$http_referer;" echo " }" echo "" echo " # Tarpit headless Chrome bots from suspect GeoIP regions" echo " if (\$tarpit_client) {" echo " set \$limit_rate ${TARPIT_RATE};" echo " }" echo -e " # js-challenge-managed-end${NC}" echo "" echo " Or re-run add-nginx-bot-block.sh to have it injected automatically" echo " (if supported in your version)." echo "" # ===================================================== # Step 4: Validate nginx config # ===================================================== step "Testing nginx configuration" if [[ "$DRY_RUN" == "true" ]]; then echo " Would run: nginx -t" else if nginx -t 2>&1; then info "nginx config valid" else echo -e "${RED}[ERROR] nginx config test failed${NC}" >&2 echo " Restore backup: ${CHALLENGE_MAP}.bak.${TIMESTAMP}" >&2 exit 1 fi fi # ===================================================== # Step 5: Reload nginx # ===================================================== step "Reloading nginx" if [[ "$DRY_RUN" == "true" ]]; then echo " Would run: systemctl reload nginx" else systemctl reload nginx info "nginx reloaded" fi # ===================================================== # Summary # ===================================================== echo "" echo -e "${BOLD}Done.${NC}" echo "" echo " Challenge map: ${CHALLENGE_MAP}" echo " Challenge page: ${CHALLENGE_HTML}" echo " State file: ${STATE_FILE}" echo " Cookie name: ${COOKIE_NAME}" echo " Cookie token: ${COOKIE_VALUE:0:8}... (32 hex chars)" echo " Cookie TTL: ${COOKIE_MAX_AGE}s" echo " Tarpit countries: ${TARPIT_COUNTRIES}" echo " Tarpit rate: ${TARPIT_RATE} bytes/sec" echo " Challenge rate: ${CHALLENGE_RATE}r/m (burst: ${CHALLENGE_BURST})" echo "" echo " To rotate credentials (invalidate bot-cached cookies):" echo " sudo $(basename "$0")" echo "" echo " To remove: sudo $(basename "$0") --remove" echo "" echo " Test (bot without cookie gets redirected to challenge):" echo " curl -o /dev/null -s -w '%{http_code}' https://yourdomain.com" echo " Expected: 302" echo "" echo " Test (browser completes challenge — 302 → 200):" echo " Open https://yourdomain.com in a browser" echo " Expected: brief redirect then page loads normally" echo "" echo " Test (old static bypass no longer works):" echo " curl -b '_bc=verified' -o /dev/null -s -w '%{http_code}' https://yourdomain.com" echo " Expected: 302 (not 200 — old cookie is invalid)" echo "" echo " Test (rate limit on challenge endpoint):" echo " for i in 1 2 3 4 5; do curl -o /dev/null -s -w \"\$i: %{http_code}\n\" https://yourdomain.com${CHALLENGE_PATH}; done" echo " Expected: first 3 return 200, then 444 (rate limited)" echo "" echo " Test (allowed bot bypasses challenge):" echo " curl -A 'Googlebot' -o /dev/null -s -w '%{http_code}' https://yourdomain.com" echo " Expected: 200"