#!/bin/bash ################################################################################ # Script Name: install-bot-monitor.sh # Version: 1.1 # Description: Install the bot-monitor script, known agents whitelist, and # daily cron job. Detects unknown user agents in web server logs # and sends alerts via ntfy and/or Prometheus textfile metrics. # # Author: Phil Connor # Contact: contact@mylinux.work # Website: https://mylinux.work # License: MIT # # Usage: # sudo ./install-bot-monitor.sh --ntfy-url https://ntfy.example.com/bot-alerts # sudo ./install-bot-monitor.sh --textfile # sudo ./install-bot-monitor.sh --ntfy-url https://ntfy.example.com/bot-alerts --textfile # sudo ./install-bot-monitor.sh --textfile --domain example.com # sudo ./install-bot-monitor.sh --dry-run # ################################################################################ set -euo pipefail # --- Defaults --- NTFY_URL="" TEXTFILE_DIR="/var/lib/node_exporter" TEXTFILE_ENABLED=false LOG_DIR="auto" LOG_PATTERN="access.log" DOMAIN="" MIN_REQUESTS=5 CRON_HOUR=6 DRY_RUN=false # --- Colors --- RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[0;33m' CYAN='\033[0;36m' BOLD='\033[1m' NC='\033[0m' info() { echo -e "${GREEN}[OK]${NC} $*"; } warn() { echo -e "${YELLOW}[WARN]${NC} $*"; } step() { echo -e "${CYAN}[STEP]${NC} $*"; } usage() { cat <&2 exit 1 fi if [[ -z "$NTFY_URL" && "$TEXTFILE_ENABLED" == "false" && "$DRY_RUN" == "false" ]]; then echo -e "${RED}Error: At least one of --ntfy-url or --textfile is required${NC}" >&2 echo " Example: sudo $(basename "$0") --ntfy-url https://ntfy.example.com/bot-alerts" echo " Example: sudo $(basename "$0") --textfile" exit 1 fi if [[ "$TEXTFILE_ENABLED" == "true" && ! -d "$TEXTFILE_DIR" && "$DRY_RUN" == "false" ]]; then echo -e "${RED}Error: Textfile directory not found: ${TEXTFILE_DIR}${NC}" >&2 echo " Create it: sudo mkdir -p ${TEXTFILE_DIR}" exit 1 fi # --- Auto-detect log directory --- detect_log_dir() { if [[ "$LOG_DIR" != "auto" ]]; then if [[ ! -d "$LOG_DIR" ]]; then echo -e "${RED}Error: Log directory not found: ${LOG_DIR}${NC}" >&2 exit 1 fi if [[ -n "$DOMAIN" ]]; then LOG_PATTERN="${DOMAIN}.log" info "Using specified log directory: $LOG_DIR/$LOG_PATTERN" else info "Using specified log directory: $LOG_DIR" fi return fi step "Auto-detecting web server log directory..." # HestiaCP / VestaCP — per-domain logs # Check apache first: has full access logs with user agents # (nginx domain logs are proxy logs in nginx+apache mode) if [[ -d /var/log/apache2/domains ]]; then LOG_DIR="/var/log/apache2/domains" if [[ -n "$DOMAIN" ]]; then LOG_PATTERN="${DOMAIN}.log" if [[ ! -f "${LOG_DIR}/${LOG_PATTERN}" ]]; then echo -e "${RED}Error: Domain log not found: ${LOG_DIR}/${LOG_PATTERN}${NC}" >&2 exit 1 fi info "Detected HestiaCP/VestaCP apache: $LOG_DIR/$LOG_PATTERN" else LOG_PATTERN="*.log" info "Detected HestiaCP/VestaCP apache: $LOG_DIR (all domains)" fi return fi if [[ -d /var/log/nginx/domains ]]; then LOG_DIR="/var/log/nginx/domains" if [[ -n "$DOMAIN" ]]; then LOG_PATTERN="${DOMAIN}.log" if [[ ! -f "${LOG_DIR}/${LOG_PATTERN}" ]]; then echo -e "${RED}Error: Domain log not found: ${LOG_DIR}/${LOG_PATTERN}${NC}" >&2 exit 1 fi info "Detected HestiaCP/VestaCP nginx: $LOG_DIR/$LOG_PATTERN" else LOG_PATTERN="*.log" info "Detected HestiaCP/VestaCP nginx: $LOG_DIR (all domains)" fi return fi # Standard nginx if [[ -f /var/log/nginx/access.log ]]; then LOG_DIR="/var/log/nginx" LOG_PATTERN="access.log" info "Detected nginx: $LOG_DIR/$LOG_PATTERN" return fi # Apache (Debian/Ubuntu) if [[ -f /var/log/apache2/access.log ]]; then LOG_DIR="/var/log/apache2" LOG_PATTERN="access.log" info "Detected apache2: $LOG_DIR/$LOG_PATTERN" return fi # Apache (RHEL/Rocky) if [[ -f /var/log/httpd/access_log ]]; then LOG_DIR="/var/log/httpd" LOG_PATTERN="access_log" info "Detected httpd: $LOG_DIR/$LOG_PATTERN" return fi echo -e "${RED}Error: Could not auto-detect log directory. Use --log-dir to specify.${NC}" >&2 exit 1 } detect_log_dir # ===================================================== # Step 1: Create directories # ===================================================== step "Creating directories" if [[ "$DRY_RUN" == "true" ]]; then echo " Would create: /etc/bot-monitor/" echo " Would create: /var/lib/bot-monitor/" else mkdir -p /etc/bot-monitor /var/lib/bot-monitor info "Created /etc/bot-monitor and /var/lib/bot-monitor" fi # ===================================================== # Step 2: Install known agents whitelist # ===================================================== step "Installing known agents whitelist" AGENTS_FILE="/etc/bot-monitor/known-agents.txt" AGENTS_CONTENT='# Known user agents — one grep pattern per line # Agents matching these patterns are excluded from unknown bot alerts. # Add your own patterns as needed. # # https://mylinux.work # --- Search engines --- Googlebot Bingbot Applebot DuckDuckBot YandexBot Baiduspider Sogou Qwantify Qwantbot # --- Social media / link previews --- facebookexternalhit Facebot Twitterbot LinkedInBot Pinterestbot Slackbot Discordbot TelegramBot WhatsApp SkypeUriPreview BingPreview # --- Browsers --- Chrome Firefox Safari Edge Opera Vivaldi Brave # --- Monitoring / uptime --- Uptime-Kuma UptimeRobot Pingdom StatusCake Better Uptime Datadog Site24x7 Cloudflare-Healthchecks Fastly-Healthcheck Blackbox-Exporter ufw-threat-feeds # --- Feed readers --- Feedly Feedbin NewsBlur Tiny Tiny RSS FreshRSS Miniflux # --- Tools --- curl Wget HTTPie Lynx w3m link-check # --- AI user-facing search (cite sources) --- ChatGPT-User Claude-User DuckAssistBot # --- OS-level networking --- WebKit.Networking NetworkingExtension # --- AI scrapers (already blocked) --- ABEvalBot GPTBot ClaudeBot anthropic-ai CCBot Bytespider TikTokSpider cohere-ai PerplexityBot Diffbot MistralBot YandexGPTBot meta-externalagent Meta-ExternalFetcher meta-webindexer PetalBot Amazonbot Amzn-SearchBot AI2Bot Ai2Bot-Dolma Timpibot img2dataset YouBot HanaleiBot Applebot-Extended Google-Extended # --- SEO crawlers (already blocked) --- MJ12bot SemrushBot AhrefsBot DotBot DataForSeoBot SERanking # --- Scraping frameworks (already blocked) --- Scrapy python-requests Go-http-client Java/ libwww-perl trafilatura # --- Vulnerability scanners (already blocked) --- Nikto sqlmap Nmap masscan ZmEu Morpheus' if [[ "$DRY_RUN" == "true" ]]; then echo " Would create: ${AGENTS_FILE}" else if [[ -f "$AGENTS_FILE" ]]; then cp "$AGENTS_FILE" "${AGENTS_FILE}.bak.$(date +%s)" warn "Existing whitelist backed up" fi echo "$AGENTS_CONTENT" > "$AGENTS_FILE" info "Installed: ${AGENTS_FILE} ($(grep -cve '^\s*#' -e '^\s*$' "$AGENTS_FILE") patterns)" fi # ===================================================== # Step 3: Install bot-monitor script # ===================================================== step "Installing bot-monitor script" SCRIPT_FILE="/usr/local/bin/bot-monitor.sh" cat > /tmp/bot-monitor.sh.tmp << 'SCRIPTEOF' #!/bin/bash # /usr/local/bin/bot-monitor.sh # Detects unknown user agents in yesterday's web server logs. # Sends alerts via ntfy and/or exports Prometheus metrics. # # Author: Phil Connor # License: MIT set -euo pipefail # --- Configuration --- LOG_DIR="__LOG_DIR__" LOG_PATTERN="__LOG_PATTERN__" KNOWN_AGENTS="/etc/bot-monitor/known-agents.txt" STATE_DIR="/var/lib/bot-monitor" NTFY_URL="__NTFY_URL__" TEXTFILE_DIR="__TEXTFILE_DIR__" TEXTFILE_ENABLED=__TEXTFILE_ENABLED__ MIN_REQUESTS=__MIN_REQUESTS__ HOSTNAME=$(hostname -f) # --- Setup --- mkdir -p "$STATE_DIR" # Build grep exclusion pattern from known agents file EXCLUDE_PATTERN=$(grep -v '^#' "$KNOWN_AGENTS" | grep -v '^$' | paste -sd'|') # --- Extract unknown agents from yesterday's logs --- YESTERDAY=$(date -d yesterday +%d/%b/%Y) UNKNOWN_FILE="$STATE_DIR/unknown-$(date -d yesterday +%Y-%m-%d).txt" grep "$YESTERDAY" "$LOG_DIR"/$LOG_PATTERN 2>/dev/null \ | awk -F'"' '{print $6}' \ | grep -v '^-$' \ | grep -v '^$' \ | sort | uniq -c | sort -rn \ | grep -viE "$EXCLUDE_PATTERN" \ | awk -v min="$MIN_REQUESTS" '$1 >= min' \ > "$UNKNOWN_FILE" || true AGENT_COUNT=$(wc -l < "$UNKNOWN_FILE") TOTAL_REQUESTS=0 if [ "$AGENT_COUNT" -gt 0 ]; then TOTAL_REQUESTS=$(awk '{sum += $1} END {print sum+0}' "$UNKNOWN_FILE") fi # --- ntfy alert --- if [ "$AGENT_COUNT" -gt 0 ] && [ -n "${NTFY_URL}" ]; then SUMMARY=$(head -10 "$UNKNOWN_FILE" | while read count agent; do printf " %6d %s\n" "$count" "$agent" done) curl -fsSL \ -H "Title: Unknown bots detected on $HOSTNAME" \ -H "Priority: 3" \ -H "Tags: spider,warning" \ -d "Found $AGENT_COUNT unknown user agents yesterday: $SUMMARY Full list: $UNKNOWN_FILE" \ "$NTFY_URL" > /dev/null 2>&1 fi # --- Prometheus textfile metrics --- if [ "${TEXTFILE_ENABLED}" = "true" ]; then OUTPUT_FILE="${TEXTFILE_DIR}/bot_monitor.prom" PROM_TMP=$(mktemp "${TEXTFILE_DIR}/.bot_monitor.XXXXXX") { echo "# HELP bot_monitor_unknown_agents_total Number of unique unknown user agents detected yesterday." echo "# TYPE bot_monitor_unknown_agents_total gauge" echo "bot_monitor_unknown_agents_total ${AGENT_COUNT}" echo '' echo "# HELP bot_monitor_unknown_requests_total Total requests from unknown user agents yesterday." echo "# TYPE bot_monitor_unknown_requests_total gauge" echo "bot_monitor_unknown_requests_total ${TOTAL_REQUESTS}" echo '' echo "# HELP bot_monitor_last_scan_timestamp_seconds Unix timestamp of last bot monitor scan." echo "# TYPE bot_monitor_last_scan_timestamp_seconds gauge" echo "bot_monitor_last_scan_timestamp_seconds $(date +%s)" echo '' if [ "$AGENT_COUNT" -gt 0 ]; then echo "# HELP bot_monitor_agent_requests Requests per unknown user agent yesterday." echo "# TYPE bot_monitor_agent_requests gauge" head -20 "$UNKNOWN_FILE" | while read count agent; do safe_agent=$(echo "$agent" | sed 's/\\/\\\\/g; s/"/\\"/g' | cut -c1-128) echo "bot_monitor_agent_requests{agent=\"${safe_agent}\"} ${count}" done fi } > "$PROM_TMP" chmod 644 "$PROM_TMP" mv -f "$PROM_TMP" "$OUTPUT_FILE" fi # --- Cleanup state files older than 30 days --- find "$STATE_DIR" -name "unknown-*.txt" -mtime +30 -delete 2>/dev/null || true SCRIPTEOF # Replace placeholders with actual values sed -i "s|__LOG_DIR__|${LOG_DIR}|g" /tmp/bot-monitor.sh.tmp sed -i "s|__LOG_PATTERN__|${LOG_PATTERN}|g" /tmp/bot-monitor.sh.tmp sed -i "s|__NTFY_URL__|${NTFY_URL}|g" /tmp/bot-monitor.sh.tmp sed -i "s|__TEXTFILE_DIR__|${TEXTFILE_DIR}|g" /tmp/bot-monitor.sh.tmp sed -i "s|__TEXTFILE_ENABLED__|${TEXTFILE_ENABLED}|g" /tmp/bot-monitor.sh.tmp sed -i "s|__MIN_REQUESTS__|${MIN_REQUESTS}|g" /tmp/bot-monitor.sh.tmp if [[ "$DRY_RUN" == "true" ]]; then echo " Would install: ${SCRIPT_FILE}" echo " Log dir: ${LOG_DIR}" echo " Log pattern: ${LOG_PATTERN}" [[ -n "$NTFY_URL" ]] && echo " ntfy URL: ${NTFY_URL}" [[ "$TEXTFILE_ENABLED" == "true" ]] && echo " Textfile dir: ${TEXTFILE_DIR}" echo " Min requests: ${MIN_REQUESTS}" else if [[ -f "$SCRIPT_FILE" ]]; then cp "$SCRIPT_FILE" "${SCRIPT_FILE}.bak.$(date +%s)" warn "Existing script backed up" fi mv /tmp/bot-monitor.sh.tmp "$SCRIPT_FILE" chmod +x "$SCRIPT_FILE" info "Installed: ${SCRIPT_FILE}" fi # ===================================================== # Step 4: Install cron job in root's crontab # ===================================================== step "Installing cron job" CRON_LINE="0 ${CRON_HOUR} * * * ${SCRIPT_FILE}" if [[ "$DRY_RUN" == "true" ]]; then echo " Would add to root crontab: ${CRON_LINE}" echo " Schedule: daily at ${CRON_HOUR}:00" else # Check if already in crontab if crontab -l 2>/dev/null | grep -qF "$SCRIPT_FILE"; then warn "Cron entry already exists in root crontab — skipping" else (crontab -l 2>/dev/null; echo "${CRON_LINE}") | crontab - info "Cron added to root crontab: daily at ${CRON_HOUR}:00" fi fi # ===================================================== # Summary # ===================================================== echo "" echo -e "${BOLD}Done.${NC}" echo "" echo " Script: ${SCRIPT_FILE}" echo " Whitelist: ${AGENTS_FILE}" echo " State dir: /var/lib/bot-monitor/" echo " Cron: daily at ${CRON_HOUR}:00" echo " Log source: ${LOG_DIR}/${LOG_PATTERN}" [[ -n "$NTFY_URL" ]] && echo " Alerts: ${NTFY_URL}" [[ "$TEXTFILE_ENABLED" == "true" ]] && echo " Metrics: ${TEXTFILE_DIR}/bot_monitor.prom" echo "" echo " Test manually:" echo " sudo ${SCRIPT_FILE}" echo "" echo " Check results:" echo " ls -la /var/lib/bot-monitor/" echo " cat /var/lib/bot-monitor/unknown-\$(date -d yesterday +%Y-%m-%d).txt" if [[ "$TEXTFILE_ENABLED" == "true" ]]; then echo "" echo " Check Prometheus metrics:" echo " cat ${TEXTFILE_DIR}/bot_monitor.prom" echo "" echo " PromQL examples:" echo " bot_monitor_unknown_agents_total # unique unknown agents" echo " bot_monitor_unknown_requests_total # total requests from unknowns" echo " bot_monitor_agent_requests # per-agent request counts" fi echo "" echo " Edit whitelist to reduce noise:" echo " nano ${AGENTS_FILE}"