#!/bin/bash ################################################################################ # Script Name: ip-intel-exporter.sh # Version: 1.1 # Description: Prometheus exporter for IP intelligence metrics from nginx logs. # Parses access logs (GeoIP-enriched or standard), classifies # traffic by country, ASN, cloud provider, and threat type. # # Author: Phil Connor # Contact: contact@mylinux.work # Website: https://mylinux.work # License: MIT # # Prerequisites: # - nginx access log (enriched or combined format) # - mmdblookup (mmdb-bin) for standard log format GeoIP lookups # - netcat (nc) for HTTP mode # # Usage: # ./ip-intel-exporter.sh # stdout # ./ip-intel-exporter.sh --textfile # node_exporter textfile # ./ip-intel-exporter.sh --http # HTTP server on port 9199 # ./ip-intel-exporter.sh --log /path/to/access.log # ################################################################################ set -euo pipefail # ============================================================================ # CONFIGURATION # ============================================================================ NGINX_LOG="/var/log/nginx/access.log" TEXTFILE_DIR="/var/lib/node_exporter" OUTPUT_FILE="" HTTP_MODE=false HTTP_PORT=9199 LOG_FORMAT="auto" # shellcheck disable=SC2034 TOP_N=10 # shellcheck disable=SC2034 MMDB_CITY="/usr/share/GeoIP/GeoLite2-City.mmdb" # shellcheck disable=SC2034 MMDB_ASN="/usr/share/GeoIP/GeoLite2-ASN.mmdb" # ============================================================================ # COLORS # ============================================================================ if [[ -t 1 ]]; then RED='\033[0;31m'; GREEN='\033[0;32m'; YELLOW='\033[0;33m'; RESET='\033[0m' else RED="" GREEN="" YELLOW="" RESET="" fi log() { echo -e "${GREEN}[OK]${RESET} $*" >&2; } warn() { echo -e "${YELLOW}[WARN]${RESET} $*" >&2; } err() { echo -e "${RED}[ERROR]${RESET} $*" >&2; } # Sanitize a string for use as a Prometheus label value. # Converts \xNN escape sequences to real UTF-8, escapes backslashes and quotes. prom_label() { local val val=$(printf '%b' "$1") val="${val//\\/\\\\}" val="${val//\"/\\\"}" printf '%s' "$val" } # ============================================================================ # USAGE # ============================================================================ show_usage() { cat </dev/null; then warn "mmdblookup not found — country/ASN metrics unavailable for standard log format" warn "Install: apt install mmdb-bin" fi fi } # ============================================================================ # METRICS COLLECTION # ============================================================================ collect_metrics() { local start_time start_time=$(date +%s%N) local metrics="" local now now=$(date +%s) # --- Parse log with awk (single pass for performance) --- local awk_output awk_output=$(awk ' BEGIN { FS=" " # Known cloud provider patterns (lowercase match) split("amazon,aws,cloudflare,google cloud,microsoft,azure,digitalocean,linode,vultr,hetzner,ovh", providers, ",") } { ip = $1 # Extract status code — field after "HTTP/x.x" status = "" for (i = 1; i <= NF; i++) { if ($i ~ /^[0-9]{3}$/ && $(i-1) ~ /HTTP\//) { status = $i break } } if (status == "") { # fallback: find first standalone 3-digit number after the request for (i = 7; i <= NF; i++) { if ($i ~ /^[0-9]{3}$/) { status = $i break } } } # Extract method for (i = 1; i <= NF; i++) { if ($i ~ /^"(GET|POST|HEAD|PUT|DELETE|PATCH|OPTIONS|CONNECT)$/) { method = substr($i, 2) break } } # Extract user agent (between second and third set of quotes after status) ua = "" quote_count = 0 ua_start = 0 for (i = 1; i <= NF; i++) { if ($i ~ /^"/) quote_count++ if (quote_count == 6 && ua_start == 0) { ua_start = i } } # Simpler: reconstruct the full line and parse with regex line = $0 # Country and ASN (enriched format: last fields are CC "ASN Org") country = "" asn_org = "" if (format == "enriched") { # Match trailing: XX "Some ASN Org" if (match(line, /([A-Z]{2}|-) "([^"]*)"[[:space:]]*$/, m)) { country = m[1] asn_org = m[2] } } # Count total requests per IP ip_total[ip]++ # Count by status class if (status >= 400) { ip_blocked[ip]++ blocked_total++ } # Count by country if (country != "" && country != "-") { country_req[country]++ if (status >= 400) country_blocked[country]++ } # Count by ASN if (asn_org != "" && asn_org != "-") { asn_req[asn_org]++ if (status >= 400) asn_blocked[asn_org]++ # Classify cloud provider lower_asn = tolower(asn_org) provider = "other" if (lower_asn ~ /amazon|aws/) provider = "aws" else if (lower_asn ~ /cloudflare/) provider = "cloudflare" else if (lower_asn ~ /google/) provider = "gcp" else if (lower_asn ~ /microsoft|azure/) provider = "azure" else if (lower_asn ~ /digitalocean/) provider = "digitalocean" else if (lower_asn ~ /hetzner/) provider = "hetzner" provider_req[provider]++ if (status >= 400) provider_blocked[provider]++ } # POST probes (POST returning 4xx/5xx) if (method == "POST" && status >= 400) post_probes++ # Scanner detection if (tolower(line) ~ /(nikto|sqlmap|nmap|masscan|zgrab|zmeu|morpheus)/) scanners++ # Empty user agent if (line ~ /" "-"$/ || line ~ /" ""$/ || line ~ /" ""-"/) empty_ua++ total_requests++ } END { # Output delimited data for bash to parse print "TOTAL_REQUESTS=" total_requests print "BLOCKED_TOTAL=" blocked_total+0 print "UNIQUE_IPS=" length(ip_total) print "UNIQUE_BLOCKED_IPS=" length(ip_blocked) print "POST_PROBES=" post_probes+0 print "SCANNERS=" scanners+0 print "EMPTY_UA=" empty_ua+0 # Country requests for (c in country_req) print "COUNTRY_REQ|" c "|" country_req[c] for (c in country_blocked) print "COUNTRY_BLK|" c "|" country_blocked[c] # ASN requests (top 20) for (a in asn_req) print "ASN_REQ|" a "|" asn_req[a] for (a in asn_blocked) print "ASN_BLK|" a "|" asn_blocked[a] # Provider for (p in provider_req) print "PROVIDER_REQ|" p "|" provider_req[p] for (p in provider_blocked) print "PROVIDER_BLK|" p "|" provider_blocked[p] # Top blocked IPs (by blocked count) PROCINFO["sorted_in"] = "@val_num_desc" n = 0 for (ip in ip_blocked) { if (n++ >= 10) break print "TOP_BLOCKED|" ip "|" ip_blocked[ip] } } ' format="$LOG_FORMAT" "$NGINX_LOG" 2>/dev/null) # --- Parse awk output --- local total_requests=0 unique_ips=0 unique_blocked=0 local blocked_total=0 local post_probes=0 scanners=0 empty_ua=0 declare -A country_req=() country_blk=() asn_req=() asn_blk=() provider_req=() provider_blk=() declare -a top_blocked_ips=() while IFS= read -r line; do case "$line" in TOTAL_REQUESTS=*) total_requests="${line#*=}" ;; BLOCKED_TOTAL=*) blocked_total="${line#*=}" ;; UNIQUE_IPS=*) unique_ips="${line#*=}" ;; UNIQUE_BLOCKED_IPS=*) unique_blocked="${line#*=}" ;; POST_PROBES=*) post_probes="${line#*=}" ;; SCANNERS=*) scanners="${line#*=}" ;; EMPTY_UA=*) empty_ua="${line#*=}" ;; COUNTRY_REQ\|*) IFS='|' read -r _ key val <<< "$line" country_req["$key"]="$val" ;; COUNTRY_BLK\|*) IFS='|' read -r _ key val <<< "$line" country_blk["$key"]="$val" ;; ASN_REQ\|*) IFS='|' read -r _ key val <<< "$line" key=$(prom_label "$key") asn_req["$key"]=$(( ${asn_req["$key"]:-0} + val )) ;; ASN_BLK\|*) IFS='|' read -r _ key val <<< "$line" key=$(prom_label "$key") asn_blk["$key"]=$(( ${asn_blk["$key"]:-0} + val )) ;; PROVIDER_REQ\|*) IFS='|' read -r _ key val <<< "$line" provider_req["$key"]="$val" ;; PROVIDER_BLK\|*) IFS='|' read -r _ key val <<< "$line" provider_blk["$key"]="$val" ;; TOP_BLOCKED\|*) top_blocked_ips+=("$line") ;; esac done <<< "$awk_output" # --- Build Prometheus metrics --- metrics+="# HELP ip_intel_up Exporter status (1=up) # TYPE ip_intel_up gauge ip_intel_up 1 # HELP ip_intel_requests_total Total requests in log # TYPE ip_intel_requests_total gauge ip_intel_requests_total ${total_requests:-0} # HELP ip_intel_blocked_total Total blocked requests (4xx/5xx) # TYPE ip_intel_blocked_total gauge ip_intel_blocked_total ${blocked_total:-0} # HELP ip_intel_unique_ips_total Unique IPs seen # TYPE ip_intel_unique_ips_total gauge ip_intel_unique_ips_total ${unique_ips:-0} # HELP ip_intel_unique_blocked_ips_total Unique IPs returning 4xx/5xx # TYPE ip_intel_unique_blocked_ips_total gauge ip_intel_unique_blocked_ips_total ${unique_blocked:-0} # HELP ip_intel_post_probe_total POST requests returning 4xx/5xx # TYPE ip_intel_post_probe_total gauge ip_intel_post_probe_total ${post_probes:-0} # HELP ip_intel_scanner_total Requests from known scanner user agents # TYPE ip_intel_scanner_total gauge ip_intel_scanner_total ${scanners:-0} # HELP ip_intel_empty_ua_total Requests with empty user agent # TYPE ip_intel_empty_ua_total gauge ip_intel_empty_ua_total ${empty_ua:-0} " # Country metrics if [[ ${#country_req[@]} -gt 0 ]]; then metrics+="# HELP ip_intel_requests_by_country Requests by country code # TYPE ip_intel_requests_by_country gauge " for c in "${!country_req[@]}"; do metrics+="ip_intel_requests_by_country{country=\"${c}\"} ${country_req[$c]} " done metrics+=" " fi if [[ ${#country_blk[@]} -gt 0 ]]; then metrics+="# HELP ip_intel_blocked_by_country Blocked requests by country code # TYPE ip_intel_blocked_by_country gauge " for c in "${!country_blk[@]}"; do metrics+="ip_intel_blocked_by_country{country=\"${c}\"} ${country_blk[$c]} " done metrics+=" " fi # ASN metrics if [[ ${#asn_req[@]} -gt 0 ]]; then metrics+="# HELP ip_intel_requests_by_asn Requests by ASN organization # TYPE ip_intel_requests_by_asn gauge " for a in "${!asn_req[@]}"; do metrics+="ip_intel_requests_by_asn{asn_org=\"${a}\"} ${asn_req[$a]} " done metrics+=" " fi if [[ ${#asn_blk[@]} -gt 0 ]]; then metrics+="# HELP ip_intel_blocked_by_asn Blocked requests by ASN organization # TYPE ip_intel_blocked_by_asn gauge " for a in "${!asn_blk[@]}"; do metrics+="ip_intel_blocked_by_asn{asn_org=\"${a}\"} ${asn_blk[$a]} " done metrics+=" " fi # Provider metrics if [[ ${#provider_req[@]} -gt 0 ]]; then metrics+="# HELP ip_intel_requests_by_provider Requests by cloud provider # TYPE ip_intel_requests_by_provider gauge " for p in "${!provider_req[@]}"; do metrics+="ip_intel_requests_by_provider{provider=\"${p}\"} ${provider_req[$p]} " done metrics+=" " fi if [[ ${#provider_blk[@]} -gt 0 ]]; then metrics+="# HELP ip_intel_blocked_by_provider Blocked requests by cloud provider # TYPE ip_intel_blocked_by_provider gauge " for p in "${!provider_blk[@]}"; do metrics+="ip_intel_blocked_by_provider{provider=\"${p}\"} ${provider_blk[$p]} " done metrics+=" " fi # Top blocked IPs if [[ ${#top_blocked_ips[@]} -gt 0 ]]; then metrics+="# HELP ip_intel_top_blocked_ip_requests Top blocked IPs by blocked request count # TYPE ip_intel_top_blocked_ip_requests gauge " for entry in "${top_blocked_ips[@]}"; do IFS='|' read -r _ ip count <<< "$entry" metrics+="ip_intel_top_blocked_ip_requests{ip=\"${ip}\"} ${count} " done metrics+=" " fi # Exporter metadata local end_time end_time=$(date +%s%N) local duration duration=$(echo "scale=3; ($end_time - $start_time) / 1000000000" | bc 2>/dev/null || echo "0") metrics+="# HELP ip_intel_exporter_duration_seconds Script execution time # TYPE ip_intel_exporter_duration_seconds gauge ip_intel_exporter_duration_seconds ${duration} # HELP ip_intel_exporter_last_run_timestamp Last successful run (unix timestamp) # TYPE ip_intel_exporter_last_run_timestamp gauge ip_intel_exporter_last_run_timestamp ${now} " echo "$metrics" } # ============================================================================ # OUTPUT HANDLING # ============================================================================ output_metrics() { local metrics metrics=$(collect_metrics) if [[ -n "$OUTPUT_FILE" ]]; then echo "$metrics" > "${OUTPUT_FILE}.tmp" mv "${OUTPUT_FILE}.tmp" "$OUTPUT_FILE" log "Metrics written to ${OUTPUT_FILE}" else echo "$metrics" fi } serve_http() { log "Starting HTTP server on port ${HTTP_PORT}" log "Metrics endpoint: http://localhost:${HTTP_PORT}/metrics" if ! command -v nc &>/dev/null && ! command -v ncat &>/dev/null; then err "netcat (nc/ncat) not found — required for HTTP mode" exit 1 fi local nc_cmd="nc" command -v ncat &>/dev/null && nc_cmd="ncat" while true; do local metrics metrics=$(collect_metrics) local content_length=${#metrics} local response="HTTP/1.1 200 OK\r\nContent-Type: text/plain; charset=utf-8\r\nContent-Length: ${content_length}\r\nConnection: close\r\n\r\n${metrics}" echo -e "$response" | $nc_cmd -l -p "$HTTP_PORT" -q 1 2>/dev/null || \ echo -e "$response" | $nc_cmd -l "$HTTP_PORT" 2>/dev/null || true done } # ============================================================================ # MAIN # ============================================================================ parse_args "$@" preflight if [[ "$HTTP_MODE" == "true" ]]; then serve_http else output_metrics fi