#!/bin/bash ################################################################################ # Script Name: web-traffic-exporter.sh # Version: 1.8 # Description: Prometheus exporter for web server access log traffic metrics. # Parses Nginx/Apache access logs and exports request counts, # status codes, bandwidth, unique visitors, top paths, referrers, # bot detection, and protocol distribution. # # Author: Phil Connor # Contact: contact@mylinux.work # Website: https://mylinux.work # License: MIT # # Prerequisites: # - Standard Unix tools (awk, grep, tail) # - netcat (nc) for HTTP mode # - Read access to web server access logs # # Usage: # # Output to stdout # ./web-traffic-exporter.sh # # # HTTP server mode # ./web-traffic-exporter.sh --http -p 9199 # # # Textfile collector mode # ./web-traffic-exporter.sh --textfile # # Metrics Exported: # Core Status: # - web_traffic_up - Exporter status (1=up, 0=down) # - web_traffic_exporter_info{version} - Exporter version # # Request Totals: # - web_traffic_requests - Total requests in parsed window # - web_traffic_requests_by_status{status} - Per HTTP status code # - web_traffic_requests_by_class{class} - Per status class (2xx etc) # - web_traffic_requests_by_method{method} - Per HTTP method # # Bandwidth: # - web_traffic_response_bytes - Total bytes sent # - web_traffic_response_bytes_by_class{class} - Bytes per class # # Unique Visitors: # - web_traffic_unique_ips - Unique source IPs # - web_traffic_unique_user_agents - Unique user agents # # Top Paths (top 10): # - web_traffic_top_path_requests{path,rank} - Hits per path # # Top Referrers (top 10): # - web_traffic_top_referrer_requests{referrer,rank} - Per referrer # # Bot Detection: # - web_traffic_bot_requests - Bot requests # - web_traffic_human_requests - Non-bot requests # - web_traffic_bot_ratio - Bot / total ratio # # Downloads: # - web_traffic_downloads - Total file downloads # - web_traffic_downloads_bytes - Bytes from downloads # - web_traffic_top_download_requests{file,rank} - Top downloads # # Status Breakdown: # - web_traffic_status_2xx # - web_traffic_status_3xx # - web_traffic_status_4xx # - web_traffic_status_5xx # # Request Rate: # - web_traffic_requests_per_minute - Estimated from log timestamps # # Protocol: # - web_traffic_requests_by_protocol{protocol} - HTTP version # # Hourly Patterns: # - web_traffic_requests_by_hour{hour} - Requests per hour of day # # Derived Metrics: # - web_traffic_average_response_bytes - Average response size # - web_traffic_error_ratio - Ratio of 4xx+5xx to total requests # # 404 Errors: # - web_traffic_404_path_requests{path,rank} - Top 404 paths # - web_traffic_top_404_referrer_requests{referrer,rank} - Top 404 referrers # # Top Paths by Bandwidth: # - web_traffic_top_path_response_bytes{path,rank} - Top paths by bytes # # Content Type: # - web_traffic_page_views - Page view requests # - web_traffic_asset_requests - Asset requests (css/js/images) # # Top Clients: # - web_traffic_top_client_requests{ip,rank} - Top 10 client IPs # # Response Sizes: # - web_traffic_response_size_bucket{size} - Requests by size range # # Top Bots: # - web_traffic_top_bot_requests{bot,rank} - Top 10 bot names # # Server Status: # - web_traffic_server_running{server} - 1 if process found # - web_traffic_server_type{server} - Server info metric # # Time Windows (daily/weekly/monthly): # - web_traffic_window_requests{window} - Requests in window # - web_traffic_window_bytes{window} - Bytes in window # - web_traffic_window_unique_ips{window} - Unique IPs in window # - web_traffic_window_bot_requests{window} - Bot requests in window # - web_traffic_window_requests_by_class{window,class} - Per class in window # - web_traffic_window_page_views{window} - Page views in window # - web_traffic_window_asset_requests{window} - Asset requests in window # - web_traffic_window_downloads{window} - Downloads in window # - web_traffic_window_downloads_bytes{window} - Download bytes in window # - web_traffic_window_human_requests{window} - Non-bot requests in window # - web_traffic_window_unique_user_agents{window} - Unique UAs in window # # Exporter: # - web_traffic_exporter_duration_seconds - Script execution time # - web_traffic_exporter_last_run_timestamp - Last run timestamp # - web_traffic_exporter_lines_parsed - Lines parsed count # # Configuration: # Default HTTP port: 9199 # Textfile directory: /var/lib/node_exporter # ACCESS_LOG: /var/log/nginx/access.log (or WEB_TRAFFIC_ACCESS_LOG env) # TAIL_LINES: 0 (all lines; or WEB_TRAFFIC_TAIL_LINES env) # MAX_ROTATED: 7 (or WEB_TRAFFIC_MAX_ROTATED env) # HTTP_TAIL_LINES: 5000 (or WEB_TRAFFIC_HTTP_TAIL_LINES env) # TRACK_UNIQUE_UA: 1 (or WEB_TRAFFIC_TRACK_UNIQUE_UA env; 0=disable) # REFERRER_MODE: host (or WEB_TRAFFIC_REFERRER_MODE env; host|full|off) # SITE_DOMAIN: (or WEB_TRAFFIC_SITE_DOMAIN env; e.g. mylinux.work) # When set, downloads only count if referrer matches this domain. # When unset, downloads require any non-empty referrer (filters direct bot hits). # ################################################################################ # ============================================================================ # CONFIGURATION VARIABLES # ============================================================================ TEXTFILE_DIR="/var/lib/node_exporter" OUTPUT_FILE="" HTTP_MODE=false HTTP_PORT=9199 # Operational safety / performance knobs # - Limit rotated files read when present (avoid huge scrape cost) MAX_ROTATED="${WEB_TRAFFIC_MAX_ROTATED:-7}" # - In HTTP mode, default to tailing N lines unless user explicitly set tail-lines HTTP_TAIL_LINES_DEFAULT="${WEB_TRAFFIC_HTTP_TAIL_LINES:-5000}" # - Reduce memory/cardinality cost: # 1 = track unique user agents, 0 = disable TRACK_UNIQUE_UA="${WEB_TRAFFIC_TRACK_UNIQUE_UA:-1}" # Referrer mode: host | full | off REFERRER_MODE="${WEB_TRAFFIC_REFERRER_MODE:-host}" ACCESS_LOG="${WEB_TRAFFIC_ACCESS_LOG:-/var/log/nginx/access.log}" LOG_DIR="${WEB_TRAFFIC_LOG_DIR:-}" LOG_FORMAT="${WEB_TRAFFIC_LOG_FORMAT:-combined}" TAIL_LINES="${WEB_TRAFFIC_TAIL_LINES:-0}" SERVER_TYPE="${WEB_TRAFFIC_SERVER_TYPE:-auto}" SITE_DOMAIN="${WEB_TRAFFIC_SITE_DOMAIN:-}" DOWNLOAD_PATH="${WEB_TRAFFIC_DOWNLOAD_PATH:-/downloads/}" # ============================================================================ # HELPER FUNCTIONS # ============================================================================ prom_escape() { # Escape Prometheus label values: \, ", and newlines. # See: https://prometheus.io/docs/instrumenting/exposition_formats/ local s="$1" s=${s//\\/\\\\} s=${s//\"/\\\"} s=${s//$'\n'/\\n} printf '%s\n' "$s" } show_usage() { cat <&2; exit 1 ;; esac done } # ============================================================================ # SERVER DETECTION # ============================================================================ detect_server_type() { if [ "$SERVER_TYPE" != "auto" ]; then echo "$SERVER_TYPE" return fi if pgrep -x nginx >/dev/null 2>&1; then echo "nginx" elif pgrep -x apache2 >/dev/null 2>&1; then echo "apache" elif pgrep -x httpd >/dev/null 2>&1; then echo "apache" else echo "unknown" fi } # ============================================================================ # LOG STREAMING (current + rotated logs) # ============================================================================ # Stream log content from rotated logs (oldest first) then current log. # Handles .log.N (plain) and .log.N.gz (compressed) files. # Args: $1 - current log file path, $2 - max rotated files to include stream_log_data() { local log_file="$1" local max_rotated="${2:-31}" # Find rotated logs: domain.log.1, domain.log.2.gz, etc. local rotated_files=() local i for i in $(seq "$max_rotated" -1 1); do if [ -f "${log_file}.${i}.gz" ]; then rotated_files+=("gz:${log_file}.${i}.gz") elif [ -f "${log_file}.${i}" ]; then rotated_files+=("plain:${log_file}.${i}") fi done # Output rotated logs (oldest first) for entry in "${rotated_files[@]}"; do local type="${entry%%:*}" local path="${entry#*:}" if [ "$type" = "gz" ]; then zcat "$path" 2>/dev/null else cat "$path" 2>/dev/null fi done # Output current log cat "$log_file" 2>/dev/null } # ============================================================================ # LOG PARSING (single-pass awk) # ============================================================================ # Parse access log lines and output all metrics data in a structured format. # This uses a single awk pass for performance. # Output format: KEY value pairs, one per line parse_access_log() { local log_file="$1" local num_lines="$2" local format="$3" [ -f "$log_file" ] || return [ -r "$log_file" ] || return local now_epoch now_epoch=$(date +%s) local cutoff_daily=$((now_epoch - 86400)) local cutoff_weekly=$((now_epoch - 604800)) local cutoff_monthly=$((now_epoch - 2592000)) # Check if rotated logs exist for this file local has_rotated=false if [ -f "${log_file}.1" ] || [ -f "${log_file}.1.gz" ]; then has_rotated=true fi # Stream log data into awk: use rotated logs for full monthly history, # or read the entire current log so daily/weekly/monthly windows differ if [ "$has_rotated" = true ]; then stream_log_data "$log_file" "$MAX_ROTATED" elif [ "$num_lines" -gt 0 ] 2>/dev/null; then tail -n "$num_lines" "$log_file" 2>/dev/null else cat "$log_file" 2>/dev/null fi | awk -v fmt="$format" \ -v track_ua="$TRACK_UNIQUE_UA" \ -v ref_mode="$REFERRER_MODE" \ -v site_domain="$SITE_DOMAIN" \ -v download_path="$DOWNLOAD_PATH" \ -v cutoff_daily="$cutoff_daily" \ -v cutoff_weekly="$cutoff_weekly" \ -v cutoff_monthly="$cutoff_monthly" ' BEGIN { total = 0 total_bytes = 0 bot_count = 0 lines_parsed = 0 first_ts = "" last_ts = "" # Month cumulative day offsets (non-leap), 1-based month indexing split("0,31,59,90,120,151,181,212,243,273,304,334", mdays, ",") # Month name to number lookup split("Jan,Feb,Mar,Apr,May,Jun,Jul,Aug,Sep,Oct,Nov,Dec", mn, ",") for (i = 1; i <= 12; i++) month_num[mn[i]] = i # Window counters win_requests["daily"] = 0; win_requests["weekly"] = 0; win_requests["monthly"] = 0 win_bytes["daily"] = 0; win_bytes["weekly"] = 0; win_bytes["monthly"] = 0 win_bots["daily"] = 0; win_bots["weekly"] = 0; win_bots["monthly"] = 0 } function count_elems(a, k, n) { n=0; for (k in a) n++; return n } # Track min/max epoch for stable RPM even if log lines arrive out-of-order function parse_log_epoch(ts, parts, dparts, tparts, day, mon, year, hh, mm, ss, tz, sign, tzh, tzm, epoch) { # Format: 17/Mar/2026:10:00:00 +0000 split(ts, parts, " ") split(parts[1], dparts, "/") day = dparts[1]+0 mon = month_num[dparts[2]] # year:HH:MM:SS split(dparts[3], tparts, ":") year = tparts[1]+0; hh = tparts[2]+0; mm = tparts[3]+0; ss = tparts[4]+0 if (mon < 1) return 0 # Portable epoch calculation (no mktime dependency) # Days from year epoch = (year - 1970) * 365 + int((year - 1969) / 4) - int((year - 1901) / 100) + int((year - 1601) / 400) # Days from months epoch += mdays[mon] + day - 1 # Leap day adjustment for current year if (mon > 2 && (year % 4 == 0 && (year % 100 != 0 || year % 400 == 0))) epoch++ # Convert to seconds and add time epoch = epoch * 86400 + hh * 3600 + mm * 60 + ss # Apply timezone offset tz = parts[2] if (tz != "") { sign = (substr(tz, 1, 1) == "-") ? 1 : -1 tzh = substr(tz, 2, 2) + 0 tzm = substr(tz, 4, 2) + 0 epoch += sign * (tzh * 3600 + tzm * 60) } return epoch } { lines_parsed++ # Parse combined/common log format using field splitting # 1.2.3.4 - - [17/Mar/2026:10:00:00 +0000] "GET /path HTTP/1.1" 200 1234 "ref" "ua" ip = $1 # Extract timestamp between [ and ] timestamp = "" if (match($0, /\[([^\]]+)\]/) ) { timestamp = substr($0, RSTART+1, RLENGTH-2) } # Extract the request line between first pair of quotes request_line = "" p1 = index($0, "\"") if (p1 > 0) { rest = substr($0, p1+1) p2 = index(rest, "\"") if (p2 > 0) { request_line = substr(rest, 1, p2-1) } } if (request_line == "") next # Split request line: METHOD PATH PROTOCOL n_req = split(request_line, req_parts, " ") if (n_req < 2) next method = req_parts[1] if (method != "GET" && method != "HEAD" && method != "POST" && method != "PUT" && method != "DELETE" && method != "PATCH" && method != "OPTIONS" && method != "CONNECT" && method != "TRACE") next path = req_parts[2] protocol = (n_req >= 3) ? req_parts[3] : "" # After the closing quote of request line, find status and bytes after_req = substr($0, p1 + 1 + p2) gsub(/^ +/, "", after_req) n_after = split(after_req, after_parts, " ") if (n_after < 2) next status = after_parts[1] bytes = after_parts[2] if (status !~ /^[0-9]+$/) next if (first_ts == "") first_ts = timestamp last_ts = timestamp if (bytes == "-") bytes = 0 total++ total_bytes += bytes # Status codes status_count[status]++ # Status classes class_code = substr(status, 1, 1) "xx" class_count[class_code]++ class_bytes[class_code] += bytes # Methods method_count[method]++ # Unique IPs ips[ip] = 1 # Paths (clean query strings for grouping) split(path, pathparts, "?") clean_path = pathparts[1] path_count[clean_path]++ path_bytes[clean_path] += bytes # Download tracking — deferred until after bot detection (see below) is_download = 0 is_downloadable = 0 is_path_download = 0 if (method == "GET" && substr(status, 1, 1) == "2") { if (download_path != "" && index(clean_path, download_path) == 1) { is_downloadable = 1 is_path_download = 1 } else if (clean_path ~ /\.(sh|ps1|py|pl|rb|json|yml|yaml|xml|csv|conf|cfg|prom|txt)$/ \ || clean_path ~ /\.(zip|tar|gz|tgz|bz2|xz|7z|rar)$/ \ || clean_path ~ /\.(pdf|doc|docx|xls|xlsx|ppt|pptx|odt|ods)$/ \ || clean_path ~ /\.(deb|rpm|msi|exe|dmg|pkg|appimage|AppImage)$/ \ || clean_path ~ /\.(iso|img|bin|run)$/) { is_downloadable = 1 } } # Protocol if (protocol != "") { gsub(/^ +| +$/, "", protocol) if (protocol != "") proto_count[protocol]++ } is_bot = 0 # Referrer and User-Agent (combined format only) if (fmt == "combined") { ref = "" ua = "" # Split the whole line by double-quote to extract quoted fields n = split($0, qparts, "\"") # qparts[2] = request line, qparts[4] = referrer, qparts[6] = user-agent if (n >= 6) { ref = qparts[4] ua = qparts[6] } else if (n >= 4) { ref = qparts[4] } # Referrer counting: reduce cardinality by default (host-only). if (ref_mode != "off" && ref != "" && ref != "-") { ref_key = ref if (ref_mode == "host") { # Extract host from http(s)://host/... (portable, no gawk capture groups) ref_key = ref sub(/^https?:\/\//, "", ref_key) sub(/\/.*/, "", ref_key) } referrer_count[ref_key]++ if (status == "404" && ref_key != "") { error_404_referrer[ref_key]++ } } # Unique UA tracking can be expensive; allow disabling. if (track_ua == 1 && ua != "" && ua != "-") { user_agents[ua] = 1 } # Bot detection # NOTE: Google uses multiple crawler UA tokens beyond plain "Googlebot" # See: https://developers.google.com/search/docs/crawling-indexing/overview-google-crawlers if (ua ~ /(Googlebot([-/][A-Za-z0-9._]+)?|Google-Extended|AdsBot-Google|Mediapartners-Google|StoreBot-Google)/ || ua ~ /(meta-webindexer|ChatGPT-User|OAI-SearchBot|Amzn-SearchBot|PerplexityBot)/ || ua ~ /([Bb]ot|[Cc]rawl|[Ss]pider|[Ss]lurp|bingbot|BingPreview|YandexBot|Baiduspider|DuckDuckBot|facebookexternalhit|Twitterbot|LinkedInBot|Applebot|MJ12bot|AhrefsBot|SemrushBot|DotBot|PetalBot)/) { bot_count++ is_bot = 1 } } # AI retrieval bots (ChatGPT-User, PerplexityBot) fetch content on # behalf of a real user — count their downloads as legitimate. is_ai_retrieval = 0 if (is_bot && ua ~ /(ChatGPT-User|PerplexityBot)/) { is_ai_retrieval = 1 } # Count downloads only for non-bot requests that look like real # users (plus AI retrieval bots). Signals indicating a real # download: # 1. Path-based: any non-bot hit on --download-path (the path # itself signals intent — nobody browses /downloads/ casually) # 2. AI retrieval bot (ChatGPT-User, PerplexityBot — user asked) # 3. Download-tool UA (wget, curl, aria2 — user copied the URL) # 4. Referrer from the site (user clicked a download link) # Extension-based downloads outside the download path still require # signal 2-4 to avoid counting embedded/linked assets. if (is_downloadable && (!is_bot || is_ai_retrieval)) { is_real_download = 0 # Path-based downloads: trust all non-bot requests if (is_path_download) { is_real_download = 1 } # AI retrieval bots are always real downloads if (!is_real_download && is_ai_retrieval) { is_real_download = 1 } # Check for download-tool user agents if (!is_real_download && (ua ~ /^(Wget|curl|aria2|libcurl|Go-http-client|python-requests|HTTPie)/ || ua ~ /^(ufw-threat-feeds|ufw-blocklist|iptables-threat-feeds|iptables-blocklist)/)) { is_real_download = 1 } # Check for site referrer (browser click) if (!is_real_download && ref != "" && ref != "-") { if (site_domain != "") { if (index(ref, site_domain) > 0) is_real_download = 1 } else { is_real_download = 1 } } if (is_real_download) { download_total++ download_bytes += bytes download_count[clean_path]++ is_download = 1 } } # Time-windowed stats + min/max epoch epoch = parse_log_epoch(timestamp) # Hourly traffic patterns (last 24 hours only) if (epoch > 0 && epoch >= cutoff_daily) { split(timestamp, ts_parts, ":") hour = ts_parts[2]+0 hour_count[hour]++ } if (epoch > 0) { if (min_epoch == "" || epoch < min_epoch) min_epoch = epoch if (max_epoch == "" || epoch > max_epoch) max_epoch = epoch } # 404 error paths if (status == "404") { error_404_count[clean_path]++ } # Page views vs assets is_page = 0 is_asset = 0 if (clean_path ~ /\.(css|js|png|jpg|jpeg|gif|svg|woff|woff2|ttf|ico|webp)$/) { asset_requests++ is_asset = 1 } else if (clean_path ~ /\/$/ || clean_path ~ /\.html?$/ || clean_path !~ /\.[a-zA-Z0-9]+$/) { page_views++ is_page = 1 } # Top client IPs (already tracking ips[ip]=1, add counter) ip_count[ip]++ # Response size distribution if (bytes+0 <= 1024) { size_bucket["tiny"]++ } else if (bytes+0 <= 10240) { size_bucket["small"]++ } else if (bytes+0 <= 102400) { size_bucket["medium"]++ } else if (bytes+0 <= 1048576) { size_bucket["large"]++ } else { size_bucket["huge"]++ } # Bot name extraction (when bot detected) if (is_bot && ua != "") { bot_name = "" # Prefer specific/official bot tokens first (better grouping) if (match(ua, /(Googlebot-[A-Za-z0-9._-]+|Googlebot\/[0-9.]+|Googlebot|Google-Extended|AdsBot-Google|Mediapartners-Google|StoreBot-Google)/)) { bot_name = substr(ua, RSTART, RLENGTH) } else if (match(ua, /(bingbot|BingPreview)/)) { bot_name = substr(ua, RSTART, RLENGTH) } else if (match(ua, /(YandexBot|Baiduspider|DuckDuckBot|Slurp)/)) { bot_name = substr(ua, RSTART, RLENGTH) } else if (match(ua, /(facebookexternalhit|Twitterbot|LinkedInBot|Applebot)/)) { bot_name = substr(ua, RSTART, RLENGTH) } else if (match(ua, /(AhrefsBot|SemrushBot|MJ12bot|DotBot|PetalBot)/)) { bot_name = substr(ua, RSTART, RLENGTH) } else if (match(ua, /(meta-webindexer|ChatGPT-User|OAI-SearchBot|Amzn-SearchBot|PerplexityBot)/)) { bot_name = substr(ua, RSTART, RLENGTH) } else if (match(ua, /([Bb]ot[a-zA-Z]*|[Cc]rawler|[Ss]pider)/)) { bot_name = substr(ua, RSTART, RLENGTH) } if (bot_name != "") { bot_name_count[bot_name]++ } } if (epoch > 0) { if (epoch >= cutoff_daily) { win_requests["daily"]++ win_bytes["daily"] += bytes win_ips_daily[ip] = 1 if (is_bot) win_bots["daily"]++ if (!is_bot) win_human_daily++ win_class_daily[class_code]++ if (is_page) win_page_views_daily++ if (is_asset) win_asset_requests_daily++ if (is_download) win_downloads_daily++ win_downloads_bytes_daily += (is_download ? bytes : 0) if (track_ua == 1 && ua != "" && ua != "-") win_uas_daily[ua] = 1 } if (epoch >= cutoff_weekly) { win_requests["weekly"]++ win_bytes["weekly"] += bytes win_ips_weekly[ip] = 1 if (is_bot) win_bots["weekly"]++ } if (epoch >= cutoff_monthly) { win_requests["monthly"]++ win_bytes["monthly"] += bytes win_ips_monthly[ip] = 1 if (is_bot) win_bots["monthly"]++ } } } END { print "LINES_PARSED " lines_parsed print "TOTAL_REQUESTS " total print "TOTAL_BYTES " total_bytes print "BOT_REQUESTS " bot_count print "HUMAN_REQUESTS " (total - bot_count) print "UNIQUE_IPS " count_elems(ips) print "UNIQUE_UAS " count_elems(user_agents) print "FIRST_TS " first_ts print "LAST_TS " last_ts print "MIN_EPOCH " (min_epoch=="" ? 0 : min_epoch) print "MAX_EPOCH " (max_epoch=="" ? 0 : max_epoch) # Status codes for (s in status_count) { print "STATUS " s " " status_count[s] } # Status classes for (c in class_count) { print "CLASS " c " " class_count[c] } # Class bytes for (c in class_bytes) { print "CLASS_BYTES " c " " class_bytes[c] } # Methods for (m in method_count) { print "METHOD " m " " method_count[m] } # Protocols for (p in proto_count) { print "PROTOCOL " p " " proto_count[p] } # Top paths (sort by count, output top 10) # We use a simple selection approach for (i = 1; i <= 10; i++) { max_count = 0 max_path = "" for (p in path_count) { if (path_count[p] > max_count) { max_count = path_count[p] max_path = p } } if (max_path != "") { print "TOP_PATH " i " " max_count " " max_path delete path_count[max_path] } } # Top paths by bytes for (i = 1; i <= 10; i++) { max_bytes = 0 max_path = "" for (p in path_bytes) { if (path_bytes[p] > max_bytes) { max_bytes = path_bytes[p] max_path = p } } if (max_path != "") { print "TOP_PATH_BYTES " i " " max_bytes " " max_path delete path_bytes[max_path] } } # Top referrers (sort by count, output top 10) for (i = 1; i <= 10; i++) { max_count = 0 max_ref = "" for (r in referrer_count) { if (referrer_count[r] > max_count) { max_count = referrer_count[r] max_ref = r } } if (max_ref != "") { print "TOP_REF " i " " max_count " " max_ref delete referrer_count[max_ref] } } # Downloads print "DOWNLOAD_TOTAL " download_total+0 print "DOWNLOAD_BYTES " download_bytes+0 for (i = 1; i <= 10; i++) { max_count = 0 max_dl = "" for (d in download_count) { if (download_count[d] > max_count) { max_count = download_count[d] max_dl = d } } if (max_dl != "") { print "TOP_DOWNLOAD " i " " max_count " " max_dl delete download_count[max_dl] } } # Hourly distribution for (h = 0; h <= 23; h++) { printf "HOUR %02d %d\n", h, hour_count[h]+0 } # Top 404 paths for (i = 1; i <= 10; i++) { max_count = 0 max_path = "" for (p in error_404_count) { if (error_404_count[p] > max_count) { max_count = error_404_count[p] max_path = p } } if (max_path != "") { print "TOP_404 " i " " max_count " " max_path delete error_404_count[max_path] } } # Top 404 referrers for (i = 1; i <= 10; i++) { max_count = 0 max_ref = "" for (r in error_404_referrer) { if (error_404_referrer[r] > max_count) { max_count = error_404_referrer[r] max_ref = r } } if (max_ref != "") { print "TOP_404_REF " i " " max_count " " max_ref delete error_404_referrer[max_ref] } } # Page views vs assets print "PAGE_VIEWS " page_views+0 print "ASSET_REQUESTS " asset_requests+0 # Top client IPs for (i = 1; i <= 10; i++) { max_count = 0 max_ip = "" for (p in ip_count) { if (ip_count[p] > max_count) { max_count = ip_count[p] max_ip = p } } if (max_ip != "") { print "TOP_IP " i " " max_count " " max_ip delete ip_count[max_ip] } } # Response size distribution sizes[1] = "tiny"; sizes[2] = "small"; sizes[3] = "medium"; sizes[4] = "large"; sizes[5] = "huge" for (s = 1; s <= 5; s++) { print "SIZE_BUCKET " sizes[s] " " size_bucket[sizes[s]]+0 } # Top bot names for (i = 1; i <= 10; i++) { max_count = 0 max_bot = "" for (b in bot_name_count) { if (bot_name_count[b] > max_count) { max_count = bot_name_count[b] max_bot = b } } if (max_bot != "") { print "TOP_BOT " i " " max_count " " max_bot delete bot_name_count[max_bot] } } # Time-windowed summaries windows[1] = "daily"; windows[2] = "weekly"; windows[3] = "monthly" for (w = 1; w <= 3; w++) { wname = windows[w] print "WIN_REQUESTS " wname " " win_requests[wname] print "WIN_BYTES " wname " " win_bytes[wname] print "WIN_BOTS " wname " " win_bots[wname] } print "WIN_UNIQUE_IPS daily " count_elems(win_ips_daily) print "WIN_UNIQUE_IPS weekly " count_elems(win_ips_weekly) print "WIN_UNIQUE_IPS monthly " count_elems(win_ips_monthly) # Daily window extended metrics print "WIN_STATUS_CLASS daily 2xx " win_class_daily["2xx"]+0 print "WIN_STATUS_CLASS daily 3xx " win_class_daily["3xx"]+0 print "WIN_STATUS_CLASS daily 4xx " win_class_daily["4xx"]+0 print "WIN_STATUS_CLASS daily 5xx " win_class_daily["5xx"]+0 print "WIN_PAGE_VIEWS daily " win_page_views_daily+0 print "WIN_ASSET_REQUESTS daily " win_asset_requests_daily+0 print "WIN_DOWNLOADS daily " win_downloads_daily+0 print "WIN_DOWNLOADS_BYTES daily " win_downloads_bytes_daily+0 print "WIN_HUMAN_REQUESTS daily " win_human_daily+0 print "WIN_UNIQUE_UAS daily " count_elems(win_uas_daily) } ' } # ============================================================================ # METRICS GENERATION # ============================================================================ # Emit HELP/TYPE lines only once per metric name (avoids duplicates in --log-dir mode) _emitted_help="" emit_help_type() { local metric="$1" help_text="$2" mtype="$3" case "$_emitted_help" in *"|${metric}|"*) return ;; esac _emitted_help="${_emitted_help}|${metric}|" echo "# HELP $metric $help_text" echo "# TYPE $metric $mtype" } generate_metrics() { _emitted_help="" local script_start script_start=$(date +%s) # ======================================================================== # Exporter Status # ======================================================================== cat </dev/null 2>&1; then nginx_running=1 fi if pgrep -x apache2 >/dev/null 2>&1 || pgrep -x httpd >/dev/null 2>&1; then apache_running=1 fi cat </dev/null; then TAIL_LINES="$HTTP_TAIL_LINES_DEFAULT" fi fi # Build list of log files to process: (file:domain) pairs local log_files=() local log_domains=() if [ -n "$LOG_DIR" ] && [ -d "$LOG_DIR" ]; then for log_file in "$LOG_DIR"/*.log; do [ -f "$log_file" ] || continue # skip error logs, byte logs, and other non-access logs case "$log_file" in *.error.log|*.bytes.log|*.ssl.log) continue ;; esac log_files+=("$log_file") local domain_name domain_name=$(basename "$log_file" .log) log_domains+=("$domain_name") done else if [ -f "$ACCESS_LOG" ]; then log_files+=("$ACCESS_LOG") log_domains+=("") fi fi if [ ${#log_files[@]} -eq 0 ]; then cat < "$parsed_file" if [ ! -s "$parsed_file" ]; then rm -f "$parsed_file" continue fi # Extract all scalar values in a single awk pass local total_requests total_bytes bot_requests human_requests local unique_ips unique_uas lines_parsed first_ts last_ts local dl_total dl_bytes page_views asset_reqs min_epoch max_epoch eval "$(awk ' /^LINES_PARSED / { printf "lines_parsed=%s\n", $2 } /^TOTAL_REQUESTS / { printf "total_requests=%s\n", $2 } /^TOTAL_BYTES / { printf "total_bytes=%s\n", $2 } /^BOT_REQUESTS / { printf "bot_requests=%s\n", $2 } /^HUMAN_REQUESTS / { printf "human_requests=%s\n", $2 } /^UNIQUE_IPS / { printf "unique_ips=%s\n", $2 } /^UNIQUE_UAS / { printf "unique_uas=%s\n", $2 } /^FIRST_TS / { printf "first_ts=\"%s %s\"\n", $2, $3 } /^LAST_TS / { printf "last_ts=\"%s %s\"\n", $2, $3 } /^DOWNLOAD_TOTAL / { printf "dl_total=%s\n", $2 } /^DOWNLOAD_BYTES / { printf "dl_bytes=%s\n", $2 } /^PAGE_VIEWS / { printf "page_views=%s\n", $2 } /^ASSET_REQUESTS / { printf "asset_reqs=%s\n", $2 } /^MIN_EPOCH / { printf "min_epoch=%s\n", $2 } /^MAX_EPOCH / { printf "max_epoch=%s\n", $2 } ' "$parsed_file")" # ================================================================ # Request Totals # ================================================================ # Build label wrapper: "label" or empty for single log local lwrap="" lwrap_comma="{" if [ -n "$dlabel" ]; then lwrap="{${dlabel}}" lwrap_comma="{${dlabel_comma}" fi emit_help_type web_traffic_requests "Total requests in parsed window" gauge echo "web_traffic_requests${lwrap} ${total_requests:-0}" echo "" emit_help_type web_traffic_response_bytes "Total response bytes in parsed window" gauge echo "web_traffic_response_bytes${lwrap} ${total_bytes:-0}" echo "" # ================================================================ # Status Codes # ================================================================ local status_lines status_lines=$(grep "^STATUS " "$parsed_file") if [ -n "$status_lines" ]; then emit_help_type web_traffic_requests_by_status "Requests per HTTP status code" gauge echo "$status_lines" | while read -r _ status count; do esc_status=$(prom_escape "$status") echo "web_traffic_requests_by_status${lwrap_comma}status=\"$esc_status\"} $count" done echo "" fi # ================================================================ # Status Classes # ================================================================ local class_lines class_lines=$(grep "^CLASS [0-9]" "$parsed_file") if [ -n "$class_lines" ]; then emit_help_type web_traffic_requests_by_class "Requests per status class" gauge echo "$class_lines" | while read -r _ class count; do esc_class=$(prom_escape "$class") echo "web_traffic_requests_by_class${lwrap_comma}class=\"$esc_class\"} $count" done echo "" local s2xx s3xx s4xx s5xx s2xx=$(echo "$class_lines" | awk '/^CLASS 2xx / {print $3}') s3xx=$(echo "$class_lines" | awk '/^CLASS 3xx / {print $3}') s4xx=$(echo "$class_lines" | awk '/^CLASS 4xx / {print $3}') s5xx=$(echo "$class_lines" | awk '/^CLASS 5xx / {print $3}') emit_help_type web_traffic_status_2xx "Total 2xx responses" gauge echo "web_traffic_status_2xx${lwrap} ${s2xx:-0}" echo "" emit_help_type web_traffic_status_3xx "Total 3xx responses" gauge echo "web_traffic_status_3xx${lwrap} ${s3xx:-0}" echo "" emit_help_type web_traffic_status_4xx "Total 4xx responses" gauge echo "web_traffic_status_4xx${lwrap} ${s4xx:-0}" echo "" emit_help_type web_traffic_status_5xx "Total 5xx responses" gauge echo "web_traffic_status_5xx${lwrap} ${s5xx:-0}" echo "" fi # ================================================================ # Class Bytes # ================================================================ local class_bytes_lines class_bytes_lines=$(grep "^CLASS_BYTES " "$parsed_file") if [ -n "$class_bytes_lines" ]; then emit_help_type web_traffic_response_bytes_by_class "Response bytes per status class" gauge echo "$class_bytes_lines" | while read -r _ class bytes; do esc_class=$(prom_escape "$class") echo "web_traffic_response_bytes_by_class${lwrap_comma}class=\"$esc_class\"} $bytes" done echo "" fi # ================================================================ # Methods # ================================================================ local method_lines method_lines=$(grep "^METHOD " "$parsed_file") if [ -n "$method_lines" ]; then emit_help_type web_traffic_requests_by_method "Requests per HTTP method" gauge echo "$method_lines" | while read -r _ method count; do esc_method=$(prom_escape "$method") echo "web_traffic_requests_by_method${lwrap_comma}method=\"$esc_method\"} $count" done echo "" fi # ================================================================ # Unique Visitors # ================================================================ emit_help_type web_traffic_unique_ips "Unique source IPs in parsed window" gauge echo "web_traffic_unique_ips${lwrap} ${unique_ips:-0}" echo "" emit_help_type web_traffic_unique_user_agents "Unique user agents in parsed window" gauge echo "web_traffic_unique_user_agents${lwrap} ${unique_uas:-0}" echo "" # ================================================================ # Bot Detection (only meaningful with combined format) # ================================================================ if [ "$LOG_FORMAT" = "combined" ]; then local bot_ratio="0" if [ "${total_requests:-0}" -gt 0 ] 2>/dev/null; then bot_ratio=$(awk "BEGIN {printf \"%.4f\", ${bot_requests:-0} / ${total_requests:-1}}") fi emit_help_type web_traffic_bot_requests "Total bot requests detected" gauge echo "web_traffic_bot_requests${lwrap} ${bot_requests:-0}" echo "" emit_help_type web_traffic_human_requests "Total non-bot requests" gauge echo "web_traffic_human_requests${lwrap} ${human_requests:-0}" echo "" emit_help_type web_traffic_bot_ratio "Ratio of bot requests to total requests" gauge echo "web_traffic_bot_ratio${lwrap} $bot_ratio" echo "" fi # ================================================================ # Derived Metrics # ================================================================ local avg_response_bytes="0" if [ "${total_requests:-0}" -gt 0 ] 2>/dev/null; then avg_response_bytes=$(awk "BEGIN {printf \"%.0f\", ${total_bytes:-0} / ${total_requests:-1}}") fi emit_help_type web_traffic_average_response_bytes "Average response size in bytes" gauge echo "web_traffic_average_response_bytes${lwrap} $avg_response_bytes" echo "" local error_ratio="0" if [ "${total_requests:-0}" -gt 0 ] 2>/dev/null; then local s4xx_val s5xx_val s4xx_val=$(grep "^CLASS 4xx " "$parsed_file" | awk '{print $3}') s5xx_val=$(grep "^CLASS 5xx " "$parsed_file" | awk '{print $3}') error_ratio=$(awk "BEGIN {printf \"%.4f\", (${s4xx_val:-0} + ${s5xx_val:-0}) / ${total_requests:-1}}") fi emit_help_type web_traffic_error_ratio "Ratio of 4xx+5xx errors to total requests" gauge echo "web_traffic_error_ratio${lwrap} $error_ratio" echo "" # ================================================================ # Top Paths # ================================================================ local top_path_lines top_path_lines=$(grep "^TOP_PATH " "$parsed_file") if [ -n "$top_path_lines" ]; then emit_help_type web_traffic_top_path_requests "Top requested paths by hit count" gauge echo "$top_path_lines" | while read -r _ rank count path; do esc_path=$(prom_escape "$path") echo "web_traffic_top_path_requests${lwrap_comma}path=\"$esc_path\",rank=\"$rank\"} $count" done echo "" fi # ================================================================ # Top Paths by Bandwidth # ================================================================ local top_path_bytes_lines top_path_bytes_lines=$(grep "^TOP_PATH_BYTES " "$parsed_file") if [ -n "$top_path_bytes_lines" ]; then emit_help_type web_traffic_top_path_response_bytes "Top paths by response bytes" gauge echo "$top_path_bytes_lines" | while read -r _ rank bytes path; do esc_path=$(prom_escape "$path") echo "web_traffic_top_path_response_bytes${lwrap_comma}path=\"$esc_path\",rank=\"$rank\"} $bytes" done echo "" fi # ================================================================ # Top Referrers # ================================================================ local top_ref_lines top_ref_lines=$(grep "^TOP_REF " "$parsed_file") if [ -n "$top_ref_lines" ]; then emit_help_type web_traffic_top_referrer_requests "Top referrers by hit count" gauge echo "$top_ref_lines" | while read -r _ rank count referrer; do esc_ref=$(prom_escape "$referrer") echo "web_traffic_top_referrer_requests${lwrap_comma}referrer=\"$esc_ref\",rank=\"$rank\"} $count" done echo "" fi # ================================================================ # Downloads # ================================================================ emit_help_type web_traffic_downloads "Total file downloads" gauge echo "web_traffic_downloads${lwrap} ${dl_total:-0}" echo "" emit_help_type web_traffic_downloads_bytes "Total bytes from file downloads" gauge echo "web_traffic_downloads_bytes${lwrap} ${dl_bytes:-0}" echo "" local top_dl_lines top_dl_lines=$(grep "^TOP_DOWNLOAD " "$parsed_file") if [ -n "$top_dl_lines" ]; then emit_help_type web_traffic_top_download_requests "Top downloaded files by hit count" gauge echo "$top_dl_lines" | while read -r _ rank count filepath; do esc_file=$(prom_escape "$filepath") echo "web_traffic_top_download_requests${lwrap_comma}file=\"$esc_file\",rank=\"$rank\"} $count" done echo "" fi # ================================================================ # Hourly Traffic Patterns # ================================================================ local hour_lines hour_lines=$(grep "^HOUR " "$parsed_file") if [ -n "$hour_lines" ]; then emit_help_type web_traffic_requests_by_hour "Requests per hour of day" gauge echo "$hour_lines" | while read -r _ hour count; do esc_hour=$(prom_escape "$hour") echo "web_traffic_requests_by_hour${lwrap_comma}hour=\"$esc_hour\"} $count" done echo "" fi # ================================================================ # 404 Error Paths # ================================================================ local top_404_lines top_404_lines=$(grep "^TOP_404 " "$parsed_file") if [ -n "$top_404_lines" ]; then emit_help_type web_traffic_404_path_requests "Top paths returning 404" gauge echo "$top_404_lines" | while read -r _ rank count path; do esc_path=$(prom_escape "$path") echo "web_traffic_404_path_requests${lwrap_comma}path=\"$esc_path\",rank=\"$rank\"} $count" done echo "" fi # ================================================================ # Top 404 Referrers # ================================================================ local top_404_ref_lines top_404_ref_lines=$(grep "^TOP_404_REF " "$parsed_file") if [ -n "$top_404_ref_lines" ]; then emit_help_type web_traffic_top_404_referrer_requests "Top referrers sending traffic to 404 pages" gauge echo "$top_404_ref_lines" | while read -r _ rank count referrer; do esc_ref=$(prom_escape "$referrer") echo "web_traffic_top_404_referrer_requests${lwrap_comma}referrer=\"$esc_ref\",rank=\"$rank\"} $count" done echo "" fi # ================================================================ # Page Views vs Assets # ================================================================ emit_help_type web_traffic_page_views "Total page view requests" gauge echo "web_traffic_page_views${lwrap} ${page_views:-0}" echo "" emit_help_type web_traffic_asset_requests "Total asset requests" gauge echo "web_traffic_asset_requests${lwrap} ${asset_reqs:-0}" echo "" # ================================================================ # Top Client IPs # ================================================================ local top_ip_lines top_ip_lines=$(grep "^TOP_IP " "$parsed_file") if [ -n "$top_ip_lines" ]; then emit_help_type web_traffic_top_client_requests "Top client IPs by request count" gauge echo "$top_ip_lines" | while read -r _ rank count ip; do esc_ip=$(prom_escape "$ip") echo "web_traffic_top_client_requests${lwrap_comma}ip=\"$esc_ip\",rank=\"$rank\"} $count" done echo "" fi # ================================================================ # Response Size Distribution # ================================================================ local size_lines size_lines=$(grep "^SIZE_BUCKET " "$parsed_file") if [ -n "$size_lines" ]; then emit_help_type web_traffic_response_size_bucket "Requests per response size range" gauge echo "$size_lines" | while read -r _ size count; do esc_size=$(prom_escape "$size") echo "web_traffic_response_size_bucket${lwrap_comma}size=\"$esc_size\"} $count" done echo "" fi # ================================================================ # Top Bot Names # ================================================================ local top_bot_lines top_bot_lines=$(grep "^TOP_BOT " "$parsed_file") if [ -n "$top_bot_lines" ]; then emit_help_type web_traffic_top_bot_requests "Top bots by request count" gauge echo "$top_bot_lines" | while read -r _ rank count bot; do esc_bot=$(prom_escape "$bot") echo "web_traffic_top_bot_requests${lwrap_comma}bot=\"$esc_bot\",rank=\"$rank\"} $count" done echo "" fi # ================================================================ # Protocol Distribution # ================================================================ local proto_lines proto_lines=$(grep "^PROTOCOL " "$parsed_file") if [ -n "$proto_lines" ]; then emit_help_type web_traffic_requests_by_protocol "Requests per HTTP protocol version" gauge echo "$proto_lines" | while read -r _ proto count; do esc_proto=$(prom_escape "$proto") echo "web_traffic_requests_by_protocol${lwrap_comma}protocol=\"$esc_proto\"} $count" done echo "" fi # ================================================================ # Request Rate Estimation # ================================================================ local rpm=0 if [ "${min_epoch:-0}" -gt 0 ] 2>/dev/null && [ "${max_epoch:-0}" -gt 0 ] 2>/dev/null; then local duration=$((max_epoch - min_epoch)) if [ "$duration" -gt 0 ]; then rpm=$(awk "BEGIN {printf \"%.2f\", (${total_requests:-0} / $duration) * 60}") fi fi emit_help_type web_traffic_requests_per_minute "Estimated requests per minute from log window" gauge echo "web_traffic_requests_per_minute${lwrap} $rpm" echo "" # ================================================================ # Lines Parsed # ================================================================ emit_help_type web_traffic_exporter_lines_parsed "Number of log lines parsed" gauge echo "web_traffic_exporter_lines_parsed${lwrap} ${lines_parsed:-0}" echo "" # ================================================================ # Time-Windowed Stats (daily/weekly/monthly) # ================================================================ local win_lines win_lines=$(grep "^WIN_" "$parsed_file") if [ -n "$win_lines" ]; then local wlp="{" [ -n "$dlabel_comma" ] && wlp="{${dlabel_comma}" emit_help_type web_traffic_window_requests "Total requests in time window" gauge echo "$win_lines" | awk -v dl="$wlp" '/^WIN_REQUESTS / {print "web_traffic_window_requests" dl "window=\"" $2 "\"} " $3}' echo "" emit_help_type web_traffic_window_bytes "Total response bytes in time window" gauge echo "$win_lines" | awk -v dl="$wlp" '/^WIN_BYTES / {print "web_traffic_window_bytes" dl "window=\"" $2 "\"} " $3}' echo "" emit_help_type web_traffic_window_unique_ips "Unique source IPs in time window" gauge echo "$win_lines" | awk -v dl="$wlp" '/^WIN_UNIQUE_IPS / {print "web_traffic_window_unique_ips" dl "window=\"" $2 "\"} " $3}' echo "" emit_help_type web_traffic_window_bot_requests "Bot requests in time window" gauge echo "$win_lines" | awk -v dl="$wlp" '/^WIN_BOTS / {print "web_traffic_window_bot_requests" dl "window=\"" $2 "\"} " $3}' echo "" emit_help_type web_traffic_window_requests_by_class "Requests per status class in time window" gauge echo "$win_lines" | awk -v dl="$wlp" '/^WIN_STATUS_CLASS / {print "web_traffic_window_requests_by_class" dl "window=\"" $2 "\",class=\"" $3 "\"} " $4}' echo "" emit_help_type web_traffic_window_page_views "Page view requests in time window" gauge echo "$win_lines" | awk -v dl="$wlp" '/^WIN_PAGE_VIEWS / {print "web_traffic_window_page_views" dl "window=\"" $2 "\"} " $3}' echo "" emit_help_type web_traffic_window_asset_requests "Asset requests in time window" gauge echo "$win_lines" | awk -v dl="$wlp" '/^WIN_ASSET_REQUESTS / {print "web_traffic_window_asset_requests" dl "window=\"" $2 "\"} " $3}' echo "" emit_help_type web_traffic_window_downloads "File downloads in time window" gauge echo "$win_lines" | awk -v dl="$wlp" '/^WIN_DOWNLOADS / {print "web_traffic_window_downloads" dl "window=\"" $2 "\"} " $3}' echo "" emit_help_type web_traffic_window_downloads_bytes "Download bytes in time window" gauge echo "$win_lines" | awk -v dl="$wlp" '/^WIN_DOWNLOADS_BYTES / {print "web_traffic_window_downloads_bytes" dl "window=\"" $2 "\"} " $3}' echo "" emit_help_type web_traffic_window_human_requests "Non-bot requests in time window" gauge echo "$win_lines" | awk -v dl="$wlp" '/^WIN_HUMAN_REQUESTS / {print "web_traffic_window_human_requests" dl "window=\"" $2 "\"} " $3}' echo "" emit_help_type web_traffic_window_unique_user_agents "Unique user agents in time window" gauge echo "$win_lines" | awk -v dl="$wlp" '/^WIN_UNIQUE_UAS / {print "web_traffic_window_unique_user_agents" dl "window=\"" $2 "\"} " $3}' echo "" fi rm -f "$parsed_file" done > "$metrics_buf" # Group all samples under their HELP/TYPE headers so multi-domain # output is valid Prometheus exposition format awk ' /^# HELP / { metric=$3; if (!(metric in help)) order[n++]=metric; help[metric]=$0; next } /^# TYPE / { type[$3]=$0; next } /^$/ { next } /^[a-zA-Z_]/ { match($0,/^[a-zA-Z_:][a-zA-Z0-9_:]*/); m=substr($0,RSTART,RLENGTH); samples[m]=samples[m] $0 "\n"; next } END { for(i=0;i&2 if ! command -v nc >/dev/null 2>&1; then echo "ERROR: netcat (nc) required for HTTP mode" >&2 exit 1 fi trap 'echo "Shutting down web traffic exporter..." >&2; exit 0' INT TERM while true; do { read -r request local body if [[ "$request" =~ ^GET\ /metrics ]]; then body=$(generate_metrics) printf "HTTP/1.1 200 OK\r\nContent-Type: text/plain; version=0.0.4\r\nContent-Length: %d\r\nConnection: close\r\n\r\n%s" "${#body}" "$body" else body=$(cat <<'HTMLEOF' Web Traffic Exporter v1.8

Web Traffic Exporter v1.8

Metrics

Sections (auto-detected)

  • Request totals by status, class, and method
  • Bandwidth totals and by status class
  • Unique visitors (IPs and user agents)
  • Top 10 requested paths
  • Top 10 external referrers
  • Bot vs human traffic detection
  • HTTP protocol version distribution
  • Request rate estimation
  • Web server process detection
HTMLEOF ) printf "HTTP/1.1 200 OK\r\nContent-Type: text/html\r\nContent-Length: %d\r\nConnection: close\r\n\r\n%s" "${#body}" "$body" fi } | if nc -h 2>&1 | grep -q 'GNU\|traditional'; then nc -l -p "$HTTP_PORT" -q 1 2>/dev/null else nc -l "$HTTP_PORT" 2>/dev/null fi done } # ============================================================================ # MAIN EXECUTION # ============================================================================ main() { parse_args "$@" if [ "$HTTP_MODE" = true ]; then run_http_server elif [ -n "$OUTPUT_FILE" ]; then local output_dir output_dir="$(dirname "$OUTPUT_FILE")" mkdir -p "$output_dir" local temp_file temp_file=$(mktemp "${output_dir}/.web_traffic_metrics.XXXXXX") if ! generate_metrics > "$temp_file" 2>/dev/null; then rm -f "$temp_file" echo "ERROR: Failed to generate metrics" >&2 exit 1 fi local file_lines file_lines=$(wc -l < "$temp_file" 2>/dev/null || echo 0) if [ "$file_lines" -lt 5 ]; then rm -f "$temp_file" echo "ERROR: Metrics file too small ($file_lines lines), keeping previous" >&2 exit 1 fi chmod 644 "$temp_file" mv -f "$temp_file" "$OUTPUT_FILE" echo "Metrics written to $OUTPUT_FILE ($file_lines lines)" >&2 else generate_metrics fi } main "$@"