#!/bin/bash ############################################################# #### Nginx Metrics Exporter for Prometheus #### #### Comprehensive nginx monitoring via stub_status, #### #### logs, SSL, process, and config metrics #### #### #### #### Author: Phil Connor #### #### Contact: contact@mylinux.work #### #### License: MIT #### #### Version: 1.1 #### #### #### #### Usage: ./nginx-metrics-exporter.sh [OPTIONS] #### ############################################################# # # Metrics collected: # - stub_status: connections, accepts, handled, requests, reading, writing, waiting # - Process: worker count, memory usage, CPU usage, open files # - Access logs: requests by status code, response times, bytes transferred # - SSL: certificate expiry days for configured domains # - Config: worker_processes, worker_connections, keepalive_timeout # - Upstream: health status (if configured) # # Requirements: # - nginx with stub_status module enabled # - socat (for HTTP server) # - curl (for stub_status fetching) # set -euo pipefail ######################### ### Configuration ### ######################### LISTEN_PORT="${NGINX_EXPORTER_PORT:-9113}" STUB_STATUS_URL="${NGINX_STUB_URL:-http://127.0.0.1/nginx_status}" ACCESS_LOG="${NGINX_ACCESS_LOG:-/var/log/nginx/access.log}" ERROR_LOG="${NGINX_ERROR_LOG:-/var/log/nginx/error.log}" NGINX_CONF="${NGINX_CONF:-/etc/nginx/nginx.conf}" SITES_DIR="${NGINX_SITES_DIR:-/etc/nginx/sites-enabled}" CONF_D_DIR="${NGINX_CONF_D:-/etc/nginx/conf.d}" SCRAPE_INTERVAL="${SCRAPE_INTERVAL:-15}" SSL_CHECK_DOMAINS="${SSL_CHECK_DOMAINS:-}" # Comma-separated list of domains to check SSL # Log parsing settings LOG_TAIL_LINES="${LOG_TAIL_LINES:-10000}" # Number of lines to parse from access log LOG_PARSE_INTERVAL="${LOG_PARSE_INTERVAL:-60}" # How often to parse logs (seconds) # State files for log metrics STATE_DIR="/tmp/nginx-metrics" LAST_LOG_PARSE=0 # Output mode TEXTFILE_DIR="/var/lib/node_exporter" OUTPUT_FILE="" HTTP_MODE=false ######################### ### Logging ### ######################### log() { echo "[$(date '+%Y-%m-%d %H:%M:%S')] $*" >&2 } ######################### ### Parse Arguments ### ######################### parse_args() { while [[ $# -gt 0 ]]; do case "$1" in --textfile) OUTPUT_FILE="$TEXTFILE_DIR/nginx.prom" shift ;; --http) HTTP_MODE=true shift ;; --output|-o) OUTPUT_FILE="$2" shift 2 ;; --port) LISTEN_PORT="$2" shift 2 ;; --stub-url) STUB_STATUS_URL="$2" shift 2 ;; --access-log) ACCESS_LOG="$2" shift 2 ;; --error-log) ERROR_LOG="$2" shift 2 ;; --nginx-conf) NGINX_CONF="$2" shift 2 ;; --ssl-domains) SSL_CHECK_DOMAINS="$2" shift 2 ;; --help) cat </dev/null; then echo "apt" elif command -v dnf &>/dev/null; then echo "dnf" elif command -v yum &>/dev/null; then echo "yum" elif command -v zypper &>/dev/null; then echo "zypper" elif command -v pacman &>/dev/null; then echo "pacman" elif command -v apk &>/dev/null; then echo "apk" else echo "" fi } install_package() { local pkg="$1" local pkgmgr pkgmgr=$(detect_package_manager) log "Installing $pkg..." case "$pkgmgr" in apt) apt-get update -qq && apt-get install -y -qq "$pkg" ;; dnf) dnf install -y -q "$pkg" ;; yum) yum install -y -q "$pkg" ;; zypper) zypper install -y -q "$pkg" ;; pacman) pacman -S --noconfirm "$pkg" ;; apk) apk add --quiet "$pkg" ;; *) log "ERROR: Unknown package manager. Please install $pkg manually." return 1 ;; esac } setup() { mkdir -p "$STATE_DIR" # Check for required tools and install if missing if ! command -v socat &>/dev/null; then log "socat not found, attempting to install..." if [[ $EUID -eq 0 ]]; then if ! install_package socat; then log "ERROR: Failed to install socat" exit 1 fi log "socat installed successfully" else log "ERROR: socat is required. Run as root to auto-install, or install manually:" log " Debian/Ubuntu: apt install socat" log " RHEL/CentOS: yum install socat" log " Fedora: dnf install socat" log " Alpine: apk add socat" exit 1 fi fi if ! command -v curl &>/dev/null; then log "curl not found, attempting to install..." if [[ $EUID -eq 0 ]]; then if ! install_package curl; then log "ERROR: Failed to install curl" exit 1 fi log "curl installed successfully" else log "ERROR: curl is required. Run as root to auto-install, or install manually." exit 1 fi fi # Check if nginx is running if ! pgrep -x nginx &>/dev/null && ! pidof nginx &>/dev/null; then log "WARNING: nginx process not found - process metrics will show nginx_process_running=0" fi # Check if stub_status is accessible check_stub_status } check_stub_status() { log "Checking stub_status at $STUB_STATUS_URL..." local response http_code response=$(curl -s -o /dev/null -w "%{http_code}" --max-time 5 "$STUB_STATUS_URL" 2>/dev/null) if [[ "$response" == "200" ]]; then # Verify it's actually stub_status output local content content=$(curl -s --max-time 5 "$STUB_STATUS_URL" 2>/dev/null) if echo "$content" | grep -q "Active connections:"; then log "✓ stub_status is working correctly" return 0 else log "WARNING: $STUB_STATUS_URL returned 200 but doesn't look like stub_status output" log " Expected 'Active connections:' in response" show_stub_status_help return 1 fi elif [[ "$response" == "000" ]]; then log "WARNING: Cannot connect to $STUB_STATUS_URL (connection refused/timeout)" log " stub_status metrics will show nginx_up=0" show_stub_status_help return 1 elif [[ "$response" == "403" ]]; then log "WARNING: Access denied to $STUB_STATUS_URL (HTTP 403)" log " Check 'allow' directives in stub_status location block" show_stub_status_help return 1 elif [[ "$response" == "404" ]]; then log "WARNING: stub_status endpoint not found at $STUB_STATUS_URL (HTTP 404)" log " stub_status may not be configured" show_stub_status_help return 1 else log "WARNING: Unexpected response from $STUB_STATUS_URL (HTTP $response)" show_stub_status_help return 1 fi } show_stub_status_help() { log "" log "To enable stub_status, add this to your nginx config:" log "" log " server {" log " listen 127.0.0.1:80;" log " server_name localhost;" log " " log " location /nginx_status {" log " stub_status on;" log " access_log off;" log " allow 127.0.0.1;" log " deny all;" log " }" log " }" log "" log "Then reload nginx: nginx -t && systemctl reload nginx" log "" log "Or specify a different URL with: --stub-url " log "" } ######################### ### Stub Status Metrics ### ######################### collect_stub_status() { local stub_output echo "# HELP nginx_up Whether nginx stub_status is reachable" echo "# TYPE nginx_up gauge" if ! stub_output=$(curl -s --max-time 5 "$STUB_STATUS_URL" 2>/dev/null); then echo "nginx_up 0" return fi echo "nginx_up 1" # Parse stub_status output # Format: # Active connections: 43 # server accepts handled requests # 7368 7368 10993 # Reading: 0 Writing: 5 Waiting: 38 local active_connections accepts handled requests reading writing waiting active_connections=$(echo "$stub_output" | grep -oP 'Active connections:\s*\K\d+' || echo "0") # Parse the accepts/handled/requests line local stats_line stats_line=$(echo "$stub_output" | grep -E '^\s*[0-9]+\s+[0-9]+\s+[0-9]+' || echo "0 0 0") accepts=$(echo "$stats_line" | awk '{print $1}') handled=$(echo "$stats_line" | awk '{print $2}') requests=$(echo "$stats_line" | awk '{print $3}') # Parse reading/writing/waiting reading=$(echo "$stub_output" | grep -oP 'Reading:\s*\K\d+' || echo "0") writing=$(echo "$stub_output" | grep -oP 'Writing:\s*\K\d+' || echo "0") waiting=$(echo "$stub_output" | grep -oP 'Waiting:\s*\K\d+' || echo "0") cat </dev/null || pidof nginx 2>/dev/null | awk '{print $1}' || echo "") if [[ -z "$nginx_master_pid" ]]; then echo "# HELP nginx_process_running Whether nginx process is running" echo "# TYPE nginx_process_running gauge" echo "nginx_process_running 0" return fi echo "# HELP nginx_process_running Whether nginx process is running" echo "# TYPE nginx_process_running gauge" echo "nginx_process_running 1" # Get all nginx PIDs nginx_pids=$(pgrep -x nginx 2>/dev/null || pidof nginx 2>/dev/null || echo "") # Count workers (total processes minus master) worker_count=$(echo "$nginx_pids" | wc -w) if [[ $worker_count -gt 0 ]]; then worker_count=$((worker_count - 1)) # Subtract master fi echo "# HELP nginx_workers_count Number of nginx worker processes" echo "# TYPE nginx_workers_count gauge" echo "nginx_workers_count $worker_count" # Calculate total memory usage (RSS in bytes) total_memory=0 total_cpu=0 total_fds=0 total_threads=0 for pid in $nginx_pids; do if [[ -d "/proc/$pid" ]]; then # Memory (RSS in KB from /proc/pid/status, convert to bytes) local rss rss=$(grep -m1 'VmRSS:' "/proc/$pid/status" 2>/dev/null | awk '{print $2}' || echo "0") total_memory=$((total_memory + rss * 1024)) # CPU time (from /proc/pid/stat - utime + stime in jiffies) local stat_line utime stime if stat_line=$(cat "/proc/$pid/stat" 2>/dev/null); then utime=$(echo "$stat_line" | awk '{print $14}') stime=$(echo "$stat_line" | awk '{print $15}') total_cpu=$((total_cpu + utime + stime)) fi # Open file descriptors local fds fds=$(ls -1 "/proc/$pid/fd" 2>/dev/null | wc -l || echo "0") total_fds=$((total_fds + fds)) # Threads local threads threads=$(grep -c '^Threads:' "/proc/$pid/status" 2>/dev/null || echo "0") if [[ "$threads" -eq 0 ]]; then threads=$(grep 'Threads:' "/proc/$pid/status" 2>/dev/null | awk '{print $2}' || echo "1") fi total_threads=$((total_threads + threads)) fi done # Convert CPU jiffies to seconds (assuming 100 Hz) local cpu_seconds cpu_seconds=$(echo "scale=2; $total_cpu / 100" | bc 2>/dev/null || echo "$total_cpu") cat </dev/null || echo "0") boot_time=$(awk '{print $1}' /proc/stat 2>/dev/null | head -1 || echo "0") # starttime is in jiffies since boot start_seconds=$(awk "BEGIN {printf \"%.0f\", $(cat /proc/uptime | awk '{print $1}') - ($starttime / 100)}") local now_epoch now_epoch=$(date +%s) local process_start=$((now_epoch - start_seconds)) echo "nginx_process_start_time_seconds $process_start" else echo "nginx_process_start_time_seconds 0" fi # Get max open files limit if [[ -f "/proc/$nginx_master_pid/limits" ]]; then local max_fds max_fds=$(grep 'Max open files' "/proc/$nginx_master_pid/limits" 2>/dev/null | awk '{print $4}' || echo "0") echo "" echo "# HELP nginx_process_max_fds Maximum number of open file descriptors" echo "# TYPE nginx_process_max_fds gauge" echo "nginx_process_max_fds $max_fds" fi } ######################### ### Config Metrics ### ######################### collect_config_metrics() { if [[ ! -f "$NGINX_CONF" ]]; then echo "# nginx.conf not found at $NGINX_CONF" return fi local worker_processes worker_connections keepalive_timeout local multi_accept use_epoll sendfile tcp_nopush tcp_nodelay gzip_enabled # Parse worker_processes (can be 'auto' or a number) worker_processes=$(grep -E '^\s*worker_processes' "$NGINX_CONF" 2>/dev/null | head -1 | awk '{print $2}' | tr -d ';' || echo "auto") if [[ "$worker_processes" == "auto" ]]; then worker_processes=$(nproc 2>/dev/null || echo "1") fi # Parse worker_connections worker_connections=$(grep -E '^\s*worker_connections' "$NGINX_CONF" 2>/dev/null | head -1 | awk '{print $2}' | tr -d ';' || echo "0") # Parse keepalive_timeout keepalive_timeout=$(grep -E '^\s*keepalive_timeout' "$NGINX_CONF" 2>/dev/null | head -1 | awk '{print $2}' | tr -d ';s' || echo "0") # Check various settings multi_accept=$(grep -qE '^\s*multi_accept\s+on' "$NGINX_CONF" 2>/dev/null && echo "1" || echo "0") use_epoll=$(grep -qE '^\s*use\s+epoll' "$NGINX_CONF" 2>/dev/null && echo "1" || echo "0") sendfile=$(grep -qE '^\s*sendfile\s+on' "$NGINX_CONF" 2>/dev/null && echo "1" || echo "0") tcp_nopush=$(grep -qE '^\s*tcp_nopush\s+on' "$NGINX_CONF" 2>/dev/null && echo "1" || echo "0") tcp_nodelay=$(grep -qE '^\s*tcp_nodelay\s+on' "$NGINX_CONF" 2>/dev/null && echo "1" || echo "0") gzip_enabled=$(grep -qE '^\s*gzip\s+on' "$NGINX_CONF" 2>/dev/null && echo "1" || echo "0") cat </dev/null | wc -l) elif [[ -d "$CONF_D_DIR" ]]; then vhost_count=$(find "$CONF_D_DIR" -name "*.conf" -type f 2>/dev/null | wc -l) fi echo "" echo "# HELP nginx_config_vhosts_total Number of configured virtual hosts" echo "# TYPE nginx_config_vhosts_total gauge" echo "nginx_config_vhosts_total $vhost_count" # Calculate max possible connections local max_connections=$((worker_processes * worker_connections)) echo "" echo "# HELP nginx_config_max_connections Maximum theoretical connections (workers * connections)" echo "# TYPE nginx_config_max_connections gauge" echo "nginx_config_max_connections $max_connections" } ######################### ### Access Log Metrics ### ######################### collect_access_log_metrics() { if [[ ! -f "$ACCESS_LOG" ]] || [[ ! -r "$ACCESS_LOG" ]]; then echo "# Access log not readable at $ACCESS_LOG" return fi local now now=$(date +%s) # Only parse logs every LOG_PARSE_INTERVAL seconds if [[ -f "$STATE_DIR/last_parse" ]]; then LAST_LOG_PARSE=$(cat "$STATE_DIR/last_parse") fi if [[ $((now - LAST_LOG_PARSE)) -lt $LOG_PARSE_INTERVAL ]] && [[ -f "$STATE_DIR/log_metrics" ]]; then cat "$STATE_DIR/log_metrics" return fi echo "$now" > "$STATE_DIR/last_parse" # Parse access log for status codes and other metrics # Assuming combined log format: $remote_addr - $remote_user [$time_local] "$request" $status $body_bytes_sent "$http_referer" "$http_user_agent" local log_data log_data=$(tail -n "$LOG_TAIL_LINES" "$ACCESS_LOG" 2>/dev/null || echo "") if [[ -z "$log_data" ]]; then echo "# No log data available" return fi local metrics_output="" # Count by status code local status_counts status_counts=$(echo "$log_data" | awk '{print $9}' | grep -E '^[0-9]{3}$' | sort | uniq -c | sort -rn) metrics_output+="# HELP nginx_http_requests_by_status_total HTTP requests by status code (from last $LOG_TAIL_LINES log lines) # TYPE nginx_http_requests_by_status_total gauge " # Initialize counters for status code groups local count_1xx=0 count_2xx=0 count_3xx=0 count_4xx=0 count_5xx=0 while read -r count status; do if [[ -n "$status" ]] && [[ -n "$count" ]]; then metrics_output+="nginx_http_requests_by_status_total{status=\"$status\"} $count " # Aggregate by category case "${status:0:1}" in 1) count_1xx=$((count_1xx + count)) ;; 2) count_2xx=$((count_2xx + count)) ;; 3) count_3xx=$((count_3xx + count)) ;; 4) count_4xx=$((count_4xx + count)) ;; 5) count_5xx=$((count_5xx + count)) ;; esac fi done <<< "$status_counts" metrics_output+=" # HELP nginx_http_requests_by_status_class_total HTTP requests by status class # TYPE nginx_http_requests_by_status_class_total gauge nginx_http_requests_by_status_class_total{class=\"1xx\"} $count_1xx nginx_http_requests_by_status_class_total{class=\"2xx\"} $count_2xx nginx_http_requests_by_status_class_total{class=\"3xx\"} $count_3xx nginx_http_requests_by_status_class_total{class=\"4xx\"} $count_4xx nginx_http_requests_by_status_class_total{class=\"5xx\"} $count_5xx " # Calculate total bytes sent local total_bytes total_bytes=$(echo "$log_data" | awk '{sum += $10} END {print sum+0}') metrics_output+=" # HELP nginx_http_response_bytes_total Total bytes sent in responses (from last $LOG_TAIL_LINES log lines) # TYPE nginx_http_response_bytes_total gauge nginx_http_response_bytes_total $total_bytes " # Count requests by method local method_counts method_counts=$(echo "$log_data" | awk -F'"' '{print $2}' | awk '{print $1}' | grep -E '^(GET|POST|PUT|DELETE|PATCH|HEAD|OPTIONS)$' | sort | uniq -c) metrics_output+=" # HELP nginx_http_requests_by_method_total HTTP requests by method (from last $LOG_TAIL_LINES log lines) # TYPE nginx_http_requests_by_method_total gauge " while read -r count method; do if [[ -n "$method" ]] && [[ -n "$count" ]]; then metrics_output+="nginx_http_requests_by_method_total{method=\"$method\"} $count " fi done <<< "$method_counts" # Count unique IPs local unique_ips unique_ips=$(echo "$log_data" | awk '{print $1}' | sort -u | wc -l) metrics_output+=" # HELP nginx_http_unique_clients Unique client IPs (from last $LOG_TAIL_LINES log lines) # TYPE nginx_http_unique_clients gauge nginx_http_unique_clients $unique_ips " # Top URIs (for potential abuse detection) local top_uris top_uris=$(echo "$log_data" | awk -F'"' '{print $2}' | awk '{print $2}' | grep -v '^-$' | sort | uniq -c | sort -rn | head -5) metrics_output+=" # HELP nginx_http_top_uri_requests_total Top requested URIs (from last $LOG_TAIL_LINES log lines) # TYPE nginx_http_top_uri_requests_total gauge " local rank=1 while read -r count uri; do if [[ -n "$uri" ]] && [[ -n "$count" ]]; then # Truncate URI and escape quotes uri="${uri:0:100}" uri="${uri//\"/\\\"}" metrics_output+="nginx_http_top_uri_requests_total{uri=\"$uri\",rank=\"$rank\"} $count " rank=$((rank + 1)) fi done <<< "$top_uris" # Count requests in time windows (if log has parseable timestamps) local recent_requests recent_requests=$(echo "$log_data" | wc -l) metrics_output+=" # HELP nginx_http_requests_in_sample Total requests in sample window # TYPE nginx_http_requests_in_sample gauge nginx_http_requests_in_sample $recent_requests " # Save metrics for caching echo "$metrics_output" > "$STATE_DIR/log_metrics" echo "$metrics_output" } ######################### ### Error Log Metrics ### ######################### collect_error_log_metrics() { if [[ ! -f "$ERROR_LOG" ]] || [[ ! -r "$ERROR_LOG" ]]; then echo "# Error log not readable at $ERROR_LOG" return fi # Count errors by level from last 1000 lines local log_data log_data=$(tail -n 1000 "$ERROR_LOG" 2>/dev/null || echo "") if [[ -z "$log_data" ]]; then return fi local emerg_count alert_count crit_count error_count warn_count notice_count info_count emerg_count=$(echo "$log_data" | grep -c '\[emerg\]' 2>/dev/null) || emerg_count=0 alert_count=$(echo "$log_data" | grep -c '\[alert\]' 2>/dev/null) || alert_count=0 crit_count=$(echo "$log_data" | grep -c '\[crit\]' 2>/dev/null) || crit_count=0 error_count=$(echo "$log_data" | grep -c '\[error\]' 2>/dev/null) || error_count=0 warn_count=$(echo "$log_data" | grep -c '\[warn\]' 2>/dev/null) || warn_count=0 notice_count=$(echo "$log_data" | grep -c '\[notice\]' 2>/dev/null) || notice_count=0 info_count=$(echo "$log_data" | grep -c '\[info\]' 2>/dev/null) || info_count=0 cat </dev/null || echo "0") log_mtime=$(stat -c %Y "$ERROR_LOG" 2>/dev/null || echo "0") now=$(date +%s) log_age=$((now - log_mtime)) cat </dev/null | grep -v '#' | grep -v 'ssl_certificate_key' | awk '{print $2}' | tr -d ';' | sort -u || echo "") if [[ -z "$cert_files" ]]; then echo "# No SSL certificates found in nginx config" return fi echo "# HELP nginx_ssl_certificate_expiry_days Days until SSL certificate expires" echo "# TYPE nginx_ssl_certificate_expiry_days gauge" echo "# HELP nginx_ssl_certificate_expiry_timestamp Unix timestamp when certificate expires" echo "# TYPE nginx_ssl_certificate_expiry_timestamp gauge" while read -r cert_file; do if [[ -f "$cert_file" ]]; then local expiry_date expiry_epoch now_epoch days_left cn expiry_date=$(openssl x509 -enddate -noout -in "$cert_file" 2>/dev/null | cut -d= -f2 || echo "") if [[ -n "$expiry_date" ]]; then expiry_epoch=$(date -d "$expiry_date" +%s 2>/dev/null || echo "0") now_epoch=$(date +%s) days_left=$(( (expiry_epoch - now_epoch) / 86400 )) # Get CN from certificate cn=$(openssl x509 -subject -noout -in "$cert_file" 2>/dev/null | grep -oP 'CN\s*=\s*\K[^,/]+' || basename "$cert_file") cn="${cn// /_}" echo "nginx_ssl_certificate_expiry_days{certificate=\"$cn\",file=\"$cert_file\"} $days_left" echo "nginx_ssl_certificate_expiry_timestamp{certificate=\"$cn\",file=\"$cert_file\"} $expiry_epoch" fi fi done <<< "$cert_files" return fi # Check specified domains via network echo "# HELP nginx_ssl_certificate_expiry_days Days until SSL certificate expires" echo "# TYPE nginx_ssl_certificate_expiry_days gauge" echo "# HELP nginx_ssl_certificate_expiry_timestamp Unix timestamp when certificate expires" echo "# TYPE nginx_ssl_certificate_expiry_timestamp gauge" IFS=',' read -ra domain_array <<< "$domains" for domain in "${domain_array[@]}"; do domain=$(echo "$domain" | tr -d ' ') if [[ -n "$domain" ]]; then local expiry_date expiry_epoch now_epoch days_left expiry_date=$(echo | openssl s_client -servername "$domain" -connect "$domain:443" 2>/dev/null | openssl x509 -noout -enddate 2>/dev/null | cut -d= -f2 || echo "") if [[ -n "$expiry_date" ]]; then expiry_epoch=$(date -d "$expiry_date" +%s 2>/dev/null || echo "0") now_epoch=$(date +%s) days_left=$(( (expiry_epoch - now_epoch) / 86400 )) echo "nginx_ssl_certificate_expiry_days{domain=\"$domain\"} $days_left" echo "nginx_ssl_certificate_expiry_timestamp{domain=\"$domain\"} $expiry_epoch" else echo "nginx_ssl_certificate_expiry_days{domain=\"$domain\"} -1" fi fi done } ######################### ### Upstream Metrics ### ######################### collect_upstream_metrics() { # Check for upstream configurations local upstreams upstreams=$(grep -rh 'upstream\s' "$SITES_DIR" "$CONF_D_DIR" "$NGINX_CONF" 2>/dev/null | grep -v '#' | awk '{print $2}' | tr -d '{' | sort -u || echo "") if [[ -z "$upstreams" ]]; then return fi echo "# HELP nginx_upstream_configured Number of configured upstreams" echo "# TYPE nginx_upstream_configured gauge" echo "nginx_upstream_configured $(echo "$upstreams" | wc -w)" echo "" echo "# HELP nginx_upstream_servers_total Servers configured per upstream" echo "# TYPE nginx_upstream_servers_total gauge" # This is a simplified count - for real upstream health you'd need nginx-plus or lua module while read -r upstream; do if [[ -n "$upstream" ]]; then # Try to count servers in this upstream block local server_count server_count=$(grep -A 20 "upstream $upstream" "$SITES_DIR"/* "$CONF_D_DIR"/* "$NGINX_CONF" 2>/dev/null | grep -c 'server\s' 2>/dev/null) || server_count=0 echo "nginx_upstream_servers_total{upstream=\"$upstream\"} $server_count" fi done <<< "$upstreams" } ######################### ### Version Metrics ### ######################### collect_version_metrics() { local version version=$(nginx -v 2>&1 | grep -oP 'nginx/\K[0-9.]+' || echo "unknown") echo "# HELP nginx_version_info Nginx version information" echo "# TYPE nginx_version_info gauge" echo "nginx_version_info{version=\"$version\"} 1" # Check if nginx is compiled with certain modules local modules_output modules_output=$(nginx -V 2>&1 || echo "") local has_ssl has_http2 has_gzip has_stub_status has_realip has_geoip has_lua has_ssl=$(echo "$modules_output" | grep -qE 'with-http_ssl_module|--with-openssl' && echo "1" || echo "0") has_http2=$(echo "$modules_output" | grep -q 'http_v2_module' && echo "1" || echo "0") has_gzip=$(echo "$modules_output" | grep -q 'http_gzip' && echo "1" || echo "0") has_stub_status=$(echo "$modules_output" | grep -q 'http_stub_status_module' && echo "1" || echo "0") has_realip=$(echo "$modules_output" | grep -q 'http_realip_module' && echo "1" || echo "0") has_geoip=$(echo "$modules_output" | grep -q 'http_geoip_module' && echo "1" || echo "0") has_lua=$(echo "$modules_output" | grep -qE 'lua|ndk_http_module' && echo "1" || echo "0") cat </dev/null || echo "0") ulimit_n=$(ulimit -n 2>/dev/null || echo "0") cat </dev/null | awk '{print $1}' || echo "0") echo "" echo "# HELP nginx_system_open_files Current system-wide open files" echo "# TYPE nginx_system_open_files gauge" echo "nginx_system_open_files $open_files" } ######################### ### Collect All Metrics ### ######################### collect_all_metrics() { local hostname hostname=$(hostname -f 2>/dev/null || hostname) cat </dev/null || { log "Server error, restarting in 5 seconds..." sleep 5 } done } ######################### ### Output ### ######################### write_output() { local metrics metrics=$(collect_all_metrics) if [[ -n "$OUTPUT_FILE" ]]; then local tmp_file="${OUTPUT_FILE}.$$" echo "$metrics" > "$tmp_file" mv "$tmp_file" "$OUTPUT_FILE" else echo "$metrics" fi } ######################### ### Main ### ######################### main() { if [[ "${1:-}" == "--handle-request" ]]; then handle_request exit 0 fi parse_args "$@" setup if [[ "$HTTP_MODE" == true ]]; then start_server elif [[ -n "$OUTPUT_FILE" ]]; then write_output else collect_all_metrics fi } main "$@"