Files
linux-scripts/nginx-metrics-exporter.sh
T
chiefgeek a1a17e81a1 Sync all scripts from website downloads — 352 scripts total
Includes updated JS challenge scripts with Claude-User whitelist,
same-site referer bypass, Blackbox-Exporter allowed bot, and all
new exporters, cheat sheets, and automation scripts.
2026-05-25 03:31:08 +02:00

1118 lines
38 KiB
Bash

#!/bin/bash
#############################################################
#### Nginx Metrics Exporter for Prometheus ####
#### Comprehensive nginx monitoring via stub_status, ####
#### logs, SSL, process, and config metrics ####
#### ####
#### Author: Phil Connor ####
#### Contact: contact@mylinux.work ####
#### License: MIT ####
#### Version: 1.11 ####
#### ####
#### Usage: ./nginx-metrics-exporter.sh [OPTIONS] ####
#############################################################
#
# Metrics collected:
# - stub_status: connections, accepts, handled, requests, reading, writing, waiting
# - Process: worker count, memory usage, CPU usage, open files
# - Access logs: requests by status code, response times, bytes transferred
# - SSL: certificate expiry days for configured domains
# - Config: worker_processes, worker_connections, keepalive_timeout
# - Upstream: health status (if configured)
#
# Requirements:
# - nginx with stub_status module enabled
# - socat (for HTTP server)
# - curl (for stub_status fetching)
#
set -euo pipefail
#########################
### Configuration ###
#########################
LISTEN_PORT="${NGINX_EXPORTER_PORT:-9113}"
STUB_STATUS_URL="${NGINX_STUB_URL:-http://127.0.0.1/nginx_status}"
ACCESS_LOG="${NGINX_ACCESS_LOG:-/var/log/nginx/access.log}"
ERROR_LOG="${NGINX_ERROR_LOG:-/var/log/nginx/error.log}"
NGINX_CONF="${NGINX_CONF:-/etc/nginx/nginx.conf}"
SITES_DIR="${NGINX_SITES_DIR:-/etc/nginx/sites-enabled}"
CONF_D_DIR="${NGINX_CONF_D:-/etc/nginx/conf.d}"
SCRAPE_INTERVAL="${SCRAPE_INTERVAL:-15}"
SSL_CHECK_DOMAINS="${SSL_CHECK_DOMAINS:-}" # Comma-separated list of domains to check SSL
# Log parsing settings
LOG_TAIL_LINES="${LOG_TAIL_LINES:-10000}" # Number of lines to parse from access log
LOG_PARSE_INTERVAL="${LOG_PARSE_INTERVAL:-60}" # How often to parse logs (seconds)
# State files for log metrics
STATE_DIR="/tmp/nginx-metrics"
LAST_LOG_PARSE=0
# Output mode
TEXTFILE_DIR="/var/lib/node_exporter"
OUTPUT_FILE=""
HTTP_MODE=false
#########################
### Logging ###
#########################
log() {
echo "[$(date '+%Y-%m-%d %H:%M:%S')] $*" >&2
}
#########################
### Parse Arguments ###
#########################
parse_args() {
while [[ $# -gt 0 ]]; do
case "$1" in
--textfile)
OUTPUT_FILE="$TEXTFILE_DIR/nginx.prom"
shift
;;
--http)
HTTP_MODE=true
shift
;;
--output|-o)
OUTPUT_FILE="$2"
shift 2
;;
--port)
LISTEN_PORT="$2"
shift 2
;;
--stub-url)
STUB_STATUS_URL="$2"
shift 2
;;
--access-log)
ACCESS_LOG="$2"
shift 2
;;
--error-log)
ERROR_LOG="$2"
shift 2
;;
--nginx-conf)
NGINX_CONF="$2"
shift 2
;;
--ssl-domains)
SSL_CHECK_DOMAINS="$2"
shift 2
;;
--help)
cat <<EOF
Nginx Metrics Exporter for Prometheus
Usage: $0 [OPTIONS]
MODES:
--textfile Write to node_exporter textfile collector
--http Run HTTP server on port $LISTEN_PORT (default mode with no other output)
(no flag) Output to stdout
OPTIONS:
--port PORT HTTP port (default: 9113)
-o, --output PATH Output file path
--stub-url URL Nginx stub_status URL (default: http://127.0.0.1/nginx_status)
--access-log PATH Path to access log (default: /var/log/nginx/access.log)
--error-log PATH Path to error log (default: /var/log/nginx/error.log)
--nginx-conf PATH Path to nginx.conf (default: /etc/nginx/nginx.conf)
--ssl-domains LIST Comma-separated domains to check SSL expiry
--help Show this help
EXAMPLES:
$0 --textfile # Write to textfile collector
$0 --http --port 9113 # Run HTTP server
$0 -o /tmp/nginx.prom # Write to custom file
$0 # Output to stdout
EOF
exit 0
;;
*)
log "Unknown option: $1"
exit 1
;;
esac
done
}
#########################
### Setup ###
#########################
detect_package_manager() {
if command -v apt-get &>/dev/null; then
echo "apt"
elif command -v dnf &>/dev/null; then
echo "dnf"
elif command -v yum &>/dev/null; then
echo "yum"
elif command -v zypper &>/dev/null; then
echo "zypper"
elif command -v pacman &>/dev/null; then
echo "pacman"
elif command -v apk &>/dev/null; then
echo "apk"
else
echo ""
fi
}
install_package() {
local pkg="$1"
local pkgmgr
pkgmgr=$(detect_package_manager)
log "Installing $pkg..."
case "$pkgmgr" in
apt)
apt-get update -qq && apt-get install -y -qq "$pkg"
;;
dnf)
dnf install -y -q "$pkg"
;;
yum)
yum install -y -q "$pkg"
;;
zypper)
zypper install -y -q "$pkg"
;;
pacman)
pacman -S --noconfirm "$pkg"
;;
apk)
apk add --quiet "$pkg"
;;
*)
log "ERROR: Unknown package manager. Please install $pkg manually."
return 1
;;
esac
}
setup() {
mkdir -p "$STATE_DIR"
# Check for required tools and install if missing
if ! command -v socat &>/dev/null; then
log "socat not found, attempting to install..."
if [[ $EUID -eq 0 ]]; then
if ! install_package socat; then
log "ERROR: Failed to install socat"
exit 1
fi
log "socat installed successfully"
else
log "ERROR: socat is required. Run as root to auto-install, or install manually:"
log " Debian/Ubuntu: apt install socat"
log " RHEL/CentOS: yum install socat"
log " Fedora: dnf install socat"
log " Alpine: apk add socat"
exit 1
fi
fi
if ! command -v curl &>/dev/null; then
log "curl not found, attempting to install..."
if [[ $EUID -eq 0 ]]; then
if ! install_package curl; then
log "ERROR: Failed to install curl"
exit 1
fi
log "curl installed successfully"
else
log "ERROR: curl is required. Run as root to auto-install, or install manually."
exit 1
fi
fi
# Check if nginx is running
if ! pgrep -x nginx &>/dev/null && ! pidof nginx &>/dev/null; then
log "WARNING: nginx process not found - process metrics will show nginx_process_running=0"
fi
# Check if stub_status is accessible
check_stub_status
}
check_stub_status() {
log "Checking stub_status at $STUB_STATUS_URL..."
local response http_code
response=$(curl -s -o /dev/null -w "%{http_code}" --max-time 5 "$STUB_STATUS_URL" 2>/dev/null)
if [[ "$response" == "200" ]]; then
# Verify it's actually stub_status output
local content
content=$(curl -s --max-time 5 "$STUB_STATUS_URL" 2>/dev/null)
if echo "$content" | grep -q "Active connections:"; then
log "✓ stub_status is working correctly"
return 0
else
log "WARNING: $STUB_STATUS_URL returned 200 but doesn't look like stub_status output"
log " Expected 'Active connections:' in response"
show_stub_status_help
return 1
fi
elif [[ "$response" == "000" ]]; then
log "WARNING: Cannot connect to $STUB_STATUS_URL (connection refused/timeout)"
log " stub_status metrics will show nginx_up=0"
show_stub_status_help
return 1
elif [[ "$response" == "403" ]]; then
log "WARNING: Access denied to $STUB_STATUS_URL (HTTP 403)"
log " Check 'allow' directives in stub_status location block"
show_stub_status_help
return 1
elif [[ "$response" == "404" ]]; then
log "WARNING: stub_status endpoint not found at $STUB_STATUS_URL (HTTP 404)"
log " stub_status may not be configured"
show_stub_status_help
return 1
else
log "WARNING: Unexpected response from $STUB_STATUS_URL (HTTP $response)"
show_stub_status_help
return 1
fi
}
show_stub_status_help() {
log ""
log "To enable stub_status, add this to your nginx config:"
log ""
log " server {"
log " listen 127.0.0.1:80;"
log " server_name localhost;"
log " "
log " location /nginx_status {"
log " stub_status on;"
log " access_log off;"
log " allow 127.0.0.1;"
log " deny all;"
log " }"
log " }"
log ""
log "Then reload nginx: nginx -t && systemctl reload nginx"
log ""
log "Or specify a different URL with: --stub-url <url>"
log ""
}
#########################
### Stub Status Metrics ###
#########################
collect_stub_status() {
local stub_output
echo "# HELP nginx_up Whether nginx stub_status is reachable"
echo "# TYPE nginx_up gauge"
if ! stub_output=$(curl -s --max-time 5 "$STUB_STATUS_URL" 2>/dev/null); then
echo "nginx_up 0"
return
fi
echo "nginx_up 1"
# Parse stub_status output
# Format:
# Active connections: 43
# server accepts handled requests
# 7368 7368 10993
# Reading: 0 Writing: 5 Waiting: 38
local active_connections accepts handled requests reading writing waiting
active_connections=$(echo "$stub_output" | grep -oP 'Active connections:\s*\K\d+' || echo "0")
# Parse the accepts/handled/requests line
local stats_line
stats_line=$(echo "$stub_output" | grep -E '^\s*[0-9]+\s+[0-9]+\s+[0-9]+' || echo "0 0 0")
accepts=$(echo "$stats_line" | awk '{print $1}')
handled=$(echo "$stats_line" | awk '{print $2}')
requests=$(echo "$stats_line" | awk '{print $3}')
# Parse reading/writing/waiting
reading=$(echo "$stub_output" | grep -oP 'Reading:\s*\K\d+' || echo "0")
writing=$(echo "$stub_output" | grep -oP 'Writing:\s*\K\d+' || echo "0")
waiting=$(echo "$stub_output" | grep -oP 'Waiting:\s*\K\d+' || echo "0")
cat <<EOF
# HELP nginx_connections_active Current number of active client connections
# TYPE nginx_connections_active gauge
nginx_connections_active $active_connections
# HELP nginx_connections_accepted Total number of accepted client connections
# TYPE nginx_connections_accepted counter
nginx_connections_accepted $accepts
# HELP nginx_connections_handled Total number of handled connections
# TYPE nginx_connections_handled counter
nginx_connections_handled $handled
# HELP nginx_http_requests_total Total number of client requests
# TYPE nginx_http_requests_total counter
nginx_http_requests_total $requests
# HELP nginx_connections_reading Current number of connections where nginx is reading request header
# TYPE nginx_connections_reading gauge
nginx_connections_reading $reading
# HELP nginx_connections_writing Current number of connections where nginx is writing response
# TYPE nginx_connections_writing gauge
nginx_connections_writing $writing
# HELP nginx_connections_waiting Current number of idle client connections waiting for request
# TYPE nginx_connections_waiting gauge
nginx_connections_waiting $waiting
EOF
}
#########################
### Process Metrics ###
#########################
collect_process_metrics() {
local nginx_master_pid nginx_pids worker_count total_memory total_cpu total_fds
local total_connections total_threads
# Find nginx master process
nginx_master_pid=$(pgrep -x nginx -o 2>/dev/null || pidof nginx 2>/dev/null | awk '{print $1}' || echo "")
if [[ -z "$nginx_master_pid" ]]; then
echo "# HELP nginx_process_running Whether nginx process is running"
echo "# TYPE nginx_process_running gauge"
echo "nginx_process_running 0"
return
fi
echo "# HELP nginx_process_running Whether nginx process is running"
echo "# TYPE nginx_process_running gauge"
echo "nginx_process_running 1"
# Get all nginx PIDs
nginx_pids=$(pgrep -x nginx 2>/dev/null || pidof nginx 2>/dev/null || echo "")
# Count workers (total processes minus master)
worker_count=$(echo "$nginx_pids" | wc -w)
if [[ $worker_count -gt 0 ]]; then
worker_count=$((worker_count - 1)) # Subtract master
fi
echo "# HELP nginx_workers_count Number of nginx worker processes"
echo "# TYPE nginx_workers_count gauge"
echo "nginx_workers_count $worker_count"
# Calculate total memory usage (RSS in bytes)
total_memory=0
total_cpu=0
total_fds=0
total_threads=0
for pid in $nginx_pids; do
if [[ -d "/proc/$pid" ]]; then
# Memory (RSS in KB from /proc/pid/status, convert to bytes)
local rss
rss=$(grep -m1 'VmRSS:' "/proc/$pid/status" 2>/dev/null | awk '{print $2}' || echo "0")
total_memory=$((total_memory + rss * 1024))
# CPU time (from /proc/pid/stat - utime + stime in jiffies)
local stat_line utime stime
if stat_line=$(cat "/proc/$pid/stat" 2>/dev/null); then
utime=$(echo "$stat_line" | awk '{print $14}')
stime=$(echo "$stat_line" | awk '{print $15}')
total_cpu=$((total_cpu + utime + stime))
fi
# Open file descriptors
local fds
fds=$(ls -1 "/proc/$pid/fd" 2>/dev/null | wc -l || echo "0")
total_fds=$((total_fds + fds))
# Threads
local threads
threads=$(grep -c '^Threads:' "/proc/$pid/status" 2>/dev/null || true)
if [[ "$threads" -eq 0 ]]; then
threads=$(grep 'Threads:' "/proc/$pid/status" 2>/dev/null | awk '{print $2}' || echo "1")
fi
total_threads=$((total_threads + threads))
fi
done
# Convert CPU jiffies to seconds (assuming 100 Hz)
local cpu_seconds
cpu_seconds=$(echo "scale=2; $total_cpu / 100" | bc 2>/dev/null || echo "$total_cpu")
cat <<EOF
# HELP nginx_process_memory_bytes Total memory used by nginx processes (RSS)
# TYPE nginx_process_memory_bytes gauge
nginx_process_memory_bytes $total_memory
# HELP nginx_process_cpu_seconds_total Total CPU time consumed by nginx processes
# TYPE nginx_process_cpu_seconds_total counter
nginx_process_cpu_seconds_total $cpu_seconds
# HELP nginx_process_open_fds Total number of open file descriptors
# TYPE nginx_process_open_fds gauge
nginx_process_open_fds $total_fds
# HELP nginx_process_threads_total Total number of threads
# TYPE nginx_process_threads_total gauge
nginx_process_threads_total $total_threads
# HELP nginx_process_start_time_seconds Start time of nginx master process
# TYPE nginx_process_start_time_seconds gauge
EOF
# Get start time of master process
if [[ -f "/proc/$nginx_master_pid/stat" ]]; then
local starttime boot_time start_seconds
starttime=$(awk '{print $22}' "/proc/$nginx_master_pid/stat" 2>/dev/null || echo "0")
boot_time=$(awk '{print $1}' /proc/stat 2>/dev/null | head -1 || echo "0")
# starttime is in jiffies since boot
start_seconds=$(awk "BEGIN {printf \"%.0f\", $(cat /proc/uptime | awk '{print $1}') - ($starttime / 100)}")
local now_epoch
now_epoch=$(date +%s)
local process_start=$((now_epoch - start_seconds))
echo "nginx_process_start_time_seconds $process_start"
else
echo "nginx_process_start_time_seconds 0"
fi
# Get max open files limit
if [[ -f "/proc/$nginx_master_pid/limits" ]]; then
local max_fds
max_fds=$(grep 'Max open files' "/proc/$nginx_master_pid/limits" 2>/dev/null | awk '{print $4}' || echo "0")
echo ""
echo "# HELP nginx_process_max_fds Maximum number of open file descriptors"
echo "# TYPE nginx_process_max_fds gauge"
echo "nginx_process_max_fds $max_fds"
fi
}
#########################
### Config Metrics ###
#########################
collect_config_metrics() {
if [[ ! -f "$NGINX_CONF" ]]; then
echo "# nginx.conf not found at $NGINX_CONF"
return
fi
local worker_processes worker_connections keepalive_timeout
local multi_accept use_epoll sendfile tcp_nopush tcp_nodelay gzip_enabled
# Parse worker_processes (can be 'auto' or a number)
worker_processes=$(grep -E '^\s*worker_processes' "$NGINX_CONF" 2>/dev/null | head -1 | awk '{print $2}' | tr -d ';' || echo "auto")
if [[ "$worker_processes" == "auto" ]]; then
worker_processes=$(nproc 2>/dev/null || echo "1")
fi
# Parse worker_connections
worker_connections=$(grep -E '^\s*worker_connections' "$NGINX_CONF" 2>/dev/null | head -1 | awk '{print $2}' | tr -d ';' || echo "0")
# Parse keepalive_timeout
keepalive_timeout=$(grep -E '^\s*keepalive_timeout' "$NGINX_CONF" 2>/dev/null | head -1 | awk '{print $2}' | tr -d ';s' || echo "0")
# Check various settings
multi_accept=$(grep -qE '^\s*multi_accept\s+on' "$NGINX_CONF" 2>/dev/null && echo "1" || echo "0")
use_epoll=$(grep -qE '^\s*use\s+epoll' "$NGINX_CONF" 2>/dev/null && echo "1" || echo "0")
sendfile=$(grep -qE '^\s*sendfile\s+on' "$NGINX_CONF" 2>/dev/null && echo "1" || echo "0")
tcp_nopush=$(grep -qE '^\s*tcp_nopush\s+on' "$NGINX_CONF" 2>/dev/null && echo "1" || echo "0")
tcp_nodelay=$(grep -qE '^\s*tcp_nodelay\s+on' "$NGINX_CONF" 2>/dev/null && echo "1" || echo "0")
gzip_enabled=$(grep -qE '^\s*gzip\s+on' "$NGINX_CONF" 2>/dev/null && echo "1" || echo "0")
cat <<EOF
# HELP nginx_config_worker_processes Configured number of worker processes
# TYPE nginx_config_worker_processes gauge
nginx_config_worker_processes $worker_processes
# HELP nginx_config_worker_connections Maximum connections per worker
# TYPE nginx_config_worker_connections gauge
nginx_config_worker_connections $worker_connections
# HELP nginx_config_keepalive_timeout_seconds Keepalive timeout in seconds
# TYPE nginx_config_keepalive_timeout_seconds gauge
nginx_config_keepalive_timeout_seconds $keepalive_timeout
# HELP nginx_config_multi_accept Whether multi_accept is enabled
# TYPE nginx_config_multi_accept gauge
nginx_config_multi_accept $multi_accept
# HELP nginx_config_use_epoll Whether epoll is configured
# TYPE nginx_config_use_epoll gauge
nginx_config_use_epoll $use_epoll
# HELP nginx_config_sendfile Whether sendfile is enabled
# TYPE nginx_config_sendfile gauge
nginx_config_sendfile $sendfile
# HELP nginx_config_tcp_nopush Whether tcp_nopush is enabled
# TYPE nginx_config_tcp_nopush gauge
nginx_config_tcp_nopush $tcp_nopush
# HELP nginx_config_tcp_nodelay Whether tcp_nodelay is enabled
# TYPE nginx_config_tcp_nodelay gauge
nginx_config_tcp_nodelay $tcp_nodelay
# HELP nginx_config_gzip Whether gzip is enabled
# TYPE nginx_config_gzip gauge
nginx_config_gzip $gzip_enabled
EOF
# Count virtual hosts
local vhost_count=0
if [[ -d "$SITES_DIR" ]]; then
vhost_count=$(find "$SITES_DIR" -type f -o -type l 2>/dev/null | wc -l)
elif [[ -d "$CONF_D_DIR" ]]; then
vhost_count=$(find "$CONF_D_DIR" -name "*.conf" -type f 2>/dev/null | wc -l)
fi
echo ""
echo "# HELP nginx_config_vhosts_total Number of configured virtual hosts"
echo "# TYPE nginx_config_vhosts_total gauge"
echo "nginx_config_vhosts_total $vhost_count"
# Calculate max possible connections
local max_connections=$((worker_processes * worker_connections))
echo ""
echo "# HELP nginx_config_max_connections Maximum theoretical connections (workers * connections)"
echo "# TYPE nginx_config_max_connections gauge"
echo "nginx_config_max_connections $max_connections"
}
#########################
### Access Log Metrics ###
#########################
collect_access_log_metrics() {
if [[ ! -f "$ACCESS_LOG" ]] || [[ ! -r "$ACCESS_LOG" ]]; then
echo "# Access log not readable at $ACCESS_LOG"
return
fi
local now
now=$(date +%s)
# Only parse logs every LOG_PARSE_INTERVAL seconds
if [[ -f "$STATE_DIR/last_parse" ]]; then
LAST_LOG_PARSE=$(cat "$STATE_DIR/last_parse")
fi
if [[ $((now - LAST_LOG_PARSE)) -lt $LOG_PARSE_INTERVAL ]] && [[ -f "$STATE_DIR/log_metrics" ]]; then
cat "$STATE_DIR/log_metrics"
return
fi
echo "$now" > "$STATE_DIR/last_parse"
# Parse access log for status codes and other metrics
# Assuming combined log format: $remote_addr - $remote_user [$time_local] "$request" $status $body_bytes_sent "$http_referer" "$http_user_agent"
local log_data
log_data=$(tail -n "$LOG_TAIL_LINES" "$ACCESS_LOG" 2>/dev/null || echo "")
if [[ -z "$log_data" ]]; then
echo "# No log data available"
return
fi
local metrics_output=""
# Count by status code
local status_counts
status_counts=$(echo "$log_data" | awk '{print $9}' | { grep -E '^[0-9]{3}$' || true; } | sort | uniq -c | sort -rn)
metrics_output+="# HELP nginx_http_requests_by_status_total HTTP requests by status code (from last $LOG_TAIL_LINES log lines)
# TYPE nginx_http_requests_by_status_total gauge
"
# Initialize counters for status code groups
local count_1xx=0 count_2xx=0 count_3xx=0 count_4xx=0 count_5xx=0
while read -r count status; do
if [[ -n "$status" ]] && [[ -n "$count" ]]; then
metrics_output+="nginx_http_requests_by_status_total{status=\"$status\"} $count
"
# Aggregate by category
case "${status:0:1}" in
1) count_1xx=$((count_1xx + count)) ;;
2) count_2xx=$((count_2xx + count)) ;;
3) count_3xx=$((count_3xx + count)) ;;
4) count_4xx=$((count_4xx + count)) ;;
5) count_5xx=$((count_5xx + count)) ;;
esac
fi
done <<< "$status_counts"
metrics_output+="
# HELP nginx_http_requests_by_status_class_total HTTP requests by status class
# TYPE nginx_http_requests_by_status_class_total gauge
nginx_http_requests_by_status_class_total{class=\"1xx\"} $count_1xx
nginx_http_requests_by_status_class_total{class=\"2xx\"} $count_2xx
nginx_http_requests_by_status_class_total{class=\"3xx\"} $count_3xx
nginx_http_requests_by_status_class_total{class=\"4xx\"} $count_4xx
nginx_http_requests_by_status_class_total{class=\"5xx\"} $count_5xx
"
# Calculate total bytes sent
local total_bytes
total_bytes=$(echo "$log_data" | awk '{sum += $10} END {print sum+0}')
metrics_output+="
# HELP nginx_http_response_bytes_total Total bytes sent in responses (from last $LOG_TAIL_LINES log lines)
# TYPE nginx_http_response_bytes_total gauge
nginx_http_response_bytes_total $total_bytes
"
# Count requests by method
local method_counts
method_counts=$(echo "$log_data" | awk -F'"' '{print $2}' | awk '{print $1}' | { grep -E '^(GET|POST|PUT|DELETE|PATCH|HEAD|OPTIONS)$' || true; } | sort | uniq -c)
metrics_output+="
# HELP nginx_http_requests_by_method_total HTTP requests by method (from last $LOG_TAIL_LINES log lines)
# TYPE nginx_http_requests_by_method_total gauge
"
while read -r count method; do
if [[ -n "$method" ]] && [[ -n "$count" ]]; then
metrics_output+="nginx_http_requests_by_method_total{method=\"$method\"} $count
"
fi
done <<< "$method_counts"
# Count unique IPs
local unique_ips
unique_ips=$(echo "$log_data" | awk '{print $1}' | sort -u | wc -l)
metrics_output+="
# HELP nginx_http_unique_clients Unique client IPs (from last $LOG_TAIL_LINES log lines)
# TYPE nginx_http_unique_clients gauge
nginx_http_unique_clients $unique_ips
"
# Top URIs (for potential abuse detection)
local top_uris
top_uris=$(echo "$log_data" | awk -F'"' '{print $2}' | awk '{print $2}' | { grep -v '^-$' || true; } | sort | uniq -c | sort -rn | head -5)
metrics_output+="
# HELP nginx_http_top_uri_requests_total Top requested URIs (from last $LOG_TAIL_LINES log lines)
# TYPE nginx_http_top_uri_requests_total gauge
"
local rank=1
while read -r count uri; do
if [[ -n "$uri" ]] && [[ -n "$count" ]]; then
# Truncate URI and escape quotes
uri="${uri:0:100}"
uri="${uri//\"/\\\"}"
metrics_output+="nginx_http_top_uri_requests_total{uri=\"$uri\",rank=\"$rank\"} $count
"
rank=$((rank + 1))
fi
done <<< "$top_uris"
# Count requests in time windows (if log has parseable timestamps)
local recent_requests
recent_requests=$(echo "$log_data" | wc -l)
metrics_output+="
# HELP nginx_http_requests_in_sample Total requests in sample window
# TYPE nginx_http_requests_in_sample gauge
nginx_http_requests_in_sample $recent_requests
"
# Save metrics for caching
echo "$metrics_output" > "$STATE_DIR/log_metrics"
echo "$metrics_output"
}
#########################
### Error Log Metrics ###
#########################
collect_error_log_metrics() {
if [[ ! -f "$ERROR_LOG" ]] || [[ ! -r "$ERROR_LOG" ]]; then
echo "# Error log not readable at $ERROR_LOG"
return
fi
# Count errors by level from last 1000 lines
local log_data
log_data=$(tail -n 1000 "$ERROR_LOG" 2>/dev/null || echo "")
if [[ -z "$log_data" ]]; then
return
fi
local emerg_count alert_count crit_count error_count warn_count notice_count info_count
emerg_count=$(echo "$log_data" | grep -c '\[emerg\]' 2>/dev/null) || emerg_count=0
alert_count=$(echo "$log_data" | grep -c '\[alert\]' 2>/dev/null) || alert_count=0
crit_count=$(echo "$log_data" | grep -c '\[crit\]' 2>/dev/null) || crit_count=0
error_count=$(echo "$log_data" | grep -c '\[error\]' 2>/dev/null) || error_count=0
warn_count=$(echo "$log_data" | grep -c '\[warn\]' 2>/dev/null) || warn_count=0
notice_count=$(echo "$log_data" | grep -c '\[notice\]' 2>/dev/null) || notice_count=0
info_count=$(echo "$log_data" | grep -c '\[info\]' 2>/dev/null) || info_count=0
cat <<EOF
# HELP nginx_error_log_messages_total Error log messages by level (from last 1000 lines)
# TYPE nginx_error_log_messages_total gauge
nginx_error_log_messages_total{level="emerg"} $emerg_count
nginx_error_log_messages_total{level="alert"} $alert_count
nginx_error_log_messages_total{level="crit"} $crit_count
nginx_error_log_messages_total{level="error"} $error_count
nginx_error_log_messages_total{level="warn"} $warn_count
nginx_error_log_messages_total{level="notice"} $notice_count
nginx_error_log_messages_total{level="info"} $info_count
EOF
# Check error log file size and age
local log_size log_mtime now log_age
log_size=$(stat -c %s "$ERROR_LOG" 2>/dev/null || echo "0")
log_mtime=$(stat -c %Y "$ERROR_LOG" 2>/dev/null || echo "0")
now=$(date +%s)
log_age=$((now - log_mtime))
cat <<EOF
# HELP nginx_error_log_size_bytes Size of error log file
# TYPE nginx_error_log_size_bytes gauge
nginx_error_log_size_bytes $log_size
# HELP nginx_error_log_last_modified_seconds Seconds since error log was last modified
# TYPE nginx_error_log_last_modified_seconds gauge
nginx_error_log_last_modified_seconds $log_age
EOF
}
#########################
### SSL Certificate Metrics ###
#########################
collect_ssl_metrics() {
local domains="$SSL_CHECK_DOMAINS"
# If no domains specified, try to find them from nginx config
if [[ -z "$domains" ]]; then
# Look for ssl_certificate directives
local cert_files
cert_files=$(grep -rh 'ssl_certificate\s' "$SITES_DIR" "$CONF_D_DIR" "$NGINX_CONF" 2>/dev/null | grep -v '#' | grep -v 'ssl_certificate_key' | awk '{print $2}' | tr -d ';' | sort -u || echo "")
if [[ -z "$cert_files" ]]; then
echo "# No SSL certificates found in nginx config"
return
fi
echo "# HELP nginx_ssl_certificate_expiry_days Days until SSL certificate expires"
echo "# TYPE nginx_ssl_certificate_expiry_days gauge"
echo "# HELP nginx_ssl_certificate_expiry_timestamp Unix timestamp when certificate expires"
echo "# TYPE nginx_ssl_certificate_expiry_timestamp gauge"
while read -r cert_file; do
if [[ -f "$cert_file" ]]; then
local expiry_date expiry_epoch now_epoch days_left cn
expiry_date=$(openssl x509 -enddate -noout -in "$cert_file" 2>/dev/null | cut -d= -f2 || echo "")
if [[ -n "$expiry_date" ]]; then
expiry_epoch=$(date -d "$expiry_date" +%s 2>/dev/null || echo "0")
now_epoch=$(date +%s)
days_left=$(( (expiry_epoch - now_epoch) / 86400 ))
# Get CN from certificate
cn=$(openssl x509 -subject -noout -in "$cert_file" 2>/dev/null | grep -oP 'CN\s*=\s*\K[^,/]+' || basename "$cert_file")
cn="${cn// /_}"
echo "nginx_ssl_certificate_expiry_days{certificate=\"$cn\",file=\"$cert_file\"} $days_left"
echo "nginx_ssl_certificate_expiry_timestamp{certificate=\"$cn\",file=\"$cert_file\"} $expiry_epoch"
fi
fi
done <<< "$cert_files"
return
fi
# Check specified domains via network
echo "# HELP nginx_ssl_certificate_expiry_days Days until SSL certificate expires"
echo "# TYPE nginx_ssl_certificate_expiry_days gauge"
echo "# HELP nginx_ssl_certificate_expiry_timestamp Unix timestamp when certificate expires"
echo "# TYPE nginx_ssl_certificate_expiry_timestamp gauge"
IFS=',' read -ra domain_array <<< "$domains"
for domain in "${domain_array[@]}"; do
domain=$(echo "$domain" | tr -d ' ')
if [[ -n "$domain" ]]; then
local expiry_date expiry_epoch now_epoch days_left
expiry_date=$(echo | openssl s_client -servername "$domain" -connect "$domain:443" 2>/dev/null | openssl x509 -noout -enddate 2>/dev/null | cut -d= -f2 || echo "")
if [[ -n "$expiry_date" ]]; then
expiry_epoch=$(date -d "$expiry_date" +%s 2>/dev/null || echo "0")
now_epoch=$(date +%s)
days_left=$(( (expiry_epoch - now_epoch) / 86400 ))
echo "nginx_ssl_certificate_expiry_days{domain=\"$domain\"} $days_left"
echo "nginx_ssl_certificate_expiry_timestamp{domain=\"$domain\"} $expiry_epoch"
else
echo "nginx_ssl_certificate_expiry_days{domain=\"$domain\"} -1"
fi
fi
done
}
#########################
### Upstream Metrics ###
#########################
collect_upstream_metrics() {
# Check for upstream configurations
local upstreams
upstreams=$(grep -rh 'upstream\s' "$SITES_DIR" "$CONF_D_DIR" "$NGINX_CONF" 2>/dev/null | grep -v '#' | awk '{print $2}' | tr -d '{' | sort -u || echo "")
if [[ -z "$upstreams" ]]; then
return
fi
echo "# HELP nginx_upstream_configured Number of configured upstreams"
echo "# TYPE nginx_upstream_configured gauge"
echo "nginx_upstream_configured $(echo "$upstreams" | wc -w)"
echo ""
echo "# HELP nginx_upstream_servers_total Servers configured per upstream"
echo "# TYPE nginx_upstream_servers_total gauge"
# This is a simplified count - for real upstream health you'd need nginx-plus or lua module
while read -r upstream; do
if [[ -n "$upstream" ]]; then
# Try to count servers in this upstream block
local server_count
server_count=$(grep -A 20 "upstream $upstream" "$SITES_DIR"/* "$CONF_D_DIR"/* "$NGINX_CONF" 2>/dev/null | grep -c 'server\s' 2>/dev/null) || server_count=0
echo "nginx_upstream_servers_total{upstream=\"$upstream\"} $server_count"
fi
done <<< "$upstreams"
}
#########################
### Version Metrics ###
#########################
collect_version_metrics() {
local version
version=$(nginx -v 2>&1 | grep -oP 'nginx/\K[0-9.]+' || echo "unknown")
echo "# HELP nginx_version_info Nginx version information"
echo "# TYPE nginx_version_info gauge"
echo "nginx_version_info{version=\"$version\"} 1"
# Check if nginx is compiled with certain modules
local modules_output
modules_output=$(nginx -V 2>&1 || echo "")
local has_ssl has_http2 has_gzip has_stub_status has_realip has_geoip has_lua
has_ssl=$(echo "$modules_output" | grep -qE 'with-http_ssl_module|--with-openssl' && echo "1" || echo "0")
has_http2=$(echo "$modules_output" | grep -q 'http_v2_module' && echo "1" || echo "0")
has_gzip=$(echo "$modules_output" | grep -q 'http_gzip' && echo "1" || echo "0")
has_stub_status=$(echo "$modules_output" | grep -q 'http_stub_status_module' && echo "1" || echo "0")
has_realip=$(echo "$modules_output" | grep -q 'http_realip_module' && echo "1" || echo "0")
has_geoip=$(echo "$modules_output" | grep -q 'http_geoip_module' && echo "1" || echo "0")
has_lua=$(echo "$modules_output" | grep -qE 'lua|ndk_http_module' && echo "1" || echo "0")
cat <<EOF
# HELP nginx_module_enabled Whether nginx module is compiled in
# TYPE nginx_module_enabled gauge
nginx_module_enabled{module="ssl"} $has_ssl
nginx_module_enabled{module="http2"} $has_http2
nginx_module_enabled{module="gzip"} $has_gzip
nginx_module_enabled{module="stub_status"} $has_stub_status
nginx_module_enabled{module="realip"} $has_realip
nginx_module_enabled{module="geoip"} $has_geoip
nginx_module_enabled{module="lua"} $has_lua
EOF
}
#########################
### File Descriptor Metrics ###
#########################
collect_system_metrics() {
# System-wide limits that affect nginx
local max_files ulimit_n
max_files=$(cat /proc/sys/fs/file-max 2>/dev/null || echo "0")
ulimit_n=$(ulimit -n 2>/dev/null || echo "0")
cat <<EOF
# HELP nginx_system_file_max System-wide maximum file descriptors
# TYPE nginx_system_file_max gauge
nginx_system_file_max $max_files
# HELP nginx_system_ulimit_n Current shell ulimit for open files
# TYPE nginx_system_ulimit_n gauge
nginx_system_ulimit_n $ulimit_n
EOF
# Current system-wide open files
local open_files
open_files=$(cat /proc/sys/fs/file-nr 2>/dev/null | awk '{print $1}' || echo "0")
echo ""
echo "# HELP nginx_system_open_files Current system-wide open files"
echo "# TYPE nginx_system_open_files gauge"
echo "nginx_system_open_files $open_files"
}
#########################
### Collect All Metrics ###
#########################
collect_all_metrics() {
local hostname
hostname=$(hostname -f 2>/dev/null || hostname)
cat <<EOF
# Nginx Metrics Exporter
# Host: $hostname
# Collected at: $(date -Iseconds)
EOF
collect_version_metrics
echo ""
collect_stub_status
echo ""
collect_process_metrics
echo ""
collect_config_metrics
echo ""
collect_access_log_metrics
echo ""
collect_error_log_metrics
echo ""
collect_ssl_metrics
echo ""
collect_upstream_metrics
echo ""
collect_system_metrics
}
#########################
### HTTP Server ###
#########################
handle_request() {
local request_line=""
local content_length=0
# Read request
while IFS= read -r line; do
line="${line%%$'\r'}"
[[ -z "$line" ]] && break
[[ -z "$request_line" ]] && request_line="$line"
done
local method path
method=$(echo "$request_line" | awk '{print $1}')
path=$(echo "$request_line" | awk '{print $2}')
case "$path" in
/metrics|/)
local metrics
metrics=$(collect_all_metrics)
local body_length=${#metrics}
cat <<EOF
HTTP/1.1 200 OK
Content-Type: text/plain; charset=utf-8
Content-Length: $body_length
Connection: close
$metrics
EOF
;;
/health|/healthz)
local health_body="OK"
cat <<EOF
HTTP/1.1 200 OK
Content-Type: text/plain
Content-Length: 2
Connection: close
OK
EOF
;;
*)
cat <<EOF
HTTP/1.1 404 Not Found
Content-Type: text/plain
Content-Length: 9
Connection: close
Not Found
EOF
;;
esac
}
start_server() {
log "Starting Nginx Metrics Exporter on port $LISTEN_PORT"
log "Metrics available at http://localhost:$LISTEN_PORT/metrics"
log "Stub status URL: $STUB_STATUS_URL"
while true; do
socat TCP-LISTEN:"$LISTEN_PORT",reuseaddr,fork EXEC:"$0 --handle-request" 2>/dev/null || {
log "Server error, restarting in 5 seconds..."
sleep 5
}
done
}
#########################
### Output ###
#########################
write_output() {
local metrics
metrics=$(collect_all_metrics)
if [[ -n "$OUTPUT_FILE" ]]; then
local tmp_file="${OUTPUT_FILE}.$$"
echo "$metrics" > "$tmp_file"
mv "$tmp_file" "$OUTPUT_FILE"
else
echo "$metrics"
fi
}
#########################
### Main ###
#########################
main() {
if [[ "${1:-}" == "--handle-request" ]]; then
handle_request
exit 0
fi
parse_args "$@"
setup
if [[ "$HTTP_MODE" == true ]]; then
start_server
elif [[ -n "$OUTPUT_FILE" ]]; then
write_output
else
collect_all_metrics
fi
}
main "$@"