#!/bin/bash ############################################################# #### HAProxy Metrics Exporter for Prometheus #### #### Extended metrics via runtime API, config parsing, #### #### SSL cert checking, stick tables, and log analysis #### #### #### #### Author: Phil Connor #### #### Contact: contact@mylinux.work #### #### License: MIT #### #### Version: 1.01 #### #### #### #### Usage: ./haproxy-metrics.sh [OPTIONS] #### ############################################################# # # Metrics collected (haproxy_extended_ prefix): # - Process: status, uptime, CPU, RSS memory, workers, open FDs # - Backend health: per-server status, weight, check duration # - Connections: current, rates, queue depths, session reuse # - SSL: certificate expiry per domain # - Stick tables: entry counts, utilization, types # - Errors: log-parsed 4xx/5xx, connection errors, retries # - Config: frontend/backend/server counts, ACL rules, maxconn # - Reload: count and last timestamp # # Requirements: # - Bash 4.0+ # - socat (for HAProxy runtime API) # - openssl (for SSL cert checks, optional) # - nc (for HTTP server mode) # set -euo pipefail ######################### ### Configuration ### ######################### TEXTFILE_DIR="${TEXTFILE_DIR:-/var/lib/node_exporter}" OUTPUT_FILE="" HTTP_MODE=false HTTP_PORT="${HTTP_PORT:-9117}" LOCK_FILE="/var/run/haproxy-metrics.lock" HAPROXY_CONFIG="${HAPROXY_CONFIG:-/etc/haproxy/haproxy.cfg}" HAPROXY_SOCKET="${HAPROXY_SOCKET:-/run/haproxy/admin.sock}" HAPROXY_LOG="${HAPROXY_LOG:-/var/log/haproxy/haproxy.log}" CERT_DIR="${CERT_DIR:-/etc/haproxy/certs}" LOG_TAIL_LINES=10000 ######################### ### Logging ### ######################### RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[1;33m' BLUE='\033[0;34m' NC='\033[0m' log_info() { echo -e "${GREEN}[INFO]${NC} $(date '+%Y-%m-%d %H:%M:%S') $*" >&2; } log_warn() { echo -e "${YELLOW}[WARN]${NC} $(date '+%Y-%m-%d %H:%M:%S') $*" >&2; } log_error() { echo -e "${RED}[ERROR]${NC} $(date '+%Y-%m-%d %H:%M:%S') $*" >&2; } log_step() { echo -e "${BLUE}[STEP]${NC} $(date '+%Y-%m-%d %H:%M:%S') $*" >&2; } ######################### ### Arguments ### ######################### show_help() { cat </dev/null || true) if [[ -n "$pid" ]] && kill -0 "$pid" 2>/dev/null; then log_error "Another instance running (PID: $pid)"; exit 1 fi rm -f "$LOCK_FILE" fi echo $$ > "$LOCK_FILE" trap 'rm -f "$LOCK_FILE"' EXIT INT TERM } ######################### ### Socket Helper ### ######################### haproxy_cmd() { local cmd="$1" if [[ -S "$HAPROXY_SOCKET" ]]; then echo "$cmd" | timeout 5 socat - UNIX-CONNECT:"$HAPROXY_SOCKET" 2>/dev/null || true fi } ######################### ### Process Metrics ### ######################### get_process_metrics() { local pid uptime_s cpu mem fds workers pid=$(pgrep -x haproxy | head -1 2>/dev/null || true) if [[ -z "$pid" ]]; then echo "# HELP haproxy_extended_up Whether HAProxy is running" echo "# TYPE haproxy_extended_up gauge" echo "haproxy_extended_up 0" return fi echo "# HELP haproxy_extended_up Whether HAProxy is running" echo "# TYPE haproxy_extended_up gauge" echo "haproxy_extended_up 1" # Version local version version=$(haproxy -v 2>/dev/null | head -1 | grep -oP '\d+\.\d+[\.\d]*' || echo "unknown") echo "# HELP haproxy_extended_version_info HAProxy version" echo "# TYPE haproxy_extended_version_info gauge" echo "haproxy_extended_version_info{version=\"${version}\"} 1" # Uptime uptime_s=$(ps -o etimes= -p "$pid" 2>/dev/null | tr -d ' ' || echo "0") echo "# HELP haproxy_extended_uptime_seconds HAProxy process uptime" echo "# TYPE haproxy_extended_uptime_seconds gauge" echo "haproxy_extended_uptime_seconds $uptime_s" # CPU cpu=$(ps -o %cpu= -p "$pid" 2>/dev/null | tr -d ' ' || echo "0") echo "# HELP haproxy_extended_cpu_percent HAProxy CPU usage" echo "# TYPE haproxy_extended_cpu_percent gauge" echo "haproxy_extended_cpu_percent $cpu" # Memory (RSS in bytes) mem=$(ps -o rss= -p "$pid" 2>/dev/null | tr -d ' ' || echo "0") mem=$((mem * 1024)) echo "# HELP haproxy_extended_memory_bytes HAProxy RSS memory" echo "# TYPE haproxy_extended_memory_bytes gauge" echo "haproxy_extended_memory_bytes $mem" # Open FDs fds=$(ls /proc/"$pid"/fd 2>/dev/null | wc -l || echo "0") echo "# HELP haproxy_extended_open_fds Open file descriptors" echo "# TYPE haproxy_extended_open_fds gauge" echo "haproxy_extended_open_fds $fds" # Worker count workers=$(pgrep -x haproxy 2>/dev/null | wc -l || echo "0") echo "# HELP haproxy_extended_worker_count HAProxy process count" echo "# TYPE haproxy_extended_worker_count gauge" echo "haproxy_extended_worker_count $workers" } ######################### ### Stats Metrics ### ######################### get_stats_metrics() { local stats stats=$(haproxy_cmd "show stat" | tail -n +2) [[ -z "$stats" ]] && return echo "# HELP haproxy_extended_server_status Per-server health status (1=UP)" echo "# TYPE haproxy_extended_server_status gauge" echo "# HELP haproxy_extended_server_weight Server weight" echo "# TYPE haproxy_extended_server_weight gauge" echo "# HELP haproxy_extended_server_current_sessions Current sessions per server" echo "# TYPE haproxy_extended_server_current_sessions gauge" echo "# HELP haproxy_extended_server_max_sessions Max observed sessions per server" echo "# TYPE haproxy_extended_server_max_sessions gauge" echo "# HELP haproxy_extended_server_check_duration_seconds Health check duration" echo "# TYPE haproxy_extended_server_check_duration_seconds gauge" echo "# HELP haproxy_extended_server_check_failures_total Consecutive health check failures" echo "# TYPE haproxy_extended_server_check_failures_total gauge" echo "# HELP haproxy_extended_server_last_change_seconds Seconds since last status change" echo "# TYPE haproxy_extended_server_last_change_seconds gauge" while IFS=',' read -r pxname svname _qcur _qmax scur smax _slim _stot _bin _bout _dreq _dresp _ereq _econ _eresp _wretr _wredis status weight _act _bck chkfail _chkdown lastchg _downtime _qlimit _pid _iid _sid _throttle _lbtot _tracked _type _rate _rate_lim _rate_max check_status check_code check_duration _hrsp_1xx _hrsp_2xx _hrsp_3xx _hrsp_4xx _hrsp_5xx _hrsp_other _hanafail _rest; do [[ -z "$pxname" || "$pxname" == "#"* ]] && continue [[ "$svname" == "FRONTEND" || "$svname" == "BACKEND" ]] && continue local status_val=0 case "$status" in UP|UP*) status_val=1 ;; MAINT*) status_val=2 ;; DRAIN*) status_val=3 ;; esac echo "haproxy_extended_server_status{backend=\"${pxname}\",server=\"${svname}\",state=\"${status}\"} ${status_val}" echo "haproxy_extended_server_weight{backend=\"${pxname}\",server=\"${svname}\"} ${weight:-0}" echo "haproxy_extended_server_current_sessions{backend=\"${pxname}\",server=\"${svname}\"} ${scur:-0}" echo "haproxy_extended_server_max_sessions{backend=\"${pxname}\",server=\"${svname}\"} ${smax:-0}" if [[ -n "$check_duration" && "$check_duration" != "" ]]; then local dur_s dur_s=$(echo "scale=3; ${check_duration:-0} / 1000" | bc 2>/dev/null || echo "0") echo "haproxy_extended_server_check_duration_seconds{backend=\"${pxname}\",server=\"${svname}\"} ${dur_s}" fi echo "haproxy_extended_server_check_failures_total{backend=\"${pxname}\",server=\"${svname}\"} ${chkfail:-0}" echo "haproxy_extended_server_last_change_seconds{backend=\"${pxname}\",server=\"${svname}\"} ${lastchg:-0}" done <<< "$stats" # Frontend/backend connection metrics echo "# HELP haproxy_extended_frontend_current_connections Current frontend connections" echo "# TYPE haproxy_extended_frontend_current_connections gauge" echo "# HELP haproxy_extended_frontend_session_rate Current session rate per frontend" echo "# TYPE haproxy_extended_frontend_session_rate gauge" echo "# HELP haproxy_extended_backend_queue_depth Current queue depth per backend" echo "# TYPE haproxy_extended_backend_queue_depth gauge" while IFS=',' read -r pxname svname qcur _qmax scur _smax _slim _stot _bin _bout _dreq _dresp _ereq _econ _eresp _wretr _wredis _status _weight _act _bck _chkfail _chkdown _lastchg _downtime _qlimit _pid _iid _sid _throttle _lbtot _tracked _type rate _rest; do [[ -z "$pxname" || "$pxname" == "#"* ]] && continue if [[ "$svname" == "FRONTEND" ]]; then echo "haproxy_extended_frontend_current_connections{frontend=\"${pxname}\"} ${scur:-0}" echo "haproxy_extended_frontend_session_rate{frontend=\"${pxname}\"} ${rate:-0}" elif [[ "$svname" == "BACKEND" ]]; then echo "haproxy_extended_backend_queue_depth{backend=\"${pxname}\"} ${qcur:-0}" fi done <<< "$stats" } ######################### ### SSL Certificates ### ######################### get_ssl_metrics() { [[ ! -d "$CERT_DIR" ]] && return local certs certs=$(find "$CERT_DIR" -name "*.pem" -o -name "*.crt" 2>/dev/null) [[ -z "$certs" ]] && return command -v openssl >/dev/null 2>&1 || return echo "# HELP haproxy_extended_ssl_cert_expiry_seconds SSL certificate expiry in seconds from now" echo "# TYPE haproxy_extended_ssl_cert_expiry_seconds gauge" echo "# HELP haproxy_extended_ssl_cert_expiry_days SSL certificate expiry in days" echo "# TYPE haproxy_extended_ssl_cert_expiry_days gauge" while IFS= read -r cert_file; do [[ -f "$cert_file" ]] || continue local cn expiry_epoch now_epoch remaining_s remaining_d cn=$(openssl x509 -in "$cert_file" -noout -subject 2>/dev/null | sed 's/.*CN\s*=\s*//' | sed 's/\/.*//' || true) [[ -z "$cn" ]] && cn=$(basename "$cert_file" | sed 's/\.\(pem\|crt\)$//') expiry_epoch=$(date -d "$(openssl x509 -in "$cert_file" -noout -enddate 2>/dev/null | cut -d= -f2)" +%s 2>/dev/null || echo "0") now_epoch=$(date +%s) remaining_s=$((expiry_epoch - now_epoch)) remaining_d=$((remaining_s / 86400)) echo "haproxy_extended_ssl_cert_expiry_seconds{domain=\"${cn}\",file=\"$(basename "$cert_file")\"} ${remaining_s}" echo "haproxy_extended_ssl_cert_expiry_days{domain=\"${cn}\",file=\"$(basename "$cert_file")\"} ${remaining_d}" done <<< "$certs" } ######################### ### Stick Tables ### ######################### get_stick_table_metrics() { local tables tables=$(haproxy_cmd "show table") [[ -z "$tables" ]] && return echo "# HELP haproxy_extended_stick_table_entries Current entries in stick table" echo "# TYPE haproxy_extended_stick_table_entries gauge" echo "# HELP haproxy_extended_stick_table_size Configured max size of stick table" echo "# TYPE haproxy_extended_stick_table_size gauge" echo "# HELP haproxy_extended_stick_table_used_ratio Utilization ratio (0.0-1.0)" echo "# TYPE haproxy_extended_stick_table_used_ratio gauge" while IFS= read -r line; do if [[ "$line" =~ ^#\ table:\ ([^,]+),\ type:\ ([^,]+),\ size:([0-9]+),\ used:([0-9]+) ]]; then local tname="${BASH_REMATCH[1]}" local ttype="${BASH_REMATCH[2]}" local tsize="${BASH_REMATCH[3]}" local tused="${BASH_REMATCH[4]}" local ratio="0" if [[ "$tsize" -gt 0 ]]; then ratio=$(echo "scale=4; $tused / $tsize" | bc 2>/dev/null || echo "0") fi echo "haproxy_extended_stick_table_entries{table=\"${tname}\",type=\"${ttype}\"} ${tused}" echo "haproxy_extended_stick_table_size{table=\"${tname}\",type=\"${ttype}\"} ${tsize}" echo "haproxy_extended_stick_table_used_ratio{table=\"${tname}\",type=\"${ttype}\"} ${ratio}" fi done <<< "$tables" } ######################### ### Config Metrics ### ######################### get_config_metrics() { [[ ! -f "$HAPROXY_CONFIG" ]] && return local frontends backends servers acls maxconn frontends=$(grep -c '^frontend ' "$HAPROXY_CONFIG" 2>/dev/null || true) backends=$(grep -c '^backend ' "$HAPROXY_CONFIG" 2>/dev/null || true) servers=$(grep -cE '^\s+server\s' "$HAPROXY_CONFIG" 2>/dev/null || true) acls=$(grep -cE '^\s+acl\s' "$HAPROXY_CONFIG" 2>/dev/null || true) maxconn=$(grep -E '^\s*maxconn\s' "$HAPROXY_CONFIG" 2>/dev/null | head -1 | awk '{print $2}' || echo "0") echo "# HELP haproxy_extended_config_frontends Number of configured frontends" echo "# TYPE haproxy_extended_config_frontends gauge" echo "haproxy_extended_config_frontends $frontends" echo "# HELP haproxy_extended_config_backends Number of configured backends" echo "# TYPE haproxy_extended_config_backends gauge" echo "haproxy_extended_config_backends $backends" echo "# HELP haproxy_extended_config_servers Number of configured servers" echo "# TYPE haproxy_extended_config_servers gauge" echo "haproxy_extended_config_servers $servers" echo "# HELP haproxy_extended_config_acl_rules Number of ACL rules" echo "# TYPE haproxy_extended_config_acl_rules gauge" echo "haproxy_extended_config_acl_rules $acls" echo "# HELP haproxy_extended_config_maxconn Configured maxconn" echo "# TYPE haproxy_extended_config_maxconn gauge" echo "haproxy_extended_config_maxconn ${maxconn:-0}" } ######################### ### Log Metrics ### ######################### get_log_metrics() { [[ ! -f "$HAPROXY_LOG" ]] && return local lines lines=$(tail -n "$LOG_TAIL_LINES" "$HAPROXY_LOG" 2>/dev/null || true) [[ -z "$lines" ]] && return local http_4xx http_5xx conn_err retries denied http_4xx=$(echo "$lines" | grep -cE ' [4][0-9]{2} ' 2>/dev/null || true) http_5xx=$(echo "$lines" | grep -cE ' [5][0-9]{2} ' 2>/dev/null || true) conn_err=$(echo "$lines" | grep -cE 'CD|SC|SD|PC|PH|PR' 2>/dev/null || true) retries=$(echo "$lines" | grep -c '+' 2>/dev/null || true) denied=$(echo "$lines" | grep -cE 'PR--|PD' 2>/dev/null || true) echo "# HELP haproxy_extended_log_http_4xx_total 4xx responses in recent log" echo "# TYPE haproxy_extended_log_http_4xx_total gauge" echo "haproxy_extended_log_http_4xx_total $http_4xx" echo "# HELP haproxy_extended_log_http_5xx_total 5xx responses in recent log" echo "# TYPE haproxy_extended_log_http_5xx_total gauge" echo "haproxy_extended_log_http_5xx_total $http_5xx" echo "# HELP haproxy_extended_log_connection_errors Connection errors in recent log" echo "# TYPE haproxy_extended_log_connection_errors gauge" echo "haproxy_extended_log_connection_errors $conn_err" echo "# HELP haproxy_extended_log_retries_total Retries in recent log" echo "# TYPE haproxy_extended_log_retries_total gauge" echo "haproxy_extended_log_retries_total $retries" echo "# HELP haproxy_extended_log_denied_total Denied requests in recent log" echo "# TYPE haproxy_extended_log_denied_total gauge" echo "haproxy_extended_log_denied_total $denied" # Log file size local log_size log_size=$(stat -c '%s' "$HAPROXY_LOG" 2>/dev/null || echo "0") echo "# HELP haproxy_extended_log_size_bytes HAProxy log file size" echo "# TYPE haproxy_extended_log_size_bytes gauge" echo "haproxy_extended_log_size_bytes $log_size" } ######################### ### Reload Metrics ### ######################### get_reload_metrics() { local info info=$(haproxy_cmd "show info") [[ -z "$info" ]] && return local reloads reloads=$(echo "$info" | grep -i 'TotalReloads' | awk '{print $2}' | tr -d '[:space:]' || echo "0") echo "# HELP haproxy_extended_reload_count Total HAProxy reloads" echo "# TYPE haproxy_extended_reload_count gauge" echo "haproxy_extended_reload_count ${reloads:-0}" local cur_conns max_conns cur_conns=$(echo "$info" | grep '^CurrConns:' | awk '{print $2}' | tr -d '[:space:]' || echo "0") max_conns=$(echo "$info" | grep '^MaxConn:' | awk '{print $2}' | tr -d '[:space:]' || echo "0") echo "# HELP haproxy_extended_current_connections Total current connections from info" echo "# TYPE haproxy_extended_current_connections gauge" echo "haproxy_extended_current_connections ${cur_conns:-0}" echo "# HELP haproxy_extended_max_connections Configured MaxConn from info" echo "# TYPE haproxy_extended_max_connections gauge" echo "haproxy_extended_max_connections ${max_conns:-0}" } ######################### ### Collector ### ######################### get_scrape_metrics() { local start_time end_time duration start_time=$(date +%s%N) echo "# HELP haproxy_extended_scrape_duration_seconds Time to collect all metrics" echo "# TYPE haproxy_extended_scrape_duration_seconds gauge" end_time=$(date +%s%N) duration=$(echo "scale=3; ($end_time - $start_time) / 1000000000" | bc 2>/dev/null || echo "0") echo "haproxy_extended_scrape_duration_seconds $duration" echo "# HELP haproxy_extended_scrape_timestamp_seconds Unix timestamp of last scrape" echo "# TYPE haproxy_extended_scrape_timestamp_seconds gauge" echo "haproxy_extended_scrape_timestamp_seconds $(date +%s)" } generate_metrics() { local start_ns start_ns=$(date +%s%N) get_process_metrics get_stats_metrics get_ssl_metrics get_stick_table_metrics get_config_metrics get_log_metrics get_reload_metrics local end_ns duration end_ns=$(date +%s%N) duration=$(echo "scale=3; ($end_ns - $start_ns) / 1000000000" | bc 2>/dev/null || echo "0") echo "# HELP haproxy_extended_scrape_duration_seconds Time to collect all metrics" echo "# TYPE haproxy_extended_scrape_duration_seconds gauge" echo "haproxy_extended_scrape_duration_seconds $duration" echo "# HELP haproxy_extended_scrape_timestamp_seconds Unix timestamp of last scrape" echo "# TYPE haproxy_extended_scrape_timestamp_seconds gauge" echo "haproxy_extended_scrape_timestamp_seconds $(date +%s)" } ######################### ### Output ### ######################### write_textfile() { local metrics="$1" local outfile="$2" local outdir outdir=$(dirname "$outfile") mkdir -p "$outdir" local tmpfile tmpfile=$(mktemp "${outfile}.XXXXXX") echo "$metrics" > "$tmpfile" mv "$tmpfile" "$outfile" log_info "Metrics written to $outfile" } run_http_server() { log_info "Starting HTTP server on port $HTTP_PORT" if ! command -v nc >/dev/null 2>&1 && ! command -v ncat >/dev/null 2>&1; then log_error "nc (netcat) is required for HTTP mode"; exit 1 fi local nc_cmd="nc" command -v ncat >/dev/null 2>&1 && nc_cmd="ncat" while true; do local metrics metrics=$(generate_metrics) local content_length=${#metrics} local response="HTTP/1.1 200 OK\r\nContent-Type: text/plain; charset=utf-8\r\nContent-Length: ${content_length}\r\nConnection: close\r\n\r\n${metrics}" echo -e "$response" | $nc_cmd -l -p "$HTTP_PORT" -q 1 2>/dev/null || \ echo -e "$response" | $nc_cmd -l "$HTTP_PORT" 2>/dev/null || true done } ######################### ### Main ### ######################### main() { parse_args "$@" acquire_lock if [[ "$HTTP_MODE" == true ]]; then run_http_server elif [[ -n "$OUTPUT_FILE" ]]; then local metrics metrics=$(generate_metrics) write_textfile "$metrics" "$OUTPUT_FILE" else generate_metrics fi } main "$@"