#!/bin/bash ################################################################################ # Script Name: rsyslog-metrics-exporter.sh # Version: 1.0 # Description: Prometheus exporter for rsyslog internal metrics via impstats # JSON output. Exports queue depths, action success/failure, # input message counts, process resource usage, and overall # rsyslog health metrics. # # Author: Phil Connor # Contact: contact@mylinux.work # Website: https://mylinux.work # License: MIT # # Prerequisites: # - rsyslog with impstats module enabled (JSON output to file) # - jq (JSON parser) # - netcat (nc) for HTTP mode # - Standard Unix tools (awk, grep, tail) # # Performance: # The stats file is read once per collection cycle — the last occurrence # of each named object is extracted in a single pass using jq. # Typical run time: under one second. # # Usage: # # One-shot output to stdout # ./rsyslog-metrics-exporter.sh --once # # # Textfile collector mode (daemon, writes every COLLECTION_INTERVAL) # ./rsyslog-metrics-exporter.sh --daemon --textfile # # # HTTP server mode # ./rsyslog-metrics-exporter.sh --http -p 9199 # # Metrics Exported: # Core Status: # - rsyslog_up - rsyslog process status (1=running, 0=down) # - rsyslog_info{version} - rsyslog version info # # Queue Metrics: # - rsyslog_messages_total - Total messages processed # - rsyslog_queue_size{queue} - Current queue depth # - rsyslog_queue_enqueued_total{queue} - Messages enqueued # - rsyslog_queue_dequeued_total{queue} - Messages dequeued # - rsyslog_queue_full_total{queue} - Times queue was full # - rsyslog_queue_max_size{queue} - Configured max queue size # - rsyslog_queue_disk_usage_bytes{queue} - Disk-assisted queue usage # # Action Metrics: # - rsyslog_action_processed_total{action} - Messages processed per action # - rsyslog_action_failed_total{action} - Failed action attempts # - rsyslog_action_suspended{action} - 1 if action is suspended # - rsyslog_action_resumed_total{action} - Times action resumed # # Input Metrics: # - rsyslog_input_received_total{input} - Messages received per input # # Process Metrics: # - rsyslog_process_memory_bytes - RSS memory of rsyslog # - rsyslog_process_open_fds - Open file descriptors # # Exporter Health: # - rsyslog_exporter_duration_seconds - Script execution time # - rsyslog_exporter_last_run_timestamp - Last run timestamp # - rsyslog_exporter_success - 1 if collection succeeded # # Configuration (environment variables): # NODE_DIR - Textfile collector directory (default: /var/lib/node_exporter) # STATS_FILE - impstats JSON output file (default: /var/log/rsyslog-stats.log) # COLLECTION_INTERVAL - Seconds between collections in daemon mode (default: 60) # HTTP_PORT - HTTP server port (default: 9199) # DEBUG - Set to any value to enable debug output # # impstats Configuration: # Add to /etc/rsyslog.d/impstats.conf: # module(load="impstats" # interval="60" # severity="7" # log.syslog="off" # log.file="/var/log/rsyslog-stats.log" # format="json" # ) # ################################################################################ set -euo pipefail # ============================================================================ # CONFIGURATION VARIABLES # ============================================================================ NODE_DIR="${NODE_DIR:-/var/lib/node_exporter}" STATS_FILE="${STATS_FILE:-/var/log/rsyslog-stats.log}" COLLECTION_INTERVAL="${COLLECTION_INTERVAL:-60}" HTTP_PORT="${HTTP_PORT:-9199}" DEBUG="${DEBUG:-}" OUTPUT_FILE="" HTTP_MODE=false DAEMON_MODE=false ONCE_MODE=false TEMP_FILE="" # ============================================================================ # HELPER FUNCTIONS # ============================================================================ show_usage() { cat <&2; exit 1 ;; esac done } debug_log() { [ -n "$DEBUG" ] && echo "DEBUG: $*" >&2 } # Verify that impstats is configured and the stats file exists # Shows suggested configuration if not found check_impstats_config() { echo "Checking rsyslog impstats configuration..." >&2 echo "" >&2 # Check rsyslog is installed if ! command -v rsyslogd >/dev/null 2>&1; then echo "ERROR: rsyslogd not found in PATH" >&2 return 1 fi local version version=$(rsyslogd -v 2>/dev/null | head -1 || echo "unknown") echo "rsyslog: $version" >&2 # Check if rsyslog is running if pidof rsyslogd >/dev/null 2>&1; then echo "Status: running (PID $(pidof rsyslogd))" >&2 else echo "Status: NOT running" >&2 fi # Check stats file if [ -f "$STATS_FILE" ]; then echo "Stats file: $STATS_FILE (exists, $(wc -l < "$STATS_FILE") lines)" >&2 echo "" >&2 # Verify it contains JSON if tail -1 "$STATS_FILE" 2>/dev/null | jq . >/dev/null 2>&1; then echo "OK: Stats file contains valid JSON" >&2 else echo "WARNING: Stats file does not contain valid JSON" >&2 echo " Ensure impstats is configured with format=\"json\"" >&2 fi else echo "Stats file: $STATS_FILE (NOT FOUND)" >&2 echo "" >&2 echo "impstats does not appear to be configured." >&2 echo "Add the following to /etc/rsyslog.d/impstats.conf:" >&2 echo "" >&2 cat <&2 echo "Then restart rsyslog: systemctl restart rsyslog" >&2 fi # Check jq if command -v jq >/dev/null 2>&1; then echo "jq: installed ($(jq --version 2>/dev/null))" >&2 else echo "jq: NOT FOUND (required)" >&2 echo " Install with: apt install jq OR yum install jq" >&2 fi # Check node_exporter textfile dir if [ -d "$NODE_DIR" ]; then echo "Textfile dir: $NODE_DIR (exists)" >&2 else echo "Textfile dir: $NODE_DIR (not found, create for --textfile mode)" >&2 fi } # Clean up temp files on exit cleanup() { [ -n "$TEMP_FILE" ] && rm -f "$TEMP_FILE" TEMP_FILE="" } # ============================================================================ # DATA COLLECTION FUNCTIONS # ============================================================================ # Check if rsyslog is running # Returns: 0 if running, 1 if not is_rsyslog_running() { pidof rsyslogd >/dev/null 2>&1 } # Get rsyslog version string # Returns: version string (e.g., "8.2312.0") get_rsyslog_version() { local version_line version_line=$(rsyslogd -v 2>/dev/null | head -1) # Extract version number from lines like "rsyslogd 8.2312.0 (aka 2023.12)" echo "$version_line" | grep -oE '[0-9]+\.[0-9]+\.[0-9]+' | head -1 } # Extract the latest stats for each unique object name from the impstats file # The stats file grows over time; we only want the most recent entry per object. # Args: $1 - origin filter (e.g., "core.queue", "core.action") # Returns: JSON lines, one per unique object name (latest occurrence wins) get_latest_stats() { local origin_filter="$1" if [ ! -f "$STATS_FILE" ] || [ ! -r "$STATS_FILE" ]; then debug_log "Stats file not readable: $STATS_FILE" return fi # Read the last 500 lines (covers several collection cycles) and # extract the last occurrence of each named object with the given origin tail -500 "$STATS_FILE" 2>/dev/null | \ jq -c --arg origin "$origin_filter" \ 'select(.origin == $origin)' 2>/dev/null | \ jq -s -c 'group_by(.name) | map(last) | .[]' 2>/dev/null } # Get the total messages processed from the main queue stats # Returns: total enqueued count from "main Q" get_total_messages() { if [ ! -f "$STATS_FILE" ] || [ ! -r "$STATS_FILE" ]; then echo "0" return fi local val val=$(tail -500 "$STATS_FILE" 2>/dev/null | \ jq -c 'select(.origin == "core.queue" and .name == "main Q")' 2>/dev/null | \ tail -1 | jq -r '.enqueued // 0' 2>/dev/null) echo "${val:-0}" } # Get rsyslog process RSS memory in bytes from /proc # Returns: RSS in bytes, or 0 if unavailable get_process_memory() { local pid pid=$(pidof rsyslogd 2>/dev/null) || { echo "0"; return; } # Use the first PID if multiple pid=${pid%% *} if [ -f "/proc/$pid/status" ]; then local vmrss_kb vmrss_kb=$(awk '/^VmRSS:/ {print $2}' "/proc/$pid/status" 2>/dev/null) if [ -n "$vmrss_kb" ]; then echo $((vmrss_kb * 1024)) return fi fi echo "0" } # Get rsyslog open file descriptor count from /proc # Returns: number of open fds, or 0 if unavailable get_open_fds() { local pid pid=$(pidof rsyslogd 2>/dev/null) || { echo "0"; return; } # Use the first PID if multiple pid=${pid%% *} if [ -d "/proc/$pid/fd" ]; then ls "/proc/$pid/fd" 2>/dev/null | wc -l return fi echo "0" } # ============================================================================ # METRIC GENERATION # ============================================================================ # Generate all Prometheus metrics # Returns: Prometheus text format metrics on stdout generate_metrics() { local script_start script_start=$(date +%s.%N 2>/dev/null || date +%s) local success=1 # Verify jq is available if ! command -v jq >/dev/null 2>&1; then cat <&2 return fi # ======================================================================== # Core Status # ======================================================================== local rsyslog_running=0 if is_rsyslog_running; then rsyslog_running=1 fi cat </dev/null || date +%s) script_duration=$(awk "BEGIN {printf \"%.3f\", $script_end - $script_start}" 2>/dev/null || echo "0") cat <&2 if ! command -v nc >/dev/null 2>&1; then echo "ERROR: netcat (nc) required for HTTP mode" >&2 exit 1 fi # Infinite loop accepting HTTP requests while true; do { read -r request # Check if request is for /metrics endpoint if [[ "$request" =~ ^GET\ /metrics ]]; then echo -e "HTTP/1.1 200 OK\r\nContent-Type: text/plain; version=0.0.4\r\n\r" generate_metrics else # Serve HTML landing page for other requests echo -e "HTTP/1.1 200 OK\r\nContent-Type: text/html\r\n\r" cat < Rsyslog Metrics Exporter v1.0

Rsyslog Metrics Exporter v1.0

Metrics

Metric Categories

  • Core Status: rsyslog up/down, version info
  • Queue Metrics: depth, enqueued/dequeued, full events, disk usage
  • Action Metrics: processed/failed/suspended per action
  • Input Metrics: messages received per input module
  • Process Metrics: memory usage, open file descriptors
  • Exporter Health: runtime, last run timestamp, success flag
EOF fi } | nc -l -p "$HTTP_PORT" -q 1 2>/dev/null done } # ============================================================================ # TEXTFILE WRITER (with atomic rename) # ============================================================================ # Write metrics to textfile collector file atomically # Uses temp file + rename to avoid partial reads write_textfile() { local output_dir output_dir="$(dirname "$OUTPUT_FILE")" mkdir -p "$output_dir" # Create temp file in SAME directory for atomic rename (same filesystem) TEMP_FILE=$(mktemp "${output_dir}/.rsyslog_metrics.XXXXXX") # Generate metrics to temp file if ! generate_metrics > "$TEMP_FILE" 2>/dev/null; then rm -f "$TEMP_FILE" TEMP_FILE="" echo "ERROR: Failed to generate metrics" >&2 return 1 fi # Validate: file must have content local file_lines file_lines=$(wc -l < "$TEMP_FILE" 2>/dev/null || echo 0) if [ "$file_lines" -lt 5 ]; then rm -f "$TEMP_FILE" TEMP_FILE="" echo "ERROR: Metrics file too small ($file_lines lines), keeping previous" >&2 return 1 fi # Set permissions before move chmod 644 "$TEMP_FILE" # Atomic rename - no gap where file is missing mv -f "$TEMP_FILE" "$OUTPUT_FILE" TEMP_FILE="" debug_log "Metrics written to $OUTPUT_FILE ($file_lines lines)" return 0 } # ============================================================================ # MAIN EXECUTION # ============================================================================ # Main entry point - routes to appropriate output mode main() { parse_args "$@" trap cleanup EXIT if [ "$HTTP_MODE" = true ]; then # Run HTTP server (blocks until killed) run_http_server elif [ -n "$OUTPUT_FILE" ] && [ "$DAEMON_MODE" = true ]; then # Daemon mode: write textfile on a loop echo "Starting rsyslog metrics exporter (daemon, interval=${COLLECTION_INTERVAL}s)..." >&2 echo "Writing to: $OUTPUT_FILE" >&2 while true; do write_textfile || true sleep "$COLLECTION_INTERVAL" done elif [ -n "$OUTPUT_FILE" ]; then # Textfile collector mode: write once write_textfile echo "Metrics written to $OUTPUT_FILE" >&2 else # Default: output to stdout generate_metrics fi } # Execute main function with all script arguments main "$@"