#!/usr/bin/env bash # # Redis Sentinel Prometheus Metrics Exporter # # Prometheus textfile collector exporter for Redis Sentinel. # Uses redis-cli to query Sentinel commands (SENTINEL masters, SENTINEL # sentinels, SENTINEL slaves, INFO sentinel) and exports cluster health, # master status, replica counts, quorum state, and failover statistics. # # Usage: # ./redis-sentinel-exporter.sh # ./redis-sentinel-exporter.sh --textfile # REDIS_HOST="sentinel.example.com" REDIS_PORT="26379" ./redis-sentinel-exporter.sh --textfile # ./redis-sentinel-exporter.sh --install # # Parameters: # --textfile Write to textfile collector directory # --install Create cron job for automatic collection # --help Show usage # # Environment: # REDIS_HOST Sentinel host (default: 127.0.0.1) # REDIS_PORT Sentinel port (default: 26379) # REDIS_AUTH Sentinel password (optional) # TEXTFILE_DIR Textfile collector directory (default: /var/lib/node_exporter/textfile_collector) # CURL_TIMEOUT Command timeout in seconds (default: 10) # # Author: Phil Connor # Contact: contact@mylinux.work # Website: https://mylinux.work # License: MIT # Version: 1.0 # # Metrics Exported: # Core: # - redis_sentinel_up # - redis_sentinel_exporter_info{version} # - redis_sentinel_masters_total # # Per-Master: # - redis_sentinel_master_status{name} # - redis_sentinel_master_slaves{name} # - redis_sentinel_master_sentinels{name} # - redis_sentinel_master_quorum{name} # - redis_sentinel_master_odown{name} # - redis_sentinel_master_sdown{name} # # Cluster: # - redis_sentinel_failover_total # # Exporter: # - redis_sentinel_exporter_duration_seconds # - redis_sentinel_exporter_last_run_timestamp set -euo pipefail # --- Configuration --- readonly VERSION="1.0" readonly SCRIPT_NAME="$(basename "$0")" REDIS_HOST="${REDIS_HOST:-127.0.0.1}" REDIS_PORT="${REDIS_PORT:-26379}" REDIS_AUTH="${REDIS_AUTH:-}" TEXTFILE_DIR="${TEXTFILE_DIR:-/var/lib/node_exporter/textfile_collector}" CURL_TIMEOUT="${CURL_TIMEOUT:-10}" TEXTFILE_MODE=false OUTPUT="" START_TIME="" # --- Functions --- usage() { cat </dev/null; then missing+=("$cmd") fi done if [[ ${#missing[@]} -gt 0 ]]; then echo "ERROR: Missing required commands: ${missing[*]}" >&2 echo "Install with: apt install ${missing[*]} OR dnf install ${missing[*]}" >&2 exit 1 fi } validate_config() { if [[ -z "$REDIS_HOST" ]]; then echo "ERROR: REDIS_HOST is empty" >&2 exit 1 fi if [[ -z "$REDIS_PORT" ]]; then echo "ERROR: REDIS_PORT is empty" >&2 exit 1 fi } redis_cmd() { local args=(-h "$REDIS_HOST" -p "$REDIS_PORT" --no-auth-warning) if [[ -n "$REDIS_AUTH" ]]; then args+=(-a "$REDIS_AUTH") fi timeout "$CURL_TIMEOUT" redis-cli "${args[@]}" "$@" 2>/dev/null || echo "" } add_metric() { local name="$1" local type="$2" local help="$3" local value="$4" local labels="${5:-}" if [[ -n "$labels" ]]; then OUTPUT+="# HELP ${name} ${help} # TYPE ${name} ${type} ${name}{${labels}} ${value} " else OUTPUT+="# HELP ${name} ${help} # TYPE ${name} ${type} ${name} ${value} " fi } add_metric_value() { local name="$1" local value="$2" local labels="${3:-}" if [[ -n "$labels" ]]; then OUTPUT+="${name}{${labels}} ${value} " else OUTPUT+="${name} ${value} " fi } collect_sentinel() { # Test connectivity with PING local ping_result ping_result=$(redis_cmd PING) if [[ "$ping_result" != *"PONG"* ]]; then add_metric "redis_sentinel_up" "gauge" "Redis Sentinel reachability (1=up, 0=down)" "0" return 1 fi add_metric "redis_sentinel_up" "gauge" "Redis Sentinel reachability (1=up, 0=down)" "1" # Get INFO sentinel for global stats local info_output info_output=$(redis_cmd INFO sentinel) local sentinel_masters=0 if [[ -n "$info_output" ]]; then sentinel_masters=$(echo "$info_output" | awk -F: '/^sentinel_masters:/{gsub(/\r/,""); print $2}') sentinel_masters="${sentinel_masters:-0}" fi add_metric "redis_sentinel_masters_total" "gauge" "Total number of monitored masters" "$sentinel_masters" # Get SENTINEL masters for per-master details local masters_output masters_output=$(redis_cmd SENTINEL masters) if [[ -z "$masters_output" ]]; then return 0 fi # Parse SENTINEL masters output (flat key-value pairs per master) local master_name="" flags="" num_slaves="" num_sentinels="" quorum="" local failover_total=0 local first_status=true local first_slaves=true local first_sentinels=true local first_quorum=true local first_odown=true local first_sdown=true while IFS= read -r line; do line="${line%%$'\r'}" if [[ "$line" == "name" ]]; then # If we had a previous master, emit its metrics if [[ -n "$master_name" ]]; then _emit_master_metrics fi read -r master_name master_name="${master_name%%$'\r'}" flags="" num_slaves="" num_sentinels="" quorum="" continue fi case "$line" in flags) read -r flags; flags="${flags%%$'\r'}" ;; num-slaves) read -r num_slaves; num_slaves="${num_slaves%%$'\r'}" ;; num-other-sentinels) read -r num_sentinels; num_sentinels="${num_sentinels%%$'\r'}" ;; quorum) read -r quorum; quorum="${quorum%%$'\r'}" ;; failover-timeout) ;; esac done <<< "$masters_output" # Emit last master if [[ -n "$master_name" ]]; then _emit_master_metrics fi # Count total failovers from INFO sentinel local failover_count=0 if [[ -n "$info_output" ]]; then # Sum running-scripts and scripts-queue-length as proxy, or parse sentinel_tilt/failover lines local running_scripts running_scripts=$(echo "$info_output" | awk -F: '/^sentinel_running_scripts:/{gsub(/\r/,""); print $2}') # Try to get failover count from master lines in INFO while IFS= read -r info_line; do info_line="${info_line%%$'\r'}" if [[ "$info_line" == master* ]]; then local fc fc=$(echo "$info_line" | grep -oP 'failovers=\K[0-9]+' || echo "0") failover_count=$((failover_count + fc)) fi done <<< "$info_output" fi add_metric "redis_sentinel_failover_total" "gauge" "Total number of failovers observed" "$failover_count" return 0 } _emit_master_metrics() { # Status: 1=ok, 0=down local status=1 if [[ "$flags" == *"s_down"* ]] || [[ "$flags" == *"o_down"* ]]; then status=0 fi if [[ "$first_status" == true ]]; then add_metric "redis_sentinel_master_status" "gauge" "Master status (1=ok, 0=down)" "$status" "name=\"${master_name}\"" first_status=false else add_metric_value "redis_sentinel_master_status" "$status" "name=\"${master_name}\"" fi # Slaves count if [[ "$first_slaves" == true ]]; then add_metric "redis_sentinel_master_slaves" "gauge" "Number of replicas for master" "${num_slaves:-0}" "name=\"${master_name}\"" first_slaves=false else add_metric_value "redis_sentinel_master_slaves" "${num_slaves:-0}" "name=\"${master_name}\"" fi # Sentinels count (add 1 for self) local total_sentinels=$(( ${num_sentinels:-0} + 1 )) if [[ "$first_sentinels" == true ]]; then add_metric "redis_sentinel_master_sentinels" "gauge" "Number of sentinels monitoring master" "$total_sentinels" "name=\"${master_name}\"" first_sentinels=false else add_metric_value "redis_sentinel_master_sentinels" "$total_sentinels" "name=\"${master_name}\"" fi # Quorum if [[ "$first_quorum" == true ]]; then add_metric "redis_sentinel_master_quorum" "gauge" "Quorum required for master" "${quorum:-0}" "name=\"${master_name}\"" first_quorum=false else add_metric_value "redis_sentinel_master_quorum" "${quorum:-0}" "name=\"${master_name}\"" fi # ODOWN local odown=0 if [[ "$flags" == *"o_down"* ]]; then odown=1 fi if [[ "$first_odown" == true ]]; then add_metric "redis_sentinel_master_odown" "gauge" "Master in objectively down state (1=yes, 0=no)" "$odown" "name=\"${master_name}\"" first_odown=false else add_metric_value "redis_sentinel_master_odown" "$odown" "name=\"${master_name}\"" fi # SDOWN local sdown=0 if [[ "$flags" == *"s_down"* ]]; then sdown=1 fi if [[ "$first_sdown" == true ]]; then add_metric "redis_sentinel_master_sdown" "gauge" "Master in subjectively down state (1=yes, 0=no)" "$sdown" "name=\"${master_name}\"" first_sdown=false else add_metric_value "redis_sentinel_master_sdown" "$sdown" "name=\"${master_name}\"" fi } write_output() { if [[ "$TEXTFILE_MODE" == true ]]; then local output_file="${TEXTFILE_DIR}/redis_sentinel.prom" local temp_file="${output_file}.$$" mkdir -p "$TEXTFILE_DIR" echo "$OUTPUT" > "$temp_file" mv "$temp_file" "$output_file" else echo "$OUTPUT" fi } install_cron() { if [[ $EUID -ne 0 ]]; then echo "ERROR: --install requires root" >&2 exit 1 fi local script_path script_path=$(readlink -f "$0") cat > /etc/cron.d/redis-sentinel-exporter </dev/null EOF chmod 644 /etc/cron.d/redis-sentinel-exporter echo "Installed cron job: /etc/cron.d/redis-sentinel-exporter" echo "Metrics will be written to: ${TEXTFILE_DIR}/redis_sentinel.prom" } # --- Main --- main() { # Parse arguments for arg in "$@"; do case "$arg" in --textfile) TEXTFILE_MODE=true ;; --install) check_dependencies validate_config install_cron exit 0 ;; --help|-h) usage ;; *) echo "Unknown option: $arg" >&2; usage ;; esac done check_dependencies validate_config START_TIME=$(date +%s%N) # Exporter info add_metric "redis_sentinel_exporter_info" "gauge" "Exporter version information" "1" "version=\"${VERSION}\"" # Collect metrics collect_sentinel # Exporter performance local end_time duration end_time=$(date +%s%N) duration=$(echo "scale=2; ($end_time - $START_TIME) / 1000000000" | bc 2>/dev/null || echo "0") add_metric "redis_sentinel_exporter_duration_seconds" "gauge" "Time to generate all metrics" "$duration" add_metric "redis_sentinel_exporter_last_run_timestamp" "gauge" "Unix timestamp of last successful run" "$(date +%s)" write_output } main "$@"