#!/bin/bash ################################################ #### Redis Metrics Collector #### #### for Prometheus node_exporter textfile #### #### #### #### Author: Phil Connor #### #### Contact: contact@mylinux.work #### #### Version: 1.0.0.20260309 #### ################################################ set -o pipefail SCRIPT_NAME=$(basename "$0") readonly SCRIPT_NAME # Default configuration readonly DEFAULT_NODE_DIR="/var/lib/node_exporter" readonly DEFAULT_COLLECTION_INTERVAL=60 readonly DEFAULT_TOP_N_KEYS=10 # Configuration variables (can be overridden by environment) REDIS_HOST=${REDIS_HOST:-127.0.0.1} REDIS_PORT=${REDIS_PORT:-6379} REDIS_PASSWORD=${REDIS_PASSWORD:-} REDIS_CLI=${REDIS_CLI:-} NODE_DIR=${NODE_DIR:-$DEFAULT_NODE_DIR} COLLECTION_INTERVAL=${COLLECTION_INTERVAL:-$DEFAULT_COLLECTION_INTERVAL} TOP_N_KEYS=${TOP_N_KEYS:-$DEFAULT_TOP_N_KEYS} DEBUG=${DEBUG:-} # Runtime flags RUN_MODE="once" # Error tracking ERRORS_TOTAL=0 handle_error() { local exit_code=$1 local line_number=$2 echo "Error: $SCRIPT_NAME failed at line $line_number with exit code $exit_code" >&2 exit "$exit_code" } trap 'handle_error $? $LINENO' ERR debug_echo() { if [[ -n "$DEBUG" ]]; then echo "[DEBUG] $*" >&2 fi } show_help() { cat << EOF Usage: $SCRIPT_NAME [OPTIONS] Redis metrics collector for Prometheus node_exporter textfile directory. Collects connected clients, memory usage, keyspace hit/miss ratios, per-database key counts, slowlog analysis, big key detection, persistence status, replication info, and uptime from Redis via redis-cli. OPTIONS: --once Run collection once and exit (default) --daemon Run continuously at COLLECTION_INTERVAL --help, -h Show this help message ENVIRONMENT VARIABLES: REDIS_HOST Redis server hostname (default: 127.0.0.1) REDIS_PORT Redis server port (default: 6379) REDIS_PASSWORD Redis AUTH password (default: empty) REDIS_CLI Path to redis-cli binary (default: auto-detect) NODE_DIR Node exporter textfile directory (default: $DEFAULT_NODE_DIR) COLLECTION_INTERVAL Seconds between collections in daemon mode (default: $DEFAULT_COLLECTION_INTERVAL) TOP_N_KEYS Number of largest keys to report (default: $DEFAULT_TOP_N_KEYS) DEBUG Enable debug output EXAMPLES: $SCRIPT_NAME --once REDIS_PASSWORD=secret $SCRIPT_NAME --daemon REDIS_HOST=redis.example.com REDIS_PORT=6380 $SCRIPT_NAME OUTPUT: Writes metrics to \$NODE_DIR/textfile_collector/redis_metrics.prom EOF exit 0 } # Parse arguments while [[ $# -gt 0 ]]; do case "$1" in --once) RUN_MODE="once"; shift ;; --daemon) RUN_MODE="daemon"; shift ;; --help|-h) show_help ;; *) echo "Unknown option: $1" >&2; show_help ;; esac done # Auto-detect redis-cli detect_redis_cli() { if [[ -n "$REDIS_CLI" ]]; then if [[ ! -x "$REDIS_CLI" ]]; then echo "Error: REDIS_CLI not found or not executable: $REDIS_CLI" >&2 exit 1 fi return fi REDIS_CLI=$(command -v redis-cli 2>/dev/null) || true if [[ -z "$REDIS_CLI" ]]; then echo "Error: redis-cli not found in PATH" >&2 exit 1 fi } # Validate configuration validate_config() { detect_redis_cli local textfile_dir="${NODE_DIR}/textfile_collector" if [[ ! -d "$textfile_dir" ]]; then echo "Error: Textfile collector directory not found: $textfile_dir" >&2 echo "Create it: sudo mkdir -p $textfile_dir" >&2 exit 1 fi } # Run a redis-cli command redis_cmd() { local cmd="$*" local args=(-h "$REDIS_HOST" -p "$REDIS_PORT") if [[ -n "$REDIS_PASSWORD" ]]; then args+=(-a "$REDIS_PASSWORD" --no-auth-warning) fi debug_echo "redis-cli ${args[*]} $cmd" $REDIS_CLI "${args[@]}" $cmd 2>/dev/null } # Collect metrics from INFO command collect_info_metrics() { debug_echo "Collecting INFO metrics..." local info info=$(redis_cmd INFO) || { ERRORS_TOTAL=$((ERRORS_TOTAL + 1)); return 1; } local connected_clients used_memory keyspace_hits keyspace_misses local rdb_last_save_time aof_enabled connected_slaves uptime_in_seconds connected_clients=$(echo "$info" | grep -oP '^connected_clients:\K[0-9]+' || echo "0") used_memory=$(echo "$info" | grep -oP '^used_memory:\K[0-9]+' || echo "0") keyspace_hits=$(echo "$info" | grep -oP '^keyspace_hits:\K[0-9]+' || echo "0") keyspace_misses=$(echo "$info" | grep -oP '^keyspace_misses:\K[0-9]+' || echo "0") rdb_last_save_time=$(echo "$info" | grep -oP '^rdb_last_save_time:\K[0-9]+' || echo "0") aof_enabled=$(echo "$info" | grep -oP '^aof_enabled:\K[0-9]+' || echo "0") connected_slaves=$(echo "$info" | grep -oP '^connected_slaves:\K[0-9]+' || echo "0") uptime_in_seconds=$(echo "$info" | grep -oP '^uptime_in_seconds:\K[0-9]+' || echo "0") cat </dev/null | grep -oP '[0-9]+' || echo "0") echo "redis_db_keys{db=\"$db\"} $dbsize" done echo "" } # Collect slowlog metrics collect_slowlog_metrics() { debug_echo "Collecting slowlog metrics..." local slowlog_len slowlog_len=$(redis_cmd SLOWLOG LEN 2>/dev/null | grep -oP '[0-9]+' || echo "0") local slowlog_latest_duration=0 if [[ "$slowlog_len" -gt 0 ]]; then local slowlog_entry slowlog_entry=$(redis_cmd SLOWLOG GET 1 2>/dev/null) if [[ -n "$slowlog_entry" ]]; then slowlog_latest_duration=$(echo "$slowlog_entry" | awk 'NR==4 { gsub(/[^0-9]/, ""); print }' || echo "0") [[ -z "$slowlog_latest_duration" ]] && slowlog_latest_duration=0 fi fi cat </dev/null) [[ -z "$key" || "$key" == "(nil)" ]] && continue local size size=$(redis_cmd MEMORY USAGE "$key" 2>/dev/null | grep -oP '[0-9]+' || echo "0") [[ "$size" -gt 0 ]] && key_sizes["$key"]=$size done if [[ ${#key_sizes[@]} -gt 0 ]]; then echo "# HELP redis_big_key_bytes Memory usage in bytes of sampled large keys." echo "# TYPE redis_big_key_bytes gauge" for key in "${!key_sizes[@]}"; do echo "$key ${key_sizes[$key]}" done | sort -t' ' -k2 -rn | head -n "$TOP_N_KEYS" | while IFS=' ' read -r k v; do local safe_key safe_key=$(echo "$k" | sed 's/["\\]/\\&/g') echo "redis_big_key_bytes{key=\"$safe_key\"} $v" done echo "" fi } # Write collection metadata collect_metadata() { cat < "$temp_file" 2>/dev/null mv "$temp_file" "$output_file" debug_echo "Collection complete. Wrote to $output_file (errors: $ERRORS_TOTAL)" } # Main main() { validate_config case "$RUN_MODE" in once) collect_all ;; daemon) echo "$SCRIPT_NAME running in daemon mode (interval: ${COLLECTION_INTERVAL}s)" while true; do collect_all sleep "$COLLECTION_INTERVAL" done ;; esac } main