#!/usr/bin/env bash # # Elasticsearch Prometheus Metrics Exporter # # Prometheus textfile collector exporter for Elasticsearch. # Uses the Elasticsearch REST API to collect cluster health, # node statistics, index counts, JVM memory, search/indexing # throughput, circuit breaker state, and shard status. # # Usage: # ./elasticsearch-exporter.sh # ./elasticsearch-exporter.sh --textfile # ./elasticsearch-exporter.sh --install # # Parameters: # --textfile Write to textfile collector directory # --install Create cron job for automatic collection # --help Show usage # # Environment: # ES_URL Elasticsearch REST API URL (default: http://localhost:9200) # ES_USER Username for basic auth (optional) # ES_PASS Password for basic auth (optional) # TEXTFILE_DIR Textfile collector directory (default: /var/lib/node_exporter/textfile_collector) # CURL_TIMEOUT API request timeout in seconds (default: 10) # # Author: Phil Connor # Contact: contact@mylinux.work # Website: https://mylinux.work # License: MIT # Version: 1.0 # # Metrics Exported: # Core: # - elasticsearch_up # - elasticsearch_exporter_info{version} # # Cluster Health: # - elasticsearch_cluster_health{status} # - elasticsearch_cluster_nodes_total # - elasticsearch_cluster_data_nodes # - elasticsearch_cluster_shards_active # - elasticsearch_cluster_shards_relocating # - elasticsearch_cluster_shards_initializing # - elasticsearch_cluster_shards_unassigned # - elasticsearch_cluster_pending_tasks # # Cluster Stats: # - elasticsearch_indices_total # - elasticsearch_documents_total # - elasticsearch_store_size_bytes # # Node Stats: # - elasticsearch_jvm_heap_used_bytes{node} # - elasticsearch_jvm_heap_max_bytes{node} # - elasticsearch_search_query_total{node} # - elasticsearch_indexing_index_total{node} # - elasticsearch_circuit_breaker_tripped{node,breaker} # # Exporter: # - elasticsearch_exporter_duration_seconds # - elasticsearch_exporter_last_run_timestamp set -euo pipefail # --- Configuration --- readonly VERSION="1.0" readonly SCRIPT_NAME="$(basename "$0")" ES_URL="${ES_URL:-http://localhost:9200}" ES_USER="${ES_USER:-}" ES_PASS="${ES_PASS:-}" TEXTFILE_DIR="${TEXTFILE_DIR:-/var/lib/node_exporter/textfile_collector}" CURL_TIMEOUT="${CURL_TIMEOUT:-10}" TEXTFILE_MODE=false OUTPUT="" START_TIME="" # --- Functions --- usage() { cat </dev/null; then missing+=("$cmd") fi done if [[ ${#missing[@]} -gt 0 ]]; then echo "ERROR: Missing required commands: ${missing[*]}" >&2 echo "Install with: apt install ${missing[*]} OR dnf install ${missing[*]}" >&2 exit 1 fi } validate_config() { # Strip trailing slash ES_URL="${ES_URL%/}" } api_get() { local endpoint="$1" local curl_args=(-sf --max-time "$CURL_TIMEOUT") if [[ -n "$ES_USER" && -n "$ES_PASS" ]]; then curl_args+=(-u "${ES_USER}:${ES_PASS}") fi curl "${curl_args[@]}" "${ES_URL}${endpoint}" 2>/dev/null || echo "" } add_metric() { local name="$1" local type="$2" local help="$3" local value="$4" local labels="${5:-}" if [[ -n "$labels" ]]; then OUTPUT+="# HELP ${name} ${help} # TYPE ${name} ${type} ${name}{${labels}} ${value} " else OUTPUT+="# HELP ${name} ${help} # TYPE ${name} ${type} ${name} ${value} " fi } add_metric_value() { local name="$1" local value="$2" local labels="${3:-}" if [[ -n "$labels" ]]; then OUTPUT+="${name}{${labels}} ${value} " else OUTPUT+="${name} ${value} " fi } collect_cluster_health() { local health_json health_json=$(api_get "/_cluster/health") if [[ -z "$health_json" ]]; then add_metric "elasticsearch_up" "gauge" "Elasticsearch reachability (1=up, 0=down)" "0" return 1 fi add_metric "elasticsearch_up" "gauge" "Elasticsearch reachability (1=up, 0=down)" "1" # Cluster health status (green=0, yellow=1, red=2) local status status=$(echo "$health_json" | jq -r '.status // "red"' 2>/dev/null) local status_value case "$status" in green) status_value=0 ;; yellow) status_value=1 ;; red) status_value=2 ;; *) status_value=2 ;; esac add_metric "elasticsearch_cluster_health" "gauge" "Cluster health status (green=0, yellow=1, red=2)" "$status_value" "status=\"${status}\"" # Node counts local nodes_total data_nodes nodes_total=$(echo "$health_json" | jq '.number_of_nodes // 0' 2>/dev/null) data_nodes=$(echo "$health_json" | jq '.number_of_data_nodes // 0' 2>/dev/null) add_metric "elasticsearch_cluster_nodes_total" "gauge" "Total number of cluster nodes" "${nodes_total:-0}" add_metric "elasticsearch_cluster_data_nodes" "gauge" "Number of data nodes" "${data_nodes:-0}" # Shard counts local active_shards relocating initializing unassigned active_shards=$(echo "$health_json" | jq '.active_shards // 0' 2>/dev/null) relocating=$(echo "$health_json" | jq '.relocating_shards // 0' 2>/dev/null) initializing=$(echo "$health_json" | jq '.initializing_shards // 0' 2>/dev/null) unassigned=$(echo "$health_json" | jq '.unassigned_shards // 0' 2>/dev/null) add_metric "elasticsearch_cluster_shards_active" "gauge" "Number of active shards" "${active_shards:-0}" add_metric "elasticsearch_cluster_shards_relocating" "gauge" "Number of relocating shards" "${relocating:-0}" add_metric "elasticsearch_cluster_shards_initializing" "gauge" "Number of initializing shards" "${initializing:-0}" add_metric "elasticsearch_cluster_shards_unassigned" "gauge" "Number of unassigned shards" "${unassigned:-0}" # Pending tasks local pending_tasks pending_tasks=$(echo "$health_json" | jq '.number_of_pending_tasks // 0' 2>/dev/null) add_metric "elasticsearch_cluster_pending_tasks" "gauge" "Number of pending cluster tasks" "${pending_tasks:-0}" return 0 } collect_cluster_stats() { local stats_json stats_json=$(api_get "/_cluster/stats") if [[ -z "$stats_json" ]]; then return fi # Indices count local indices_count indices_count=$(echo "$stats_json" | jq '.indices.count // 0' 2>/dev/null) add_metric "elasticsearch_indices_total" "gauge" "Total number of indices" "${indices_count:-0}" # Document count local doc_count doc_count=$(echo "$stats_json" | jq '.indices.docs.count // 0' 2>/dev/null) add_metric "elasticsearch_documents_total" "gauge" "Total number of documents" "${doc_count:-0}" # Store size local store_size store_size=$(echo "$stats_json" | jq '.indices.store.size_in_bytes // 0' 2>/dev/null) add_metric "elasticsearch_store_size_bytes" "gauge" "Total store size in bytes" "${store_size:-0}" } collect_node_stats() { local nodes_json nodes_json=$(api_get "/_nodes/stats") if [[ -z "$nodes_json" ]]; then return fi local node_ids node_ids=$(echo "$nodes_json" | jq -r '.nodes | keys[]' 2>/dev/null) if [[ -z "$node_ids" ]]; then return fi # JVM heap used per node OUTPUT+="# HELP elasticsearch_jvm_heap_used_bytes JVM heap memory used per node # TYPE elasticsearch_jvm_heap_used_bytes gauge " local node_id node_name heap_used for node_id in $node_ids; do node_name=$(echo "$nodes_json" | jq -r ".nodes[\"${node_id}\"].name // \"${node_id}\"" 2>/dev/null) heap_used=$(echo "$nodes_json" | jq ".nodes[\"${node_id}\"].jvm.mem.heap_used_in_bytes // 0" 2>/dev/null) add_metric_value "elasticsearch_jvm_heap_used_bytes" "${heap_used:-0}" "node=\"${node_name}\"" done # JVM heap max per node OUTPUT+="# HELP elasticsearch_jvm_heap_max_bytes JVM heap memory max per node # TYPE elasticsearch_jvm_heap_max_bytes gauge " local heap_max for node_id in $node_ids; do node_name=$(echo "$nodes_json" | jq -r ".nodes[\"${node_id}\"].name // \"${node_id}\"" 2>/dev/null) heap_max=$(echo "$nodes_json" | jq ".nodes[\"${node_id}\"].jvm.mem.heap_max_in_bytes // 0" 2>/dev/null) add_metric_value "elasticsearch_jvm_heap_max_bytes" "${heap_max:-0}" "node=\"${node_name}\"" done # Search query total per node OUTPUT+="# HELP elasticsearch_search_query_total Total search queries per node # TYPE elasticsearch_search_query_total gauge " local query_total for node_id in $node_ids; do node_name=$(echo "$nodes_json" | jq -r ".nodes[\"${node_id}\"].name // \"${node_id}\"" 2>/dev/null) query_total=$(echo "$nodes_json" | jq ".nodes[\"${node_id}\"].indices.search.query_total // 0" 2>/dev/null) add_metric_value "elasticsearch_search_query_total" "${query_total:-0}" "node=\"${node_name}\"" done # Indexing index total per node OUTPUT+="# HELP elasticsearch_indexing_index_total Total indexing operations per node # TYPE elasticsearch_indexing_index_total gauge " local index_total for node_id in $node_ids; do node_name=$(echo "$nodes_json" | jq -r ".nodes[\"${node_id}\"].name // \"${node_id}\"" 2>/dev/null) index_total=$(echo "$nodes_json" | jq ".nodes[\"${node_id}\"].indices.indexing.index_total // 0" 2>/dev/null) add_metric_value "elasticsearch_indexing_index_total" "${index_total:-0}" "node=\"${node_name}\"" done # Circuit breaker trips per node per breaker type OUTPUT+="# HELP elasticsearch_circuit_breaker_tripped Circuit breaker trip count per node and breaker # TYPE elasticsearch_circuit_breaker_tripped gauge " local breaker_names breaker_name tripped for node_id in $node_ids; do node_name=$(echo "$nodes_json" | jq -r ".nodes[\"${node_id}\"].name // \"${node_id}\"" 2>/dev/null) breaker_names=$(echo "$nodes_json" | jq -r ".nodes[\"${node_id}\"].breakers | keys[]" 2>/dev/null) for breaker_name in $breaker_names; do tripped=$(echo "$nodes_json" | jq ".nodes[\"${node_id}\"].breakers[\"${breaker_name}\"].tripped // 0" 2>/dev/null) add_metric_value "elasticsearch_circuit_breaker_tripped" "${tripped:-0}" "node=\"${node_name}\",breaker=\"${breaker_name}\"" done done } write_output() { if [[ "$TEXTFILE_MODE" == true ]]; then local output_file="${TEXTFILE_DIR}/elasticsearch.prom" local temp_file="${output_file}.$$" mkdir -p "$TEXTFILE_DIR" echo "$OUTPUT" > "$temp_file" mv "$temp_file" "$output_file" else echo "$OUTPUT" fi } install_cron() { if [[ $EUID -ne 0 ]]; then echo "ERROR: --install requires root" >&2 exit 1 fi local script_path script_path=$(readlink -f "$0") local env_vars="" env_vars+="ES_URL=${ES_URL}\n" if [[ -n "$ES_USER" ]]; then env_vars+="ES_USER=${ES_USER}\n" fi if [[ -n "$ES_PASS" ]]; then env_vars+="ES_PASS=${ES_PASS}\n" fi env_vars+="TEXTFILE_DIR=${TEXTFILE_DIR}" cat > /etc/cron.d/elasticsearch-exporter </dev/null EOF chmod 644 /etc/cron.d/elasticsearch-exporter echo "Installed cron job: /etc/cron.d/elasticsearch-exporter" echo "Metrics will be written to: ${TEXTFILE_DIR}/elasticsearch.prom" } # --- Main --- main() { # Parse arguments for arg in "$@"; do case "$arg" in --textfile) TEXTFILE_MODE=true ;; --install) check_dependencies validate_config install_cron exit 0 ;; --help|-h) usage ;; *) echo "Unknown option: $arg" >&2; usage ;; esac done check_dependencies validate_config START_TIME=$(date +%s%N) # Exporter info add_metric "elasticsearch_exporter_info" "gauge" "Exporter version information" "1" "version=\"${VERSION}\"" # Collect metrics if collect_cluster_health; then collect_cluster_stats collect_node_stats fi # Exporter performance local end_time duration end_time=$(date +%s%N) duration=$(echo "scale=2; ($end_time - $START_TIME) / 1000000000" | bc 2>/dev/null || echo "0") add_metric "elasticsearch_exporter_duration_seconds" "gauge" "Time to generate all metrics" "$duration" add_metric "elasticsearch_exporter_last_run_timestamp" "gauge" "Unix timestamp of last successful run" "$(date +%s)" write_output } main "$@"