#!/usr/bin/env bash # # Graylog Prometheus Metrics Exporter # # Prometheus textfile collector exporter for Graylog. # Uses the Graylog REST API to collect node status, throughput, # journal health, buffer usage, input/stream/index counts, # and cluster leadership state. # # Usage: # GRAYLOG_URL="http://graylog.example.com:9000/api" GRAYLOG_TOKEN="abc123" ./graylog-exporter.sh # GRAYLOG_URL="http://graylog.example.com:9000/api" GRAYLOG_TOKEN="abc123" ./graylog-exporter.sh --textfile # GRAYLOG_URL="http://graylog.example.com:9000/api" GRAYLOG_TOKEN="abc123" ./graylog-exporter.sh --install # # Parameters: # --textfile Write to textfile collector directory # --install Create cron job for automatic collection # --help Show usage # # Environment: # GRAYLOG_URL Graylog API base URL (required, e.g. http://localhost:9000/api) # GRAYLOG_TOKEN API token (required) # TEXTFILE_DIR Textfile collector directory (default: /var/lib/node_exporter/textfile_collector) # CURL_TIMEOUT API request timeout in seconds (default: 10) # # Author: Phil Connor # Contact: contact@mylinux.work # Website: https://mylinux.work # License: MIT # Version: 1.0 # # Metrics Exported: # Core: # - graylog_up # - graylog_exporter_info{version} # - graylog_node_is_leader # - graylog_input_count # # Throughput: # - graylog_throughput_input # - graylog_throughput_output # # Journal: # - graylog_journal_size_bytes # - graylog_journal_uncommitted_entries # - graylog_journal_events_read_count # - graylog_journal_events_append_count # # Buffers: # - graylog_buffer_input_usage # - graylog_buffer_process_usage # - graylog_buffer_output_usage # # Counts: # - graylog_stream_count # - graylog_index_count # - graylog_sidecar_count # - graylog_sidecar_active_count # - graylog_content_pack_count # # Exporter: # - graylog_exporter_duration_seconds # - graylog_exporter_last_run_timestamp set -euo pipefail # --- Configuration --- readonly VERSION="1.1" readonly SCRIPT_NAME="$(basename "$0")" GRAYLOG_URL="${GRAYLOG_URL:-}" GRAYLOG_TOKEN="${GRAYLOG_TOKEN:-}" TEXTFILE_DIR="${TEXTFILE_DIR:-/var/lib/node_exporter/textfile_collector}" CURL_TIMEOUT="${CURL_TIMEOUT:-10}" TEXTFILE_MODE=false OUTPUT="" START_TIME="" # --- Functions --- usage() { cat </dev/null; then missing+=("$cmd") fi done if [[ ${#missing[@]} -gt 0 ]]; then echo "ERROR: Missing required commands: ${missing[*]}" >&2 echo "Install with: apt install ${missing[*]} OR dnf install ${missing[*]}" >&2 exit 1 fi } validate_config() { if [[ -z "$GRAYLOG_URL" ]]; then echo "ERROR: GRAYLOG_URL environment variable is required" >&2 exit 1 fi if [[ -z "$GRAYLOG_TOKEN" ]]; then echo "ERROR: GRAYLOG_TOKEN environment variable is required" >&2 exit 1 fi # Strip trailing slash GRAYLOG_URL="${GRAYLOG_URL%/}" } api_get() { local endpoint="$1" curl -sf --max-time "$CURL_TIMEOUT" \ -u "${GRAYLOG_TOKEN}:token" \ -H "Accept: application/json" \ "${GRAYLOG_URL}${endpoint}" 2>/dev/null || echo "" } add_metric() { local name="$1" local type="$2" local help="$3" local value="$4" local labels="${5:-}" if [[ -n "$labels" ]]; then OUTPUT+="# HELP ${name} ${help} # TYPE ${name} ${type} ${name}{${labels}} ${value} " else OUTPUT+="# HELP ${name} ${help} # TYPE ${name} ${type} ${name} ${value} " fi } add_metric_value() { local name="$1" local value="$2" local labels="${3:-}" if [[ -n "$labels" ]]; then OUTPUT+="${name}{${labels}} ${value} " else OUTPUT+="${name} ${value} " fi } collect_cluster() { local cluster_json cluster_json=$(api_get "/cluster") if [[ -z "$cluster_json" ]]; then add_metric "graylog_up" "gauge" "Graylog reachability (1=up, 0=down)" "0" return 1 fi add_metric "graylog_up" "gauge" "Graylog reachability (1=up, 0=down)" "1" # Determine leader status from the first (local) node local is_leader is_leader=$(echo "$cluster_json" | jq -r '[.[]][0].is_leader // [.[]][0].is_master // false' 2>/dev/null) if [[ "$is_leader" == "true" ]]; then add_metric "graylog_node_is_leader" "gauge" "Whether this node is the cluster leader (1=leader, 0=follower)" "1" else add_metric "graylog_node_is_leader" "gauge" "Whether this node is the cluster leader (1=leader, 0=follower)" "0" fi return 0 } collect_throughput() { local throughput_json throughput_json=$(api_get "/system/throughput") if [[ -z "$throughput_json" ]]; then return fi local input_throughput output_throughput input_throughput=$(echo "$throughput_json" | jq -r '.throughput.input // .throughput // 0' 2>/dev/null) output_throughput=$(echo "$throughput_json" | jq -r '.throughput.output // 0' 2>/dev/null) # Fallback: some Graylog versions expose a single throughput value if [[ "$input_throughput" == "0" || -z "$input_throughput" ]]; then input_throughput=$(echo "$throughput_json" | jq -r '.throughput // 0' 2>/dev/null) fi add_metric "graylog_throughput_input" "gauge" "Messages received per second" "${input_throughput:-0}" add_metric "graylog_throughput_output" "gauge" "Messages written per second" "${output_throughput:-0}" } collect_inputs() { local inputs_json inputs_json=$(api_get "/system/inputs") if [[ -z "$inputs_json" ]]; then return fi local input_count input_count=$(echo "$inputs_json" | jq -r '.total // 0' 2>/dev/null) add_metric "graylog_input_count" "gauge" "Total number of configured inputs" "${input_count:-0}" } collect_journal() { local journal_json journal_json=$(api_get "/system/journal") if [[ -z "$journal_json" ]]; then return fi local journal_size uncommitted read_events append_events journal_size=$(echo "$journal_json" | jq -r '.journal_size // 0' 2>/dev/null) uncommitted=$(echo "$journal_json" | jq -r '.uncommitted_journal_entries // 0' 2>/dev/null) read_events=$(echo "$journal_json" | jq -r '.read_events_per_second // 0' 2>/dev/null) append_events=$(echo "$journal_json" | jq -r '.append_events_per_second // 0' 2>/dev/null) add_metric "graylog_journal_size_bytes" "gauge" "Journal size in bytes" "${journal_size:-0}" add_metric "graylog_journal_uncommitted_entries" "gauge" "Uncommitted journal entries" "${uncommitted:-0}" add_metric "graylog_journal_events_read_count" "gauge" "Journal events read per second" "${read_events:-0}" add_metric "graylog_journal_events_append_count" "gauge" "Journal events appended per second" "${append_events:-0}" } collect_buffers() { local buffers_json buffers_json=$(api_get "/system/buffers") if [[ -z "$buffers_json" ]]; then return fi local input_usage process_usage output_usage input_usage=$(echo "$buffers_json" | jq -r '.buffers.input.utilization_percent // 0' 2>/dev/null) process_usage=$(echo "$buffers_json" | jq -r '.buffers.process.utilization_percent // 0' 2>/dev/null) output_usage=$(echo "$buffers_json" | jq -r '.buffers.output.utilization_percent // 0' 2>/dev/null) add_metric "graylog_buffer_input_usage" "gauge" "Input buffer utilization percentage" "${input_usage:-0}" add_metric "graylog_buffer_process_usage" "gauge" "Process buffer utilization percentage" "${process_usage:-0}" add_metric "graylog_buffer_output_usage" "gauge" "Output buffer utilization percentage" "${output_usage:-0}" } collect_streams() { local streams_json streams_json=$(api_get "/streams") if [[ -z "$streams_json" ]]; then return fi local stream_count stream_count=$(echo "$streams_json" | jq -r '.total // 0' 2>/dev/null) add_metric "graylog_stream_count" "gauge" "Total number of streams" "${stream_count:-0}" } collect_indices() { local indices_json indices_json=$(api_get "/system/indexer/indices") if [[ -z "$indices_json" ]]; then return fi local index_count index_count=$(echo "$indices_json" | jq -r '.all.indices | length // 0' 2>/dev/null) # Fallback: try counting top-level keys if [[ "$index_count" == "0" || -z "$index_count" ]]; then index_count=$(echo "$indices_json" | jq -r 'if type == "object" then keys | length else 0 end' 2>/dev/null) fi add_metric "graylog_index_count" "gauge" "Total number of indices" "${index_count:-0}" } collect_sidecars() { local sidecars_json sidecars_json=$(api_get "/sidecars") if [[ -z "$sidecars_json" ]]; then return fi local total_count active_count total_count=$(echo "$sidecars_json" | jq -r '.pagination.total // 0' 2>/dev/null) active_count=$(echo "$sidecars_json" | jq -r '[.sidecars[] | select(.node_details.status.status == 1)] | length // 0' 2>/dev/null) add_metric "graylog_sidecar_count" "gauge" "Total number of registered Sidecars" "${total_count:-0}" add_metric "graylog_sidecar_active_count" "gauge" "Number of active Sidecars" "${active_count:-0}" } collect_content_packs() { local packs_json packs_json=$(api_get "/system/content_packs/latest") if [[ -z "$packs_json" ]]; then return fi local pack_count pack_count=$(echo "$packs_json" | jq -r '.total // 0' 2>/dev/null) add_metric "graylog_content_pack_count" "gauge" "Total number of content packs" "${pack_count:-0}" } write_output() { if [[ "$TEXTFILE_MODE" == true ]]; then local output_file="${TEXTFILE_DIR}/graylog.prom" local temp_file="${output_file}.$$" mkdir -p "$TEXTFILE_DIR" echo "$OUTPUT" > "$temp_file" mv "$temp_file" "$output_file" else echo "$OUTPUT" fi } install_cron() { if [[ $EUID -ne 0 ]]; then echo "ERROR: --install requires root" >&2 exit 1 fi local script_path script_path=$(readlink -f "$0") cat > /etc/cron.d/graylog-exporter </dev/null EOF chmod 644 /etc/cron.d/graylog-exporter echo "Installed cron job: /etc/cron.d/graylog-exporter" echo "Metrics will be written to: ${TEXTFILE_DIR}/graylog.prom" } # --- Main --- main() { # Parse arguments for arg in "$@"; do case "$arg" in --textfile) TEXTFILE_MODE=true ;; --install) check_dependencies validate_config install_cron exit 0 ;; --help|-h) usage ;; *) echo "Unknown option: $arg" >&2; usage ;; esac done check_dependencies validate_config START_TIME=$(date +%s%N) # Exporter info add_metric "graylog_exporter_info" "gauge" "Exporter version information" "1" "version=\"${VERSION}\"" # Collect metrics if collect_cluster; then collect_inputs collect_throughput collect_journal collect_buffers collect_streams collect_indices collect_sidecars collect_content_packs fi # Exporter performance local end_time duration end_time=$(date +%s%N) duration=$(echo "scale=2; ($end_time - $START_TIME) / 1000000000" | bc 2>/dev/null || echo "0") add_metric "graylog_exporter_duration_seconds" "gauge" "Time to generate all metrics" "$duration" add_metric "graylog_exporter_last_run_timestamp" "gauge" "Unix timestamp of last successful run" "$(date +%s)" write_output } main "$@"