a1a17e81a1
Includes updated JS challenge scripts with Claude-User whitelist, same-site referer bypass, Blackbox-Exporter allowed bot, and all new exporters, cheat sheets, and automation scripts.
684 lines
21 KiB
Bash
684 lines
21 KiB
Bash
#!/bin/bash
|
|
################################################################################
|
|
# Script Name: alertmanager-exporter.sh
|
|
# Version: 1.0
|
|
# Description: Prometheus exporter for Alertmanager operational overview.
|
|
# Queries the Alertmanager API for active alerts, silences,
|
|
# cluster health, and config status. Complements the built-in
|
|
# /metrics endpoint with higher-level operational metrics.
|
|
#
|
|
# Author: Phil Connor
|
|
# Contact: contact@mylinux.work
|
|
# Website: https://mylinux.work
|
|
# License: MIT
|
|
#
|
|
# Prerequisites:
|
|
# - curl
|
|
# - jq
|
|
# - Alertmanager running and accessible
|
|
# - netcat (nc) for HTTP mode
|
|
#
|
|
# Usage:
|
|
# # Output to stdout
|
|
# ./alertmanager-exporter.sh
|
|
#
|
|
# # HTTP server mode
|
|
# ./alertmanager-exporter.sh --http -p 9094
|
|
#
|
|
# # Textfile collector mode
|
|
# ./alertmanager-exporter.sh --textfile
|
|
#
|
|
# Metrics Exported:
|
|
# - alertmanager_overview_up - Exporter status (1=up, 0=down)
|
|
# - alertmanager_overview_info - Alertmanager version info
|
|
# - alertmanager_overview_alerts_active_total - Total active alerts
|
|
# - alertmanager_overview_alerts_by_state - Alerts by state
|
|
# - alertmanager_overview_alerts_by_severity - Alerts by severity
|
|
# - alertmanager_overview_alerts_by_receiver - Alerts by receiver
|
|
# - alertmanager_overview_alert_groups_total - Alert group count
|
|
# - alertmanager_overview_silences_active - Active silences
|
|
# - alertmanager_overview_silences_pending - Pending silences
|
|
# - alertmanager_overview_silences_expired - Expired silences
|
|
# - alertmanager_overview_silence_coverage_ratio - Silence coverage
|
|
# - alertmanager_overview_cluster_peers - Peer count
|
|
# - alertmanager_overview_cluster_peer_healthy - Per-peer health
|
|
# - alertmanager_overview_config_hash - Config hash for drift detection
|
|
# - alertmanager_overview_uptime_seconds - Uptime
|
|
# - alertmanager_overview_last_config_reload_timestamp - Last reload
|
|
# - alertmanager_overview_exporter_duration_seconds - Script duration
|
|
# - alertmanager_overview_exporter_last_run_timestamp - Last run time
|
|
#
|
|
# Configuration:
|
|
# Default HTTP port: 9094
|
|
# Textfile directory: /var/lib/node_exporter
|
|
# Alertmanager URL: http://localhost:9093
|
|
#
|
|
################################################################################
|
|
|
|
# ============================================================================
|
|
# CONFIGURATION VARIABLES
|
|
# ============================================================================
|
|
|
|
TEXTFILE_DIR="/var/lib/node_exporter"
|
|
OUTPUT_FILE=""
|
|
HTTP_MODE=false
|
|
HTTP_PORT=9094
|
|
AM_URL="http://localhost:9093"
|
|
|
|
# ============================================================================
|
|
# HELPER FUNCTIONS
|
|
# ============================================================================
|
|
|
|
show_usage() {
|
|
cat <<EOF
|
|
Usage: $0 [OPTIONS]
|
|
|
|
Export Alertmanager operational overview as Prometheus metrics.
|
|
|
|
MODES:
|
|
--textfile Write to node_exporter textfile collector
|
|
--http Run HTTP server on port $HTTP_PORT
|
|
|
|
OPTIONS:
|
|
-p, --port HTTP port (default: 9094)
|
|
-u, --url Alertmanager URL (default: $AM_URL)
|
|
-o, --output Output file path
|
|
|
|
EXAMPLES:
|
|
$0 --textfile # Write to textfile collector
|
|
$0 --http --port 9094 # Run HTTP server
|
|
$0 --url http://alertmanager:9093 # Custom Alertmanager URL
|
|
$0 -o /tmp/alertmanager-overview.prom # Write to custom file
|
|
|
|
EOF
|
|
exit 0
|
|
}
|
|
|
|
parse_args() {
|
|
while [[ $# -gt 0 ]]; do
|
|
case $1 in
|
|
-h|--help) show_usage ;;
|
|
--textfile) OUTPUT_FILE="$TEXTFILE_DIR/alertmanager-overview.prom"; shift ;;
|
|
--http) HTTP_MODE=true; shift ;;
|
|
-p|--port) HTTP_PORT="$2"; shift 2 ;;
|
|
-u|--url) AM_URL="$2"; shift 2 ;;
|
|
-o|--output) OUTPUT_FILE="$2"; shift 2 ;;
|
|
*) echo "Unknown option: $1" >&2; exit 1 ;;
|
|
esac
|
|
done
|
|
}
|
|
|
|
# Check prerequisites
|
|
check_requirements() {
|
|
local missing=0
|
|
|
|
if ! command -v curl >/dev/null 2>&1; then
|
|
echo "ERROR: curl not found" >&2
|
|
missing=1
|
|
fi
|
|
|
|
if ! command -v jq >/dev/null 2>&1; then
|
|
echo "ERROR: jq not found" >&2
|
|
missing=1
|
|
fi
|
|
|
|
return $missing
|
|
}
|
|
|
|
# Query an Alertmanager API endpoint
|
|
# Args: $1 - endpoint path (e.g., /api/v2/alerts)
|
|
# Returns: JSON response or empty string on failure
|
|
am_api() {
|
|
local endpoint="$1"
|
|
curl -sf --connect-timeout 5 --max-time 10 "${AM_URL}${endpoint}" 2>/dev/null
|
|
}
|
|
|
|
# ============================================================================
|
|
# METRIC COLLECTION FUNCTIONS
|
|
# ============================================================================
|
|
|
|
# Get alert counts by state
|
|
# Populates global variables: ALERTS_ACTIVE, ALERTS_SUPPRESSED, ALERTS_UNPROCESSED
|
|
collect_alerts() {
|
|
local alerts_json
|
|
alerts_json=$(am_api "/api/v2/alerts")
|
|
|
|
if [ -z "$alerts_json" ]; then
|
|
ALERTS_TOTAL=0
|
|
ALERTS_ACTIVE=0
|
|
ALERTS_SUPPRESSED=0
|
|
ALERTS_UNPROCESSED=0
|
|
ALERTS_JSON="[]"
|
|
return 1
|
|
fi
|
|
|
|
ALERTS_JSON="$alerts_json"
|
|
ALERTS_TOTAL=$(echo "$alerts_json" | jq 'length')
|
|
ALERTS_ACTIVE=$(echo "$alerts_json" | jq '[.[] | select(.status.state == "active")] | length')
|
|
ALERTS_SUPPRESSED=$(echo "$alerts_json" | jq '[.[] | select(.status.state == "suppressed")] | length')
|
|
ALERTS_UNPROCESSED=$(echo "$alerts_json" | jq '[.[] | select(.status.state == "unprocessed")] | length')
|
|
}
|
|
|
|
# Get alert counts by severity label
|
|
# Returns: metrics lines for each severity
|
|
collect_alerts_by_severity() {
|
|
local severity count
|
|
|
|
for severity in critical warning info; do
|
|
count=$(echo "$ALERTS_JSON" | jq --arg sev "$severity" \
|
|
'[.[] | select(.labels.severity == $sev)] | length')
|
|
echo "alertmanager_overview_alerts_by_severity{severity=\"$severity\"} ${count:-0}"
|
|
done
|
|
|
|
# Count alerts with no severity or other severity values
|
|
count=$(echo "$ALERTS_JSON" | jq \
|
|
'[.[] | select(.labels.severity != "critical" and .labels.severity != "warning" and .labels.severity != "info")] | length')
|
|
if [ "$count" -gt 0 ]; then
|
|
echo "alertmanager_overview_alerts_by_severity{severity=\"other\"} $count"
|
|
fi
|
|
}
|
|
|
|
# Get alert counts by receiver
|
|
collect_alerts_by_receiver() {
|
|
echo "$ALERTS_JSON" | jq -r '
|
|
[.[] | .receivers[]?.name // "unknown"] |
|
|
group_by(.) |
|
|
map({receiver: .[0], count: length}) |
|
|
.[] |
|
|
"alertmanager_overview_alerts_by_receiver{receiver=\"\(.receiver)\"} \(.count)"
|
|
' 2>/dev/null
|
|
}
|
|
|
|
# Get alert group count
|
|
collect_alert_groups() {
|
|
local groups_json
|
|
groups_json=$(am_api "/api/v2/alerts/groups")
|
|
|
|
if [ -z "$groups_json" ]; then
|
|
echo "0"
|
|
return
|
|
fi
|
|
|
|
echo "$groups_json" | jq 'length'
|
|
}
|
|
|
|
# Get silence counts by state
|
|
collect_silences() {
|
|
local silences_json
|
|
silences_json=$(am_api "/api/v2/silences")
|
|
|
|
if [ -z "$silences_json" ]; then
|
|
SILENCES_ACTIVE=0
|
|
SILENCES_PENDING=0
|
|
SILENCES_EXPIRED=0
|
|
return 1
|
|
fi
|
|
|
|
SILENCES_ACTIVE=$(echo "$silences_json" | jq '[.[] | select(.status.state == "active")] | length')
|
|
SILENCES_PENDING=$(echo "$silences_json" | jq '[.[] | select(.status.state == "pending")] | length')
|
|
SILENCES_EXPIRED=$(echo "$silences_json" | jq '[.[] | select(.status.state == "expired")] | length')
|
|
}
|
|
|
|
# Calculate silence coverage ratio
|
|
# Returns: ratio 0.0-1.0 (suppressed / total alerts)
|
|
calculate_silence_coverage() {
|
|
if [ "$ALERTS_TOTAL" -gt 0 ]; then
|
|
awk "BEGIN {printf \"%.4f\", $ALERTS_SUPPRESSED / $ALERTS_TOTAL}"
|
|
else
|
|
echo "0"
|
|
fi
|
|
}
|
|
|
|
# Get cluster status
|
|
collect_cluster_status() {
|
|
local status_json
|
|
status_json=$(am_api "/api/v2/status")
|
|
|
|
if [ -z "$status_json" ]; then
|
|
CLUSTER_PEERS=0
|
|
CLUSTER_STATUS="unknown"
|
|
AM_VERSION="unknown"
|
|
AM_UPTIME_SECONDS=0
|
|
CONFIG_HASH="0"
|
|
LAST_RELOAD=0
|
|
return 1
|
|
fi
|
|
|
|
AM_VERSION=$(echo "$status_json" | jq -r '.versionInfo.version // "unknown"')
|
|
|
|
# Cluster info
|
|
# shellcheck disable=SC2034 # reserved for future use
|
|
CLUSTER_STATUS=$(echo "$status_json" | jq -r '.cluster.status // "disabled"')
|
|
CLUSTER_PEERS=$(echo "$status_json" | jq '.cluster.peers // [] | length')
|
|
|
|
# Peer details (for per-peer health metrics)
|
|
CLUSTER_PEERS_JSON=$(echo "$status_json" | jq '.cluster.peers // []')
|
|
|
|
# Uptime from start time
|
|
local start_time
|
|
start_time=$(echo "$status_json" | jq -r '.uptime // empty' 2>/dev/null)
|
|
if [ -n "$start_time" ]; then
|
|
local start_epoch now_epoch
|
|
start_epoch=$(date -d "$start_time" +%s 2>/dev/null || echo 0)
|
|
now_epoch=$(date +%s)
|
|
if [ "$start_epoch" -gt 0 ]; then
|
|
AM_UPTIME_SECONDS=$((now_epoch - start_epoch))
|
|
else
|
|
AM_UPTIME_SECONDS=0
|
|
fi
|
|
else
|
|
AM_UPTIME_SECONDS=0
|
|
fi
|
|
|
|
# Config hash — hash the config JSON for drift detection
|
|
local config_json
|
|
config_json=$(echo "$status_json" | jq -r '.config.original // ""')
|
|
if [ -n "$config_json" ]; then
|
|
CONFIG_HASH=$(echo "$config_json" | sha256sum | awk '{print $1}' | head -c 16)
|
|
else
|
|
CONFIG_HASH="0"
|
|
fi
|
|
|
|
# Last config reload — not directly available from /api/v2/status
|
|
# We'll pull this from the built-in /metrics if reachable
|
|
local reload_ts
|
|
reload_ts=$(curl -sf "${AM_URL}/metrics" 2>/dev/null | \
|
|
grep "^alertmanager_config_last_reload_success_timestamp_seconds" | \
|
|
awk '{print $2}' | head -1)
|
|
LAST_RELOAD=${reload_ts:-0}
|
|
}
|
|
|
|
# Output per-peer health metrics
|
|
output_peer_metrics() {
|
|
if [ "$CLUSTER_PEERS" -eq 0 ] || [ -z "$CLUSTER_PEERS_JSON" ]; then
|
|
return
|
|
fi
|
|
|
|
echo "$CLUSTER_PEERS_JSON" | jq -r '
|
|
.[] |
|
|
"alertmanager_overview_cluster_peer_healthy{peer=\"\(.address // "unknown")\"} 1"
|
|
' 2>/dev/null
|
|
}
|
|
|
|
# Get notification metrics from built-in /metrics endpoint
|
|
collect_notification_metrics() {
|
|
local metrics_raw
|
|
metrics_raw=$(curl -sf "${AM_URL}/metrics" 2>/dev/null)
|
|
|
|
if [ -z "$metrics_raw" ]; then
|
|
return 1
|
|
fi
|
|
|
|
NOTIFICATION_METRICS="$metrics_raw"
|
|
}
|
|
|
|
# Output notification rate per receiver (from built-in metrics)
|
|
output_notification_rates() {
|
|
if [ -z "$NOTIFICATION_METRICS" ]; then
|
|
return
|
|
fi
|
|
|
|
echo "$NOTIFICATION_METRICS" | \
|
|
grep "^alertmanager_notifications_total{" | \
|
|
sed 's/alertmanager_notifications_total/alertmanager_overview_notification_rate/' 2>/dev/null
|
|
}
|
|
|
|
# Output notification failures per receiver (from built-in metrics)
|
|
output_notification_failures() {
|
|
if [ -z "$NOTIFICATION_METRICS" ]; then
|
|
return
|
|
fi
|
|
|
|
echo "$NOTIFICATION_METRICS" | \
|
|
grep "^alertmanager_notifications_failed_total{" | \
|
|
sed 's/alertmanager_notifications_failed_total/alertmanager_overview_notification_failures/' 2>/dev/null
|
|
}
|
|
|
|
# Output notification latency per receiver (from built-in metrics)
|
|
output_notification_latency() {
|
|
if [ -z "$NOTIFICATION_METRICS" ]; then
|
|
return
|
|
fi
|
|
|
|
# Use the _sum and _count to compute average latency per integration
|
|
echo "$NOTIFICATION_METRICS" | \
|
|
grep "^alertmanager_notification_latency_seconds_sum{" | \
|
|
sed 's/alertmanager_notification_latency_seconds_sum/alertmanager_overview_notification_latency_seconds/' 2>/dev/null
|
|
}
|
|
|
|
# ============================================================================
|
|
# METRIC OUTPUT
|
|
# ============================================================================
|
|
|
|
generate_metrics() {
|
|
local script_start
|
|
script_start=$(date +%s)
|
|
|
|
# Check if Alertmanager is reachable
|
|
local am_up=1
|
|
if ! am_api "/api/v2/status" >/dev/null 2>&1; then
|
|
am_up=0
|
|
fi
|
|
|
|
cat <<EOF
|
|
# HELP alertmanager_overview_up Alertmanager exporter status (1=up, 0=down)
|
|
# TYPE alertmanager_overview_up gauge
|
|
alertmanager_overview_up $am_up
|
|
EOF
|
|
|
|
# If Alertmanager is down, output minimal metrics and exit
|
|
if [ "$am_up" -eq 0 ]; then
|
|
local script_end script_duration
|
|
script_end=$(date +%s)
|
|
script_duration=$((script_end - script_start))
|
|
cat <<EOF
|
|
|
|
# HELP alertmanager_overview_exporter_duration_seconds Time to generate all metrics
|
|
# TYPE alertmanager_overview_exporter_duration_seconds gauge
|
|
alertmanager_overview_exporter_duration_seconds $script_duration
|
|
|
|
# HELP alertmanager_overview_exporter_last_run_timestamp Unix timestamp of last run
|
|
# TYPE alertmanager_overview_exporter_last_run_timestamp gauge
|
|
alertmanager_overview_exporter_last_run_timestamp $script_end
|
|
EOF
|
|
return
|
|
fi
|
|
|
|
# Collect data from API
|
|
collect_alerts
|
|
collect_silences
|
|
collect_cluster_status
|
|
collect_notification_metrics
|
|
|
|
echo ""
|
|
|
|
# Version info
|
|
cat <<EOF
|
|
# HELP alertmanager_overview_info Alertmanager version and exporter version
|
|
# TYPE alertmanager_overview_info gauge
|
|
alertmanager_overview_info{version="$AM_VERSION",exporter_version="1.0"} 1
|
|
EOF
|
|
|
|
echo ""
|
|
|
|
# Active alerts total
|
|
cat <<EOF
|
|
# HELP alertmanager_overview_alerts_active_total Total active alerts
|
|
# TYPE alertmanager_overview_alerts_active_total gauge
|
|
alertmanager_overview_alerts_active_total $ALERTS_TOTAL
|
|
EOF
|
|
|
|
echo ""
|
|
|
|
# Alerts by state
|
|
cat <<EOF
|
|
# HELP alertmanager_overview_alerts_by_state Active alerts by state
|
|
# TYPE alertmanager_overview_alerts_by_state gauge
|
|
alertmanager_overview_alerts_by_state{state="active"} $ALERTS_ACTIVE
|
|
alertmanager_overview_alerts_by_state{state="suppressed"} $ALERTS_SUPPRESSED
|
|
alertmanager_overview_alerts_by_state{state="unprocessed"} $ALERTS_UNPROCESSED
|
|
EOF
|
|
|
|
echo ""
|
|
|
|
# Alerts by severity
|
|
cat <<EOF
|
|
# HELP alertmanager_overview_alerts_by_severity Active alerts by severity label
|
|
# TYPE alertmanager_overview_alerts_by_severity gauge
|
|
EOF
|
|
collect_alerts_by_severity
|
|
|
|
echo ""
|
|
|
|
# Alerts by receiver
|
|
local receiver_lines
|
|
receiver_lines=$(collect_alerts_by_receiver)
|
|
if [ -n "$receiver_lines" ]; then
|
|
cat <<EOF
|
|
# HELP alertmanager_overview_alerts_by_receiver Active alerts by receiver
|
|
# TYPE alertmanager_overview_alerts_by_receiver gauge
|
|
$receiver_lines
|
|
EOF
|
|
echo ""
|
|
fi
|
|
|
|
# Alert groups
|
|
local group_count
|
|
group_count=$(collect_alert_groups)
|
|
cat <<EOF
|
|
# HELP alertmanager_overview_alert_groups_total Total alert group count
|
|
# TYPE alertmanager_overview_alert_groups_total gauge
|
|
alertmanager_overview_alert_groups_total $group_count
|
|
EOF
|
|
|
|
echo ""
|
|
|
|
# Alert resolution rate
|
|
local resolution_rate
|
|
if [ "$ALERTS_TOTAL" -gt 0 ]; then
|
|
resolution_rate=$(awk "BEGIN {printf \"%.4f\", $ALERTS_SUPPRESSED / $ALERTS_TOTAL}")
|
|
else
|
|
resolution_rate="0"
|
|
fi
|
|
cat <<EOF
|
|
# HELP alertmanager_overview_alert_resolution_rate Ratio of suppressed to total alerts
|
|
# TYPE alertmanager_overview_alert_resolution_rate gauge
|
|
alertmanager_overview_alert_resolution_rate $resolution_rate
|
|
EOF
|
|
|
|
echo ""
|
|
|
|
# Silences
|
|
cat <<EOF
|
|
# HELP alertmanager_overview_silences_active Number of active silences
|
|
# TYPE alertmanager_overview_silences_active gauge
|
|
alertmanager_overview_silences_active $SILENCES_ACTIVE
|
|
|
|
# HELP alertmanager_overview_silences_pending Number of pending silences
|
|
# TYPE alertmanager_overview_silences_pending gauge
|
|
alertmanager_overview_silences_pending $SILENCES_PENDING
|
|
|
|
# HELP alertmanager_overview_silences_expired Number of expired silences
|
|
# TYPE alertmanager_overview_silences_expired gauge
|
|
alertmanager_overview_silences_expired $SILENCES_EXPIRED
|
|
EOF
|
|
|
|
echo ""
|
|
|
|
# Silence coverage
|
|
local silence_coverage
|
|
silence_coverage=$(calculate_silence_coverage)
|
|
cat <<EOF
|
|
# HELP alertmanager_overview_silence_coverage_ratio Ratio of silenced alerts to total alerts
|
|
# TYPE alertmanager_overview_silence_coverage_ratio gauge
|
|
alertmanager_overview_silence_coverage_ratio $silence_coverage
|
|
EOF
|
|
|
|
echo ""
|
|
|
|
# Cluster health
|
|
cat <<EOF
|
|
# HELP alertmanager_overview_cluster_peers Number of cluster peers
|
|
# TYPE alertmanager_overview_cluster_peers gauge
|
|
alertmanager_overview_cluster_peers $CLUSTER_PEERS
|
|
EOF
|
|
|
|
echo ""
|
|
|
|
# Per-peer health
|
|
local peer_lines
|
|
peer_lines=$(output_peer_metrics)
|
|
if [ -n "$peer_lines" ]; then
|
|
cat <<EOF
|
|
# HELP alertmanager_overview_cluster_peer_healthy Peer health status (1=healthy)
|
|
# TYPE alertmanager_overview_cluster_peer_healthy gauge
|
|
$peer_lines
|
|
EOF
|
|
echo ""
|
|
fi
|
|
|
|
# Config hash
|
|
cat <<EOF
|
|
# HELP alertmanager_overview_config_hash Config hash for drift detection (first 16 chars of SHA256)
|
|
# TYPE alertmanager_overview_config_hash gauge
|
|
alertmanager_overview_config_hash{hash="$CONFIG_HASH"} 1
|
|
EOF
|
|
|
|
echo ""
|
|
|
|
# Notification rate per receiver
|
|
local notif_rates
|
|
notif_rates=$(output_notification_rates)
|
|
if [ -n "$notif_rates" ]; then
|
|
cat <<EOF
|
|
# HELP alertmanager_overview_notification_rate Total notifications sent per integration
|
|
# TYPE alertmanager_overview_notification_rate counter
|
|
$notif_rates
|
|
EOF
|
|
echo ""
|
|
fi
|
|
|
|
# Notification failures per receiver
|
|
local notif_failures
|
|
notif_failures=$(output_notification_failures)
|
|
if [ -n "$notif_failures" ]; then
|
|
cat <<EOF
|
|
# HELP alertmanager_overview_notification_failures Total notification failures per integration
|
|
# TYPE alertmanager_overview_notification_failures counter
|
|
$notif_failures
|
|
EOF
|
|
echo ""
|
|
fi
|
|
|
|
# Notification latency per receiver
|
|
local notif_latency
|
|
notif_latency=$(output_notification_latency)
|
|
if [ -n "$notif_latency" ]; then
|
|
cat <<EOF
|
|
# HELP alertmanager_overview_notification_latency_seconds Notification latency sum per integration
|
|
# TYPE alertmanager_overview_notification_latency_seconds counter
|
|
$notif_latency
|
|
EOF
|
|
echo ""
|
|
fi
|
|
|
|
# Uptime
|
|
cat <<EOF
|
|
# HELP alertmanager_overview_uptime_seconds Alertmanager uptime in seconds
|
|
# TYPE alertmanager_overview_uptime_seconds gauge
|
|
alertmanager_overview_uptime_seconds $AM_UPTIME_SECONDS
|
|
EOF
|
|
|
|
echo ""
|
|
|
|
# Last config reload
|
|
cat <<EOF
|
|
# HELP alertmanager_overview_last_config_reload_timestamp Unix timestamp of last successful config reload
|
|
# TYPE alertmanager_overview_last_config_reload_timestamp gauge
|
|
alertmanager_overview_last_config_reload_timestamp $LAST_RELOAD
|
|
EOF
|
|
|
|
echo ""
|
|
|
|
# Exporter runtime
|
|
local script_end script_duration
|
|
script_end=$(date +%s)
|
|
script_duration=$((script_end - script_start))
|
|
|
|
cat <<EOF
|
|
# HELP alertmanager_overview_exporter_duration_seconds Time to generate all metrics
|
|
# TYPE alertmanager_overview_exporter_duration_seconds gauge
|
|
alertmanager_overview_exporter_duration_seconds $script_duration
|
|
|
|
# HELP alertmanager_overview_exporter_last_run_timestamp Unix timestamp of last successful run
|
|
# TYPE alertmanager_overview_exporter_last_run_timestamp gauge
|
|
alertmanager_overview_exporter_last_run_timestamp $script_end
|
|
EOF
|
|
|
|
echo ""
|
|
}
|
|
|
|
# ============================================================================
|
|
# HTTP SERVER MODE
|
|
# ============================================================================
|
|
|
|
run_http_server() {
|
|
echo "Starting alertmanager-overview exporter on port $HTTP_PORT..." >&2
|
|
echo "Alertmanager URL: $AM_URL" >&2
|
|
|
|
if ! command -v nc >/dev/null 2>&1; then
|
|
echo "ERROR: netcat (nc) required for HTTP mode" >&2
|
|
exit 1
|
|
fi
|
|
|
|
while true; do
|
|
{
|
|
read -r request
|
|
if [[ "$request" =~ ^GET\ /metrics ]]; then
|
|
echo -e "HTTP/1.1 200 OK\r\nContent-Type: text/plain; version=0.0.4\r\n\r"
|
|
generate_metrics
|
|
else
|
|
echo -e "HTTP/1.1 200 OK\r\nContent-Type: text/html\r\n\r"
|
|
cat <<EOF
|
|
<!DOCTYPE html>
|
|
<html>
|
|
<head><title>Alertmanager Overview Exporter</title></head>
|
|
<body>
|
|
<h1>Alertmanager Overview Exporter v1.0</h1>
|
|
<p>Alertmanager URL: $AM_URL</p>
|
|
<p><a href="/metrics">Metrics</a></p>
|
|
</body>
|
|
</html>
|
|
EOF
|
|
fi
|
|
} | nc -l -p "$HTTP_PORT" -q 1 2>/dev/null
|
|
done
|
|
}
|
|
|
|
# ============================================================================
|
|
# MAIN EXECUTION
|
|
# ============================================================================
|
|
|
|
main() {
|
|
parse_args "$@"
|
|
|
|
if ! check_requirements; then
|
|
exit 1
|
|
fi
|
|
|
|
if [ "$HTTP_MODE" = true ]; then
|
|
run_http_server
|
|
elif [ -n "$OUTPUT_FILE" ]; then
|
|
local output_dir
|
|
output_dir="$(dirname "$OUTPUT_FILE")"
|
|
mkdir -p "$output_dir"
|
|
|
|
local temp_file
|
|
temp_file=$(mktemp "${output_dir}/.alertmanager_overview.XXXXXX")
|
|
|
|
if ! generate_metrics > "$temp_file" 2>/dev/null; then
|
|
rm -f "$temp_file"
|
|
echo "ERROR: Failed to generate metrics" >&2
|
|
exit 1
|
|
fi
|
|
|
|
local file_lines
|
|
file_lines=$(wc -l < "$temp_file" 2>/dev/null || echo 0)
|
|
|
|
if [ "$file_lines" -lt 5 ]; then
|
|
rm -f "$temp_file"
|
|
echo "ERROR: Metrics file too small ($file_lines lines), keeping previous" >&2
|
|
exit 1
|
|
fi
|
|
|
|
chmod 644 "$temp_file"
|
|
mv -f "$temp_file" "$OUTPUT_FILE"
|
|
|
|
echo "Metrics written to $OUTPUT_FILE ($file_lines lines)" >&2
|
|
else
|
|
generate_metrics
|
|
fi
|
|
}
|
|
|
|
main "$@"
|