Sync all scripts from website downloads — 352 scripts total
Includes updated JS challenge scripts with Claude-User whitelist, same-site referer bypass, Blackbox-Exporter allowed bot, and all new exporters, cheat sheets, and automation scripts.
This commit is contained in:
@@ -0,0 +1,683 @@
|
||||
#!/bin/bash
|
||||
################################################################################
|
||||
# Script Name: alertmanager-exporter.sh
|
||||
# Version: 1.0
|
||||
# Description: Prometheus exporter for Alertmanager operational overview.
|
||||
# Queries the Alertmanager API for active alerts, silences,
|
||||
# cluster health, and config status. Complements the built-in
|
||||
# /metrics endpoint with higher-level operational metrics.
|
||||
#
|
||||
# Author: Phil Connor
|
||||
# Contact: contact@mylinux.work
|
||||
# Website: https://mylinux.work
|
||||
# License: MIT
|
||||
#
|
||||
# Prerequisites:
|
||||
# - curl
|
||||
# - jq
|
||||
# - Alertmanager running and accessible
|
||||
# - netcat (nc) for HTTP mode
|
||||
#
|
||||
# Usage:
|
||||
# # Output to stdout
|
||||
# ./alertmanager-exporter.sh
|
||||
#
|
||||
# # HTTP server mode
|
||||
# ./alertmanager-exporter.sh --http -p 9094
|
||||
#
|
||||
# # Textfile collector mode
|
||||
# ./alertmanager-exporter.sh --textfile
|
||||
#
|
||||
# Metrics Exported:
|
||||
# - alertmanager_overview_up - Exporter status (1=up, 0=down)
|
||||
# - alertmanager_overview_info - Alertmanager version info
|
||||
# - alertmanager_overview_alerts_active_total - Total active alerts
|
||||
# - alertmanager_overview_alerts_by_state - Alerts by state
|
||||
# - alertmanager_overview_alerts_by_severity - Alerts by severity
|
||||
# - alertmanager_overview_alerts_by_receiver - Alerts by receiver
|
||||
# - alertmanager_overview_alert_groups_total - Alert group count
|
||||
# - alertmanager_overview_silences_active - Active silences
|
||||
# - alertmanager_overview_silences_pending - Pending silences
|
||||
# - alertmanager_overview_silences_expired - Expired silences
|
||||
# - alertmanager_overview_silence_coverage_ratio - Silence coverage
|
||||
# - alertmanager_overview_cluster_peers - Peer count
|
||||
# - alertmanager_overview_cluster_peer_healthy - Per-peer health
|
||||
# - alertmanager_overview_config_hash - Config hash for drift detection
|
||||
# - alertmanager_overview_uptime_seconds - Uptime
|
||||
# - alertmanager_overview_last_config_reload_timestamp - Last reload
|
||||
# - alertmanager_overview_exporter_duration_seconds - Script duration
|
||||
# - alertmanager_overview_exporter_last_run_timestamp - Last run time
|
||||
#
|
||||
# Configuration:
|
||||
# Default HTTP port: 9094
|
||||
# Textfile directory: /var/lib/node_exporter
|
||||
# Alertmanager URL: http://localhost:9093
|
||||
#
|
||||
################################################################################
|
||||
|
||||
# ============================================================================
|
||||
# CONFIGURATION VARIABLES
|
||||
# ============================================================================
|
||||
|
||||
TEXTFILE_DIR="/var/lib/node_exporter"
|
||||
OUTPUT_FILE=""
|
||||
HTTP_MODE=false
|
||||
HTTP_PORT=9094
|
||||
AM_URL="http://localhost:9093"
|
||||
|
||||
# ============================================================================
|
||||
# HELPER FUNCTIONS
|
||||
# ============================================================================
|
||||
|
||||
show_usage() {
|
||||
cat <<EOF
|
||||
Usage: $0 [OPTIONS]
|
||||
|
||||
Export Alertmanager operational overview as Prometheus metrics.
|
||||
|
||||
MODES:
|
||||
--textfile Write to node_exporter textfile collector
|
||||
--http Run HTTP server on port $HTTP_PORT
|
||||
|
||||
OPTIONS:
|
||||
-p, --port HTTP port (default: 9094)
|
||||
-u, --url Alertmanager URL (default: $AM_URL)
|
||||
-o, --output Output file path
|
||||
|
||||
EXAMPLES:
|
||||
$0 --textfile # Write to textfile collector
|
||||
$0 --http --port 9094 # Run HTTP server
|
||||
$0 --url http://alertmanager:9093 # Custom Alertmanager URL
|
||||
$0 -o /tmp/alertmanager-overview.prom # Write to custom file
|
||||
|
||||
EOF
|
||||
exit 0
|
||||
}
|
||||
|
||||
parse_args() {
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case $1 in
|
||||
-h|--help) show_usage ;;
|
||||
--textfile) OUTPUT_FILE="$TEXTFILE_DIR/alertmanager-overview.prom"; shift ;;
|
||||
--http) HTTP_MODE=true; shift ;;
|
||||
-p|--port) HTTP_PORT="$2"; shift 2 ;;
|
||||
-u|--url) AM_URL="$2"; shift 2 ;;
|
||||
-o|--output) OUTPUT_FILE="$2"; shift 2 ;;
|
||||
*) echo "Unknown option: $1" >&2; exit 1 ;;
|
||||
esac
|
||||
done
|
||||
}
|
||||
|
||||
# Check prerequisites
|
||||
check_requirements() {
|
||||
local missing=0
|
||||
|
||||
if ! command -v curl >/dev/null 2>&1; then
|
||||
echo "ERROR: curl not found" >&2
|
||||
missing=1
|
||||
fi
|
||||
|
||||
if ! command -v jq >/dev/null 2>&1; then
|
||||
echo "ERROR: jq not found" >&2
|
||||
missing=1
|
||||
fi
|
||||
|
||||
return $missing
|
||||
}
|
||||
|
||||
# Query an Alertmanager API endpoint
|
||||
# Args: $1 - endpoint path (e.g., /api/v2/alerts)
|
||||
# Returns: JSON response or empty string on failure
|
||||
am_api() {
|
||||
local endpoint="$1"
|
||||
curl -sf --connect-timeout 5 --max-time 10 "${AM_URL}${endpoint}" 2>/dev/null
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# METRIC COLLECTION FUNCTIONS
|
||||
# ============================================================================
|
||||
|
||||
# Get alert counts by state
|
||||
# Populates global variables: ALERTS_ACTIVE, ALERTS_SUPPRESSED, ALERTS_UNPROCESSED
|
||||
collect_alerts() {
|
||||
local alerts_json
|
||||
alerts_json=$(am_api "/api/v2/alerts")
|
||||
|
||||
if [ -z "$alerts_json" ]; then
|
||||
ALERTS_TOTAL=0
|
||||
ALERTS_ACTIVE=0
|
||||
ALERTS_SUPPRESSED=0
|
||||
ALERTS_UNPROCESSED=0
|
||||
ALERTS_JSON="[]"
|
||||
return 1
|
||||
fi
|
||||
|
||||
ALERTS_JSON="$alerts_json"
|
||||
ALERTS_TOTAL=$(echo "$alerts_json" | jq 'length')
|
||||
ALERTS_ACTIVE=$(echo "$alerts_json" | jq '[.[] | select(.status.state == "active")] | length')
|
||||
ALERTS_SUPPRESSED=$(echo "$alerts_json" | jq '[.[] | select(.status.state == "suppressed")] | length')
|
||||
ALERTS_UNPROCESSED=$(echo "$alerts_json" | jq '[.[] | select(.status.state == "unprocessed")] | length')
|
||||
}
|
||||
|
||||
# Get alert counts by severity label
|
||||
# Returns: metrics lines for each severity
|
||||
collect_alerts_by_severity() {
|
||||
local severity count
|
||||
|
||||
for severity in critical warning info; do
|
||||
count=$(echo "$ALERTS_JSON" | jq --arg sev "$severity" \
|
||||
'[.[] | select(.labels.severity == $sev)] | length')
|
||||
echo "alertmanager_overview_alerts_by_severity{severity=\"$severity\"} ${count:-0}"
|
||||
done
|
||||
|
||||
# Count alerts with no severity or other severity values
|
||||
count=$(echo "$ALERTS_JSON" | jq \
|
||||
'[.[] | select(.labels.severity != "critical" and .labels.severity != "warning" and .labels.severity != "info")] | length')
|
||||
if [ "$count" -gt 0 ]; then
|
||||
echo "alertmanager_overview_alerts_by_severity{severity=\"other\"} $count"
|
||||
fi
|
||||
}
|
||||
|
||||
# Get alert counts by receiver
|
||||
collect_alerts_by_receiver() {
|
||||
echo "$ALERTS_JSON" | jq -r '
|
||||
[.[] | .receivers[]?.name // "unknown"] |
|
||||
group_by(.) |
|
||||
map({receiver: .[0], count: length}) |
|
||||
.[] |
|
||||
"alertmanager_overview_alerts_by_receiver{receiver=\"\(.receiver)\"} \(.count)"
|
||||
' 2>/dev/null
|
||||
}
|
||||
|
||||
# Get alert group count
|
||||
collect_alert_groups() {
|
||||
local groups_json
|
||||
groups_json=$(am_api "/api/v2/alerts/groups")
|
||||
|
||||
if [ -z "$groups_json" ]; then
|
||||
echo "0"
|
||||
return
|
||||
fi
|
||||
|
||||
echo "$groups_json" | jq 'length'
|
||||
}
|
||||
|
||||
# Get silence counts by state
|
||||
collect_silences() {
|
||||
local silences_json
|
||||
silences_json=$(am_api "/api/v2/silences")
|
||||
|
||||
if [ -z "$silences_json" ]; then
|
||||
SILENCES_ACTIVE=0
|
||||
SILENCES_PENDING=0
|
||||
SILENCES_EXPIRED=0
|
||||
return 1
|
||||
fi
|
||||
|
||||
SILENCES_ACTIVE=$(echo "$silences_json" | jq '[.[] | select(.status.state == "active")] | length')
|
||||
SILENCES_PENDING=$(echo "$silences_json" | jq '[.[] | select(.status.state == "pending")] | length')
|
||||
SILENCES_EXPIRED=$(echo "$silences_json" | jq '[.[] | select(.status.state == "expired")] | length')
|
||||
}
|
||||
|
||||
# Calculate silence coverage ratio
|
||||
# Returns: ratio 0.0-1.0 (suppressed / total alerts)
|
||||
calculate_silence_coverage() {
|
||||
if [ "$ALERTS_TOTAL" -gt 0 ]; then
|
||||
awk "BEGIN {printf \"%.4f\", $ALERTS_SUPPRESSED / $ALERTS_TOTAL}"
|
||||
else
|
||||
echo "0"
|
||||
fi
|
||||
}
|
||||
|
||||
# Get cluster status
|
||||
collect_cluster_status() {
|
||||
local status_json
|
||||
status_json=$(am_api "/api/v2/status")
|
||||
|
||||
if [ -z "$status_json" ]; then
|
||||
CLUSTER_PEERS=0
|
||||
CLUSTER_STATUS="unknown"
|
||||
AM_VERSION="unknown"
|
||||
AM_UPTIME_SECONDS=0
|
||||
CONFIG_HASH="0"
|
||||
LAST_RELOAD=0
|
||||
return 1
|
||||
fi
|
||||
|
||||
AM_VERSION=$(echo "$status_json" | jq -r '.versionInfo.version // "unknown"')
|
||||
|
||||
# Cluster info
|
||||
# shellcheck disable=SC2034 # reserved for future use
|
||||
CLUSTER_STATUS=$(echo "$status_json" | jq -r '.cluster.status // "disabled"')
|
||||
CLUSTER_PEERS=$(echo "$status_json" | jq '.cluster.peers // [] | length')
|
||||
|
||||
# Peer details (for per-peer health metrics)
|
||||
CLUSTER_PEERS_JSON=$(echo "$status_json" | jq '.cluster.peers // []')
|
||||
|
||||
# Uptime from start time
|
||||
local start_time
|
||||
start_time=$(echo "$status_json" | jq -r '.uptime // empty' 2>/dev/null)
|
||||
if [ -n "$start_time" ]; then
|
||||
local start_epoch now_epoch
|
||||
start_epoch=$(date -d "$start_time" +%s 2>/dev/null || echo 0)
|
||||
now_epoch=$(date +%s)
|
||||
if [ "$start_epoch" -gt 0 ]; then
|
||||
AM_UPTIME_SECONDS=$((now_epoch - start_epoch))
|
||||
else
|
||||
AM_UPTIME_SECONDS=0
|
||||
fi
|
||||
else
|
||||
AM_UPTIME_SECONDS=0
|
||||
fi
|
||||
|
||||
# Config hash — hash the config JSON for drift detection
|
||||
local config_json
|
||||
config_json=$(echo "$status_json" | jq -r '.config.original // ""')
|
||||
if [ -n "$config_json" ]; then
|
||||
CONFIG_HASH=$(echo "$config_json" | sha256sum | awk '{print $1}' | head -c 16)
|
||||
else
|
||||
CONFIG_HASH="0"
|
||||
fi
|
||||
|
||||
# Last config reload — not directly available from /api/v2/status
|
||||
# We'll pull this from the built-in /metrics if reachable
|
||||
local reload_ts
|
||||
reload_ts=$(curl -sf "${AM_URL}/metrics" 2>/dev/null | \
|
||||
grep "^alertmanager_config_last_reload_success_timestamp_seconds" | \
|
||||
awk '{print $2}' | head -1)
|
||||
LAST_RELOAD=${reload_ts:-0}
|
||||
}
|
||||
|
||||
# Output per-peer health metrics
|
||||
output_peer_metrics() {
|
||||
if [ "$CLUSTER_PEERS" -eq 0 ] || [ -z "$CLUSTER_PEERS_JSON" ]; then
|
||||
return
|
||||
fi
|
||||
|
||||
echo "$CLUSTER_PEERS_JSON" | jq -r '
|
||||
.[] |
|
||||
"alertmanager_overview_cluster_peer_healthy{peer=\"\(.address // "unknown")\"} 1"
|
||||
' 2>/dev/null
|
||||
}
|
||||
|
||||
# Get notification metrics from built-in /metrics endpoint
|
||||
collect_notification_metrics() {
|
||||
local metrics_raw
|
||||
metrics_raw=$(curl -sf "${AM_URL}/metrics" 2>/dev/null)
|
||||
|
||||
if [ -z "$metrics_raw" ]; then
|
||||
return 1
|
||||
fi
|
||||
|
||||
NOTIFICATION_METRICS="$metrics_raw"
|
||||
}
|
||||
|
||||
# Output notification rate per receiver (from built-in metrics)
|
||||
output_notification_rates() {
|
||||
if [ -z "$NOTIFICATION_METRICS" ]; then
|
||||
return
|
||||
fi
|
||||
|
||||
echo "$NOTIFICATION_METRICS" | \
|
||||
grep "^alertmanager_notifications_total{" | \
|
||||
sed 's/alertmanager_notifications_total/alertmanager_overview_notification_rate/' 2>/dev/null
|
||||
}
|
||||
|
||||
# Output notification failures per receiver (from built-in metrics)
|
||||
output_notification_failures() {
|
||||
if [ -z "$NOTIFICATION_METRICS" ]; then
|
||||
return
|
||||
fi
|
||||
|
||||
echo "$NOTIFICATION_METRICS" | \
|
||||
grep "^alertmanager_notifications_failed_total{" | \
|
||||
sed 's/alertmanager_notifications_failed_total/alertmanager_overview_notification_failures/' 2>/dev/null
|
||||
}
|
||||
|
||||
# Output notification latency per receiver (from built-in metrics)
|
||||
output_notification_latency() {
|
||||
if [ -z "$NOTIFICATION_METRICS" ]; then
|
||||
return
|
||||
fi
|
||||
|
||||
# Use the _sum and _count to compute average latency per integration
|
||||
echo "$NOTIFICATION_METRICS" | \
|
||||
grep "^alertmanager_notification_latency_seconds_sum{" | \
|
||||
sed 's/alertmanager_notification_latency_seconds_sum/alertmanager_overview_notification_latency_seconds/' 2>/dev/null
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# METRIC OUTPUT
|
||||
# ============================================================================
|
||||
|
||||
generate_metrics() {
|
||||
local script_start
|
||||
script_start=$(date +%s)
|
||||
|
||||
# Check if Alertmanager is reachable
|
||||
local am_up=1
|
||||
if ! am_api "/api/v2/status" >/dev/null 2>&1; then
|
||||
am_up=0
|
||||
fi
|
||||
|
||||
cat <<EOF
|
||||
# HELP alertmanager_overview_up Alertmanager exporter status (1=up, 0=down)
|
||||
# TYPE alertmanager_overview_up gauge
|
||||
alertmanager_overview_up $am_up
|
||||
EOF
|
||||
|
||||
# If Alertmanager is down, output minimal metrics and exit
|
||||
if [ "$am_up" -eq 0 ]; then
|
||||
local script_end script_duration
|
||||
script_end=$(date +%s)
|
||||
script_duration=$((script_end - script_start))
|
||||
cat <<EOF
|
||||
|
||||
# HELP alertmanager_overview_exporter_duration_seconds Time to generate all metrics
|
||||
# TYPE alertmanager_overview_exporter_duration_seconds gauge
|
||||
alertmanager_overview_exporter_duration_seconds $script_duration
|
||||
|
||||
# HELP alertmanager_overview_exporter_last_run_timestamp Unix timestamp of last run
|
||||
# TYPE alertmanager_overview_exporter_last_run_timestamp gauge
|
||||
alertmanager_overview_exporter_last_run_timestamp $script_end
|
||||
EOF
|
||||
return
|
||||
fi
|
||||
|
||||
# Collect data from API
|
||||
collect_alerts
|
||||
collect_silences
|
||||
collect_cluster_status
|
||||
collect_notification_metrics
|
||||
|
||||
echo ""
|
||||
|
||||
# Version info
|
||||
cat <<EOF
|
||||
# HELP alertmanager_overview_info Alertmanager version and exporter version
|
||||
# TYPE alertmanager_overview_info gauge
|
||||
alertmanager_overview_info{version="$AM_VERSION",exporter_version="1.0"} 1
|
||||
EOF
|
||||
|
||||
echo ""
|
||||
|
||||
# Active alerts total
|
||||
cat <<EOF
|
||||
# HELP alertmanager_overview_alerts_active_total Total active alerts
|
||||
# TYPE alertmanager_overview_alerts_active_total gauge
|
||||
alertmanager_overview_alerts_active_total $ALERTS_TOTAL
|
||||
EOF
|
||||
|
||||
echo ""
|
||||
|
||||
# Alerts by state
|
||||
cat <<EOF
|
||||
# HELP alertmanager_overview_alerts_by_state Active alerts by state
|
||||
# TYPE alertmanager_overview_alerts_by_state gauge
|
||||
alertmanager_overview_alerts_by_state{state="active"} $ALERTS_ACTIVE
|
||||
alertmanager_overview_alerts_by_state{state="suppressed"} $ALERTS_SUPPRESSED
|
||||
alertmanager_overview_alerts_by_state{state="unprocessed"} $ALERTS_UNPROCESSED
|
||||
EOF
|
||||
|
||||
echo ""
|
||||
|
||||
# Alerts by severity
|
||||
cat <<EOF
|
||||
# HELP alertmanager_overview_alerts_by_severity Active alerts by severity label
|
||||
# TYPE alertmanager_overview_alerts_by_severity gauge
|
||||
EOF
|
||||
collect_alerts_by_severity
|
||||
|
||||
echo ""
|
||||
|
||||
# Alerts by receiver
|
||||
local receiver_lines
|
||||
receiver_lines=$(collect_alerts_by_receiver)
|
||||
if [ -n "$receiver_lines" ]; then
|
||||
cat <<EOF
|
||||
# HELP alertmanager_overview_alerts_by_receiver Active alerts by receiver
|
||||
# TYPE alertmanager_overview_alerts_by_receiver gauge
|
||||
$receiver_lines
|
||||
EOF
|
||||
echo ""
|
||||
fi
|
||||
|
||||
# Alert groups
|
||||
local group_count
|
||||
group_count=$(collect_alert_groups)
|
||||
cat <<EOF
|
||||
# HELP alertmanager_overview_alert_groups_total Total alert group count
|
||||
# TYPE alertmanager_overview_alert_groups_total gauge
|
||||
alertmanager_overview_alert_groups_total $group_count
|
||||
EOF
|
||||
|
||||
echo ""
|
||||
|
||||
# Alert resolution rate
|
||||
local resolution_rate
|
||||
if [ "$ALERTS_TOTAL" -gt 0 ]; then
|
||||
resolution_rate=$(awk "BEGIN {printf \"%.4f\", $ALERTS_SUPPRESSED / $ALERTS_TOTAL}")
|
||||
else
|
||||
resolution_rate="0"
|
||||
fi
|
||||
cat <<EOF
|
||||
# HELP alertmanager_overview_alert_resolution_rate Ratio of suppressed to total alerts
|
||||
# TYPE alertmanager_overview_alert_resolution_rate gauge
|
||||
alertmanager_overview_alert_resolution_rate $resolution_rate
|
||||
EOF
|
||||
|
||||
echo ""
|
||||
|
||||
# Silences
|
||||
cat <<EOF
|
||||
# HELP alertmanager_overview_silences_active Number of active silences
|
||||
# TYPE alertmanager_overview_silences_active gauge
|
||||
alertmanager_overview_silences_active $SILENCES_ACTIVE
|
||||
|
||||
# HELP alertmanager_overview_silences_pending Number of pending silences
|
||||
# TYPE alertmanager_overview_silences_pending gauge
|
||||
alertmanager_overview_silences_pending $SILENCES_PENDING
|
||||
|
||||
# HELP alertmanager_overview_silences_expired Number of expired silences
|
||||
# TYPE alertmanager_overview_silences_expired gauge
|
||||
alertmanager_overview_silences_expired $SILENCES_EXPIRED
|
||||
EOF
|
||||
|
||||
echo ""
|
||||
|
||||
# Silence coverage
|
||||
local silence_coverage
|
||||
silence_coverage=$(calculate_silence_coverage)
|
||||
cat <<EOF
|
||||
# HELP alertmanager_overview_silence_coverage_ratio Ratio of silenced alerts to total alerts
|
||||
# TYPE alertmanager_overview_silence_coverage_ratio gauge
|
||||
alertmanager_overview_silence_coverage_ratio $silence_coverage
|
||||
EOF
|
||||
|
||||
echo ""
|
||||
|
||||
# Cluster health
|
||||
cat <<EOF
|
||||
# HELP alertmanager_overview_cluster_peers Number of cluster peers
|
||||
# TYPE alertmanager_overview_cluster_peers gauge
|
||||
alertmanager_overview_cluster_peers $CLUSTER_PEERS
|
||||
EOF
|
||||
|
||||
echo ""
|
||||
|
||||
# Per-peer health
|
||||
local peer_lines
|
||||
peer_lines=$(output_peer_metrics)
|
||||
if [ -n "$peer_lines" ]; then
|
||||
cat <<EOF
|
||||
# HELP alertmanager_overview_cluster_peer_healthy Peer health status (1=healthy)
|
||||
# TYPE alertmanager_overview_cluster_peer_healthy gauge
|
||||
$peer_lines
|
||||
EOF
|
||||
echo ""
|
||||
fi
|
||||
|
||||
# Config hash
|
||||
cat <<EOF
|
||||
# HELP alertmanager_overview_config_hash Config hash for drift detection (first 16 chars of SHA256)
|
||||
# TYPE alertmanager_overview_config_hash gauge
|
||||
alertmanager_overview_config_hash{hash="$CONFIG_HASH"} 1
|
||||
EOF
|
||||
|
||||
echo ""
|
||||
|
||||
# Notification rate per receiver
|
||||
local notif_rates
|
||||
notif_rates=$(output_notification_rates)
|
||||
if [ -n "$notif_rates" ]; then
|
||||
cat <<EOF
|
||||
# HELP alertmanager_overview_notification_rate Total notifications sent per integration
|
||||
# TYPE alertmanager_overview_notification_rate counter
|
||||
$notif_rates
|
||||
EOF
|
||||
echo ""
|
||||
fi
|
||||
|
||||
# Notification failures per receiver
|
||||
local notif_failures
|
||||
notif_failures=$(output_notification_failures)
|
||||
if [ -n "$notif_failures" ]; then
|
||||
cat <<EOF
|
||||
# HELP alertmanager_overview_notification_failures Total notification failures per integration
|
||||
# TYPE alertmanager_overview_notification_failures counter
|
||||
$notif_failures
|
||||
EOF
|
||||
echo ""
|
||||
fi
|
||||
|
||||
# Notification latency per receiver
|
||||
local notif_latency
|
||||
notif_latency=$(output_notification_latency)
|
||||
if [ -n "$notif_latency" ]; then
|
||||
cat <<EOF
|
||||
# HELP alertmanager_overview_notification_latency_seconds Notification latency sum per integration
|
||||
# TYPE alertmanager_overview_notification_latency_seconds counter
|
||||
$notif_latency
|
||||
EOF
|
||||
echo ""
|
||||
fi
|
||||
|
||||
# Uptime
|
||||
cat <<EOF
|
||||
# HELP alertmanager_overview_uptime_seconds Alertmanager uptime in seconds
|
||||
# TYPE alertmanager_overview_uptime_seconds gauge
|
||||
alertmanager_overview_uptime_seconds $AM_UPTIME_SECONDS
|
||||
EOF
|
||||
|
||||
echo ""
|
||||
|
||||
# Last config reload
|
||||
cat <<EOF
|
||||
# HELP alertmanager_overview_last_config_reload_timestamp Unix timestamp of last successful config reload
|
||||
# TYPE alertmanager_overview_last_config_reload_timestamp gauge
|
||||
alertmanager_overview_last_config_reload_timestamp $LAST_RELOAD
|
||||
EOF
|
||||
|
||||
echo ""
|
||||
|
||||
# Exporter runtime
|
||||
local script_end script_duration
|
||||
script_end=$(date +%s)
|
||||
script_duration=$((script_end - script_start))
|
||||
|
||||
cat <<EOF
|
||||
# HELP alertmanager_overview_exporter_duration_seconds Time to generate all metrics
|
||||
# TYPE alertmanager_overview_exporter_duration_seconds gauge
|
||||
alertmanager_overview_exporter_duration_seconds $script_duration
|
||||
|
||||
# HELP alertmanager_overview_exporter_last_run_timestamp Unix timestamp of last successful run
|
||||
# TYPE alertmanager_overview_exporter_last_run_timestamp gauge
|
||||
alertmanager_overview_exporter_last_run_timestamp $script_end
|
||||
EOF
|
||||
|
||||
echo ""
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# HTTP SERVER MODE
|
||||
# ============================================================================
|
||||
|
||||
run_http_server() {
|
||||
echo "Starting alertmanager-overview exporter on port $HTTP_PORT..." >&2
|
||||
echo "Alertmanager URL: $AM_URL" >&2
|
||||
|
||||
if ! command -v nc >/dev/null 2>&1; then
|
||||
echo "ERROR: netcat (nc) required for HTTP mode" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
while true; do
|
||||
{
|
||||
read -r request
|
||||
if [[ "$request" =~ ^GET\ /metrics ]]; then
|
||||
echo -e "HTTP/1.1 200 OK\r\nContent-Type: text/plain; version=0.0.4\r\n\r"
|
||||
generate_metrics
|
||||
else
|
||||
echo -e "HTTP/1.1 200 OK\r\nContent-Type: text/html\r\n\r"
|
||||
cat <<EOF
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head><title>Alertmanager Overview Exporter</title></head>
|
||||
<body>
|
||||
<h1>Alertmanager Overview Exporter v1.0</h1>
|
||||
<p>Alertmanager URL: $AM_URL</p>
|
||||
<p><a href="/metrics">Metrics</a></p>
|
||||
</body>
|
||||
</html>
|
||||
EOF
|
||||
fi
|
||||
} | nc -l -p "$HTTP_PORT" -q 1 2>/dev/null
|
||||
done
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# MAIN EXECUTION
|
||||
# ============================================================================
|
||||
|
||||
main() {
|
||||
parse_args "$@"
|
||||
|
||||
if ! check_requirements; then
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ "$HTTP_MODE" = true ]; then
|
||||
run_http_server
|
||||
elif [ -n "$OUTPUT_FILE" ]; then
|
||||
local output_dir
|
||||
output_dir="$(dirname "$OUTPUT_FILE")"
|
||||
mkdir -p "$output_dir"
|
||||
|
||||
local temp_file
|
||||
temp_file=$(mktemp "${output_dir}/.alertmanager_overview.XXXXXX")
|
||||
|
||||
if ! generate_metrics > "$temp_file" 2>/dev/null; then
|
||||
rm -f "$temp_file"
|
||||
echo "ERROR: Failed to generate metrics" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
local file_lines
|
||||
file_lines=$(wc -l < "$temp_file" 2>/dev/null || echo 0)
|
||||
|
||||
if [ "$file_lines" -lt 5 ]; then
|
||||
rm -f "$temp_file"
|
||||
echo "ERROR: Metrics file too small ($file_lines lines), keeping previous" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
chmod 644 "$temp_file"
|
||||
mv -f "$temp_file" "$OUTPUT_FILE"
|
||||
|
||||
echo "Metrics written to $OUTPUT_FILE ($file_lines lines)" >&2
|
||||
else
|
||||
generate_metrics
|
||||
fi
|
||||
}
|
||||
|
||||
main "$@"
|
||||
Reference in New Issue
Block a user