Files
linux-scripts/proxmox-metrics.sh
chiefgeek a1a17e81a1 Sync all scripts from website downloads — 352 scripts total
Includes updated JS challenge scripts with Claude-User whitelist,
same-site referer bypass, Blackbox-Exporter allowed bot, and all
new exporters, cheat sheets, and automation scripts.
2026-05-25 03:31:08 +02:00

258 lines
8.4 KiB
Bash

#!/bin/bash
################################################################################
# Script Name: proxmox-metrics.sh
# Version: 1.0
# Description: Prometheus exporter for Proxmox VE — cluster health, per-node
# CPU/memory/load, VM and container resource usage, storage pools,
# backup job status, HA state, replication lag, and task queue depth
#
# Author: Phil Connor
# Contact: contact@mylinux.work
# Website: https://mylinux.work
# License: MIT
#
# Prerequisites:
# - curl, jq
# - Proxmox API token with PVEAuditor role
# - netcat (nc) for HTTP mode
#
# Usage:
# ./proxmox-metrics.sh
# ./proxmox-metrics.sh --http -p 9221
# ./proxmox-metrics.sh --textfile
#
# Configuration:
# Default HTTP port: 9221
# Textfile directory: /var/lib/node_exporter
#
################################################################################
TEXTFILE_DIR="/var/lib/node_exporter"
OUTPUT_FILE=""
HTTP_MODE=false
HTTP_PORT=9221
PVE_HOST="${PVE_HOST:-localhost}"
PVE_PORT="${PVE_PORT:-8006}"
PVE_TOKEN_ID="${PVE_TOKEN_ID:-}"
PVE_TOKEN_SECRET="${PVE_TOKEN_SECRET:-}"
PVE_VERIFY_SSL="${PVE_VERIFY_SSL:-false}"
show_usage() {
cat <<EOF
Usage: $0 [OPTIONS]
Export Proxmox VE statistics as Prometheus metrics.
MODES:
--textfile Write to node_exporter textfile collector
--http Run HTTP server on port $HTTP_PORT
OPTIONS:
-p, --port HTTP port (default: 9221)
-o, --output Output file path
EXAMPLES:
$0 --textfile
$0 --http --port 9221
PVE_TOKEN_ID="user@realm!token" PVE_TOKEN_SECRET="secret" $0 --textfile
EOF
exit 0
}
parse_args() {
while [[ $# -gt 0 ]]; do
case $1 in
-h|--help) show_usage ;;
--textfile) OUTPUT_FILE="$TEXTFILE_DIR/proxmox.prom"; shift ;;
--http) HTTP_MODE=true; shift ;;
-p|--port) HTTP_PORT="$2"; shift 2 ;;
-o|--output) OUTPUT_FILE="$2"; shift 2 ;;
*) echo "Unknown option: $1" >&2; exit 1 ;;
esac
done
}
pve_api() {
local endpoint="$1"
local ssl_flag=""
[ "$PVE_VERIFY_SSL" = "false" ] && ssl_flag="-k"
curl -s $ssl_flag \
-H "Authorization: PVEAPIToken=${PVE_TOKEN_ID}=${PVE_TOKEN_SECRET}" \
"https://${PVE_HOST}:${PVE_PORT}/api2/json${endpoint}" 2>/dev/null
}
check_proxmox() {
if ! command -v curl >/dev/null 2>&1 || ! command -v jq >/dev/null 2>&1; then
echo "ERROR: curl and jq are required" >&2
return 1
fi
if [ -z "$PVE_TOKEN_ID" ] || [ -z "$PVE_TOKEN_SECRET" ]; then
echo "ERROR: PVE_TOKEN_ID and PVE_TOKEN_SECRET required" >&2
return 1
fi
if ! pve_api "/version" | jq -e '.data.version' >/dev/null 2>&1; then
echo "ERROR: Cannot connect to Proxmox API" >&2
return 1
fi
return 0
}
generate_metrics() {
local script_start
script_start=$(date +%s)
if ! check_proxmox; then
cat <<EOF
# HELP pve_up Proxmox exporter status
# TYPE pve_up gauge
pve_up 0
EOF
return
fi
echo "# HELP pve_up Proxmox exporter status"
echo "# TYPE pve_up gauge"
echo "pve_up 1"
echo ""
# Cluster status
local cluster_status
cluster_status=$(pve_api "/cluster/status")
local cluster_name cluster_version quorum nodes_total nodes_online
cluster_name=$(echo "$cluster_status" | jq -r '.data[] | select(.type=="cluster") | .name // "unknown"')
quorum=$(echo "$cluster_status" | jq -r '.data[] | select(.type=="cluster") | .quorate // 0')
nodes_total=$(echo "$cluster_status" | jq '[.data[] | select(.type=="node")] | length')
nodes_online=$(echo "$cluster_status" | jq '[.data[] | select(.type=="node" and .online==1)] | length')
cat <<EOF
# HELP pve_cluster_info Cluster information
# TYPE pve_cluster_info gauge
pve_cluster_info{name="$cluster_name"} 1
# HELP pve_cluster_quorum Cluster quorum status
# TYPE pve_cluster_quorum gauge
pve_cluster_quorum $quorum
# HELP pve_cluster_nodes_total Total nodes in cluster
# TYPE pve_cluster_nodes_total gauge
pve_cluster_nodes_total $nodes_total
# HELP pve_cluster_nodes_online Online nodes
# TYPE pve_cluster_nodes_online gauge
pve_cluster_nodes_online $nodes_online
EOF
# Node metrics
local nodes_data
nodes_data=$(pve_api "/nodes")
echo "# HELP pve_node_cpu_usage_ratio CPU usage ratio per node"
echo "# TYPE pve_node_cpu_usage_ratio gauge"
echo "$nodes_data" | jq -r '.data[] | "pve_node_cpu_usage_ratio{node=\"\(.node)\"} \(.cpu // 0)"'
echo ""
echo "# HELP pve_node_memory_total_bytes Total memory per node"
echo "# TYPE pve_node_memory_total_bytes gauge"
echo "$nodes_data" | jq -r '.data[] | "pve_node_memory_total_bytes{node=\"\(.node)\"} \(.maxmem // 0)"'
echo ""
echo "# HELP pve_node_memory_used_bytes Used memory per node"
echo "# TYPE pve_node_memory_used_bytes gauge"
echo "$nodes_data" | jq -r '.data[] | "pve_node_memory_used_bytes{node=\"\(.node)\"} \(.mem // 0)"'
echo ""
echo "# HELP pve_node_uptime_seconds Uptime per node"
echo "# TYPE pve_node_uptime_seconds gauge"
echo "$nodes_data" | jq -r '.data[] | "pve_node_uptime_seconds{node=\"\(.node)\"} \(.uptime // 0)"'
echo ""
# Storage metrics
local storage_data
storage_data=$(pve_api "/storage")
echo "# HELP pve_storage_total_bytes Total storage per pool"
echo "# TYPE pve_storage_total_bytes gauge"
echo "# HELP pve_storage_used_bytes Used storage per pool"
echo "# TYPE pve_storage_used_bytes gauge"
echo "# HELP pve_storage_usage_ratio Storage usage ratio per pool"
echo "# TYPE pve_storage_usage_ratio gauge"
for node in $(echo "$nodes_data" | jq -r '.data[].node'); do
local node_storage
node_storage=$(pve_api "/nodes/$node/storage")
echo "$node_storage" | jq -r --arg node "$node" '.data[] | select(.total != null and .total > 0) |
"pve_storage_total_bytes{node=\"\($node)\",storage=\"\(.storage)\",type=\"\(.type // "unknown")\"} \(.total // 0)\npve_storage_used_bytes{node=\"\($node)\",storage=\"\(.storage)\",type=\"\(.type // "unknown")\"} \(.used // 0)\npve_storage_usage_ratio{node=\"\($node)\",storage=\"\(.storage)\",type=\"\(.type // "unknown")\"} \(if .total > 0 then (.used / .total) else 0 end)"' 2>/dev/null
done
echo ""
# Exporter runtime
local script_end script_duration
script_end=$(date +%s)
script_duration=$((script_end - script_start))
cat <<EOF
# HELP pve_exporter_duration_seconds Time to generate all metrics
# TYPE pve_exporter_duration_seconds gauge
pve_exporter_duration_seconds $script_duration
# HELP pve_exporter_last_run_timestamp Unix timestamp of last successful run
# TYPE pve_exporter_last_run_timestamp gauge
pve_exporter_last_run_timestamp $script_end
EOF
}
run_http_server() {
echo "Starting Proxmox exporter on port $HTTP_PORT..." >&2
if ! command -v nc >/dev/null 2>&1; then
echo "ERROR: netcat (nc) required for HTTP mode" >&2
exit 1
fi
while true; do
{
read -r request
if [[ "$request" =~ ^GET\ /metrics ]]; then
echo -e "HTTP/1.1 200 OK\r\nContent-Type: text/plain; version=0.0.4\r\n\r"
generate_metrics
else
echo -e "HTTP/1.1 200 OK\r\nContent-Type: text/html\r\n\r"
echo "<html><head><title>Proxmox Exporter</title></head><body><h1>Proxmox VE Prometheus Exporter</h1><p><a href=\"/metrics\">Metrics</a></p></body></html>"
fi
} | nc -l -p "$HTTP_PORT" -q 1 2>/dev/null
done
}
main() {
parse_args "$@"
if [ "$HTTP_MODE" = true ]; then
run_http_server
elif [ -n "$OUTPUT_FILE" ]; then
local output_dir
output_dir="$(dirname "$OUTPUT_FILE")"
mkdir -p "$output_dir"
local temp_file
temp_file=$(mktemp "${output_dir}/.proxmox_metrics.XXXXXX")
if ! generate_metrics > "$temp_file" 2>/dev/null; then
rm -f "$temp_file"
echo "ERROR: Failed to generate metrics" >&2
exit 1
fi
local file_lines
file_lines=$(wc -l < "$temp_file" 2>/dev/null || echo 0)
if [ "$file_lines" -lt 5 ]; then
rm -f "$temp_file"
echo "ERROR: Metrics file too small ($file_lines lines), keeping previous" >&2
exit 1
fi
chmod 644 "$temp_file"
mv -f "$temp_file" "$OUTPUT_FILE"
echo "Metrics written to $OUTPUT_FILE ($file_lines lines)" >&2
else
generate_metrics
fi
}
main "$@"