a1a17e81a1
Includes updated JS challenge scripts with Claude-User whitelist, same-site referer bypass, Blackbox-Exporter allowed bot, and all new exporters, cheat sheets, and automation scripts.
490 lines
19 KiB
Bash
Executable File
490 lines
19 KiB
Bash
Executable File
#!/bin/bash
|
|
|
|
################################################
|
|
#### Podman Container Prometheus Exporter ####
|
|
#### for node_exporter textfile collector ####
|
|
#### ####
|
|
#### Author: Phil Connor ####
|
|
#### Contact: contact@mylinux.work ####
|
|
#### Version: 1.0.0.20260327 ####
|
|
################################################
|
|
|
|
set -euo pipefail
|
|
|
|
SCRIPT_NAME=$(basename "$0")
|
|
readonly SCRIPT_NAME
|
|
|
|
# Default configuration
|
|
readonly DEFAULT_NODE_DIR="/var/lib/node_exporter"
|
|
readonly DEFAULT_COLLECTION_INTERVAL=60
|
|
readonly DEFAULT_PODMAN_CMD="podman"
|
|
readonly DEFAULT_HTTP_PORT=9101
|
|
|
|
# Configuration variables (can be overridden by environment)
|
|
NODE_DIR=${NODE_DIR:-$DEFAULT_NODE_DIR}
|
|
COLLECTION_INTERVAL=${COLLECTION_INTERVAL:-$DEFAULT_COLLECTION_INTERVAL}
|
|
PODMAN_CMD=${PODMAN_CMD:-$DEFAULT_PODMAN_CMD}
|
|
HTTP_PORT=${HTTP_PORT:-$DEFAULT_HTTP_PORT}
|
|
DEBUG=${DEBUG:-}
|
|
|
|
# Runtime flags
|
|
RUN_MODE="once"
|
|
|
|
# HTTP mode PID tracking for cleanup
|
|
HTTP_PID=""
|
|
|
|
handle_error() {
|
|
local exit_code=$1
|
|
local line_number=$2
|
|
echo "Error: $SCRIPT_NAME failed at line $line_number with exit code $exit_code" >&2
|
|
exit "$exit_code"
|
|
}
|
|
|
|
trap 'handle_error $? $LINENO' ERR
|
|
|
|
cleanup() {
|
|
if [[ -n "$HTTP_PID" ]] && kill -0 "$HTTP_PID" 2>/dev/null; then
|
|
kill "$HTTP_PID" 2>/dev/null || true
|
|
fi
|
|
# Remove any leftover temp files
|
|
rm -f "${NODE_DIR}/textfile_collector/podman_containers.prom."* 2>/dev/null || true
|
|
}
|
|
|
|
trap cleanup EXIT
|
|
|
|
debug_echo() {
|
|
if [[ -n "$DEBUG" ]]; then
|
|
echo "[DEBUG] $*" >&2
|
|
fi
|
|
}
|
|
|
|
show_help() {
|
|
cat << EOF
|
|
Usage: $SCRIPT_NAME [OPTIONS]
|
|
|
|
Podman container metrics collector for Prometheus node_exporter textfile directory.
|
|
|
|
Collects per-container resource usage (CPU, memory, network, block I/O), state,
|
|
restart counts, exit codes, and uptime via podman stats/inspect and writes them
|
|
as Prometheus metrics. Supports both rootless and rootful Podman.
|
|
|
|
OPTIONS:
|
|
--once Run collection once and exit (default)
|
|
--daemon Run continuously at COLLECTION_INTERVAL
|
|
--http Serve metrics over HTTP on HTTP_PORT
|
|
--help, -h Show this help message
|
|
|
|
ENVIRONMENT VARIABLES:
|
|
NODE_DIR Node exporter textfile directory (default: $DEFAULT_NODE_DIR)
|
|
COLLECTION_INTERVAL Seconds between collections in daemon mode (default: $DEFAULT_COLLECTION_INTERVAL)
|
|
PODMAN_CMD Podman binary path or command (default: $DEFAULT_PODMAN_CMD)
|
|
HTTP_PORT Port for --http mode (default: $DEFAULT_HTTP_PORT)
|
|
DEBUG Enable debug output
|
|
|
|
EXAMPLES:
|
|
$SCRIPT_NAME --once
|
|
$SCRIPT_NAME --daemon
|
|
COLLECTION_INTERVAL=30 $SCRIPT_NAME --daemon
|
|
$SCRIPT_NAME --http
|
|
PODMAN_CMD=/usr/bin/podman HTTP_PORT=9102 $SCRIPT_NAME --http
|
|
|
|
OUTPUT:
|
|
Writes metrics to \$NODE_DIR/textfile_collector/podman_containers.prom
|
|
|
|
EOF
|
|
exit 0
|
|
}
|
|
|
|
# Parse arguments
|
|
while [[ $# -gt 0 ]]; do
|
|
case "$1" in
|
|
--once) RUN_MODE="once"; shift ;;
|
|
--daemon) RUN_MODE="daemon"; shift ;;
|
|
--http) RUN_MODE="http"; shift ;;
|
|
--help|-h) show_help ;;
|
|
*) echo "Unknown option: $1" >&2; show_help ;;
|
|
esac
|
|
done
|
|
|
|
# Detect rootless mode
|
|
detect_rootless() {
|
|
if [[ $EUID -ne 0 ]]; then
|
|
echo "true"
|
|
else
|
|
echo "false"
|
|
fi
|
|
}
|
|
|
|
ROOTLESS=$(detect_rootless)
|
|
readonly ROOTLESS
|
|
|
|
# Validate configuration
|
|
validate_config() {
|
|
if ! command -v "$PODMAN_CMD" &>/dev/null; then
|
|
echo "Error: $PODMAN_CMD is not installed or not in PATH" >&2
|
|
exit 1
|
|
fi
|
|
|
|
if ! command -v jq &>/dev/null; then
|
|
echo "Error: jq is not installed or not in PATH" >&2
|
|
exit 1
|
|
fi
|
|
|
|
if [[ "$RUN_MODE" != "http" ]]; then
|
|
local textfile_dir="${NODE_DIR}/textfile_collector"
|
|
if [[ ! -d "$textfile_dir" ]]; then
|
|
echo "Error: Textfile collector directory not found: $textfile_dir" >&2
|
|
echo "Create it: sudo mkdir -p $textfile_dir" >&2
|
|
exit 1
|
|
fi
|
|
fi
|
|
}
|
|
|
|
# Parse human-readable byte strings (e.g. "1.23GiB", "456.7MB", "100kB") to bytes
|
|
parse_bytes() {
|
|
local raw="$1"
|
|
|
|
# Strip to numeric value and unit
|
|
local num unit
|
|
num=$(echo "$raw" | sed 's/[^0-9.]//g')
|
|
unit=$(echo "$raw" | sed 's/[0-9.]//g')
|
|
|
|
if [[ -z "$num" ]]; then
|
|
echo "0"
|
|
return
|
|
fi
|
|
|
|
case "$unit" in
|
|
B|b) awk "BEGIN {printf \"%.0f\", $num}" ;;
|
|
kB|KB|kb) awk "BEGIN {printf \"%.0f\", $num * 1000}" ;;
|
|
KiB|kiB) awk "BEGIN {printf \"%.0f\", $num * 1024}" ;;
|
|
MB|mb) awk "BEGIN {printf \"%.0f\", $num * 1000000}" ;;
|
|
MiB|miB) awk "BEGIN {printf \"%.0f\", $num * 1048576}" ;;
|
|
GB|gb) awk "BEGIN {printf \"%.0f\", $num * 1000000000}" ;;
|
|
GiB|giB) awk "BEGIN {printf \"%.0f\", $num * 1073741824}" ;;
|
|
TB|tb) awk "BEGIN {printf \"%.0f\", $num * 1000000000000}" ;;
|
|
TiB|tiB) awk "BEGIN {printf \"%.0f\", $num * 1099511627776}" ;;
|
|
*) awk "BEGIN {printf \"%.0f\", $num}" ;;
|
|
esac
|
|
}
|
|
|
|
# Collect metrics for all containers
|
|
collect_all() {
|
|
local output_dir="${NODE_DIR}/textfile_collector"
|
|
local output_file="${output_dir}/podman_containers.prom"
|
|
local temp_file
|
|
temp_file=$(mktemp "${output_file}.XXXXXX")
|
|
|
|
local start_time
|
|
start_time=$(date +%s%N)
|
|
local success=1
|
|
|
|
debug_echo "Starting collection (rootless=$ROOTLESS)..."
|
|
|
|
{
|
|
# Gather container list with state information
|
|
local ps_json
|
|
ps_json=$($PODMAN_CMD ps -a --format json 2>/dev/null) || {
|
|
debug_echo "Failed to run $PODMAN_CMD ps"
|
|
success=0
|
|
ps_json="[]"
|
|
}
|
|
|
|
local container_count
|
|
container_count=$(echo "$ps_json" | jq 'length')
|
|
|
|
if [[ "$container_count" -eq 0 ]]; then
|
|
debug_echo "No containers found"
|
|
fi
|
|
|
|
# Gather stats for running containers
|
|
local stats_json="[]"
|
|
if [[ "$container_count" -gt 0 ]]; then
|
|
stats_json=$($PODMAN_CMD stats --no-stream --format json 2>/dev/null) || {
|
|
debug_echo "Failed to run $PODMAN_CMD stats"
|
|
stats_json="[]"
|
|
}
|
|
fi
|
|
|
|
# Per-container resource metrics headers
|
|
echo "# HELP podman_container_cpu_percent CPU usage percentage of the container."
|
|
echo "# TYPE podman_container_cpu_percent gauge"
|
|
echo "# HELP podman_container_memory_usage_bytes Memory usage of the container in bytes."
|
|
echo "# TYPE podman_container_memory_usage_bytes gauge"
|
|
echo "# HELP podman_container_memory_limit_bytes Memory limit of the container in bytes."
|
|
echo "# TYPE podman_container_memory_limit_bytes gauge"
|
|
echo "# HELP podman_container_network_rx_bytes Total network bytes received by the container."
|
|
echo "# TYPE podman_container_network_rx_bytes gauge"
|
|
echo "# HELP podman_container_network_tx_bytes Total network bytes transmitted by the container."
|
|
echo "# TYPE podman_container_network_tx_bytes gauge"
|
|
echo "# HELP podman_container_block_read_bytes Total block bytes read by the container."
|
|
echo "# TYPE podman_container_block_read_bytes gauge"
|
|
echo "# HELP podman_container_block_write_bytes Total block bytes written by the container."
|
|
echo "# TYPE podman_container_block_write_bytes gauge"
|
|
echo "# HELP podman_container_pids Number of PIDs in the container."
|
|
echo "# TYPE podman_container_pids gauge"
|
|
echo "# HELP podman_container_running Whether the container is running (1=running, 0=stopped)."
|
|
echo "# TYPE podman_container_running gauge"
|
|
echo "# HELP podman_container_restart_count Number of container restarts."
|
|
echo "# TYPE podman_container_restart_count gauge"
|
|
echo "# HELP podman_container_uptime_seconds Seconds since the container started."
|
|
echo "# TYPE podman_container_uptime_seconds gauge"
|
|
echo "# HELP podman_container_exit_code Last exit code of the container."
|
|
echo "# TYPE podman_container_exit_code gauge"
|
|
echo "# HELP podman_container_info Container metadata (always 1)."
|
|
echo "# TYPE podman_container_info gauge"
|
|
|
|
local now
|
|
now=$(date +%s)
|
|
|
|
# Iterate over each container from ps output
|
|
local i
|
|
for (( i=0; i<container_count; i++ )); do
|
|
local container_id container_name image state
|
|
container_id=$(echo "$ps_json" | jq -r ".[$i].Id // .[$i].ID // \"\"")
|
|
container_name=$(echo "$ps_json" | jq -r ".[$i].Names // .[$i].Name // \"\"" | jq -r 'if type == "array" then .[0] else . end')
|
|
image=$(echo "$ps_json" | jq -r ".[$i].Image // \"\"")
|
|
state=$(echo "$ps_json" | jq -r ".[$i].State // \"unknown\"" | tr '[:upper:]' '[:lower:]')
|
|
|
|
[[ -z "$container_name" ]] && continue
|
|
|
|
debug_echo "Processing container: $container_name (state=$state)"
|
|
|
|
# Get detailed inspect data for restart count, exit code, start time, pod
|
|
local inspect_json
|
|
inspect_json=$($PODMAN_CMD inspect "$container_id" 2>/dev/null) || {
|
|
debug_echo "Failed to inspect container: $container_name"
|
|
success=0
|
|
continue
|
|
}
|
|
|
|
local restart_count exit_code started_at pod_name
|
|
restart_count=$(echo "$inspect_json" | jq -r '.[0].RestartCount // 0')
|
|
exit_code=$(echo "$inspect_json" | jq -r '.[0].State.ExitCode // 0')
|
|
started_at=$(echo "$inspect_json" | jq -r '.[0].State.StartedAt // ""')
|
|
pod_name=$(echo "$inspect_json" | jq -r '.[0].Pod // ""')
|
|
|
|
# If pod is a hash ID, try to resolve the pod name
|
|
if [[ -n "$pod_name" ]] && [[ "$pod_name" =~ ^[a-f0-9]{64}$ ]]; then
|
|
local resolved_pod
|
|
resolved_pod=$($PODMAN_CMD pod inspect "$pod_name" 2>/dev/null | jq -r '.Name // ""') || resolved_pod=""
|
|
[[ -n "$resolved_pod" ]] && pod_name="$resolved_pod"
|
|
fi
|
|
# Empty string if not in a pod
|
|
pod_name="${pod_name:-}"
|
|
|
|
# Common label set
|
|
local labels="name=\"${container_name}\",image=\"${image}\",pod=\"${pod_name}\""
|
|
|
|
# Running state
|
|
local running=0
|
|
if [[ "$state" == "running" ]]; then
|
|
running=1
|
|
fi
|
|
echo "podman_container_running{${labels},rootless=\"${ROOTLESS}\"} ${running}"
|
|
|
|
# Restart count and exit code
|
|
echo "podman_container_restart_count{${labels}} ${restart_count}"
|
|
echo "podman_container_exit_code{${labels}} ${exit_code}"
|
|
|
|
# Uptime calculation
|
|
local uptime=0
|
|
if [[ -n "$started_at" ]] && [[ "$started_at" != "0001-01-01T00:00:00Z" ]] && [[ "$running" -eq 1 ]]; then
|
|
local started_epoch
|
|
started_epoch=$(date -d "$started_at" +%s 2>/dev/null) || started_epoch=0
|
|
if [[ "$started_epoch" -gt 0 ]]; then
|
|
uptime=$((now - started_epoch))
|
|
[[ "$uptime" -lt 0 ]] && uptime=0
|
|
fi
|
|
fi
|
|
echo "podman_container_uptime_seconds{${labels}} ${uptime}"
|
|
|
|
# Info metric (always 1)
|
|
local short_id="${container_id:0:12}"
|
|
echo "podman_container_info{${labels},id=\"${short_id}\",status=\"${state}\",rootless=\"${ROOTLESS}\"} 1"
|
|
|
|
# Resource metrics from stats (only available for running containers)
|
|
local stats_entry
|
|
stats_entry=$(echo "$stats_json" | jq -r --arg name "$container_name" --arg id "$container_id" \
|
|
'[.[] | select(.Name == $name or .ContainerID == $id or .ID == $id)] | first // empty')
|
|
|
|
if [[ -n "$stats_entry" ]]; then
|
|
# CPU percent — strip the % sign
|
|
local cpu_raw cpu_percent
|
|
cpu_raw=$(echo "$stats_entry" | jq -r '.CPU // .cpu_percent // "0"')
|
|
cpu_percent=$(echo "$cpu_raw" | sed 's/%//')
|
|
echo "podman_container_cpu_percent{${labels}} ${cpu_percent}"
|
|
|
|
# Memory usage and limit
|
|
local mem_usage_raw mem_limit_raw mem_usage mem_limit
|
|
mem_usage_raw=$(echo "$stats_entry" | jq -r '.MemUsage // .mem_usage // "0"' | sed 's/ \/.*//')
|
|
mem_limit_raw=$(echo "$stats_entry" | jq -r '.MemUsage // .mem_usage // "0"' | sed 's/.*\/ //')
|
|
# Handle MemUsage field that may be split into MemUsage and MemLimit
|
|
if [[ "$mem_usage_raw" == "$mem_limit_raw" ]]; then
|
|
mem_limit_raw=$(echo "$stats_entry" | jq -r '.MemLimit // "0"')
|
|
fi
|
|
mem_usage=$(parse_bytes "$mem_usage_raw")
|
|
mem_limit=$(parse_bytes "$mem_limit_raw")
|
|
echo "podman_container_memory_usage_bytes{${labels}} ${mem_usage}"
|
|
echo "podman_container_memory_limit_bytes{${labels}} ${mem_limit}"
|
|
|
|
# Network I/O
|
|
local net_input_raw net_output_raw net_rx net_tx
|
|
net_input_raw=$(echo "$stats_entry" | jq -r '.NetInput // .net_input // "0"' | sed 's/ \/.*//')
|
|
net_output_raw=$(echo "$stats_entry" | jq -r '.NetOutput // .net_output // "0"' | sed 's/.*\/ //')
|
|
# Handle combined NetIO field
|
|
if [[ "$net_input_raw" == "0" ]]; then
|
|
local net_io
|
|
net_io=$(echo "$stats_entry" | jq -r '.NetIO // ""')
|
|
if [[ -n "$net_io" ]] && [[ "$net_io" != "null" ]]; then
|
|
net_input_raw=$(echo "$net_io" | sed 's/ \/.*//')
|
|
net_output_raw=$(echo "$net_io" | sed 's/.*\/ //')
|
|
fi
|
|
fi
|
|
net_rx=$(parse_bytes "$net_input_raw")
|
|
net_tx=$(parse_bytes "$net_output_raw")
|
|
echo "podman_container_network_rx_bytes{${labels}} ${net_rx}"
|
|
echo "podman_container_network_tx_bytes{${labels}} ${net_tx}"
|
|
|
|
# Block I/O
|
|
local block_input_raw block_output_raw block_read block_write
|
|
block_input_raw=$(echo "$stats_entry" | jq -r '.BlockInput // .block_input // "0"' | sed 's/ \/.*//')
|
|
block_output_raw=$(echo "$stats_entry" | jq -r '.BlockOutput // .block_output // "0"' | sed 's/.*\/ //')
|
|
# Handle combined BlockIO field
|
|
if [[ "$block_input_raw" == "0" ]]; then
|
|
local block_io
|
|
block_io=$(echo "$stats_entry" | jq -r '.BlockIO // ""')
|
|
if [[ -n "$block_io" ]] && [[ "$block_io" != "null" ]]; then
|
|
block_input_raw=$(echo "$block_io" | sed 's/ \/.*//')
|
|
block_output_raw=$(echo "$block_io" | sed 's/.*\/ //')
|
|
fi
|
|
fi
|
|
block_read=$(parse_bytes "$block_input_raw")
|
|
block_write=$(parse_bytes "$block_output_raw")
|
|
echo "podman_container_block_read_bytes{${labels}} ${block_read}"
|
|
echo "podman_container_block_write_bytes{${labels}} ${block_write}"
|
|
|
|
# PIDs
|
|
local pids
|
|
pids=$(echo "$stats_entry" | jq -r '.PIDs // .pids // 0')
|
|
echo "podman_container_pids{${labels}} ${pids}"
|
|
else
|
|
# Container is not running — emit zeroed resource metrics
|
|
echo "podman_container_cpu_percent{${labels}} 0"
|
|
echo "podman_container_memory_usage_bytes{${labels}} 0"
|
|
echo "podman_container_memory_limit_bytes{${labels}} 0"
|
|
echo "podman_container_network_rx_bytes{${labels}} 0"
|
|
echo "podman_container_network_tx_bytes{${labels}} 0"
|
|
echo "podman_container_block_read_bytes{${labels}} 0"
|
|
echo "podman_container_block_write_bytes{${labels}} 0"
|
|
echo "podman_container_pids{${labels}} 0"
|
|
fi
|
|
|
|
done
|
|
|
|
# Aggregate: total containers by state
|
|
echo ""
|
|
echo "# HELP podman_containers_total Total number of containers by state."
|
|
echo "# TYPE podman_containers_total gauge"
|
|
|
|
local state_counts
|
|
state_counts=$(echo "$ps_json" | jq -r '[.[].State // "unknown"] | map(ascii_downcase) | group_by(.) | map({state: .[0], count: length}) | .[]' 2>/dev/null)
|
|
|
|
if [[ -n "$state_counts" ]]; then
|
|
echo "$ps_json" | jq -r \
|
|
'[.[].State // "unknown"] | map(ascii_downcase) | group_by(.) | map("\(.[0]) \(length)") | .[]' 2>/dev/null | \
|
|
while IFS=' ' read -r s c; do
|
|
echo "podman_containers_total{state=\"${s}\"} ${c}"
|
|
done
|
|
fi
|
|
|
|
# Exporter metadata
|
|
local end_time duration
|
|
end_time=$(date +%s%N)
|
|
duration=$(awk "BEGIN {printf \"%.4f\", ($end_time - $start_time) / 1000000000}")
|
|
|
|
echo ""
|
|
echo "# HELP podman_exporter_duration_seconds Time taken to collect metrics."
|
|
echo "# TYPE podman_exporter_duration_seconds gauge"
|
|
echo "podman_exporter_duration_seconds ${duration}"
|
|
echo ""
|
|
echo "# HELP podman_exporter_last_run_timestamp Unix timestamp of last collection."
|
|
echo "# TYPE podman_exporter_last_run_timestamp gauge"
|
|
echo "podman_exporter_last_run_timestamp $(date +%s)"
|
|
echo ""
|
|
echo "# HELP podman_exporter_success Whether the last collection succeeded (1=success, 0=failure)."
|
|
echo "# TYPE podman_exporter_success gauge"
|
|
echo "podman_exporter_success ${success}"
|
|
|
|
} > "$temp_file" 2>/dev/null
|
|
|
|
mv "$temp_file" "$output_file"
|
|
|
|
debug_echo "Collection complete. Wrote to $output_file"
|
|
}
|
|
|
|
# Serve metrics over HTTP using bash TCP redirects or socat
|
|
serve_http() {
|
|
if ! command -v socat &>/dev/null; then
|
|
echo "Error: socat is required for --http mode but is not installed" >&2
|
|
exit 1
|
|
fi
|
|
|
|
echo "$SCRIPT_NAME serving metrics on http://0.0.0.0:${HTTP_PORT}/metrics (interval: ${COLLECTION_INTERVAL}s)"
|
|
|
|
# Ensure textfile directory exists for http mode (use a temp dir)
|
|
local http_dir
|
|
http_dir=$(mktemp -d)
|
|
NODE_DIR="$http_dir"
|
|
mkdir -p "${http_dir}/textfile_collector"
|
|
|
|
# Background collection loop
|
|
(
|
|
while true; do
|
|
collect_all
|
|
sleep "$COLLECTION_INTERVAL"
|
|
done
|
|
) &
|
|
HTTP_PID=$!
|
|
|
|
# Serve requests with socat
|
|
while true; do
|
|
socat "TCP-LISTEN:${HTTP_PORT},reuseaddr,fork" SYSTEM:"
|
|
metrics_file=\"${http_dir}/textfile_collector/podman_containers.prom\"
|
|
if [ -f \"\$metrics_file\" ]; then
|
|
body=\$(cat \"\$metrics_file\")
|
|
content_length=\$(echo -n \"\$body\" | wc -c)
|
|
printf 'HTTP/1.1 200 OK\r\nContent-Type: text/plain; version=0.0.4; charset=utf-8\r\nContent-Length: %d\r\n\r\n%s' \"\$content_length\" \"\$body\"
|
|
else
|
|
printf 'HTTP/1.1 503 Service Unavailable\r\nContent-Type: text/plain\r\n\r\nMetrics not yet available\n'
|
|
fi
|
|
" 2>/dev/null || {
|
|
debug_echo "socat exited, restarting listener..."
|
|
sleep 1
|
|
}
|
|
done
|
|
}
|
|
|
|
# Main
|
|
main() {
|
|
validate_config
|
|
|
|
case "$RUN_MODE" in
|
|
once)
|
|
collect_all
|
|
;;
|
|
daemon)
|
|
echo "$SCRIPT_NAME running in daemon mode (interval: ${COLLECTION_INTERVAL}s)"
|
|
while true; do
|
|
collect_all
|
|
sleep "$COLLECTION_INTERVAL"
|
|
done
|
|
;;
|
|
http)
|
|
serve_http
|
|
;;
|
|
esac
|
|
}
|
|
|
|
main
|