Files
linux-scripts/podman-container-exporter.sh
chiefgeek a1a17e81a1 Sync all scripts from website downloads — 352 scripts total
Includes updated JS challenge scripts with Claude-User whitelist,
same-site referer bypass, Blackbox-Exporter allowed bot, and all
new exporters, cheat sheets, and automation scripts.
2026-05-25 03:31:08 +02:00

490 lines
19 KiB
Bash
Executable File

#!/bin/bash
################################################
#### Podman Container Prometheus Exporter ####
#### for node_exporter textfile collector ####
#### ####
#### Author: Phil Connor ####
#### Contact: contact@mylinux.work ####
#### Version: 1.0.0.20260327 ####
################################################
set -euo pipefail
SCRIPT_NAME=$(basename "$0")
readonly SCRIPT_NAME
# Default configuration
readonly DEFAULT_NODE_DIR="/var/lib/node_exporter"
readonly DEFAULT_COLLECTION_INTERVAL=60
readonly DEFAULT_PODMAN_CMD="podman"
readonly DEFAULT_HTTP_PORT=9101
# Configuration variables (can be overridden by environment)
NODE_DIR=${NODE_DIR:-$DEFAULT_NODE_DIR}
COLLECTION_INTERVAL=${COLLECTION_INTERVAL:-$DEFAULT_COLLECTION_INTERVAL}
PODMAN_CMD=${PODMAN_CMD:-$DEFAULT_PODMAN_CMD}
HTTP_PORT=${HTTP_PORT:-$DEFAULT_HTTP_PORT}
DEBUG=${DEBUG:-}
# Runtime flags
RUN_MODE="once"
# HTTP mode PID tracking for cleanup
HTTP_PID=""
handle_error() {
local exit_code=$1
local line_number=$2
echo "Error: $SCRIPT_NAME failed at line $line_number with exit code $exit_code" >&2
exit "$exit_code"
}
trap 'handle_error $? $LINENO' ERR
cleanup() {
if [[ -n "$HTTP_PID" ]] && kill -0 "$HTTP_PID" 2>/dev/null; then
kill "$HTTP_PID" 2>/dev/null || true
fi
# Remove any leftover temp files
rm -f "${NODE_DIR}/textfile_collector/podman_containers.prom."* 2>/dev/null || true
}
trap cleanup EXIT
debug_echo() {
if [[ -n "$DEBUG" ]]; then
echo "[DEBUG] $*" >&2
fi
}
show_help() {
cat << EOF
Usage: $SCRIPT_NAME [OPTIONS]
Podman container metrics collector for Prometheus node_exporter textfile directory.
Collects per-container resource usage (CPU, memory, network, block I/O), state,
restart counts, exit codes, and uptime via podman stats/inspect and writes them
as Prometheus metrics. Supports both rootless and rootful Podman.
OPTIONS:
--once Run collection once and exit (default)
--daemon Run continuously at COLLECTION_INTERVAL
--http Serve metrics over HTTP on HTTP_PORT
--help, -h Show this help message
ENVIRONMENT VARIABLES:
NODE_DIR Node exporter textfile directory (default: $DEFAULT_NODE_DIR)
COLLECTION_INTERVAL Seconds between collections in daemon mode (default: $DEFAULT_COLLECTION_INTERVAL)
PODMAN_CMD Podman binary path or command (default: $DEFAULT_PODMAN_CMD)
HTTP_PORT Port for --http mode (default: $DEFAULT_HTTP_PORT)
DEBUG Enable debug output
EXAMPLES:
$SCRIPT_NAME --once
$SCRIPT_NAME --daemon
COLLECTION_INTERVAL=30 $SCRIPT_NAME --daemon
$SCRIPT_NAME --http
PODMAN_CMD=/usr/bin/podman HTTP_PORT=9102 $SCRIPT_NAME --http
OUTPUT:
Writes metrics to \$NODE_DIR/textfile_collector/podman_containers.prom
EOF
exit 0
}
# Parse arguments
while [[ $# -gt 0 ]]; do
case "$1" in
--once) RUN_MODE="once"; shift ;;
--daemon) RUN_MODE="daemon"; shift ;;
--http) RUN_MODE="http"; shift ;;
--help|-h) show_help ;;
*) echo "Unknown option: $1" >&2; show_help ;;
esac
done
# Detect rootless mode
detect_rootless() {
if [[ $EUID -ne 0 ]]; then
echo "true"
else
echo "false"
fi
}
ROOTLESS=$(detect_rootless)
readonly ROOTLESS
# Validate configuration
validate_config() {
if ! command -v "$PODMAN_CMD" &>/dev/null; then
echo "Error: $PODMAN_CMD is not installed or not in PATH" >&2
exit 1
fi
if ! command -v jq &>/dev/null; then
echo "Error: jq is not installed or not in PATH" >&2
exit 1
fi
if [[ "$RUN_MODE" != "http" ]]; then
local textfile_dir="${NODE_DIR}/textfile_collector"
if [[ ! -d "$textfile_dir" ]]; then
echo "Error: Textfile collector directory not found: $textfile_dir" >&2
echo "Create it: sudo mkdir -p $textfile_dir" >&2
exit 1
fi
fi
}
# Parse human-readable byte strings (e.g. "1.23GiB", "456.7MB", "100kB") to bytes
parse_bytes() {
local raw="$1"
# Strip to numeric value and unit
local num unit
num=$(echo "$raw" | sed 's/[^0-9.]//g')
unit=$(echo "$raw" | sed 's/[0-9.]//g')
if [[ -z "$num" ]]; then
echo "0"
return
fi
case "$unit" in
B|b) awk "BEGIN {printf \"%.0f\", $num}" ;;
kB|KB|kb) awk "BEGIN {printf \"%.0f\", $num * 1000}" ;;
KiB|kiB) awk "BEGIN {printf \"%.0f\", $num * 1024}" ;;
MB|mb) awk "BEGIN {printf \"%.0f\", $num * 1000000}" ;;
MiB|miB) awk "BEGIN {printf \"%.0f\", $num * 1048576}" ;;
GB|gb) awk "BEGIN {printf \"%.0f\", $num * 1000000000}" ;;
GiB|giB) awk "BEGIN {printf \"%.0f\", $num * 1073741824}" ;;
TB|tb) awk "BEGIN {printf \"%.0f\", $num * 1000000000000}" ;;
TiB|tiB) awk "BEGIN {printf \"%.0f\", $num * 1099511627776}" ;;
*) awk "BEGIN {printf \"%.0f\", $num}" ;;
esac
}
# Collect metrics for all containers
collect_all() {
local output_dir="${NODE_DIR}/textfile_collector"
local output_file="${output_dir}/podman_containers.prom"
local temp_file
temp_file=$(mktemp "${output_file}.XXXXXX")
local start_time
start_time=$(date +%s%N)
local success=1
debug_echo "Starting collection (rootless=$ROOTLESS)..."
{
# Gather container list with state information
local ps_json
ps_json=$($PODMAN_CMD ps -a --format json 2>/dev/null) || {
debug_echo "Failed to run $PODMAN_CMD ps"
success=0
ps_json="[]"
}
local container_count
container_count=$(echo "$ps_json" | jq 'length')
if [[ "$container_count" -eq 0 ]]; then
debug_echo "No containers found"
fi
# Gather stats for running containers
local stats_json="[]"
if [[ "$container_count" -gt 0 ]]; then
stats_json=$($PODMAN_CMD stats --no-stream --format json 2>/dev/null) || {
debug_echo "Failed to run $PODMAN_CMD stats"
stats_json="[]"
}
fi
# Per-container resource metrics headers
echo "# HELP podman_container_cpu_percent CPU usage percentage of the container."
echo "# TYPE podman_container_cpu_percent gauge"
echo "# HELP podman_container_memory_usage_bytes Memory usage of the container in bytes."
echo "# TYPE podman_container_memory_usage_bytes gauge"
echo "# HELP podman_container_memory_limit_bytes Memory limit of the container in bytes."
echo "# TYPE podman_container_memory_limit_bytes gauge"
echo "# HELP podman_container_network_rx_bytes Total network bytes received by the container."
echo "# TYPE podman_container_network_rx_bytes gauge"
echo "# HELP podman_container_network_tx_bytes Total network bytes transmitted by the container."
echo "# TYPE podman_container_network_tx_bytes gauge"
echo "# HELP podman_container_block_read_bytes Total block bytes read by the container."
echo "# TYPE podman_container_block_read_bytes gauge"
echo "# HELP podman_container_block_write_bytes Total block bytes written by the container."
echo "# TYPE podman_container_block_write_bytes gauge"
echo "# HELP podman_container_pids Number of PIDs in the container."
echo "# TYPE podman_container_pids gauge"
echo "# HELP podman_container_running Whether the container is running (1=running, 0=stopped)."
echo "# TYPE podman_container_running gauge"
echo "# HELP podman_container_restart_count Number of container restarts."
echo "# TYPE podman_container_restart_count gauge"
echo "# HELP podman_container_uptime_seconds Seconds since the container started."
echo "# TYPE podman_container_uptime_seconds gauge"
echo "# HELP podman_container_exit_code Last exit code of the container."
echo "# TYPE podman_container_exit_code gauge"
echo "# HELP podman_container_info Container metadata (always 1)."
echo "# TYPE podman_container_info gauge"
local now
now=$(date +%s)
# Iterate over each container from ps output
local i
for (( i=0; i<container_count; i++ )); do
local container_id container_name image state
container_id=$(echo "$ps_json" | jq -r ".[$i].Id // .[$i].ID // \"\"")
container_name=$(echo "$ps_json" | jq -r ".[$i].Names // .[$i].Name // \"\"" | jq -r 'if type == "array" then .[0] else . end')
image=$(echo "$ps_json" | jq -r ".[$i].Image // \"\"")
state=$(echo "$ps_json" | jq -r ".[$i].State // \"unknown\"" | tr '[:upper:]' '[:lower:]')
[[ -z "$container_name" ]] && continue
debug_echo "Processing container: $container_name (state=$state)"
# Get detailed inspect data for restart count, exit code, start time, pod
local inspect_json
inspect_json=$($PODMAN_CMD inspect "$container_id" 2>/dev/null) || {
debug_echo "Failed to inspect container: $container_name"
success=0
continue
}
local restart_count exit_code started_at pod_name
restart_count=$(echo "$inspect_json" | jq -r '.[0].RestartCount // 0')
exit_code=$(echo "$inspect_json" | jq -r '.[0].State.ExitCode // 0')
started_at=$(echo "$inspect_json" | jq -r '.[0].State.StartedAt // ""')
pod_name=$(echo "$inspect_json" | jq -r '.[0].Pod // ""')
# If pod is a hash ID, try to resolve the pod name
if [[ -n "$pod_name" ]] && [[ "$pod_name" =~ ^[a-f0-9]{64}$ ]]; then
local resolved_pod
resolved_pod=$($PODMAN_CMD pod inspect "$pod_name" 2>/dev/null | jq -r '.Name // ""') || resolved_pod=""
[[ -n "$resolved_pod" ]] && pod_name="$resolved_pod"
fi
# Empty string if not in a pod
pod_name="${pod_name:-}"
# Common label set
local labels="name=\"${container_name}\",image=\"${image}\",pod=\"${pod_name}\""
# Running state
local running=0
if [[ "$state" == "running" ]]; then
running=1
fi
echo "podman_container_running{${labels},rootless=\"${ROOTLESS}\"} ${running}"
# Restart count and exit code
echo "podman_container_restart_count{${labels}} ${restart_count}"
echo "podman_container_exit_code{${labels}} ${exit_code}"
# Uptime calculation
local uptime=0
if [[ -n "$started_at" ]] && [[ "$started_at" != "0001-01-01T00:00:00Z" ]] && [[ "$running" -eq 1 ]]; then
local started_epoch
started_epoch=$(date -d "$started_at" +%s 2>/dev/null) || started_epoch=0
if [[ "$started_epoch" -gt 0 ]]; then
uptime=$((now - started_epoch))
[[ "$uptime" -lt 0 ]] && uptime=0
fi
fi
echo "podman_container_uptime_seconds{${labels}} ${uptime}"
# Info metric (always 1)
local short_id="${container_id:0:12}"
echo "podman_container_info{${labels},id=\"${short_id}\",status=\"${state}\",rootless=\"${ROOTLESS}\"} 1"
# Resource metrics from stats (only available for running containers)
local stats_entry
stats_entry=$(echo "$stats_json" | jq -r --arg name "$container_name" --arg id "$container_id" \
'[.[] | select(.Name == $name or .ContainerID == $id or .ID == $id)] | first // empty')
if [[ -n "$stats_entry" ]]; then
# CPU percent — strip the % sign
local cpu_raw cpu_percent
cpu_raw=$(echo "$stats_entry" | jq -r '.CPU // .cpu_percent // "0"')
cpu_percent=$(echo "$cpu_raw" | sed 's/%//')
echo "podman_container_cpu_percent{${labels}} ${cpu_percent}"
# Memory usage and limit
local mem_usage_raw mem_limit_raw mem_usage mem_limit
mem_usage_raw=$(echo "$stats_entry" | jq -r '.MemUsage // .mem_usage // "0"' | sed 's/ \/.*//')
mem_limit_raw=$(echo "$stats_entry" | jq -r '.MemUsage // .mem_usage // "0"' | sed 's/.*\/ //')
# Handle MemUsage field that may be split into MemUsage and MemLimit
if [[ "$mem_usage_raw" == "$mem_limit_raw" ]]; then
mem_limit_raw=$(echo "$stats_entry" | jq -r '.MemLimit // "0"')
fi
mem_usage=$(parse_bytes "$mem_usage_raw")
mem_limit=$(parse_bytes "$mem_limit_raw")
echo "podman_container_memory_usage_bytes{${labels}} ${mem_usage}"
echo "podman_container_memory_limit_bytes{${labels}} ${mem_limit}"
# Network I/O
local net_input_raw net_output_raw net_rx net_tx
net_input_raw=$(echo "$stats_entry" | jq -r '.NetInput // .net_input // "0"' | sed 's/ \/.*//')
net_output_raw=$(echo "$stats_entry" | jq -r '.NetOutput // .net_output // "0"' | sed 's/.*\/ //')
# Handle combined NetIO field
if [[ "$net_input_raw" == "0" ]]; then
local net_io
net_io=$(echo "$stats_entry" | jq -r '.NetIO // ""')
if [[ -n "$net_io" ]] && [[ "$net_io" != "null" ]]; then
net_input_raw=$(echo "$net_io" | sed 's/ \/.*//')
net_output_raw=$(echo "$net_io" | sed 's/.*\/ //')
fi
fi
net_rx=$(parse_bytes "$net_input_raw")
net_tx=$(parse_bytes "$net_output_raw")
echo "podman_container_network_rx_bytes{${labels}} ${net_rx}"
echo "podman_container_network_tx_bytes{${labels}} ${net_tx}"
# Block I/O
local block_input_raw block_output_raw block_read block_write
block_input_raw=$(echo "$stats_entry" | jq -r '.BlockInput // .block_input // "0"' | sed 's/ \/.*//')
block_output_raw=$(echo "$stats_entry" | jq -r '.BlockOutput // .block_output // "0"' | sed 's/.*\/ //')
# Handle combined BlockIO field
if [[ "$block_input_raw" == "0" ]]; then
local block_io
block_io=$(echo "$stats_entry" | jq -r '.BlockIO // ""')
if [[ -n "$block_io" ]] && [[ "$block_io" != "null" ]]; then
block_input_raw=$(echo "$block_io" | sed 's/ \/.*//')
block_output_raw=$(echo "$block_io" | sed 's/.*\/ //')
fi
fi
block_read=$(parse_bytes "$block_input_raw")
block_write=$(parse_bytes "$block_output_raw")
echo "podman_container_block_read_bytes{${labels}} ${block_read}"
echo "podman_container_block_write_bytes{${labels}} ${block_write}"
# PIDs
local pids
pids=$(echo "$stats_entry" | jq -r '.PIDs // .pids // 0')
echo "podman_container_pids{${labels}} ${pids}"
else
# Container is not running — emit zeroed resource metrics
echo "podman_container_cpu_percent{${labels}} 0"
echo "podman_container_memory_usage_bytes{${labels}} 0"
echo "podman_container_memory_limit_bytes{${labels}} 0"
echo "podman_container_network_rx_bytes{${labels}} 0"
echo "podman_container_network_tx_bytes{${labels}} 0"
echo "podman_container_block_read_bytes{${labels}} 0"
echo "podman_container_block_write_bytes{${labels}} 0"
echo "podman_container_pids{${labels}} 0"
fi
done
# Aggregate: total containers by state
echo ""
echo "# HELP podman_containers_total Total number of containers by state."
echo "# TYPE podman_containers_total gauge"
local state_counts
state_counts=$(echo "$ps_json" | jq -r '[.[].State // "unknown"] | map(ascii_downcase) | group_by(.) | map({state: .[0], count: length}) | .[]' 2>/dev/null)
if [[ -n "$state_counts" ]]; then
echo "$ps_json" | jq -r \
'[.[].State // "unknown"] | map(ascii_downcase) | group_by(.) | map("\(.[0]) \(length)") | .[]' 2>/dev/null | \
while IFS=' ' read -r s c; do
echo "podman_containers_total{state=\"${s}\"} ${c}"
done
fi
# Exporter metadata
local end_time duration
end_time=$(date +%s%N)
duration=$(awk "BEGIN {printf \"%.4f\", ($end_time - $start_time) / 1000000000}")
echo ""
echo "# HELP podman_exporter_duration_seconds Time taken to collect metrics."
echo "# TYPE podman_exporter_duration_seconds gauge"
echo "podman_exporter_duration_seconds ${duration}"
echo ""
echo "# HELP podman_exporter_last_run_timestamp Unix timestamp of last collection."
echo "# TYPE podman_exporter_last_run_timestamp gauge"
echo "podman_exporter_last_run_timestamp $(date +%s)"
echo ""
echo "# HELP podman_exporter_success Whether the last collection succeeded (1=success, 0=failure)."
echo "# TYPE podman_exporter_success gauge"
echo "podman_exporter_success ${success}"
} > "$temp_file" 2>/dev/null
mv "$temp_file" "$output_file"
debug_echo "Collection complete. Wrote to $output_file"
}
# Serve metrics over HTTP using bash TCP redirects or socat
serve_http() {
if ! command -v socat &>/dev/null; then
echo "Error: socat is required for --http mode but is not installed" >&2
exit 1
fi
echo "$SCRIPT_NAME serving metrics on http://0.0.0.0:${HTTP_PORT}/metrics (interval: ${COLLECTION_INTERVAL}s)"
# Ensure textfile directory exists for http mode (use a temp dir)
local http_dir
http_dir=$(mktemp -d)
NODE_DIR="$http_dir"
mkdir -p "${http_dir}/textfile_collector"
# Background collection loop
(
while true; do
collect_all
sleep "$COLLECTION_INTERVAL"
done
) &
HTTP_PID=$!
# Serve requests with socat
while true; do
socat "TCP-LISTEN:${HTTP_PORT},reuseaddr,fork" SYSTEM:"
metrics_file=\"${http_dir}/textfile_collector/podman_containers.prom\"
if [ -f \"\$metrics_file\" ]; then
body=\$(cat \"\$metrics_file\")
content_length=\$(echo -n \"\$body\" | wc -c)
printf 'HTTP/1.1 200 OK\r\nContent-Type: text/plain; version=0.0.4; charset=utf-8\r\nContent-Length: %d\r\n\r\n%s' \"\$content_length\" \"\$body\"
else
printf 'HTTP/1.1 503 Service Unavailable\r\nContent-Type: text/plain\r\n\r\nMetrics not yet available\n'
fi
" 2>/dev/null || {
debug_echo "socat exited, restarting listener..."
sleep 1
}
done
}
# Main
main() {
validate_config
case "$RUN_MODE" in
once)
collect_all
;;
daemon)
echo "$SCRIPT_NAME running in daemon mode (interval: ${COLLECTION_INTERVAL}s)"
while true; do
collect_all
sleep "$COLLECTION_INTERVAL"
done
;;
http)
serve_http
;;
esac
}
main