a1a17e81a1
Includes updated JS challenge scripts with Claude-User whitelist, same-site referer bypass, Blackbox-Exporter allowed bot, and all new exporters, cheat sheets, and automation scripts.
225 lines
7.6 KiB
Bash
225 lines
7.6 KiB
Bash
#!/bin/bash
|
|
|
|
################################################
|
|
#### Container Health Prometheus Exporter ####
|
|
#### for node_exporter textfile collector ####
|
|
#### ####
|
|
#### Author: Phil Connor ####
|
|
#### Contact: contact@mylinux.work ####
|
|
#### Version: 1.0.0.20260309 ####
|
|
################################################
|
|
|
|
set -o pipefail
|
|
|
|
SCRIPT_NAME=$(basename "$0")
|
|
readonly SCRIPT_NAME
|
|
|
|
# Default configuration
|
|
readonly DEFAULT_NODE_DIR="/var/lib/node_exporter"
|
|
readonly DEFAULT_COLLECTION_INTERVAL=60
|
|
|
|
# Configuration variables (can be overridden by environment)
|
|
NODE_DIR=${NODE_DIR:-$DEFAULT_NODE_DIR}
|
|
COLLECTION_INTERVAL=${COLLECTION_INTERVAL:-$DEFAULT_COLLECTION_INTERVAL}
|
|
DEBUG=${DEBUG:-}
|
|
|
|
# Runtime flags
|
|
RUN_MODE="once"
|
|
|
|
handle_error() {
|
|
local exit_code=$1
|
|
local line_number=$2
|
|
echo "Error: $SCRIPT_NAME failed at line $line_number with exit code $exit_code" >&2
|
|
exit "$exit_code"
|
|
}
|
|
|
|
trap 'handle_error $? $LINENO' ERR
|
|
|
|
debug_echo() {
|
|
if [[ -n "$DEBUG" ]]; then
|
|
echo "[DEBUG] $*" >&2
|
|
fi
|
|
}
|
|
|
|
show_help() {
|
|
cat << EOF
|
|
Usage: $SCRIPT_NAME [OPTIONS]
|
|
|
|
Container health metrics collector for Prometheus node_exporter textfile directory.
|
|
|
|
Collects per-container health check status, image age, restart counts, exit codes,
|
|
and running state via docker inspect and writes them as Prometheus metrics.
|
|
|
|
OPTIONS:
|
|
--once Run collection once and exit (default)
|
|
--daemon Run continuously at COLLECTION_INTERVAL
|
|
--help, -h Show this help message
|
|
|
|
ENVIRONMENT VARIABLES:
|
|
NODE_DIR Node exporter textfile directory (default: $DEFAULT_NODE_DIR)
|
|
COLLECTION_INTERVAL Seconds between collections in daemon mode (default: $DEFAULT_COLLECTION_INTERVAL)
|
|
DEBUG Enable debug output
|
|
|
|
EXAMPLES:
|
|
$SCRIPT_NAME --once
|
|
$SCRIPT_NAME --daemon
|
|
COLLECTION_INTERVAL=30 $SCRIPT_NAME --daemon
|
|
|
|
OUTPUT:
|
|
Writes metrics to \$NODE_DIR/textfile_collector/container_health.prom
|
|
|
|
EOF
|
|
exit 0
|
|
}
|
|
|
|
# Parse arguments
|
|
while [[ $# -gt 0 ]]; do
|
|
case "$1" in
|
|
--once) RUN_MODE="once"; shift ;;
|
|
--daemon) RUN_MODE="daemon"; shift ;;
|
|
--help|-h) show_help ;;
|
|
*) echo "Unknown option: $1" >&2; show_help ;;
|
|
esac
|
|
done
|
|
|
|
# Validate configuration
|
|
validate_config() {
|
|
if ! command -v docker &>/dev/null; then
|
|
echo "Error: docker is not installed or not in PATH" >&2
|
|
exit 1
|
|
fi
|
|
|
|
local textfile_dir="${NODE_DIR}/textfile_collector"
|
|
if [[ ! -d "$textfile_dir" ]]; then
|
|
echo "Error: Textfile collector directory not found: $textfile_dir" >&2
|
|
echo "Create it: sudo mkdir -p $textfile_dir" >&2
|
|
exit 1
|
|
fi
|
|
}
|
|
|
|
# Collect metrics for all containers
|
|
collect_all() {
|
|
local output_dir="${NODE_DIR}/textfile_collector"
|
|
local output_file="${output_dir}/container_health.prom"
|
|
local temp_file
|
|
temp_file=$(mktemp "${output_file}.XXXXXX")
|
|
|
|
local start_time
|
|
start_time=$(date +%s%N)
|
|
local success=1
|
|
|
|
debug_echo "Starting collection..."
|
|
|
|
{
|
|
local containers
|
|
containers=$(docker ps -a --format '{{.Names}}')
|
|
|
|
if [[ -z "$containers" ]]; then
|
|
debug_echo "No containers found"
|
|
fi
|
|
|
|
# Per-container metrics headers
|
|
echo "# HELP container_health_status Health check status of the container (1 for current status)."
|
|
echo "# TYPE container_health_status gauge"
|
|
echo "# HELP container_image_age_seconds Age of the container image in seconds."
|
|
echo "# TYPE container_image_age_seconds gauge"
|
|
echo "# HELP container_restart_count Number of container restarts."
|
|
echo "# TYPE container_restart_count gauge"
|
|
echo "# HELP container_exit_code Exit code of the container."
|
|
echo "# TYPE container_exit_code gauge"
|
|
echo "# HELP container_running Whether the container is running (1=running, 0=stopped)."
|
|
echo "# TYPE container_running gauge"
|
|
|
|
local now
|
|
now=$(date +%s)
|
|
|
|
while IFS= read -r container_name; do
|
|
[[ -z "$container_name" ]] && continue
|
|
|
|
debug_echo "Inspecting container: $container_name"
|
|
|
|
# Extract all fields in a single docker inspect call
|
|
local inspect_data
|
|
inspect_data=$(docker inspect --format \
|
|
'{{.Config.Image}}|{{if .State.Health}}{{.State.Health.Status}}{{else}}none{{end}}|{{.Created}}|{{.RestartCount}}|{{.State.ExitCode}}|{{.State.Running}}' \
|
|
"$container_name" 2>/dev/null) || {
|
|
debug_echo "Failed to inspect container: $container_name"
|
|
success=0
|
|
continue
|
|
}
|
|
|
|
local image health_status created restart_count exit_code running_raw
|
|
IFS='|' read -r image health_status created restart_count exit_code running_raw <<< "$inspect_data"
|
|
|
|
# Calculate image age in seconds
|
|
local created_epoch image_age
|
|
created_epoch=$(date -d "$created" +%s 2>/dev/null) || created_epoch=0
|
|
image_age=$((now - created_epoch))
|
|
|
|
# Convert running boolean to 0/1
|
|
local running=0
|
|
if [[ "$running_raw" == "true" ]]; then
|
|
running=1
|
|
fi
|
|
|
|
# Health status — emit a 1 for the current status, 0 for others
|
|
for status in healthy unhealthy starting none; do
|
|
if [[ "$health_status" == "$status" ]]; then
|
|
echo "container_health_status{name=\"${container_name}\",image=\"${image}\",status=\"${status}\"} 1"
|
|
else
|
|
echo "container_health_status{name=\"${container_name}\",image=\"${image}\",status=\"${status}\"} 0"
|
|
fi
|
|
done
|
|
|
|
echo "container_image_age_seconds{name=\"${container_name}\",image=\"${image}\"} ${image_age}"
|
|
echo "container_restart_count{name=\"${container_name}\",image=\"${image}\"} ${restart_count}"
|
|
echo "container_exit_code{name=\"${container_name}\",image=\"${image}\"} ${exit_code}"
|
|
echo "container_running{name=\"${container_name}\",image=\"${image}\"} ${running}"
|
|
|
|
done <<< "$containers"
|
|
|
|
# Exporter metadata
|
|
local end_time duration
|
|
end_time=$(date +%s%N)
|
|
duration=$(awk "BEGIN {printf \"%.4f\", ($end_time - $start_time) / 1000000000}")
|
|
|
|
echo ""
|
|
echo "# HELP container_health_exporter_duration_seconds Time taken to collect metrics."
|
|
echo "# TYPE container_health_exporter_duration_seconds gauge"
|
|
echo "container_health_exporter_duration_seconds ${duration}"
|
|
echo ""
|
|
echo "# HELP container_health_exporter_last_run_timestamp Unix timestamp of last collection."
|
|
echo "# TYPE container_health_exporter_last_run_timestamp gauge"
|
|
echo "container_health_exporter_last_run_timestamp $(date +%s)"
|
|
echo ""
|
|
echo "# HELP container_health_exporter_success Whether the last collection succeeded (1=success, 0=failure)."
|
|
echo "# TYPE container_health_exporter_success gauge"
|
|
echo "container_health_exporter_success ${success}"
|
|
|
|
} > "$temp_file" 2>/dev/null
|
|
|
|
mv "$temp_file" "$output_file"
|
|
|
|
debug_echo "Collection complete. Wrote to $output_file"
|
|
}
|
|
|
|
# Main
|
|
main() {
|
|
validate_config
|
|
|
|
case "$RUN_MODE" in
|
|
once)
|
|
collect_all
|
|
;;
|
|
daemon)
|
|
echo "$SCRIPT_NAME running in daemon mode (interval: ${COLLECTION_INTERVAL}s)"
|
|
while true; do
|
|
collect_all
|
|
sleep "$COLLECTION_INTERVAL"
|
|
done
|
|
;;
|
|
esac
|
|
}
|
|
|
|
main
|