a1a17e81a1
Includes updated JS challenge scripts with Claude-User whitelist, same-site referer bypass, Blackbox-Exporter allowed bot, and all new exporters, cheat sheets, and automation scripts.
410 lines
13 KiB
Bash
Executable File
410 lines
13 KiB
Bash
Executable File
#!/bin/bash
|
|
|
|
#####################################################
|
|
### ###
|
|
### Description: Expose metrics from salt-minion. ###
|
|
### ###
|
|
### Phil Connor, contact@mylinux.work ###
|
|
### License: MIT ###
|
|
### Version 2.28.1.20260421 ###
|
|
### ###
|
|
#####################################################
|
|
|
|
# Exit on any error, treat unset variables as errors, and fail pipes on first failure
|
|
set -euo pipefail
|
|
|
|
# Parse command line arguments
|
|
DRY_RUN=false
|
|
VERBOSE=false
|
|
QUIET=false
|
|
NO_CRON=false
|
|
SCRIPT_VERSION="2.28.1.20260421"
|
|
|
|
show_version() {
|
|
echo "Salt Status Monitor Bash Script"
|
|
echo "Version: $SCRIPT_VERSION"
|
|
echo "Author: Phil Connor pconnor@ara.com"
|
|
}
|
|
|
|
show_help() {
|
|
echo "Usage: $0 [OPTIONS]"
|
|
echo "Monitor Salt minion status and export Prometheus metrics"
|
|
echo ""
|
|
echo "Options:"
|
|
echo " --dry-run Output metrics to console instead of file"
|
|
echo " --verbose Enable verbose debug output"
|
|
echo " --quiet Suppress non-error output"
|
|
echo " --no-cron Skip cron job installation"
|
|
echo " --timeout N Override timeout seconds (default: varies by operation)"
|
|
echo " --version Show version and exit"
|
|
echo " --help Show this help message"
|
|
}
|
|
|
|
# Logging functions
|
|
log_verbose() {
|
|
[[ "$VERBOSE" == "true" ]] && echo "[$(date '+%Y-%m-%d %H:%M:%S')] [VERBOSE] $1"
|
|
}
|
|
|
|
log_info() {
|
|
[[ "$QUIET" == "false" ]] && echo "[$(date '+%Y-%m-%d %H:%M:%S')] [INFO] $1"
|
|
}
|
|
|
|
while [[ $# -gt 0 ]]; do
|
|
case $1 in
|
|
--dry-run)
|
|
DRY_RUN=true
|
|
shift
|
|
;;
|
|
--verbose|-v)
|
|
VERBOSE=true
|
|
shift
|
|
;;
|
|
--quiet|-q)
|
|
QUIET=true
|
|
shift
|
|
;;
|
|
--no-cron)
|
|
NO_CRON=true
|
|
shift
|
|
;;
|
|
--timeout)
|
|
if [[ -n "$2" && "$2" =~ ^[0-9]+$ ]]; then
|
|
TIMEOUT_OVERRIDE="$2"
|
|
shift 2
|
|
else
|
|
echo "Error: --timeout requires a numeric value" >&2
|
|
exit 1
|
|
fi
|
|
;;
|
|
--version)
|
|
show_version
|
|
exit 0
|
|
;;
|
|
-h|--help)
|
|
show_help
|
|
exit 0
|
|
;;
|
|
*)
|
|
echo "Unknown option: $1" >&2
|
|
echo "Use --help for usage information" >&2
|
|
exit 1
|
|
;;
|
|
esac
|
|
done
|
|
|
|
# Get absolute path to this script for cron job installation
|
|
readonly SCRIPT_PATH="$(readlink -f "$0")"
|
|
|
|
# Configuration with defaults - can be overridden by environment variables
|
|
readonly CRONTAB_USER="${CRONTAB_USER:-root}" # User to install cron job under
|
|
readonly NODE_EXPORTER_DIR="${NODE_EXPORTER_DIR:-/var/lib/node_exporter}" # Directory where Prometheus metrics are stored
|
|
readonly PROMETHEUS_USER="${PROMETHEUS_USER:-prometheus}" # User that owns the metrics directory
|
|
readonly LOCK_DIR="${LOCK_DIR:-/var/run}" # Directory for lock files to prevent concurrent runs
|
|
readonly UPDATE_INTERVAL="${UPDATE_INTERVAL:-*/10 * * * *}" # Cron schedule - every 10 minutes by default
|
|
readonly SALT_MASTER_PORT=4505 # Salt master communication port
|
|
|
|
# Status codes used in Prometheus metrics
|
|
readonly STATUS_SUCCESS=1 # Service is working correctly
|
|
readonly STATUS_FAILURE=0 # Service has failed or is not responding
|
|
readonly STATUS_NOT_FOUND=2 # Service/command not found on system
|
|
|
|
# Validate that critical environment variables are set
|
|
[[ -z "$NODE_EXPORTER_DIR" || -z "$PROMETHEUS_USER" ]] && {
|
|
echo "ERROR: Required environment variables not set" >&2
|
|
exit 1
|
|
}
|
|
|
|
# Error handling function that logs to stderr and exits with specified code
|
|
handle_error() {
|
|
echo "ERROR: $1" >&2
|
|
exit "${2:-1}"
|
|
}
|
|
|
|
# Logging function with timestamp and level
|
|
log() {
|
|
echo "[$(date '+%Y-%m-%d %H:%M:%S')] [$1] $2"
|
|
}
|
|
|
|
# Find a command in PATH or fallback directories
|
|
# Returns the full path to the executable or exits with error
|
|
find_command() {
|
|
local cmd="$1"
|
|
shift
|
|
local fallback_paths=("$@")
|
|
|
|
# First try to find command in PATH
|
|
if command -v "$cmd" &>/dev/null; then
|
|
command -v "$cmd"
|
|
return 0
|
|
fi
|
|
|
|
# If not in PATH, check fallback directories
|
|
for path in "${fallback_paths[@]}"; do
|
|
local full_path="$path/$cmd"
|
|
[[ -x "$full_path" ]] && {
|
|
echo "$full_path"
|
|
return 0
|
|
}
|
|
done
|
|
|
|
# Command not found anywhere
|
|
handle_error "Could not find '$cmd' executable"
|
|
}
|
|
|
|
# Install a cron job to run this script periodically
|
|
# Only installs if the job doesn't already exist
|
|
install_cron_job() {
|
|
# Check if cron job already exists
|
|
crontab -l 2>/dev/null | grep -q "$SCRIPT_PATH" && return 0
|
|
|
|
# Create temporary file for new crontab
|
|
local temp_cron
|
|
temp_cron=$(mktemp)
|
|
|
|
# Combine existing crontab with new job
|
|
{
|
|
crontab -l 2>/dev/null || true # Get existing crontab, ignore errors if empty
|
|
echo "$UPDATE_INTERVAL $SCRIPT_PATH > $NODE_EXPORTER_DIR/salt_status.prom 2>&1"
|
|
} > "$temp_cron"
|
|
|
|
# Install the new crontab
|
|
if crontab -u "$CRONTAB_USER" "$temp_cron"; then
|
|
log_info "Cron job installed successfully"
|
|
else
|
|
rm -f "$temp_cron"
|
|
handle_error "Failed to install cron job"
|
|
fi
|
|
|
|
# Clean up temporary file
|
|
rm -f "$temp_cron"
|
|
}
|
|
|
|
# Set up file locking to prevent multiple instances of this script running
|
|
# Uses file descriptor 9 for the lock
|
|
setup_lock() {
|
|
# Ensure lock directory exists
|
|
[[ ! -d "$LOCK_DIR" ]] && handle_error "Lock directory does not exist: $LOCK_DIR"
|
|
|
|
# Clean up old lock files (older than 60 minutes)
|
|
find "$LOCK_DIR" -name "salt_status.*" -type f -mmin +60 -delete 2>/dev/null || true
|
|
|
|
# Create unique lock file
|
|
lockfile=$(mktemp -p "$LOCK_DIR" salt_status.XXXXXX) || handle_error "Failed to create lock file"
|
|
|
|
# Open lock file on file descriptor 9 and attempt to lock it
|
|
exec 9>"$lockfile"
|
|
flock -n 9 || handle_error "Script is already running"
|
|
|
|
# Set up cleanup trap to release lock and remove file on exit
|
|
trap 'flock -u 9; exec 9>&-; rm -f "$lockfile"' EXIT INT TERM
|
|
}
|
|
|
|
# Ensure the Node Exporter directory exists and is writable
|
|
# Creates the directory if running as root and sets proper ownership
|
|
setup_directories() {
|
|
# Return early if directory already exists
|
|
[[ -d "$NODE_EXPORTER_DIR" ]] && return 0
|
|
|
|
# Create directory if running as root
|
|
if [[ "$(id -u)" == "0" ]]; then
|
|
mkdir -p "$NODE_EXPORTER_DIR"
|
|
# Set ownership to prometheus user, ignore errors if user doesn't exist
|
|
chown "$PROMETHEUS_USER:" "$NODE_EXPORTER_DIR" 2>/dev/null || true
|
|
fi
|
|
|
|
# Verify the directory is writable
|
|
[[ ! -w "$NODE_EXPORTER_DIR" ]] && handle_error "$NODE_EXPORTER_DIR is not writable"
|
|
}
|
|
|
|
# Check if Salt-minion has an active network connection to Salt-master
|
|
# Uses ss (socket statistics) to check for established connections on port 4505
|
|
check_salt_connection() {
|
|
local ss_path
|
|
ss_path=$(find_command ss /bin /usr/bin /usr/sbin)
|
|
|
|
log_verbose "Checking for Salt connection on port $SALT_MASTER_PORT"
|
|
|
|
# Check for established connections (-nt = numeric, no header, TCP)
|
|
if "$ss_path" -nt | grep -q "\b$SALT_MASTER_PORT\b"; then
|
|
log_verbose "Found active connection on port $SALT_MASTER_PORT"
|
|
echo $STATUS_SUCCESS
|
|
else
|
|
log_verbose "No active connection found on port $SALT_MASTER_PORT"
|
|
echo $STATUS_FAILURE
|
|
fi
|
|
}
|
|
|
|
# Test if Salt-minion can successfully ping the Salt-master
|
|
# Uses salt-call test.ping to verify two-way communication
|
|
check_salt_ping() {
|
|
local salt_call_path
|
|
|
|
# Try to find salt-call command, return NOT_FOUND if missing
|
|
if ! salt_call_path=$(find_command salt-call /bin /usr/bin /usr/sbin 2>/dev/null); then
|
|
echo $STATUS_NOT_FOUND
|
|
return
|
|
fi
|
|
|
|
# Execute ping test and check for True response
|
|
if "$salt_call_path" test.ping 2>/dev/null | grep -q '\bTrue\b'; then
|
|
echo $STATUS_SUCCESS
|
|
else
|
|
echo $STATUS_FAILURE
|
|
fi
|
|
}
|
|
|
|
# Check if Salt-minion service is active using systemctl
|
|
check_salt_service() {
|
|
local systemctl_path
|
|
|
|
# Find systemctl command
|
|
if ! systemctl_path=$(find_command systemctl /bin /usr/bin /sbin /usr/sbin 2>/dev/null); then
|
|
echo $STATUS_NOT_FOUND
|
|
return
|
|
fi
|
|
|
|
# Check if salt-minion service is active
|
|
if "$systemctl_path" is-active salt-minion &>/dev/null; then
|
|
echo $STATUS_SUCCESS
|
|
else
|
|
echo $STATUS_FAILURE
|
|
fi
|
|
}
|
|
|
|
# Get timestamp of last successful Salt communication
|
|
check_salt_last_communication() {
|
|
local salt_call_path
|
|
|
|
# Try to find salt-call command, return 0 if missing
|
|
if ! salt_call_path=$(find_command salt-call /bin /usr/bin /usr/sbin 2>/dev/null); then
|
|
echo "0"
|
|
return
|
|
fi
|
|
|
|
# Get current timestamp if ping succeeds, otherwise 0
|
|
if "$salt_call_path" test.ping 2>/dev/null | grep -q '\bTrue\b'; then
|
|
date +%s
|
|
else
|
|
echo "0"
|
|
fi
|
|
}
|
|
|
|
# Get Salt-minion version information
|
|
get_salt_version() {
|
|
local salt_call_path
|
|
|
|
# Try to find salt-call command, return empty if missing
|
|
if ! salt_call_path=$(find_command salt-call /bin /usr/bin /usr/sbin 2>/dev/null); then
|
|
echo "0"
|
|
return
|
|
fi
|
|
|
|
# Extract version number and convert to numeric (e.g., 3006.1 becomes 3006.1)
|
|
local version
|
|
version=$("$salt_call_path" --version 2>/dev/null | grep -o '[0-9]\+\.[0-9]\+' | head -1)
|
|
echo "${version:-0}"
|
|
}
|
|
|
|
# Get Salt-minion process memory usage in bytes
|
|
get_salt_memory_usage() {
|
|
local ps_path
|
|
|
|
# Find ps command
|
|
if ! ps_path=$(find_command ps /bin /usr/bin 2>/dev/null); then
|
|
echo "0"
|
|
return
|
|
fi
|
|
|
|
# Get RSS memory usage in KB and convert to bytes
|
|
local memory_kb
|
|
memory_kb=$("$ps_path" -eo comm,rss | grep -E '^salt-minion' | awk '{sum+=$2} END {print sum+0}' 2>/dev/null)
|
|
[[ -z "$memory_kb" ]] && memory_kb=0
|
|
echo "$((memory_kb * 1024))"
|
|
}
|
|
|
|
# Count recent errors in salt-minion log
|
|
count_salt_errors() {
|
|
local log_file="/var/log/salt/minion"
|
|
|
|
# Return 0 if log file doesn't exist or isn't readable
|
|
[[ ! -r "$log_file" ]] && { echo "0"; return; }
|
|
|
|
# Count ERROR lines from last 24 hours
|
|
local error_count
|
|
error_count=$(grep -c "\[ERROR\]" "$log_file" 2>/dev/null || true)
|
|
echo "${error_count:-0}"
|
|
}
|
|
|
|
# Output a Prometheus metric in the correct format
|
|
# Parameters: metric_name, value, help_text, metric_type
|
|
output_metric() {
|
|
local name="$1" value="$2" help="$3" type="$4"
|
|
|
|
# Output in Prometheus exposition format
|
|
cat << EOF
|
|
# HELP $name $help
|
|
# TYPE $name $type
|
|
$name $value
|
|
EOF
|
|
}
|
|
|
|
# Main function that orchestrates the metric collection process
|
|
main() {
|
|
# Skip setup steps in dry-run mode
|
|
if [[ "$DRY_RUN" == "false" ]]; then
|
|
# Set up file locking to prevent concurrent execution
|
|
setup_lock
|
|
|
|
# Ensure output directory exists and is writable
|
|
setup_directories
|
|
|
|
# Install cron job for periodic execution (only if script file exists and not disabled)
|
|
if [[ -f "$SCRIPT_PATH" && "$NO_CRON" == "false" ]]; then
|
|
install_cron_job
|
|
elif [[ "$NO_CRON" == "true" ]]; then
|
|
log_info "Skipping cron job installation (--no-cron specified)"
|
|
fi
|
|
else
|
|
echo "=== DRY RUN MODE - Metrics that would be written to $NODE_EXPORTER_DIR/salt_status.prom ===" >&2
|
|
fi
|
|
|
|
# Collect Salt status metrics
|
|
local connection_status ping_status service_status last_comm version memory_usage error_count
|
|
connection_status=$(check_salt_connection)
|
|
ping_status=$(check_salt_ping)
|
|
service_status=$(check_salt_service)
|
|
last_comm=$(check_salt_last_communication)
|
|
version=$(get_salt_version)
|
|
memory_usage=$(get_salt_memory_usage)
|
|
error_count=$(count_salt_errors)
|
|
|
|
# Output metrics in Prometheus format
|
|
output_metric "minion_connection_status" "$connection_status" \
|
|
"Shows if Salt-Minion is connected to Salt-Master." "gauge"
|
|
|
|
output_metric "minion_ping_status" "$ping_status" \
|
|
"Shows if Salt-Minion is able to ping Salt-Master." "gauge"
|
|
|
|
output_metric "minion_service_status" "$service_status" \
|
|
"Shows if Salt-Minion service is active." "gauge"
|
|
|
|
output_metric "minion_last_communication_timestamp" "$last_comm" \
|
|
"Timestamp of last successful communication with Salt-Master." "gauge"
|
|
|
|
output_metric "minion_version" "$version" \
|
|
"Salt-Minion version number." "gauge"
|
|
|
|
output_metric "minion_memory_usage_bytes" "$memory_usage" \
|
|
"Salt-Minion process memory usage in bytes." "gauge"
|
|
|
|
output_metric "minion_error_count" "$error_count" \
|
|
"Number of error entries in Salt-Minion log file." "counter"
|
|
|
|
if [[ "$DRY_RUN" == "true" ]]; then
|
|
echo "=== END DRY RUN OUTPUT ===" >&2
|
|
fi
|
|
}
|
|
|
|
# Execute main function with all script arguments
|
|
main "$@"
|