Files
linux-scripts/salt-status.sh
T
chiefgeek a1a17e81a1 Sync all scripts from website downloads — 352 scripts total
Includes updated JS challenge scripts with Claude-User whitelist,
same-site referer bypass, Blackbox-Exporter allowed bot, and all
new exporters, cheat sheets, and automation scripts.
2026-05-25 03:31:08 +02:00

410 lines
13 KiB
Bash
Executable File

#!/bin/bash
#####################################################
### ###
### Description: Expose metrics from salt-minion. ###
### ###
### Phil Connor, contact@mylinux.work ###
### License: MIT ###
### Version 2.28.1.20260421 ###
### ###
#####################################################
# Exit on any error, treat unset variables as errors, and fail pipes on first failure
set -euo pipefail
# Parse command line arguments
DRY_RUN=false
VERBOSE=false
QUIET=false
NO_CRON=false
SCRIPT_VERSION="2.28.1.20260421"
show_version() {
echo "Salt Status Monitor Bash Script"
echo "Version: $SCRIPT_VERSION"
echo "Author: Phil Connor pconnor@ara.com"
}
show_help() {
echo "Usage: $0 [OPTIONS]"
echo "Monitor Salt minion status and export Prometheus metrics"
echo ""
echo "Options:"
echo " --dry-run Output metrics to console instead of file"
echo " --verbose Enable verbose debug output"
echo " --quiet Suppress non-error output"
echo " --no-cron Skip cron job installation"
echo " --timeout N Override timeout seconds (default: varies by operation)"
echo " --version Show version and exit"
echo " --help Show this help message"
}
# Logging functions
log_verbose() {
[[ "$VERBOSE" == "true" ]] && echo "[$(date '+%Y-%m-%d %H:%M:%S')] [VERBOSE] $1"
}
log_info() {
[[ "$QUIET" == "false" ]] && echo "[$(date '+%Y-%m-%d %H:%M:%S')] [INFO] $1"
}
while [[ $# -gt 0 ]]; do
case $1 in
--dry-run)
DRY_RUN=true
shift
;;
--verbose|-v)
VERBOSE=true
shift
;;
--quiet|-q)
QUIET=true
shift
;;
--no-cron)
NO_CRON=true
shift
;;
--timeout)
if [[ -n "$2" && "$2" =~ ^[0-9]+$ ]]; then
TIMEOUT_OVERRIDE="$2"
shift 2
else
echo "Error: --timeout requires a numeric value" >&2
exit 1
fi
;;
--version)
show_version
exit 0
;;
-h|--help)
show_help
exit 0
;;
*)
echo "Unknown option: $1" >&2
echo "Use --help for usage information" >&2
exit 1
;;
esac
done
# Get absolute path to this script for cron job installation
readonly SCRIPT_PATH="$(readlink -f "$0")"
# Configuration with defaults - can be overridden by environment variables
readonly CRONTAB_USER="${CRONTAB_USER:-root}" # User to install cron job under
readonly NODE_EXPORTER_DIR="${NODE_EXPORTER_DIR:-/var/lib/node_exporter}" # Directory where Prometheus metrics are stored
readonly PROMETHEUS_USER="${PROMETHEUS_USER:-prometheus}" # User that owns the metrics directory
readonly LOCK_DIR="${LOCK_DIR:-/var/run}" # Directory for lock files to prevent concurrent runs
readonly UPDATE_INTERVAL="${UPDATE_INTERVAL:-*/10 * * * *}" # Cron schedule - every 10 minutes by default
readonly SALT_MASTER_PORT=4505 # Salt master communication port
# Status codes used in Prometheus metrics
readonly STATUS_SUCCESS=1 # Service is working correctly
readonly STATUS_FAILURE=0 # Service has failed or is not responding
readonly STATUS_NOT_FOUND=2 # Service/command not found on system
# Validate that critical environment variables are set
[[ -z "$NODE_EXPORTER_DIR" || -z "$PROMETHEUS_USER" ]] && {
echo "ERROR: Required environment variables not set" >&2
exit 1
}
# Error handling function that logs to stderr and exits with specified code
handle_error() {
echo "ERROR: $1" >&2
exit "${2:-1}"
}
# Logging function with timestamp and level
log() {
echo "[$(date '+%Y-%m-%d %H:%M:%S')] [$1] $2"
}
# Find a command in PATH or fallback directories
# Returns the full path to the executable or exits with error
find_command() {
local cmd="$1"
shift
local fallback_paths=("$@")
# First try to find command in PATH
if command -v "$cmd" &>/dev/null; then
command -v "$cmd"
return 0
fi
# If not in PATH, check fallback directories
for path in "${fallback_paths[@]}"; do
local full_path="$path/$cmd"
[[ -x "$full_path" ]] && {
echo "$full_path"
return 0
}
done
# Command not found anywhere
handle_error "Could not find '$cmd' executable"
}
# Install a cron job to run this script periodically
# Only installs if the job doesn't already exist
install_cron_job() {
# Check if cron job already exists
crontab -l 2>/dev/null | grep -q "$SCRIPT_PATH" && return 0
# Create temporary file for new crontab
local temp_cron
temp_cron=$(mktemp)
# Combine existing crontab with new job
{
crontab -l 2>/dev/null || true # Get existing crontab, ignore errors if empty
echo "$UPDATE_INTERVAL $SCRIPT_PATH > $NODE_EXPORTER_DIR/salt_status.prom 2>&1"
} > "$temp_cron"
# Install the new crontab
if crontab -u "$CRONTAB_USER" "$temp_cron"; then
log_info "Cron job installed successfully"
else
rm -f "$temp_cron"
handle_error "Failed to install cron job"
fi
# Clean up temporary file
rm -f "$temp_cron"
}
# Set up file locking to prevent multiple instances of this script running
# Uses file descriptor 9 for the lock
setup_lock() {
# Ensure lock directory exists
[[ ! -d "$LOCK_DIR" ]] && handle_error "Lock directory does not exist: $LOCK_DIR"
# Clean up old lock files (older than 60 minutes)
find "$LOCK_DIR" -name "salt_status.*" -type f -mmin +60 -delete 2>/dev/null || true
# Create unique lock file
lockfile=$(mktemp -p "$LOCK_DIR" salt_status.XXXXXX) || handle_error "Failed to create lock file"
# Open lock file on file descriptor 9 and attempt to lock it
exec 9>"$lockfile"
flock -n 9 || handle_error "Script is already running"
# Set up cleanup trap to release lock and remove file on exit
trap 'flock -u 9; exec 9>&-; rm -f "$lockfile"' EXIT INT TERM
}
# Ensure the Node Exporter directory exists and is writable
# Creates the directory if running as root and sets proper ownership
setup_directories() {
# Return early if directory already exists
[[ -d "$NODE_EXPORTER_DIR" ]] && return 0
# Create directory if running as root
if [[ "$(id -u)" == "0" ]]; then
mkdir -p "$NODE_EXPORTER_DIR"
# Set ownership to prometheus user, ignore errors if user doesn't exist
chown "$PROMETHEUS_USER:" "$NODE_EXPORTER_DIR" 2>/dev/null || true
fi
# Verify the directory is writable
[[ ! -w "$NODE_EXPORTER_DIR" ]] && handle_error "$NODE_EXPORTER_DIR is not writable"
}
# Check if Salt-minion has an active network connection to Salt-master
# Uses ss (socket statistics) to check for established connections on port 4505
check_salt_connection() {
local ss_path
ss_path=$(find_command ss /bin /usr/bin /usr/sbin)
log_verbose "Checking for Salt connection on port $SALT_MASTER_PORT"
# Check for established connections (-nt = numeric, no header, TCP)
if "$ss_path" -nt | grep -q "\b$SALT_MASTER_PORT\b"; then
log_verbose "Found active connection on port $SALT_MASTER_PORT"
echo $STATUS_SUCCESS
else
log_verbose "No active connection found on port $SALT_MASTER_PORT"
echo $STATUS_FAILURE
fi
}
# Test if Salt-minion can successfully ping the Salt-master
# Uses salt-call test.ping to verify two-way communication
check_salt_ping() {
local salt_call_path
# Try to find salt-call command, return NOT_FOUND if missing
if ! salt_call_path=$(find_command salt-call /bin /usr/bin /usr/sbin 2>/dev/null); then
echo $STATUS_NOT_FOUND
return
fi
# Execute ping test and check for True response
if "$salt_call_path" test.ping 2>/dev/null | grep -q '\bTrue\b'; then
echo $STATUS_SUCCESS
else
echo $STATUS_FAILURE
fi
}
# Check if Salt-minion service is active using systemctl
check_salt_service() {
local systemctl_path
# Find systemctl command
if ! systemctl_path=$(find_command systemctl /bin /usr/bin /sbin /usr/sbin 2>/dev/null); then
echo $STATUS_NOT_FOUND
return
fi
# Check if salt-minion service is active
if "$systemctl_path" is-active salt-minion &>/dev/null; then
echo $STATUS_SUCCESS
else
echo $STATUS_FAILURE
fi
}
# Get timestamp of last successful Salt communication
check_salt_last_communication() {
local salt_call_path
# Try to find salt-call command, return 0 if missing
if ! salt_call_path=$(find_command salt-call /bin /usr/bin /usr/sbin 2>/dev/null); then
echo "0"
return
fi
# Get current timestamp if ping succeeds, otherwise 0
if "$salt_call_path" test.ping 2>/dev/null | grep -q '\bTrue\b'; then
date +%s
else
echo "0"
fi
}
# Get Salt-minion version information
get_salt_version() {
local salt_call_path
# Try to find salt-call command, return empty if missing
if ! salt_call_path=$(find_command salt-call /bin /usr/bin /usr/sbin 2>/dev/null); then
echo "0"
return
fi
# Extract version number and convert to numeric (e.g., 3006.1 becomes 3006.1)
local version
version=$("$salt_call_path" --version 2>/dev/null | grep -o '[0-9]\+\.[0-9]\+' | head -1)
echo "${version:-0}"
}
# Get Salt-minion process memory usage in bytes
get_salt_memory_usage() {
local ps_path
# Find ps command
if ! ps_path=$(find_command ps /bin /usr/bin 2>/dev/null); then
echo "0"
return
fi
# Get RSS memory usage in KB and convert to bytes
local memory_kb
memory_kb=$("$ps_path" -eo comm,rss | grep -E '^salt-minion' | awk '{sum+=$2} END {print sum+0}' 2>/dev/null)
[[ -z "$memory_kb" ]] && memory_kb=0
echo "$((memory_kb * 1024))"
}
# Count recent errors in salt-minion log
count_salt_errors() {
local log_file="/var/log/salt/minion"
# Return 0 if log file doesn't exist or isn't readable
[[ ! -r "$log_file" ]] && { echo "0"; return; }
# Count ERROR lines from last 24 hours
local error_count
error_count=$(grep -c "\[ERROR\]" "$log_file" 2>/dev/null || true)
echo "${error_count:-0}"
}
# Output a Prometheus metric in the correct format
# Parameters: metric_name, value, help_text, metric_type
output_metric() {
local name="$1" value="$2" help="$3" type="$4"
# Output in Prometheus exposition format
cat << EOF
# HELP $name $help
# TYPE $name $type
$name $value
EOF
}
# Main function that orchestrates the metric collection process
main() {
# Skip setup steps in dry-run mode
if [[ "$DRY_RUN" == "false" ]]; then
# Set up file locking to prevent concurrent execution
setup_lock
# Ensure output directory exists and is writable
setup_directories
# Install cron job for periodic execution (only if script file exists and not disabled)
if [[ -f "$SCRIPT_PATH" && "$NO_CRON" == "false" ]]; then
install_cron_job
elif [[ "$NO_CRON" == "true" ]]; then
log_info "Skipping cron job installation (--no-cron specified)"
fi
else
echo "=== DRY RUN MODE - Metrics that would be written to $NODE_EXPORTER_DIR/salt_status.prom ===" >&2
fi
# Collect Salt status metrics
local connection_status ping_status service_status last_comm version memory_usage error_count
connection_status=$(check_salt_connection)
ping_status=$(check_salt_ping)
service_status=$(check_salt_service)
last_comm=$(check_salt_last_communication)
version=$(get_salt_version)
memory_usage=$(get_salt_memory_usage)
error_count=$(count_salt_errors)
# Output metrics in Prometheus format
output_metric "minion_connection_status" "$connection_status" \
"Shows if Salt-Minion is connected to Salt-Master." "gauge"
output_metric "minion_ping_status" "$ping_status" \
"Shows if Salt-Minion is able to ping Salt-Master." "gauge"
output_metric "minion_service_status" "$service_status" \
"Shows if Salt-Minion service is active." "gauge"
output_metric "minion_last_communication_timestamp" "$last_comm" \
"Timestamp of last successful communication with Salt-Master." "gauge"
output_metric "minion_version" "$version" \
"Salt-Minion version number." "gauge"
output_metric "minion_memory_usage_bytes" "$memory_usage" \
"Salt-Minion process memory usage in bytes." "gauge"
output_metric "minion_error_count" "$error_count" \
"Number of error entries in Salt-Minion log file." "counter"
if [[ "$DRY_RUN" == "true" ]]; then
echo "=== END DRY RUN OUTPUT ===" >&2
fi
}
# Execute main function with all script arguments
main "$@"