Add all 44 scripts, update CI: error severity baseline, PowerShell validation, multi-distro testing

Amp-Thread-ID: https://ampcode.com/threads/T-019cc404-c628-759e-a50b-f5eeea35b91f
Co-authored-by: Amp <amp@ampcode.com>
This commit is contained in:
root
2026-03-07 05:40:51 +01:00
parent db43b8a313
commit 88551536e6
43 changed files with 28906 additions and 23 deletions
+619
View File
@@ -0,0 +1,619 @@
#!/bin/bash
########################################################################################
#### users_logged_in.sh ####
#### ####
#### This script monitors and reports information about users currently logged into ####
#### a Linux system. It's designed to work with Prometheus monitoring system to ####
#### track user activity on Amazon, Ubuntu, and RedHat Linux servers. ####
#### ####
#### Contact: Phil Connor contact@mylinux.work ####
#### Version 3.3.1-20250923 ####
########################################################################################
set -euo pipefail
# CLI flags
DRY_RUN=false
VERBOSE=false
QUIET=false
NO_CRON=false
SCRIPT_VERSION="3.3.1-20250923"
# Parse command line arguments
parse_arguments() {
while [[ $# -gt 0 ]]; do
case $1 in
--dry-run)
DRY_RUN=true
shift
;;
--verbose|-v)
VERBOSE=true
DEBUG=1
shift
;;
--quiet|-q)
QUIET=true
shift
;;
--no-cron)
NO_CRON=true
shift
;;
--version)
echo "User Login Monitor"
echo "Version: $SCRIPT_VERSION"
echo "Author: Phil Connor contact@mylinux.work"
exit 0
;;
-h|--help)
echo "Usage: $0 [OPTIONS]"
echo "Monitor user login activity and export Prometheus metrics"
echo ""
echo "Options:"
echo " --dry-run Output metrics to console instead of file"
echo " --verbose Enable verbose debug output"
echo " --quiet Suppress non-error output"
echo " --no-cron Skip cron job installation"
echo " --version Show version and exit"
echo " --help Show this help message"
exit 0
;;
*)
echo "Unknown option: $1" >&2
echo "Use --help for usage information" >&2
exit 1
;;
esac
done
}
# Enhanced logging functions
log_verbose() {
[[ "$VERBOSE" == "true" ]] && echo "[$(date '+%Y-%m-%d %H:%M:%S')] [VERBOSE] $1"
}
log_info() {
[[ "$QUIET" == "false" ]] && echo "[$(date '+%Y-%m-%d %H:%M:%S')] [INFO] $1"
}
# System Configuration - Define default values and paths
readonly NODE_EXPORTER_DIR="${NODE_EXPORTER_DIR:-/var/lib/node_exporter}" # Directory where Prometheus metrics are stored
readonly PROMETHEUS_USER="${PROMETHEUS_USER:-prometheus}" # User that owns the Prometheus files
readonly CRONTAB_USER="${CRONTAB_USER:-root}" # User under which the cron job runs
readonly SCRIPT_PATH="$(readlink -f "$0")" # Full path to this script
readonly UPDATE_INTERVAL="${UPDATE_INTERVAL:-*/3 * * * *}" # Cron schedule (every 3 minutes by default)
readonly LOCKFILE="/var/run/users_logged_in.lock" # Prevents multiple instances from running
# Required commands - Map of commands to their expected locations
declare -A COMMANDS=(
[awk]="/usr/bin" # Text processing utility
[cut]="/usr/bin" # Extract columns from text
[grep]="/usr/bin" # Search text patterns
[sed]="/usr/bin" # Stream editor for text manipulation
[sort]="/usr/bin" # Sort lines of text
[uniq]="/usr/bin" # Remove duplicate lines
[who]="/usr/bin" # Show logged in users
)
# Command paths (populated by find_commands function)
declare -A CMD_PATHS
# Validation - Ensure required environment variables are set
[[ -z "$NODE_EXPORTER_DIR" || -z "$PROMETHEUS_USER" ]] && {
echo "ERROR: Required environment variables not set" >&2
exit 1
}
# Error handling function - Display error message and exit with specified code
handle_error() {
local err_msg="$1"
local exit_code="${2:-1}"
echo "ERROR: $err_msg" >&2
exit "$exit_code"
}
# Logging function - Output timestamped log messages
log() {
local level="$1"
local message="$2"
echo "[$(date '+%Y-%m-%d %H:%M:%S')] [$level] $message"
}
# Find command location - Locate executable path or use fallback
find_command() {
local command_name="$1"
local fallback_path="$2"
local path
path=$(command -v "$command_name" 2>/dev/null) || path="$fallback_path/$command_name"
[[ -x "$path" ]] || handle_error "Cannot find or execute '$command_name'"
echo "$path"
}
# Initialize command paths - Populate CMD_PATHS array with actual command locations
find_commands() {
for cmd in "${!COMMANDS[@]}"; do
CMD_PATHS[$cmd]=$(find_command "$cmd" "${COMMANDS[$cmd]}")
done
}
# Cleanup function - Remove lockfile on script exit
cleanup() {
rm -f "$LOCKFILE"
}
# Setup Prometheus directory - Create and set permissions for metrics output directory
setup_directory() {
if [[ ! -d "$NODE_EXPORTER_DIR" ]]; then
if [[ $(id -u) -eq 0 ]]; then
mkdir -p "$NODE_EXPORTER_DIR"
chown "$PROMETHEUS_USER": "$NODE_EXPORTER_DIR" 2>/dev/null || true
fi
fi
[[ -w "$NODE_EXPORTER_DIR" ]] || handle_error "$NODE_EXPORTER_DIR is not writable"
}
# Setup lockfile - Prevent multiple script instances from running simultaneously
setup_lockfile() {
find "$LOCKFILE" -mmin +60 -delete 2>/dev/null || true # Remove stale lockfiles older than 60 minutes
[[ -f "$LOCKFILE" ]] && handle_error "Script is already running"
touch "$LOCKFILE" && chmod 600 "$LOCKFILE"
}
# Install cron job - Automatically schedule this script to run periodically
install_cron_job() {
if [[ "$NO_CRON" == "true" ]]; then
log_info "Skipping cron job installation (--no-cron specified)"
return 0
fi
if [[ -f "$SCRIPT_PATH" ]] && ! crontab -l 2>/dev/null | grep -q "$SCRIPT_PATH"; then
local cron_entry="$UPDATE_INTERVAL $SCRIPT_PATH > $NODE_EXPORTER_DIR/usrlogins.prom 2>&1"
if ! (echo -e "$(crontab -u "$CRONTAB_USER" -l 2>/dev/null || echo '')\n$cron_entry" | crontab -u "$CRONTAB_USER" -); then
log "WARNING" "Failed to install cron job for user $CRONTAB_USER"
else
log_info "Cron job installed successfully"
fi
fi
}
# Get logged users - Extract user information and format as Prometheus metrics
get_logged_users() {
"${CMD_PATHS[who]}" | "${CMD_PATHS[sort]}" | "${CMD_PATHS[uniq]}" | \
"${CMD_PATHS[awk]}" '{
gsub(/US\\|@us\.[^.]+\.net/, "", $1) # Remove domain prefixes from username (US\ or @us.*.net)
gsub(/\//, " ", $2) # Replace slashes in terminal names
gsub(/:/, "", $2) # Remove colons from terminal names
gsub(/:100/, "aws_workspace", $5) # Convert AWS workspace notation
gsub(/\(|\)/, "", $5) # Remove parentheses from location
print "node_logged_in_usrs{name=\""$1"\", terminal=\""$2"\", location=\""$5"\"}", 1
}'
}
# Get user terminal count - Count open terminals per user
get_user_terminal_count() {
"${CMD_PATHS[who]}" | "${CMD_PATHS[sed]}" 's/.*US\\[\t ]*//;s/,//g' | \
"${CMD_PATHS[cut]}" -f1 -d' ' | "${CMD_PATHS[sort]}" | "${CMD_PATHS[uniq]}" -c | \
"${CMD_PATHS[awk]}" '{
gsub(/@us\.[^.]+\.net/, "", $2) # Remove email domain from username (@us.*.net)
print "node_logged_in_usr_terminals{username=\""$2"\"}", $1
}'
}
# Get total user count - Count total logged in sessions
get_total_user_count() {
"${CMD_PATHS[who]}" -q | "${CMD_PATHS[grep]}" users | \
"${CMD_PATHS[awk]}" '{print $2}' | "${CMD_PATHS[cut]}" -d "=" -f2
}
# Get last user commands - Extract recent bash history for each user
get_last_user_commands() {
local username="$1"
local history_file
if [[ -z "$username" ]]; then
return 1
fi
# Try different history file locations based on username and common paths
for hist_path in "/home/${username}/.bash_history" "/home/${username}/.history" "/root/.bash_history"; do
if [[ -r "$hist_path" ]]; then
history_file="$hist_path"
break
fi
done
# Extract last 10 commands and format as Prometheus metrics
if [[ -n "$history_file" ]]; then
tail -n 10 "$history_file" 2>/dev/null | \
"${CMD_PATHS[awk]}" -v user="$username" 'NR <= 10 {
gsub(/\\/, "\\\\", $0) # Escape backslashes first (before other escaping)
gsub(/"/, "\\\"", $0) # Escape double quotes in commands
gsub(/'\''/, "", $0) # Remove single quotes (problematic for Prometheus)
print "node_user_last_commands{username=\"" user "\", command_number=\"" NR "\", command=\"" $0 "\"} 1"
}'
fi
}
# Get sudo commands - Extract recent privileged commands from auth logs
get_sudo_commands() {
local username="$1"
if [[ -z "$username" ]]; then
return 1
fi
# Strip domain prefixes for comparison
local clean_username="${username#US\\}"
clean_username="${clean_username%@*}"
# Check both Ubuntu (/var/log/auth.log) and RHEL (/var/log/secure) locations
local auth_logs=("/var/log/secure" "/var/log/auth.log")
local commands_found=""
for log_file in "${auth_logs[@]}"; do
if [[ -r "$log_file" ]]; then
# Try RHEL/Amazon Linux format first (TTY= pattern)
commands_found=$(grep "TTY=" "$log_file" 2>/dev/null | \
grep -E "(US\\\\$clean_username|$clean_username|$username)" | \
grep "COMMAND=" | \
tail -10 | \
"${CMD_PATHS[awk]}" -F'; COMMAND=' -v user="$clean_username" '{
if (NF >= 2) {
cmd = $2
gsub(/#040/, " ", cmd) # Convert #040 to spaces
gsub(/^[ \t]+|[ \t]+$/, "", cmd) # Trim whitespace
gsub(/\\/, "\\\\", cmd) # Escape backslashes first (before other escaping)
gsub(/"/, "\\\"", cmd) # Escape double quotes
gsub(/'\''/, "", cmd) # Remove single quotes (problematic for Prometheus)
if (cmd != "" && length(cmd) > 0) {
print user "|||" cmd # Use delimiter for deduplication
}
}
}')
# If RHEL format didn't work, try Ubuntu format
if [[ -z "$commands_found" ]]; then
commands_found=$(grep "COMMAND=" "$log_file" 2>/dev/null | \
grep -E "(USER=$clean_username|$clean_username :)" | \
tail -10 | \
"${CMD_PATHS[awk]}" -F'COMMAND=' -v user="$clean_username" '{
if (NF >= 2) {
cmd = $2
gsub(/^[ \t]+|[ \t]+$/, "", cmd) # Trim whitespace
gsub(/\\/, "\\\\", cmd) # Escape backslashes first (before other escaping)
gsub(/"/, "\\\"", cmd) # Escape double quotes
gsub(/'\''/, "", cmd) # Remove single quotes (problematic for Prometheus)
if (cmd != "" && length(cmd) > 0) {
print user "|||" cmd # Use delimiter for deduplication
}
}
}')
fi
# If we found commands, break (prefer secure over auth.log for RHEL)
if [[ -n "$commands_found" ]]; then
break
fi
fi
done
# Deduplicate and format as proper metrics
if [[ -n "$commands_found" ]]; then
echo "$commands_found" | "${CMD_PATHS[sort]}" | "${CMD_PATHS[uniq]}" | \
"${CMD_PATHS[awk]}" -F'\\|\\|\\|' '{
print "node_user_sudo_commands{username=\"" $1 "\", command=\"" $2 "\"} 1"
}'
fi
}
# Get session events - Extract login/logout events from auth logs
get_session_events() {
local username="$1"
if [[ -z "$username" ]]; then
return 1
fi
# Strip domain prefixes for comparison
local clean_username="${username#US\\}"
clean_username="${clean_username%@*}"
# Check both log files for session events
local auth_logs=("/var/log/secure" "/var/log/auth.log")
local session_events=""
for log_file in "${auth_logs[@]}"; do
if [[ -r "$log_file" ]]; then
# Get recent session events (last 24 hours worth)
session_events=$(grep -E "(session opened|session closed|Accepted)" "$log_file" 2>/dev/null | \
grep -E "(US\\\\$clean_username|$clean_username|$username)" | \
tail -20 | \
"${CMD_PATHS[awk]}" -v user="$clean_username" '{
if ($0 ~ /session opened/) {
method = "ssh"
if ($0 ~ /sudo/) method = "sudo"
print user "|||login|||" method # Use delimiter for deduplication
}
else if ($0 ~ /session closed/) {
method = "ssh"
if ($0 ~ /sudo/) method = "sudo"
print user "|||logout|||" method # Use delimiter for deduplication
}
else if ($0 ~ /Accepted/) {
method = "ssh"
if ($0 ~ /publickey/) method = "ssh-key"
else if ($0 ~ /password/) method = "ssh-password"
print user "|||login|||" method # Use delimiter for deduplication
}
}')
if [[ -n "$session_events" ]]; then
break
fi
fi
done
# Deduplicate and format as proper metrics
if [[ -n "$session_events" ]]; then
echo "$session_events" | "${CMD_PATHS[sort]}" | "${CMD_PATHS[uniq]}" | \
"${CMD_PATHS[awk]}" -F'\\|\\|\\|' '{
print "node_user_session_events{username=\"" $1 "\", event=\"" $2 "\", method=\"" $3 "\"} 1"
}'
fi
}
# Get failed login attempts - Track security events
get_failed_logins() {
# Check both log files for failed authentication attempts
local auth_logs=("/var/log/secure" "/var/log/auth.log")
local failed_logins=""
for log_file in "${auth_logs[@]}"; do
if [[ -r "$log_file" ]]; then
# Get failed login attempts from last 24 hours
failed_logins=$(grep -E "(Failed password|authentication failure|Invalid user)" "$log_file" 2>/dev/null | \
tail -50 | \
"${CMD_PATHS[awk]}" '{
username = "unknown"
source_ip = "unknown"
# Extract username - handle various formats
if ($0 ~ /for [a-zA-Z0-9_]+/) {
match($0, /for ([a-zA-Z0-9_\\]+)/, arr)
if (arr[1]) {
username = arr[1]
gsub(/US\\/, "", username) # Clean domain prefix
}
}
# Extract source IP
if ($0 ~ /from [0-9]+\.[0-9]+\.[0-9]+\.[0-9]+/) {
match($0, /from ([0-9]+\.[0-9]+\.[0-9]+\.[0-9]+)/, arr)
if (arr[1]) source_ip = arr[1]
}
failure_type = "password"
if ($0 ~ /Invalid user/) failure_type = "invalid_user"
else if ($0 ~ /authentication failure/) failure_type = "auth_failure"
print username "|||" source_ip "|||" failure_type # Use delimiter for deduplication
}')
if [[ -n "$failed_logins" ]]; then
break
fi
fi
done
# Deduplicate and format as proper metrics
if [[ -n "$failed_logins" ]]; then
echo "$failed_logins" | "${CMD_PATHS[sort]}" | "${CMD_PATHS[uniq]}" | \
"${CMD_PATHS[awk]}" -F'\\|\\|\\|' '{
print "node_user_failed_logins{username=\"" $1 "\", source_ip=\"" $2 "\", failure_type=\"" $3 "\"} 1"
}'
fi
}
# Get active session durations - Calculate how long users have been logged in
get_session_durations() {
local current_time
current_time=$(date +%s)
"${CMD_PATHS[who]}" -u | "${CMD_PATHS[awk]}" -v current_time="$current_time" '{
if (NF >= 5) {
username = $1
gsub(/US\\|@us\.[^.]+\.net/, "", username) # Clean username (US\ or @us.*.net)
# Parse login time (format: Oct 15 14:30 or 14:30)
login_time = ""
if ($3 ~ /:/) {
# Today format: 14:30
login_time = $3
login_date = strftime("%Y-%m-%d", current_time)
} else if ($4 ~ /:/) {
# Date format: Oct 15 14:30
login_date = strftime("%Y", current_time) "-" $3 "-" $4
login_time = $5
}
if (login_time != "" && login_date != "") {
# Convert to epoch (approximate)
split(login_time, time_parts, ":")
hours = time_parts[1]
minutes = time_parts[2]
# Simple duration calculation (today only)
login_seconds = (hours * 3600) + (minutes * 60)
current_seconds = strftime("%H", current_time) * 3600 + strftime("%M", current_time) * 60
if (current_seconds >= login_seconds) {
duration = current_seconds - login_seconds
} else {
duration = (86400 - login_seconds) + current_seconds # Cross midnight
}
print username "|||" duration # Use delimiter for deduplication
}
}
}' | "${CMD_PATHS[sort]}" -k1,1 | \
"${CMD_PATHS[awk]}" -F'\\|\\|\\|' '{
# Keep the latest/highest duration for each username
if ($1 != prev_user) {
if (prev_user != "") {
print "node_user_session_duration_seconds{username=\"" prev_user "\"} " max_duration
}
prev_user = $1
max_duration = $2
} else if ($2 > max_duration) {
max_duration = $2
}
} END {
if (prev_user != "") {
print "node_user_session_duration_seconds{username=\"" prev_user "\"} " max_duration
}
}'
}
# Output metric - Format and display Prometheus metric with help text and type
output_metric() {
local metric_name="$1"
local help_text="$2"
local metric_type="$3"
local metric_value="$4"
local default_value="$5"
echo "# HELP $metric_name $help_text"
echo "# TYPE $metric_name $metric_type"
echo "${metric_value:-$default_value}"
}
# Main function - Orchestrate the entire monitoring process
main() {
# Parse command line arguments first
parse_arguments "$@"
# Record script start time for runtime metric
local script_start_time
script_start_time=$(date +%s.%N)
# Add dry-run header if applicable
if [[ "$DRY_RUN" == "true" ]]; then
echo "=== DRY RUN MODE - Metrics that would be written to $NODE_EXPORTER_DIR/usrlogins.prom ===" >&2
fi
trap cleanup EXIT # Ensure cleanup runs when script exits
# Initialize environment and commands
find_commands
# Skip setup in dry-run mode
if [[ "$DRY_RUN" == "false" ]]; then
setup_directory
setup_lockfile
install_cron_job
fi
# Generate and output all Prometheus metrics
# Metric 1: Individual user sessions with details
local users
users=$(get_logged_users)
output_metric "node_logged_in_usrs" "Currently Logged in Users" "gauge" \
"$users" 'node_logged_in_usrs{name="", location=""} 0'
# Metric 2: Terminal count per user
local user_terminals
user_terminals=$(get_user_terminal_count)
output_metric "node_logged_in_usr_terminals" "Total of open sessions per user" "gauge" \
"$user_terminals" 'node_logged_in_usr_terminals{username=""} 0'
# Metric 3: Total user count system-wide
local total_count
total_count=$(get_total_user_count)
output_metric "node_logged_in_total" "Total of open sessions on the system" "gauge" \
"node_logged_in_total ${total_count:-0}" "node_logged_in_total 0"
# Metric 4: Last 10 commands for each logged in user
local logged_users
logged_users=$("${CMD_PATHS[who]}" | "${CMD_PATHS[awk]}" '{gsub(/US\\|@us\.[^.]+\.net/, "", $1); print $1}' | "${CMD_PATHS[sort]}" | "${CMD_PATHS[uniq]}")
local user_commands=""
while IFS= read -r user; do
if [[ -n "$user" ]]; then
local commands
commands=$(get_last_user_commands "$user")
if [[ -n "$commands" ]]; then
user_commands+="$commands"$'\n'
fi
fi
done <<< "$logged_users"
output_metric "node_user_last_commands" "Last 10 commands executed by logged in users" "gauge" \
"$user_commands" 'node_user_last_commands{username="", command_number="", command=""} 0'
# Metric 5: Recent sudo commands for each logged in user
local sudo_commands=""
while IFS= read -r user; do
if [[ -n "$user" ]]; then
local sudo_cmds
sudo_cmds=$(get_sudo_commands "$user")
if [[ -n "$sudo_cmds" ]]; then
sudo_commands+="$sudo_cmds"$'\n'
fi
fi
done <<< "$logged_users"
output_metric "node_user_sudo_commands" "Recent sudo commands executed by logged in users" "gauge" \
"$sudo_commands" 'node_user_sudo_commands{username="", command=""} 0'
# Metric 6: Session events (login/logout) for each logged in user
local session_events=""
while IFS= read -r user; do
if [[ -n "$user" ]]; then
local events
events=$(get_session_events "$user")
if [[ -n "$events" ]]; then
session_events+="$events"$'\n'
fi
fi
done <<< "$logged_users"
output_metric "node_user_session_events" "Login and logout events for users" "gauge" \
"$session_events" 'node_user_session_events{username="", event="", method=""} 0'
# Metric 7: Active session durations
local session_durations
session_durations=$(get_session_durations)
output_metric "node_user_session_duration_seconds" "Duration of active user sessions in seconds" "gauge" \
"$session_durations" 'node_user_session_duration_seconds{username=""} 0'
# Metric 8: Failed login attempts (security monitoring)
local failed_logins
failed_logins=$(get_failed_logins)
output_metric "node_user_failed_logins" "Failed login attempts by username and source IP" "counter" \
"$failed_logins" 'node_user_failed_logins{username="", source_ip="", failure_type=""} 0'
# Metric 9: Script runtime
local script_end_time script_runtime
script_end_time=$(date +%s.%N)
script_runtime=$(echo "$script_end_time - $script_start_time" | bc -l 2>/dev/null || echo "0")
output_metric "node_user_monitor_runtime_seconds" "Script execution time in seconds" "gauge" \
"node_user_monitor_runtime_seconds $script_runtime" "node_user_monitor_runtime_seconds 0"
if [[ "$DRY_RUN" == "true" ]]; then
echo "=== END DRY RUN OUTPUT ===" >&2
fi
}
# Script entry point
main "$@"
# 2025-09-23
# Fixed: Prometheus parsing errors with single quotes (\' sequences)
# Fixed: Prometheus parsing errors with backslash escapes (\u, \x, etc.)
# Improved: Domain regex pattern now handles any us.*.net domain instead of just us.calormen.net