#!/bin/bash ################################################################################ # Script Name: users-logged-in.sh # Version: 3.4.0-20260415 # Description: Prometheus exporter for user login activity — tracks sessions, # terminals, sudo commands, failed logins, and session durations # # Author: Phil Connor # Contact: contact@mylinux.work # Website: https://mylinux.work # License: MIT # # Usage: # ./users-logged-in.sh # Output to stdout # ./users-logged-in.sh --textfile # Write to textfile collector # ./users-logged-in.sh --dry-run # Preview metrics # ./users-logged-in.sh -o /tmp/out.prom # Write to custom file ################################################################################ set -euo pipefail # CLI flags DRY_RUN=false VERBOSE=false QUIET=false NO_CRON=false SCRIPT_VERSION="3.4.0-20260415" # Output configuration TEXTFILE_DIR="/var/lib/node_exporter" OUTPUT_FILE="" # Parse command line arguments parse_arguments() { while [[ $# -gt 0 ]]; do case $1 in --dry-run) DRY_RUN=true shift ;; --verbose|-v) VERBOSE=true DEBUG=1 shift ;; --quiet|-q) QUIET=true shift ;; --textfile) OUTPUT_FILE="$TEXTFILE_DIR/usrlogins.prom" shift ;; -o|--output) OUTPUT_FILE="$2" shift 2 ;; --no-cron) NO_CRON=true shift ;; --version) echo "User Login Monitor" echo "Version: $SCRIPT_VERSION" echo "Author: Phil Connor contact@mylinux.work" exit 0 ;; -h|--help) echo "Usage: $0 [OPTIONS]" echo "Monitor user login activity and export Prometheus metrics" echo "" echo "Options:" echo " --textfile Write to node_exporter textfile collector" echo " -o, --output Output file path" echo " --dry-run Output metrics to console instead of file" echo " --verbose Enable verbose debug output" echo " --quiet Suppress non-error output" echo " --no-cron Skip cron job installation" echo " --version Show version and exit" echo " --help Show this help message" exit 0 ;; *) echo "Unknown option: $1" >&2 echo "Use --help for usage information" >&2 exit 1 ;; esac done } # Enhanced logging functions log_verbose() { [[ "$VERBOSE" == "true" ]] && echo "[$(date '+%Y-%m-%d %H:%M:%S')] [VERBOSE] $1" } log_info() { [[ "$QUIET" == "false" ]] && echo "[$(date '+%Y-%m-%d %H:%M:%S')] [INFO] $1" } # System Configuration - Define default values and paths readonly NODE_EXPORTER_DIR="${NODE_EXPORTER_DIR:-/var/lib/node_exporter}" # Directory where Prometheus metrics are stored readonly PROMETHEUS_USER="${PROMETHEUS_USER:-prometheus}" # User that owns the Prometheus files readonly CRONTAB_USER="${CRONTAB_USER:-root}" # User under which the cron job runs readonly SCRIPT_PATH="$(readlink -f "$0")" # Full path to this script readonly UPDATE_INTERVAL="${UPDATE_INTERVAL:-*/3 * * * *}" # Cron schedule (every 3 minutes by default) readonly LOCKFILE="/var/run/users_logged_in.lock" # Prevents multiple instances from running # Required commands - Map of commands to their expected locations declare -A COMMANDS=( [awk]="/usr/bin" # Text processing utility [cut]="/usr/bin" # Extract columns from text [grep]="/usr/bin" # Search text patterns [sed]="/usr/bin" # Stream editor for text manipulation [sort]="/usr/bin" # Sort lines of text [uniq]="/usr/bin" # Remove duplicate lines [who]="/usr/bin" # Show logged in users ) # Command paths (populated by find_commands function) declare -A CMD_PATHS # Validation - Ensure required environment variables are set [[ -z "$NODE_EXPORTER_DIR" || -z "$PROMETHEUS_USER" ]] && { echo "ERROR: Required environment variables not set" >&2 exit 1 } # Error handling function - Display error message and exit with specified code handle_error() { local err_msg="$1" local exit_code="${2:-1}" echo "ERROR: $err_msg" >&2 exit "$exit_code" } # Logging function - Output timestamped log messages log() { local level="$1" local message="$2" echo "[$(date '+%Y-%m-%d %H:%M:%S')] [$level] $message" } # Find command location - Locate executable path or use fallback find_command() { local command_name="$1" local fallback_path="$2" local path path=$(command -v "$command_name" 2>/dev/null) || path="$fallback_path/$command_name" [[ -x "$path" ]] || handle_error "Cannot find or execute '$command_name'" echo "$path" } # Initialize command paths - Populate CMD_PATHS array with actual command locations find_commands() { for cmd in "${!COMMANDS[@]}"; do CMD_PATHS[$cmd]=$(find_command "$cmd" "${COMMANDS[$cmd]}") done } # Cleanup function - Remove lockfile on script exit cleanup() { rm -f "$LOCKFILE" } # Setup Prometheus directory - Create and set permissions for metrics output directory setup_directory() { if [[ ! -d "$NODE_EXPORTER_DIR" ]]; then if [[ $(id -u) -eq 0 ]]; then mkdir -p "$NODE_EXPORTER_DIR" chown "$PROMETHEUS_USER": "$NODE_EXPORTER_DIR" 2>/dev/null || true fi fi [[ -w "$NODE_EXPORTER_DIR" ]] || handle_error "$NODE_EXPORTER_DIR is not writable" } # Setup lockfile - Prevent multiple script instances from running simultaneously setup_lockfile() { find "$LOCKFILE" -mmin +60 -delete 2>/dev/null || true # Remove stale lockfiles older than 60 minutes [[ -f "$LOCKFILE" ]] && handle_error "Script is already running" touch "$LOCKFILE" && chmod 600 "$LOCKFILE" } # Install cron job - Automatically schedule this script to run periodically install_cron_job() { if [[ "$NO_CRON" == "true" ]]; then log_info "Skipping cron job installation (--no-cron specified)" return 0 fi if [[ -f "$SCRIPT_PATH" ]] && ! crontab -l 2>/dev/null | grep -q "$SCRIPT_PATH"; then local cron_entry="$UPDATE_INTERVAL $SCRIPT_PATH > $NODE_EXPORTER_DIR/usrlogins.prom 2>&1" if ! (echo -e "$(crontab -u "$CRONTAB_USER" -l 2>/dev/null || echo '')\n$cron_entry" | crontab -u "$CRONTAB_USER" -); then log "WARNING" "Failed to install cron job for user $CRONTAB_USER" else log_info "Cron job installed successfully" fi fi } # Get logged users - Extract user information and format as Prometheus metrics get_logged_users() { "${CMD_PATHS[who]}" | "${CMD_PATHS[sort]}" | "${CMD_PATHS[uniq]}" | \ "${CMD_PATHS[awk]}" '{ gsub(/US\\|@us\.[^.]+\.net/, "", $1) # Remove domain prefixes from username (US\ or @us.*.net) gsub(/\//, " ", $2) # Replace slashes in terminal names gsub(/:/, "", $2) # Remove colons from terminal names gsub(/:100/, "aws_workspace", $5) # Convert AWS workspace notation gsub(/\(|\)/, "", $5) # Remove parentheses from location print "node_logged_in_usrs{name=\""$1"\", terminal=\""$2"\", location=\""$5"\"}", 1 }' } # Get user terminal count - Count open terminals per user get_user_terminal_count() { "${CMD_PATHS[who]}" | "${CMD_PATHS[sed]}" 's/.*US\\[\t ]*//;s/,//g' | \ "${CMD_PATHS[cut]}" -f1 -d' ' | "${CMD_PATHS[sort]}" | "${CMD_PATHS[uniq]}" -c | \ "${CMD_PATHS[awk]}" '{ gsub(/@us\.[^.]+\.net/, "", $2) # Remove email domain from username (@us.*.net) print "node_logged_in_usr_terminals{username=\""$2"\"}", $1 }' } # Get total user count - Count total logged in sessions get_total_user_count() { "${CMD_PATHS[who]}" -q | "${CMD_PATHS[grep]}" users | \ "${CMD_PATHS[awk]}" '{print $2}' | "${CMD_PATHS[cut]}" -d "=" -f2 } # Get last user commands - Extract recent bash history for each user get_last_user_commands() { local username="$1" local history_file if [[ -z "$username" ]]; then return 1 fi # Try different history file locations based on username and common paths for hist_path in "/home/${username}/.bash_history" "/home/${username}/.history" "/root/.bash_history"; do if [[ -r "$hist_path" ]]; then history_file="$hist_path" break fi done # Extract last 10 commands and format as Prometheus metrics if [[ -n "$history_file" ]]; then tail -n 10 "$history_file" 2>/dev/null | \ "${CMD_PATHS[awk]}" -v user="$username" 'NR <= 10 { gsub(/\\/, "\\\\", $0) # Escape backslashes first (before other escaping) gsub(/"/, "\\\"", $0) # Escape double quotes in commands gsub(/'\''/, "", $0) # Remove single quotes (problematic for Prometheus) print "node_user_last_commands{username=\"" user "\", command_number=\"" NR "\", command=\"" $0 "\"} 1" }' fi } # Get sudo commands - Extract recent privileged commands from auth logs get_sudo_commands() { local username="$1" if [[ -z "$username" ]]; then return 1 fi # Strip domain prefixes for comparison local clean_username="${username#US\\}" clean_username="${clean_username%@*}" # Check both Ubuntu (/var/log/auth.log) and RHEL (/var/log/secure) locations local auth_logs=("/var/log/secure" "/var/log/auth.log") local commands_found="" for log_file in "${auth_logs[@]}"; do if [[ -r "$log_file" ]]; then # Try RHEL/Amazon Linux format first (TTY= pattern) commands_found=$(grep "TTY=" "$log_file" 2>/dev/null | \ grep -E "(US\\\\$clean_username|$clean_username|$username)" | \ grep "COMMAND=" | \ tail -10 | \ "${CMD_PATHS[awk]}" -F'; COMMAND=' -v user="$clean_username" '{ if (NF >= 2) { cmd = $2 gsub(/#040/, " ", cmd) # Convert #040 to spaces gsub(/^[ \t]+|[ \t]+$/, "", cmd) # Trim whitespace gsub(/\\/, "\\\\", cmd) # Escape backslashes first (before other escaping) gsub(/"/, "\\\"", cmd) # Escape double quotes gsub(/'\''/, "", cmd) # Remove single quotes (problematic for Prometheus) if (cmd != "" && length(cmd) > 0) { print user "|||" cmd # Use delimiter for deduplication } } }') # If RHEL format didn't work, try Ubuntu format if [[ -z "$commands_found" ]]; then commands_found=$(grep "COMMAND=" "$log_file" 2>/dev/null | \ grep -E "(USER=$clean_username|$clean_username :)" | \ tail -10 | \ "${CMD_PATHS[awk]}" -F'COMMAND=' -v user="$clean_username" '{ if (NF >= 2) { cmd = $2 gsub(/^[ \t]+|[ \t]+$/, "", cmd) # Trim whitespace gsub(/\\/, "\\\\", cmd) # Escape backslashes first (before other escaping) gsub(/"/, "\\\"", cmd) # Escape double quotes gsub(/'\''/, "", cmd) # Remove single quotes (problematic for Prometheus) if (cmd != "" && length(cmd) > 0) { print user "|||" cmd # Use delimiter for deduplication } } }') fi # If we found commands, break (prefer secure over auth.log for RHEL) if [[ -n "$commands_found" ]]; then break fi fi done # Deduplicate and format as proper metrics if [[ -n "$commands_found" ]]; then echo "$commands_found" | "${CMD_PATHS[sort]}" | "${CMD_PATHS[uniq]}" | \ "${CMD_PATHS[awk]}" -F'\\|\\|\\|' '{ print "node_user_sudo_commands{username=\"" $1 "\", command=\"" $2 "\"} 1" }' fi } # Get session events - Extract login/logout events from auth logs get_session_events() { local username="$1" if [[ -z "$username" ]]; then return 1 fi # Strip domain prefixes for comparison local clean_username="${username#US\\}" clean_username="${clean_username%@*}" # Check both log files for session events local auth_logs=("/var/log/secure" "/var/log/auth.log") local session_events="" for log_file in "${auth_logs[@]}"; do if [[ -r "$log_file" ]]; then # Get recent session events (last 24 hours worth) session_events=$(grep -E "(session opened|session closed|Accepted)" "$log_file" 2>/dev/null | \ grep -E "(US\\\\$clean_username|$clean_username|$username)" | \ tail -20 | \ "${CMD_PATHS[awk]}" -v user="$clean_username" '{ if ($0 ~ /session opened/) { method = "ssh" if ($0 ~ /sudo/) method = "sudo" print user "|||login|||" method # Use delimiter for deduplication } else if ($0 ~ /session closed/) { method = "ssh" if ($0 ~ /sudo/) method = "sudo" print user "|||logout|||" method # Use delimiter for deduplication } else if ($0 ~ /Accepted/) { method = "ssh" if ($0 ~ /publickey/) method = "ssh-key" else if ($0 ~ /password/) method = "ssh-password" print user "|||login|||" method # Use delimiter for deduplication } }') if [[ -n "$session_events" ]]; then break fi fi done # Deduplicate and format as proper metrics if [[ -n "$session_events" ]]; then echo "$session_events" | "${CMD_PATHS[sort]}" | "${CMD_PATHS[uniq]}" | \ "${CMD_PATHS[awk]}" -F'\\|\\|\\|' '{ print "node_user_session_events{username=\"" $1 "\", event=\"" $2 "\", method=\"" $3 "\"} 1" }' fi } # Get failed login attempts - Track security events get_failed_logins() { # Check both log files for failed authentication attempts local auth_logs=("/var/log/secure" "/var/log/auth.log") local failed_logins="" for log_file in "${auth_logs[@]}"; do if [[ -r "$log_file" ]]; then # Get failed login attempts from last 24 hours failed_logins=$(grep -E "(Failed password|authentication failure|Invalid user)" "$log_file" 2>/dev/null | \ tail -50 | \ "${CMD_PATHS[awk]}" '{ username = "unknown" source_ip = "unknown" # Extract username - handle various formats if ($0 ~ /for [a-zA-Z0-9_]+/) { match($0, /for ([a-zA-Z0-9_\\]+)/, arr) if (arr[1]) { username = arr[1] gsub(/US\\/, "", username) # Clean domain prefix } } # Extract source IP if ($0 ~ /from [0-9]+\.[0-9]+\.[0-9]+\.[0-9]+/) { match($0, /from ([0-9]+\.[0-9]+\.[0-9]+\.[0-9]+)/, arr) if (arr[1]) source_ip = arr[1] } failure_type = "password" if ($0 ~ /Invalid user/) failure_type = "invalid_user" else if ($0 ~ /authentication failure/) failure_type = "auth_failure" print username "|||" source_ip "|||" failure_type # Use delimiter for deduplication }') if [[ -n "$failed_logins" ]]; then break fi fi done # Deduplicate and format as proper metrics if [[ -n "$failed_logins" ]]; then echo "$failed_logins" | "${CMD_PATHS[sort]}" | "${CMD_PATHS[uniq]}" | \ "${CMD_PATHS[awk]}" -F'\\|\\|\\|' '{ print "node_user_failed_logins{username=\"" $1 "\", source_ip=\"" $2 "\", failure_type=\"" $3 "\"} 1" }' fi } # Get active session durations - Calculate how long users have been logged in get_session_durations() { local current_time current_time=$(date +%s) "${CMD_PATHS[who]}" -u | "${CMD_PATHS[awk]}" -v current_time="$current_time" '{ if (NF >= 5) { username = $1 gsub(/US\\|@us\.[^.]+\.net/, "", username) # Clean username (US\ or @us.*.net) # Parse login time (format: Oct 15 14:30 or 14:30) login_time = "" if ($3 ~ /:/) { # Today format: 14:30 login_time = $3 login_date = strftime("%Y-%m-%d", current_time) } else if ($4 ~ /:/) { # Date format: Oct 15 14:30 login_date = strftime("%Y", current_time) "-" $3 "-" $4 login_time = $5 } if (login_time != "" && login_date != "") { # Convert to epoch (approximate) split(login_time, time_parts, ":") hours = time_parts[1] minutes = time_parts[2] # Simple duration calculation (today only) login_seconds = (hours * 3600) + (minutes * 60) current_seconds = strftime("%H", current_time) * 3600 + strftime("%M", current_time) * 60 if (current_seconds >= login_seconds) { duration = current_seconds - login_seconds } else { duration = (86400 - login_seconds) + current_seconds # Cross midnight } print username "|||" duration # Use delimiter for deduplication } } }' | "${CMD_PATHS[sort]}" -k1,1 | \ "${CMD_PATHS[awk]}" -F'\\|\\|\\|' '{ # Keep the latest/highest duration for each username if ($1 != prev_user) { if (prev_user != "") { print "node_user_session_duration_seconds{username=\"" prev_user "\"} " max_duration } prev_user = $1 max_duration = $2 } else if ($2 > max_duration) { max_duration = $2 } } END { if (prev_user != "") { print "node_user_session_duration_seconds{username=\"" prev_user "\"} " max_duration } }' } # Output metric - Format and display Prometheus metric with help text and type output_metric() { local metric_name="$1" local help_text="$2" local metric_type="$3" local metric_value="$4" local default_value="$5" echo "# HELP $metric_name $help_text" echo "# TYPE $metric_name $metric_type" echo "${metric_value:-$default_value}" } # Generate all metrics to stdout generate_metrics() { local script_start_time script_start_time=$(date +%s.%N) # Metric 1: Individual user sessions with details local users users=$(get_logged_users) output_metric "node_logged_in_usrs" "Currently Logged in Users" "gauge" \ "$users" 'node_logged_in_usrs{name="", location=""} 0' # Metric 2: Terminal count per user local user_terminals user_terminals=$(get_user_terminal_count) output_metric "node_logged_in_usr_terminals" "Total of open sessions per user" "gauge" \ "$user_terminals" 'node_logged_in_usr_terminals{username=""} 0' # Metric 3: Total user count system-wide local total_count total_count=$(get_total_user_count) output_metric "node_logged_in_total" "Total of open sessions on the system" "gauge" \ "node_logged_in_total ${total_count:-0}" "node_logged_in_total 0" # Metric 4: Last 10 commands for each logged in user local logged_users logged_users=$("${CMD_PATHS[who]}" | "${CMD_PATHS[awk]}" '{gsub(/US\\|@us\.[^.]+\.net/, "", $1); print $1}' | "${CMD_PATHS[sort]}" | "${CMD_PATHS[uniq]}") local user_commands="" while IFS= read -r user; do if [[ -n "$user" ]]; then local commands commands=$(get_last_user_commands "$user") if [[ -n "$commands" ]]; then user_commands+="$commands"$'\n' fi fi done <<< "$logged_users" output_metric "node_user_last_commands" "Last 10 commands executed by logged in users" "gauge" \ "$user_commands" 'node_user_last_commands{username="", command_number="", command=""} 0' # Metric 5: Recent sudo commands for each logged in user local sudo_commands="" while IFS= read -r user; do if [[ -n "$user" ]]; then local sudo_cmds sudo_cmds=$(get_sudo_commands "$user") if [[ -n "$sudo_cmds" ]]; then sudo_commands+="$sudo_cmds"$'\n' fi fi done <<< "$logged_users" output_metric "node_user_sudo_commands" "Recent sudo commands executed by logged in users" "gauge" \ "$sudo_commands" 'node_user_sudo_commands{username="", command=""} 0' # Metric 6: Session events (login/logout) for each logged in user local session_events="" while IFS= read -r user; do if [[ -n "$user" ]]; then local events events=$(get_session_events "$user") if [[ -n "$events" ]]; then session_events+="$events"$'\n' fi fi done <<< "$logged_users" output_metric "node_user_session_events" "Login and logout events for users" "gauge" \ "$session_events" 'node_user_session_events{username="", event="", method=""} 0' # Metric 7: Active session durations local session_durations session_durations=$(get_session_durations) output_metric "node_user_session_duration_seconds" "Duration of active user sessions in seconds" "gauge" \ "$session_durations" 'node_user_session_duration_seconds{username=""} 0' # Metric 8: Failed login attempts (security monitoring) local failed_logins failed_logins=$(get_failed_logins) output_metric "node_user_failed_logins" "Failed login attempts by username and source IP" "counter" \ "$failed_logins" 'node_user_failed_logins{username="", source_ip="", failure_type=""} 0' # Metric 9: Script runtime local script_end_time script_runtime script_end_time=$(date +%s.%N) script_runtime=$(echo "$script_end_time - $script_start_time" | bc -l 2>/dev/null || echo "0") output_metric "node_user_monitor_runtime_seconds" "Script execution time in seconds" "gauge" \ "node_user_monitor_runtime_seconds $script_runtime" "node_user_monitor_runtime_seconds 0" } # Main function - Orchestrate the entire monitoring process main() { parse_arguments "$@" trap cleanup EXIT find_commands if [[ "$DRY_RUN" == "false" ]]; then setup_directory setup_lockfile install_cron_job fi if [[ -n "$OUTPUT_FILE" ]]; then local output_dir output_dir="$(dirname "$OUTPUT_FILE")" mkdir -p "$output_dir" local temp_file temp_file=$(mktemp "${output_dir}/.usrlogins_metrics.XXXXXX") if ! generate_metrics > "$temp_file" 2>/dev/null; then rm -f "$temp_file" echo "ERROR: Failed to generate metrics" >&2 exit 1 fi chmod 644 "$temp_file" mv -f "$temp_file" "$OUTPUT_FILE" log_info "Metrics written to $OUTPUT_FILE" elif [[ "$DRY_RUN" == "true" ]]; then echo "=== DRY RUN MODE ===" >&2 generate_metrics echo "=== END DRY RUN OUTPUT ===" >&2 else generate_metrics fi } # Script entry point main "$@"