#!/usr/bin/env bash ######################################################################################### #### service-restart-tracker.sh — Detect and log unexpected service restarts #### #### Scans the systemd journal for service start/stop/restart events #### #### #### #### Author: Phil Connor #### #### Contact: contact@mylinux.work #### #### License: MIT #### #### Version 1.00 #### #### #### #### Usage: #### #### ./service-restart-tracker.sh #### #### ./service-restart-tracker.sh --since "2 hours ago" #### #### #### #### See --help for all options. #### ######################################################################################### set -euo pipefail # ── Defaults ────────────────────────────────────────────────────────── SINCE="${SINCE:-24 hours ago}" SERVICES="" MIN_RESTARTS="${MIN_RESTARTS:-3}" LOG_FILE="" VERBOSE="${VERBOSE:-false}" COLOR="${COLOR:-auto}" # ── State ───────────────────────────────────────────────────────────── SCRIPT_NAME="$(basename "$0")" readonly SCRIPT_NAME COUNT_TOTAL=0 COUNT_RESTARTED=0 COUNT_FREQUENT=0 # ── Colors ──────────────────────────────────────────────────────────── setup_colors() { if [[ "$COLOR" == "never" ]]; then RED="" GREEN="" YELLOW="" CYAN="" BOLD="" DIM="" RESET="" return fi if [[ "$COLOR" == "always" ]] || [[ -t 1 ]]; then RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[0;33m' CYAN='\033[0;36m' BOLD='\033[1m' DIM='\033[2m' RESET='\033[0m' else RED="" GREEN="" YELLOW="" CYAN="" BOLD="" DIM="" RESET="" fi } # ── Logging ─────────────────────────────────────────────────────────── log() { echo -e "${CYAN}[INFO]${RESET} $*"; } warn() { echo -e "${YELLOW}[WARN]${RESET} $*" >&2; } err() { echo -e "${RED}[ERROR]${RESET} $*" >&2; } verbose() { if [[ "$VERBOSE" == "true" ]]; then echo -e "${DIM}[DEBUG]${RESET} $*"; fi; } # ── Helpers ─────────────────────────────────────────────────────────── section_header() { echo "" echo -e " ${BOLD}${CYAN}── $1 ──${RESET}" echo "" } field() { printf " ${BOLD}%-22s${RESET} %s\n" "$1" "$2" } field_color() { printf " ${BOLD}%-22s${RESET} %b\n" "$1" "$2" } output_line() { local line="$1" echo "$line" if [[ -n "$LOG_FILE" ]]; then # Strip ANSI escape codes for log file local stripped="$line" while [[ "$stripped" =~ $'\033'\[[0-9\;]*m ]]; do stripped="${stripped//${BASH_REMATCH[0]}/}" done echo "$stripped" >> "$LOG_FILE" fi } # ══════════════════════════════════════════════════════════════════════ # SERVICE ANALYSIS # ══════════════════════════════════════════════════════════════════════ get_service_list() { if [[ -n "$SERVICES" ]]; then echo "$SERVICES" | tr ',' '\n' else # Get all services that had activity in the time window journalctl --since "$SINCE" --no-pager -o json 2>/dev/null \ | grep -oP '"_SYSTEMD_UNIT":"[^"]*\.service"' \ | sed 's/.*:"\(.*\)"/\1/' \ | sort -u fi } analyze_service() { local service="$1" verbose "Analyzing service: ${service}" # Count start events (Started messages) local start_count start_count=$(journalctl --since "$SINCE" -u "$service" --no-pager 2>/dev/null \ | grep -ciE "(started|starting)" || echo "0") # Count stop events local stop_count stop_count=$(journalctl --since "$SINCE" -u "$service" --no-pager 2>/dev/null \ | grep -ciE "(stopped|stopping|deactivat)" || echo "0") # Estimate restart count: starts beyond the initial one local restart_count=0 if [[ "$start_count" -gt 1 ]]; then restart_count=$((start_count - 1)) fi # Get last restart time local last_start last_start=$(journalctl --since "$SINCE" -u "$service" --no-pager 2>/dev/null \ | grep -iE "(started|starting)" | tail -1 | awk '{print $1, $2, $3}' || echo "") # Get current uptime via ActiveEnterTimestamp local uptime_str="" local active_since active_since=$(systemctl show "$service" --property=ActiveEnterTimestamp 2>/dev/null \ | cut -d= -f2 || echo "") if [[ -n "$active_since" && "$active_since" != "" ]]; then local active_epoch now_epoch active_epoch=$(date -d "$active_since" +%s 2>/dev/null || echo "0") now_epoch=$(date +%s) if [[ "$active_epoch" -gt 0 ]]; then local diff=$((now_epoch - active_epoch)) local days=$((diff / 86400)) local hours=$(( (diff % 86400) / 3600 )) local mins=$(( (diff % 3600) / 60 )) if [[ "$days" -gt 0 ]]; then uptime_str="${days}d ${hours}h ${mins}m" elif [[ "$hours" -gt 0 ]]; then uptime_str="${hours}h ${mins}m" else uptime_str="${mins}m" fi fi fi # Skip services with no activity if [[ "$start_count" -eq 0 && "$stop_count" -eq 0 ]]; then verbose "Skipping ${service}: no activity" return fi COUNT_TOTAL=$((COUNT_TOTAL + 1)) local color status if [[ "$restart_count" -ge "$MIN_RESTARTS" ]]; then color="$RED" status="FREQUENT" COUNT_FREQUENT=$((COUNT_FREQUENT + 1)) COUNT_RESTARTED=$((COUNT_RESTARTED + 1)) elif [[ "$restart_count" -gt 0 ]]; then color="$YELLOW" status="RESTARTED" COUNT_RESTARTED=$((COUNT_RESTARTED + 1)) else color="$GREEN" status="STABLE" fi output_line "$(printf " %b%-40s %3d restarts %-20s %-14s %s%b" \ "$color" "$service" "$restart_count" "${last_start:---}" \ "${uptime_str:---}" "$status" "$RESET")" } # ══════════════════════════════════════════════════════════════════════ # USAGE # ══════════════════════════════════════════════════════════════════════ usage() { cat <&2 exit 1 ;; esac done } # ══════════════════════════════════════════════════════════════════════ # MAIN # ══════════════════════════════════════════════════════════════════════ main() { parse_args "$@" setup_colors if ! command -v journalctl &>/dev/null; then err "journalctl is required (systemd not found)" exit 1 fi echo "" echo -e "${BOLD}Service Restart Tracker — $(hostname -f 2>/dev/null || hostname)${RESET}" echo -e "${DIM}$(date '+%Y-%m-%d %H:%M:%S %Z')${RESET}" echo -e "${DIM}Scanning since: ${SINCE} | Min restarts threshold: ${MIN_RESTARTS}${RESET}" if [[ -n "$LOG_FILE" ]]; then echo "Service Restart Report — $(date '+%Y-%m-%d %H:%M:%S %Z')" > "$LOG_FILE" echo "Scanning since: ${SINCE}" >> "$LOG_FILE" echo "" >> "$LOG_FILE" fi section_header "Service Restart Activity" output_line "$(printf " ${BOLD}%-40s %12s %-20s %-14s %s${RESET}" \ "SERVICE" "RESTARTS" "LAST RESTART" "UPTIME" "STATUS")" output_line " $(printf '%.0s─' {1..100})" local service_list service_list=$(get_service_list) if [[ -z "$service_list" ]]; then log "No services found with activity since ${SINCE}" else while IFS= read -r service; do [[ -z "$service" ]] && continue analyze_service "$service" done <<< "$service_list" fi section_header "Summary" field "Total services:" "$COUNT_TOTAL" field_color "Restarted:" "${YELLOW}${COUNT_RESTARTED}${RESET}" if [[ "$COUNT_FREQUENT" -gt 0 ]]; then field_color "Frequently restarting:" "${RED}${COUNT_FREQUENT}${RESET}" else field "Frequently restarting:" "$COUNT_FREQUENT" fi local stable=$((COUNT_TOTAL - COUNT_RESTARTED)) field_color "Stable:" "${GREEN}${stable}${RESET}" if [[ -n "$LOG_FILE" ]]; then log "Results written to ${LOG_FILE}" fi echo "" } main "$@"