#!/usr/bin/env bash ######################################################################################### #### systemd-timer-audit.sh — Audit all systemd timers, flag failed/stale/overlap #### #### Shows status, schedule, last/next trigger, and associated service state #### #### #### #### Author: Phil Connor #### #### Contact: contact@mylinux.work #### #### License: MIT #### #### Version 1.00 #### #### #### #### Usage: #### #### ./systemd-timer-audit.sh #### #### ./systemd-timer-audit.sh --user #### #### ./systemd-timer-audit.sh --all --json #### #### #### #### See --help for all options. #### ######################################################################################### set -euo pipefail # ── Defaults ────────────────────────────────────────────────────────── VERBOSE="${VERBOSE:-false}" COLOR="${COLOR:-auto}" TEXTFILE_DIR="/var/lib/node_exporter" PROM_FILE="" SHOW_USER=false SHOW_ALL=false ONLY_FAILED=false JSON_OUTPUT=false # ── State ───────────────────────────────────────────────────────────── SCRIPT_NAME="$(basename "$0")" readonly SCRIPT_NAME COUNT_TOTAL=0; COUNT_ACTIVE=0; COUNT_FAILED=0; COUNT_DISABLED=0 declare -a T_NAME=() T_SVC=() T_STATUS=() T_LAST=() T_NEXT=() declare -a T_SCOPE=() T_UFILE=() T_CAL=() T_NEXTMIN=() # ── Colors ──────────────────────────────────────────────────────────── setup_colors() { if [[ "$COLOR" == "never" ]]; then RED="" GREEN="" YELLOW="" BOLD="" DIM="" RESET=""; return fi if [[ "$COLOR" == "always" ]] || [[ -t 1 ]]; then RED='\033[0;31m'; GREEN='\033[0;32m'; YELLOW='\033[0;33m' BOLD='\033[1m'; DIM='\033[2m'; RESET='\033[0m' else RED="" GREEN="" YELLOW="" BOLD="" DIM="" RESET="" fi } # ── Logging ─────────────────────────────────────────────────────────── warn() { echo -e "${YELLOW}[WARN]${RESET} $*" >&2; } err() { echo -e "${RED}[ERROR]${RESET} $*" >&2; } verbose() { [[ "$VERBOSE" == "true" ]] && echo -e "${DIM}[DEBUG]${RESET} $*"; return 0; } # ── Helpers ─────────────────────────────────────────────────────────── get_prop() { local flag=""; [[ "$3" == "user" ]] && flag="--user" # shellcheck disable=SC2086 systemctl $flag show -p "$2" --value "$1" 2>/dev/null || echo "" } epoch_from_usec() { [[ -z "$1" || "$1" == "0" || "$1" == "n/a" ]] && echo "0" && return # systemd may return a human-readable timestamp instead of microseconds if [[ "$1" =~ ^[0-9]+$ ]]; then echo $(( $1 / 1000000 )) else # Try to parse as date string date -d "$1" +%s 2>/dev/null || echo "0" fi } fmt_ts() { [[ "$1" == "0" || -z "$1" ]] && echo "n/a" && return date -d "@${1}" "+%Y-%m-%d %H:%M %Z" 2>/dev/null || echo "n/a" } minute_key() { [[ "$1" == "0" || -z "$1" ]] && echo "" && return date -d "@${1}" "+%H:%M" 2>/dev/null || echo "" } # ── Collect Timers ──────────────────────────────────────────────────── collect_timers() { local scope="$1" flag="" [[ "$scope" == "user" ]] && flag="--user" local list # shellcheck disable=SC2086 list=$(systemctl $flag list-timers --all --no-pager --no-legend 2>/dev/null || true) [[ -z "$list" ]] && { verbose "No timers found for scope: $scope"; return; } while IFS= read -r line; do [[ -z "$line" ]] && continue local tunit tunit=$(echo "$line" | awk '{for(i=1;i<=NF;i++) if($i ~ /\.timer$/) {print $i; exit}}') [[ -z "$tunit" ]] && continue local svc; svc=$(get_prop "$tunit" "Unit" "$scope") [[ -z "$svc" ]] && svc="${tunit%.timer}.service" local svc_st; svc_st=$(get_prop "$svc" "ActiveState" "$scope") [[ -z "$svc_st" ]] && svc_st="unknown" local tmr_st; tmr_st=$(get_prop "$tunit" "ActiveState" "$scope") local lu; lu=$(epoch_from_usec "$(get_prop "$tunit" "LastTriggerUSec" "$scope")") local nu; nu=$(epoch_from_usec "$(get_prop "$tunit" "NextElapseUSecRealtime" "$scope")") local uf; uf=$(get_prop "$tunit" "FragmentPath" "$scope") local cal; cal=$(get_prop "$tunit" "TimersCalendar" "$scope") [[ -z "$cal" ]] && cal=$(get_prop "$tunit" "TimersMonotonic" "$scope") local label="active" [[ "$svc_st" == "failed" ]] && label="failed" [[ "$label" == "active" && "$tmr_st" != "active" ]] && label="disabled" T_NAME+=("$tunit"); T_SVC+=("$svc"); T_STATUS+=("$label") T_LAST+=("$lu"); T_NEXT+=("$nu"); T_SCOPE+=("$scope") T_UFILE+=("$uf"); T_CAL+=("$cal") T_NEXTMIN+=("$(minute_key "$nu")") done <<< "$list" } # ── Overlap Detection ──────────────────────────────────────────────── detect_overlaps() { local -A mm local i for i in "${!T_NAME[@]}"; do local k="${T_NEXTMIN[$i]}"; [[ -z "$k" ]] && continue if [[ -n "${mm[$k]:-}" ]]; then mm[$k]="${mm[$k]}|${T_NAME[$i]}" else mm[$k]="${T_NAME[$i]}"; fi done for k in "${!mm[@]}"; do local v="${mm[$k]}" if [[ "$v" == *"|"* ]]; then echo "" echo -e " ${YELLOW}⚠ Overlap: multiple timers fire near ${k}:${RESET}" echo "$v" | tr '|' '\n' | sed 's/^/ /' fi done } # ── Table Output ───────────────────────────────────────────────────── print_table() { [[ "$JSON_OUTPUT" == true ]] && return echo "" echo -e "${BOLD}Systemd Timer Audit${RESET}" echo "" local hdr_extra=""; [[ "$VERBOSE" == "true" ]] && hdr_extra=" UNIT FILE" printf " ${BOLD}%-30s %-25s %-10s %-22s %-22s%s${RESET}\n" \ "TIMER" "SERVICE" "STATUS" "LAST TRIGGER" "NEXT TRIGGER" "$hdr_extra" printf " %s\n" "$(printf '%.0s─' {1..105})" local i for i in "${!T_NAME[@]}"; do local st="${T_STATUS[$i]}" [[ "$ONLY_FAILED" == true && "$st" != "failed" ]] && continue local c="" sfx="" case "$st" in active) c="$GREEN" ;; failed) c="$RED"; sfx=" ← FAILED" ;; disabled) c="$DIM" ;; esac local lf; lf=$(fmt_ts "${T_LAST[$i]}") local nf; nf=$(fmt_ts "${T_NEXT[$i]}") local extra=""; [[ "$VERBOSE" == "true" ]] && extra=" ${T_UFILE[$i]}" printf " %-30s %-25s %b%-10s%b %-22s %-22s%s%s\n" \ "${T_NAME[$i]}" "${T_SVC[$i]}" "$c" "$st" "$RESET" "$lf" "$nf" "$extra" "$sfx" done detect_overlaps echo "" echo -e " ${BOLD}Summary${RESET}" printf " %-14s %d\n" "Total timers:" "$COUNT_TOTAL" printf " %-14s %b%d%b\n" "Active:" "$GREEN" "$COUNT_ACTIVE" "$RESET" printf " %-14s %b%d%b\n" "Failed:" "$RED" "$COUNT_FAILED" "$RESET" printf " %-14s %d\n" "Disabled:" "$COUNT_DISABLED" echo "" } # ── JSON Output ────────────────────────────────────────────────────── print_json() { [[ "$JSON_OUTPUT" != true ]] && return local i first=true printf '{"timers":[' for i in "${!T_NAME[@]}"; do local st="${T_STATUS[$i]}" [[ "$ONLY_FAILED" == true && "$st" != "failed" ]] && continue [[ "$first" == true ]] && first=false || printf ',' printf '{"timer":"%s","service":"%s","status":"%s","scope":"%s","last_trigger":"%s","next_trigger":"%s","last_trigger_epoch":%s}' \ "${T_NAME[$i]}" "${T_SVC[$i]}" "$st" "${T_SCOPE[$i]}" \ "$(fmt_ts "${T_LAST[$i]}")" "$(fmt_ts "${T_NEXT[$i]}")" "${T_LAST[$i]}" done printf '],"summary":{"total":%d,"active":%d,"failed":%d,"disabled":%d}}\n' \ "$COUNT_TOTAL" "$COUNT_ACTIVE" "$COUNT_FAILED" "$COUNT_DISABLED" } # ── Prometheus Metrics ──────────────────────────────────────────────── write_prom_metrics() { [[ -z "$PROM_FILE" ]] && return local output_dir output_dir="$(dirname "$PROM_FILE")" mkdir -p "$output_dir" local tmp tmp=$(mktemp "${output_dir}/.systemd_timers.XXXXXX") { echo "# HELP systemd_timer_total Total number of systemd timers." echo "# TYPE systemd_timer_total gauge" echo "systemd_timer_total $COUNT_TOTAL" echo "# HELP systemd_timer_active Number of active systemd timers." echo "# TYPE systemd_timer_active gauge" echo "systemd_timer_active $COUNT_ACTIVE" echo "# HELP systemd_timer_failed Number of timers with failed associated services." echo "# TYPE systemd_timer_failed gauge" echo "systemd_timer_failed $COUNT_FAILED" echo "# HELP systemd_timer_last_trigger_seconds Unix timestamp of last trigger per timer." echo "# TYPE systemd_timer_last_trigger_seconds gauge" for i in "${!T_NAME[@]}"; do echo "systemd_timer_last_trigger_seconds{timer=\"${T_NAME[$i]}\"} ${T_LAST[$i]}" done } > "$tmp" chmod 644 "$tmp" mv -f "$tmp" "$PROM_FILE" verbose "Metrics written to $PROM_FILE" } # ── Tally ───────────────────────────────────────────────────────────── compute_summary() { COUNT_TOTAL=${#T_NAME[@]} local i for i in "${!T_STATUS[@]}"; do case "${T_STATUS[$i]}" in active) COUNT_ACTIVE=$((COUNT_ACTIVE + 1)) ;; failed) COUNT_FAILED=$((COUNT_FAILED + 1)) ;; disabled) COUNT_DISABLED=$((COUNT_DISABLED + 1)) ;; esac done } # ══════════════════════════════════════════════════════════════════════ # USAGE # ══════════════════════════════════════════════════════════════════════ usage() { cat <&2; exit 1 ;; *) err "Unexpected argument: $1"; echo "Run ${SCRIPT_NAME} --help for usage" >&2; exit 1 ;; esac done } # ══════════════════════════════════════════════════════════════════════ # MAIN # ══════════════════════════════════════════════════════════════════════ main() { parse_args "$@" setup_colors if [[ "$SHOW_ALL" == true ]]; then collect_timers "system"; collect_timers "user" elif [[ "$SHOW_USER" == true ]]; then collect_timers "user" else collect_timers "system" fi if [[ ${#T_NAME[@]} -eq 0 ]]; then if [[ "$JSON_OUTPUT" == true ]]; then echo '{"timers":[],"summary":{"total":0,"active":0,"failed":0,"disabled":0}}' else echo ""; echo -e "${BOLD}Systemd Timer Audit${RESET}"; echo "" echo " No timers found."; echo "" fi exit 0 fi compute_summary print_table print_json write_prom_metrics } main "$@"