#!/usr/bin/env bash ######################################################################################### #### cron-doctor.sh — Diagnose common cron and systemd timer problems #### #### Checks PATH, missing binaries, unescaped %, output redirection, permissions, #### #### overlap risk, and failed timer services #### #### #### #### Author: Phil Connor #### #### Contact: contact@mylinux.work #### #### License: MIT #### #### Version 1.0 #### #### #### #### Usage: #### #### ./cron-doctor.sh #### #### ./cron-doctor.sh --user admin #### #### ./cron-doctor.sh --fix-suggestions #### #### #### #### See --help for all options. #### ######################################################################################### set -euo pipefail # ── Defaults ────────────────────────────────────────────────────────── SCRIPT_NAME="$(basename "$0")" readonly SCRIPT_NAME VERBOSE="${VERBOSE:-false}" COLOR="${COLOR:-auto}" TARGET_USER="" FIX_SUGGESTIONS=false CRON_ONLY=false TIMERS_ONLY=false # ── Counters ────────────────────────────────────────────────────────── WARN_COUNT=0 FAIL_COUNT=0 INFO_COUNT=0 FAIL_MESSAGES=() WARN_MESSAGES=() # ── Colors ──────────────────────────────────────────────────────────── setup_colors() { if [[ "$COLOR" == "never" ]]; then RED="" GREEN="" YELLOW="" CYAN="" BOLD="" DIM="" RESET="" return fi if [[ "$COLOR" == "always" ]] || [[ -t 1 ]]; then RED='\033[0;31m'; GREEN='\033[0;32m'; YELLOW='\033[0;33m' CYAN='\033[0;36m'; BOLD='\033[1m'; DIM='\033[2m'; RESET='\033[0m' else RED="" GREEN="" YELLOW="" CYAN="" BOLD="" DIM="" RESET="" fi } # ── Logging ─────────────────────────────────────────────────────────── log() { echo -e " ${GREEN}[OK]${RESET} $*"; } warn() { echo -e " ${YELLOW}[WARN]${RESET} $*"; WARN_MESSAGES+=("$*"); (( WARN_COUNT++ )) || true; } fail() { echo -e " ${RED}[FAIL]${RESET} $*"; FAIL_MESSAGES+=("$*"); (( FAIL_COUNT++ )) || true; } info() { echo -e " ${CYAN}[INFO]${RESET} $*"; (( INFO_COUNT++ )) || true; } suggest() { [[ "$FIX_SUGGESTIONS" == "true" ]] && echo -e " ${DIM}→ $*${RESET}"; return 0; } verbose() { [[ "$VERBOSE" == "true" ]] && echo -e " ${DIM}[DEBUG]${RESET} $*"; return 0; } section() { echo "" echo -e " ${BOLD}${CYAN}── $1 ──${RESET}" echo "" } # ── Usage ───────────────────────────────────────────────────────────── usage() { cat <&2; usage ;; esac done setup_colors # re-init in case --no-color was passed # ── Detect crontab directory ────────────────────────────────────────── detect_cron_spool() { if [[ -d /var/spool/cron/crontabs ]]; then echo "/var/spool/cron/crontabs" # Debian/Ubuntu elif [[ -d /var/spool/cron ]]; then echo "/var/spool/cron" # RHEL/Rocky else echo "" fi } CRON_SPOOL="$(detect_cron_spool)" # ── Get list of crontab files to check ──────────────────────────────── get_crontab_files() { local files=() if [[ -n "$CRON_SPOOL" ]]; then if [[ -n "$TARGET_USER" ]]; then [[ -f "$CRON_SPOOL/$TARGET_USER" ]] && files+=("$CRON_SPOOL/$TARGET_USER") else for f in "$CRON_SPOOL"/*; do [[ -f "$f" ]] && files+=("$f") done fi fi printf '%s\n' "${files[@]}" 2>/dev/null || true } # ── Parse cron entries from a file ──────────────────────────────────── # Outputs: schedule|command (skips comments, blanks, variables) parse_cron_entries() { local file="$1" has_user_field="${2:-false}" while IFS= read -r line; do # skip comments and blank lines [[ "$line" =~ ^[[:space:]]*# ]] && continue [[ "$line" =~ ^[[:space:]]*$ ]] && continue # skip variable assignments (MAILTO=, PATH=, SHELL=, etc.) [[ "$line" =~ ^[[:space:]]*[A-Za-z_]+= ]] && continue if [[ "$has_user_field" == "true" ]]; then # system crontab: min hour dom mon dow user command echo "$line" | awk '{ if ($1 ~ /^@/) { sched=$1; user=$2; cmd=""; for(i=3;i<=NF;i++) cmd=cmd" "$i } else { sched=$1" "$2" "$3" "$4" "$5; user=$6; cmd=""; for(i=7;i<=NF;i++) cmd=cmd" "$i } gsub(/^[[:space:]]+/, "", cmd) print sched"|"cmd }' else # user crontab: min hour dom mon dow command echo "$line" | awk '{ if ($1 ~ /^@/) { sched=$1; cmd=""; for(i=2;i<=NF;i++) cmd=cmd" "$i } else { sched=$1" "$2" "$3" "$4" "$5; cmd=""; for(i=6;i<=NF;i++) cmd=cmd" "$i } gsub(/^[[:space:]]+/, "", cmd) print sched"|"cmd }' fi done < "$file" } # ── Check: crontab environment (PATH) ───────────────────────────────── check_cron_environment() { local file="$1" label="$2" local has_path=false has_mailto=false while IFS= read -r line; do [[ "$line" =~ ^[[:space:]]*PATH= ]] && has_path=true [[ "$line" =~ ^[[:space:]]*MAILTO= ]] && has_mailto=true done < "$file" if [[ "$has_path" == "false" ]]; then warn "${label}: no PATH set — cron uses /usr/bin:/bin only" suggest "Add to top of crontab: PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin" else verbose "${label}: PATH is set" fi if [[ "$has_mailto" == "false" ]]; then local has_unredirected=false while IFS='|' read -r _sched cmd; do [[ -z "$cmd" ]] && continue if ! echo "$cmd" | grep -qE '>\s*/|>\s*&|2>&1|>/dev/null'; then has_unredirected=true break fi done < <(parse_cron_entries "$file" false) if [[ "$has_unredirected" == "true" ]]; then warn "${label}: no MAILTO and some jobs lack output redirection — output may be lost" suggest "Add MAILTO=admin@example.com or redirect: command >> /var/log/job.log 2>&1" fi fi } # ── Check: missing binaries ─────────────────────────────────────────── check_missing_binaries() { local file="$1" label="$2" has_user="${3:-false}" while IFS='|' read -r _sched cmd; do [[ -z "$cmd" ]] && continue # extract the first word (binary) — handle cd/env/sudo/flock prefixes local binary binary=$(echo "$cmd" | sed -E ' s#^(cd [^ ;]+[; ]+(&&[[:space:]]*)?)## s#^(sudo (-u [^ ]+ )?)## s#^(env (-i )?([A-Za-z_]+=[^ ]+ )*)## s#^(/usr/bin/flock [^ ]+ )## s#^(/bin/sh -c |/bin/bash -c )## ' | awk '{print $1}') # strip trailing shell metacharacters (;, &&, ||, |) binary="${binary%%[;&|]*}" [[ -z "$binary" ]] && continue # skip shell builtins [[ "$binary" =~ ^(test|true|false|echo|cd|source|\[|\[\[)$ ]] && continue # if it's an absolute path, check directly if [[ "$binary" == /* ]]; then if [[ ! -f "$binary" ]]; then fail "${label}: binary not found: ${binary}" suggest "Check path: which $(basename "$binary")" elif [[ ! -x "$binary" ]]; then fail "${label}: not executable: ${binary}" suggest "chmod +x ${binary}" fi else # relative binary — check if it exists in cron's default PATH if ! command -v "$binary" &>/dev/null; then verbose "${label}: can't verify relative command: ${binary}" fi fi done < <(parse_cron_entries "$file" "$has_user") } # ── Check: unescaped percent signs ──────────────────────────────────── check_percent_signs() { local file="$1" label="$2" local lineno=0 while IFS= read -r line; do (( lineno++ )) || true [[ "$line" =~ ^[[:space:]]*# ]] && continue [[ "$line" =~ ^[[:space:]]*$ ]] && continue [[ "$line" =~ ^[[:space:]]*[A-Za-z_]+= ]] && continue # check for % not preceded by \ (unescaped) if echo "$line" | grep -qP '(?/dev/null) || return owner=$(stat -c '%U' "$file" 2>/dev/null) || return # system files (/etc/crontab, /etc/cron.d/*) are expected to be 644 root-owned # user crontabs are expected to be 600 owned by the user if [[ "$file" == /etc/* ]]; then if [[ "$owner" != "root" ]]; then warn "${label}: owned by ${owner}, expected root" fi else local expected_user expected_user=$(basename "$file") if [[ "$perms" != "600" ]]; then warn "${label}: permissions are ${perms}, expected 600" suggest "chmod 600 ${file}" fi if [[ "$owner" != "$expected_user" && "$owner" != "root" ]]; then fail "${label}: owned by ${owner}, expected ${expected_user} or root" fi fi } # ── Check: missing trailing newline ─────────────────────────────────── check_trailing_newline() { local file="$1" label="$2" if [[ ! -r "$file" ]] || [[ ! -s "$file" ]]; then return fi # check if file ends with newline if [[ "$(tail -c 1 "$file" | xxd -p)" != "0a" ]]; then fail "${label}: no trailing newline — last cron entry will not run" suggest "echo '' >> ${file}" fi } # ── Check: overlap risk ────────────────────────────────────────────── check_overlap_risk() { local file="$1" label="$2" has_user="${3:-false}" while IFS='|' read -r sched cmd; do [[ -z "$cmd" ]] && continue # check for frequent schedules (every minute or every 5 min) local is_frequent=false if echo "$sched" | grep -qE '^\*[[:space:]]|^\*/[1-5][[:space:]]'; then is_frequent=true fi if [[ "$is_frequent" == "true" ]]; then # check if command uses flock or lockfile if ! echo "$cmd" | grep -qiE 'flock|lockfile|lock'; then warn "${label}: frequent job (${sched%% *}) without locking: $(echo "$cmd" | cut -c1-60)" suggest "Wrap with flock: /usr/bin/flock -n /var/lock/myjob.lock $cmd" fi fi done < <(parse_cron_entries "$file" "$has_user") } # ── Check: cron.allow / cron.deny ───────────────────────────────────── check_cron_access() { section "Cron Access Control" if [[ -f /etc/cron.allow ]]; then info "/etc/cron.allow exists — only listed users can use cron" if [[ -n "$TARGET_USER" ]]; then if grep -qxF "$TARGET_USER" /etc/cron.allow 2>/dev/null; then log "${TARGET_USER} is in cron.allow" else fail "${TARGET_USER} is NOT in cron.allow — cron jobs will not run" suggest "echo '${TARGET_USER}' >> /etc/cron.allow" fi fi elif [[ -f /etc/cron.deny ]]; then info "/etc/cron.deny exists — listed users are blocked" if [[ -n "$TARGET_USER" ]]; then if grep -qxF "$TARGET_USER" /etc/cron.deny 2>/dev/null; then fail "${TARGET_USER} is in cron.deny — cron jobs will not run" suggest "Remove ${TARGET_USER} from /etc/cron.deny" else log "${TARGET_USER} is not in cron.deny" fi fi else verbose "No cron.allow or cron.deny found" fi } # ── Check: systemd timers ───────────────────────────────────────────── check_systemd_timers() { section "Systemd Timers" if ! command -v systemctl &>/dev/null; then info "systemctl not found — skipping timer checks" return fi # failed timer-triggered services local failed failed=$(systemctl list-units --type=service --state=failed --no-pager --plain 2>/dev/null | \ awk '{print $1}' | grep -v '^$' | grep -v '^UNIT' || true) if [[ -n "$failed" ]]; then while IFS= read -r svc; do # check if this service has a matching timer local timer="${svc%.service}.timer" if systemctl list-unit-files "$timer" &>/dev/null 2>&1; then fail "Timer-triggered service failed: ${svc}" suggest "journalctl -u ${svc} -b --no-pager | tail -20" fi done <<< "$failed" else log "No failed timer-triggered services" fi # timers enabled but not active while IFS= read -r line; do local timer_name state timer_name=$(echo "$line" | awk '{print $1}') state=$(echo "$line" | awk '{print $3}') [[ -z "$timer_name" ]] && continue [[ "$timer_name" != *.timer ]] && continue if [[ "$state" != "active" ]]; then warn "Timer ${timer_name} is loaded but not active (state: ${state})" suggest "systemctl start ${timer_name}" fi done < <(systemctl list-units --type=timer --all --no-pager --plain 2>/dev/null || true) # timers without Persistent=true while IFS= read -r timer_name; do [[ -z "$timer_name" ]] && continue [[ "$timer_name" != *.timer ]] && continue local persistent persistent=$(systemctl show "$timer_name" -p Persistent 2>/dev/null | cut -d= -f2) if [[ "$persistent" == "no" ]]; then local has_calendar has_calendar=$(systemctl show "$timer_name" -p TimersCalendar 2>/dev/null) if [[ -n "$has_calendar" && "$has_calendar" != "TimersCalendar=" ]]; then warn "${timer_name}: Persistent=false — missed runs during downtime won't catch up" suggest "Add Persistent=true to [Timer] section: systemctl edit ${timer_name}" fi fi done < <(systemctl list-units --type=timer --state=active --no-pager --plain 2>/dev/null | awk '{print $1}') } # ── Run cron checks on a single file ───────────────────────────────── check_crontab_file() { local file="$1" label="$2" has_user="${3:-false}" verbose "Checking: ${file}" check_crontab_permissions "$file" "$label" check_trailing_newline "$file" "$label" check_cron_environment "$file" "$label" check_percent_signs "$file" "$label" check_missing_binaries "$file" "$label" "$has_user" check_overlap_risk "$file" "$label" "$has_user" } # ══════════════════════════════════════════════════════════════════════ # Main # ══════════════════════════════════════════════════════════════════════ echo "" echo -e " ${BOLD}Cron Doctor${RESET} — diagnosing scheduled task issues" echo -e " ${DIM}$(date '+%Y-%m-%d %H:%M:%S')${RESET}" # ── Cron checks ─────────────────────────────────────────────────────── if [[ "$TIMERS_ONLY" == "false" ]]; then check_cron_access # User crontabs section "User Crontabs" crontab_files=$(get_crontab_files) if [[ -z "$crontab_files" ]]; then if [[ -n "$TARGET_USER" ]]; then info "No crontab found for user: ${TARGET_USER}" else info "No user crontabs found in ${CRON_SPOOL:-/var/spool/cron}" fi else while IFS= read -r file; do [[ -z "$file" ]] && continue user=$(basename "$file") check_crontab_file "$file" "crontab(${user})" false done <<< "$crontab_files" fi # System crontab if [[ -f /etc/crontab ]]; then section "System Crontab (/etc/crontab)" check_crontab_file "/etc/crontab" "/etc/crontab" true fi # /etc/cron.d drop-ins if [[ -d /etc/cron.d ]]; then section "Drop-ins (/etc/cron.d)" found_drop_ins=false for f in /etc/cron.d/*; do [[ ! -f "$f" ]] && continue # skip dpkg/ucf leftovers [[ "$f" =~ \.(dpkg-|ucf-) ]] && continue found_drop_ins=true check_crontab_file "$f" "cron.d/$(basename "$f")" true done if [[ "$found_drop_ins" == "false" ]]; then info "No drop-in files in /etc/cron.d" fi fi fi # ── Systemd timer checks ───────────────────────────────────────────── if [[ "$CRON_ONLY" == "false" ]]; then check_systemd_timers fi # ── Summary ─────────────────────────────────────────────────────────── echo "" echo -e " ${BOLD}── Summary ──${RESET}" echo "" TOTAL=$(( FAIL_COUNT + WARN_COUNT )) if [[ $TOTAL -eq 0 ]]; then echo -e " ${GREEN}✓ No issues found${RESET}" else if [[ $FAIL_COUNT -gt 0 ]]; then echo -e " ${RED}${FAIL_COUNT} failure(s):${RESET}" for msg in "${FAIL_MESSAGES[@]}"; do echo -e " ${RED}•${RESET} ${msg}" done fi if [[ $WARN_COUNT -gt 0 ]]; then echo -e " ${YELLOW}${WARN_COUNT} warning(s)${RESET}" fi fi echo "" if [[ $FAIL_COUNT -gt 0 ]]; then exit 2 elif [[ $WARN_COUNT -gt 0 ]]; then exit 1 else exit 0 fi