Files
linux-scripts/cron-job-exporter.sh
T
chiefgeek a1a17e81a1 Sync all scripts from website downloads — 352 scripts total
Includes updated JS challenge scripts with Claude-User whitelist,
same-site referer bypass, Blackbox-Exporter allowed bot, and all
new exporters, cheat sheets, and automation scripts.
2026-05-25 03:31:08 +02:00

317 lines
8.8 KiB
Bash

#!/bin/bash
#############################################################
#### Cron Job Monitoring Exporter for Prometheus ####
#### Tracks whether scheduled cron jobs ran successfully, ####
#### their exit codes, duration, and staleness ####
#### ####
#### Author: Phil Connor ####
#### Contact: contact@mylinux.work ####
#### License: MIT ####
#### Version: 1.0 ####
#### ####
#### Usage: ./cron-job-exporter.sh [OPTIONS] ####
#############################################################
#
# Monitors cron job execution by wrapping cron commands.
# Two modes of operation:
# 1. Wrapper mode: wrap a cron command to record metrics
# 2. Collector mode: scan state files and write .prom output
#
# Metrics exported:
# - cron_job_exit_code (last exit code)
# - cron_job_duration_seconds (last execution time)
# - cron_job_last_run_timestamp (unix timestamp of last run)
# - cron_job_success (1 if last run exited 0, else 0)
# - cron_job_runs_total (total number of runs)
#
# Requirements:
# - Bash 4.0+
# - node_exporter with textfile collector enabled
#
set -euo pipefail
#########################
### Configuration ###
#########################
NODE_DIR="${NODE_DIR:-/var/lib/node_exporter}"
STATE_DIR="${STATE_DIR:-/var/lib/cron-job-exporter}"
PROM_FILE="${NODE_DIR}/cron_jobs.prom"
STALE_THRESHOLD="${STALE_THRESHOLD:-86400}" # 24 hours
DEBUG="${DEBUG:-}"
#########################
### Logging ###
#########################
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m'
log_info() {
echo -e "${GREEN}[INFO]${NC} $1" >&2
}
log_warn() {
echo -e "${YELLOW}[WARN]${NC} $1" >&2
}
log_error() {
echo -e "${RED}[ERROR]${NC} $1" >&2
}
log_debug() {
[[ -n "$DEBUG" ]] && echo "[DEBUG] $1" >&2
}
#########################
### Parse Arguments ###
#########################
show_help() {
cat <<EOF
Cron Job Monitoring Exporter for Prometheus
Tracks cron job execution status, exit codes, and duration.
MODES:
Wrapper mode: $0 --wrap --name <job_name> -- <command>
Collector mode: $0 --collect
WRAPPER MODE (use in crontab):
Wraps a cron command, records exit code, duration, and timestamp
to a state file. Run --collect separately to generate .prom output.
Example crontab:
* * * * * /opt/cron-job-exporter.sh --wrap --name backup_db -- /opt/backup-db.sh
0 * * * * /opt/cron-job-exporter.sh --wrap --name log_cleanup -- /opt/cleanup-logs.sh
COLLECTOR MODE (run on schedule or as oneshot):
Reads all state files and writes a single .prom file for node_exporter.
Example crontab:
* * * * * /opt/cron-job-exporter.sh --collect
OPTIONS:
--wrap Wrapper mode: run a command and record metrics
--collect Collector mode: generate .prom from state files
--name NAME Job name for wrapper mode (required with --wrap)
--stale-threshold SEC Seconds before a job is considered stale (default: 86400)
--state-dir DIR State file directory (default: /var/lib/cron-job-exporter)
--help Show this help
EOF
exit 0
}
MODE=""
JOB_NAME=""
JOB_CMD=()
parse_args() {
while [[ $# -gt 0 ]]; do
case "$1" in
--wrap) MODE="wrap"; shift ;;
--collect) MODE="collect"; shift ;;
--name) JOB_NAME="$2"; shift 2 ;;
--stale-threshold) STALE_THRESHOLD="$2"; shift 2 ;;
--state-dir) STATE_DIR="$2"; shift 2 ;;
--help) show_help ;;
--) shift; JOB_CMD=("$@"); break ;;
*) log_error "Unknown option: $1"; exit 1 ;;
esac
done
if [[ -z "$MODE" ]]; then
log_error "Must specify --wrap or --collect"
echo "Run '$0 --help' for usage."
exit 1
fi
if [[ "$MODE" == "wrap" ]]; then
if [[ -z "$JOB_NAME" ]]; then
log_error "--name is required in wrapper mode"
exit 1
fi
if [[ ${#JOB_CMD[@]} -eq 0 ]]; then
log_error "No command specified after --"
exit 1
fi
fi
}
#########################
### Sanitize ###
#########################
sanitize_name() {
local name="$1"
name="${name,,}"
name="${name// /_}"
name=$(echo "$name" | sed 's/[^a-z0-9_]/_/g')
name=$(echo "$name" | sed 's/__*/_/g; s/^_//; s/_$//')
echo "$name"
}
#########################
### Wrapper Mode ###
#########################
run_wrapper() {
mkdir -p "$STATE_DIR"
local safe_name
safe_name=$(sanitize_name "$JOB_NAME")
local state_file="${STATE_DIR}/${safe_name}.state"
log_debug "Wrapping command: ${JOB_CMD[*]}"
log_debug "Job name: $safe_name"
local start_time end_time duration exit_code
start_time=$(date +%s%N)
# Run the command, capturing exit code
set +e
"${JOB_CMD[@]}"
exit_code=$?
set -e
end_time=$(date +%s%N)
duration=$(echo "scale=3; ($end_time - $start_time) / 1000000000" | bc 2>/dev/null || echo "0")
# Read current run count
local runs=0
if [[ -f "$state_file" ]]; then
runs=$(grep '^runs=' "$state_file" 2>/dev/null | cut -d= -f2 || echo "0")
fi
runs=$((runs + 1))
# Write state file atomically
local tmpfile
tmpfile=$(mktemp "${state_file}.XXXXXX")
cat > "$tmpfile" <<EOF
name=$safe_name
exit_code=$exit_code
duration=$duration
timestamp=$(date +%s)
runs=$runs
success=$(( exit_code == 0 ? 1 : 0 ))
EOF
mv "$tmpfile" "$state_file"
log_debug "State written to $state_file (exit=$exit_code, duration=${duration}s, runs=$runs)"
# Exit with the original command's exit code
exit "$exit_code"
}
#########################
### Collector Mode ###
#########################
run_collector() {
mkdir -p "$STATE_DIR" "$NODE_DIR"
local metrics=""
local now
now=$(date +%s)
# Metric headers
metrics+="# HELP cron_job_exit_code Exit code of the last cron job run
# TYPE cron_job_exit_code gauge
"
metrics+="# HELP cron_job_duration_seconds Duration of the last cron job run
# TYPE cron_job_duration_seconds gauge
"
metrics+="# HELP cron_job_last_run_timestamp Unix timestamp of the last cron job run
# TYPE cron_job_last_run_timestamp gauge
"
metrics+="# HELP cron_job_success Whether the last cron job run was successful (1=yes, 0=no)
# TYPE cron_job_success gauge
"
metrics+="# HELP cron_job_runs_total Total number of times the cron job has run
# TYPE cron_job_runs_total counter
"
metrics+="# HELP cron_job_stale Whether the cron job has not run within the stale threshold (1=stale, 0=ok)
# TYPE cron_job_stale gauge
"
local found=0
for state_file in "$STATE_DIR"/*.state; do
[[ -f "$state_file" ]] || continue
found=1
# Read state file
local name="" exit_code="0" duration="0" timestamp="0" runs="0" success="0"
while IFS='=' read -r key value; do
case "$key" in
name) name="$value" ;;
exit_code) exit_code="$value" ;;
duration) duration="$value" ;;
timestamp) timestamp="$value" ;;
runs) runs="$value" ;;
success) success="$value" ;;
esac
done < "$state_file"
[[ -z "$name" ]] && continue
local stale=0
if (( now - timestamp > STALE_THRESHOLD )); then
stale=1
fi
metrics+="cron_job_exit_code{job=\"${name}\"} ${exit_code}
"
metrics+="cron_job_duration_seconds{job=\"${name}\"} ${duration}
"
metrics+="cron_job_last_run_timestamp{job=\"${name}\"} ${timestamp}
"
metrics+="cron_job_success{job=\"${name}\"} ${success}
"
metrics+="cron_job_runs_total{job=\"${name}\"} ${runs}
"
metrics+="cron_job_stale{job=\"${name}\"} ${stale}
"
log_debug "Collected: $name (exit=$exit_code, stale=$stale)"
done
if [[ $found -eq 0 ]]; then
log_debug "No state files found in $STATE_DIR"
fi
# Collector metadata
metrics+="
# HELP cron_job_collector_last_run_timestamp Unix timestamp of last collector run
# TYPE cron_job_collector_last_run_timestamp gauge
cron_job_collector_last_run_timestamp $now
"
# Atomic write
local tmpfile
tmpfile=$(mktemp "${PROM_FILE}.XXXXXX")
echo "$metrics" > "$tmpfile"
mv "$tmpfile" "$PROM_FILE"
log_info "Metrics written to $PROM_FILE ($found jobs)"
}
#########################
### Main ###
#########################
main() {
parse_args "$@"
case "$MODE" in
wrap) run_wrapper ;;
collect) run_collector ;;
esac
}
main "$@"