a1a17e81a1
Includes updated JS challenge scripts with Claude-User whitelist, same-site referer bypass, Blackbox-Exporter allowed bot, and all new exporters, cheat sheets, and automation scripts.
317 lines
8.8 KiB
Bash
317 lines
8.8 KiB
Bash
#!/bin/bash
|
|
#############################################################
|
|
#### Cron Job Monitoring Exporter for Prometheus ####
|
|
#### Tracks whether scheduled cron jobs ran successfully, ####
|
|
#### their exit codes, duration, and staleness ####
|
|
#### ####
|
|
#### Author: Phil Connor ####
|
|
#### Contact: contact@mylinux.work ####
|
|
#### License: MIT ####
|
|
#### Version: 1.0 ####
|
|
#### ####
|
|
#### Usage: ./cron-job-exporter.sh [OPTIONS] ####
|
|
#############################################################
|
|
#
|
|
# Monitors cron job execution by wrapping cron commands.
|
|
# Two modes of operation:
|
|
# 1. Wrapper mode: wrap a cron command to record metrics
|
|
# 2. Collector mode: scan state files and write .prom output
|
|
#
|
|
# Metrics exported:
|
|
# - cron_job_exit_code (last exit code)
|
|
# - cron_job_duration_seconds (last execution time)
|
|
# - cron_job_last_run_timestamp (unix timestamp of last run)
|
|
# - cron_job_success (1 if last run exited 0, else 0)
|
|
# - cron_job_runs_total (total number of runs)
|
|
#
|
|
# Requirements:
|
|
# - Bash 4.0+
|
|
# - node_exporter with textfile collector enabled
|
|
#
|
|
set -euo pipefail
|
|
|
|
#########################
|
|
### Configuration ###
|
|
#########################
|
|
|
|
NODE_DIR="${NODE_DIR:-/var/lib/node_exporter}"
|
|
STATE_DIR="${STATE_DIR:-/var/lib/cron-job-exporter}"
|
|
PROM_FILE="${NODE_DIR}/cron_jobs.prom"
|
|
STALE_THRESHOLD="${STALE_THRESHOLD:-86400}" # 24 hours
|
|
DEBUG="${DEBUG:-}"
|
|
|
|
#########################
|
|
### Logging ###
|
|
#########################
|
|
|
|
RED='\033[0;31m'
|
|
GREEN='\033[0;32m'
|
|
YELLOW='\033[1;33m'
|
|
NC='\033[0m'
|
|
|
|
log_info() {
|
|
echo -e "${GREEN}[INFO]${NC} $1" >&2
|
|
}
|
|
|
|
log_warn() {
|
|
echo -e "${YELLOW}[WARN]${NC} $1" >&2
|
|
}
|
|
|
|
log_error() {
|
|
echo -e "${RED}[ERROR]${NC} $1" >&2
|
|
}
|
|
|
|
log_debug() {
|
|
[[ -n "$DEBUG" ]] && echo "[DEBUG] $1" >&2
|
|
}
|
|
|
|
#########################
|
|
### Parse Arguments ###
|
|
#########################
|
|
|
|
show_help() {
|
|
cat <<EOF
|
|
Cron Job Monitoring Exporter for Prometheus
|
|
|
|
Tracks cron job execution status, exit codes, and duration.
|
|
|
|
MODES:
|
|
Wrapper mode: $0 --wrap --name <job_name> -- <command>
|
|
Collector mode: $0 --collect
|
|
|
|
WRAPPER MODE (use in crontab):
|
|
Wraps a cron command, records exit code, duration, and timestamp
|
|
to a state file. Run --collect separately to generate .prom output.
|
|
|
|
Example crontab:
|
|
* * * * * /opt/cron-job-exporter.sh --wrap --name backup_db -- /opt/backup-db.sh
|
|
0 * * * * /opt/cron-job-exporter.sh --wrap --name log_cleanup -- /opt/cleanup-logs.sh
|
|
|
|
COLLECTOR MODE (run on schedule or as oneshot):
|
|
Reads all state files and writes a single .prom file for node_exporter.
|
|
|
|
Example crontab:
|
|
* * * * * /opt/cron-job-exporter.sh --collect
|
|
|
|
OPTIONS:
|
|
--wrap Wrapper mode: run a command and record metrics
|
|
--collect Collector mode: generate .prom from state files
|
|
--name NAME Job name for wrapper mode (required with --wrap)
|
|
--stale-threshold SEC Seconds before a job is considered stale (default: 86400)
|
|
--state-dir DIR State file directory (default: /var/lib/cron-job-exporter)
|
|
--help Show this help
|
|
|
|
EOF
|
|
exit 0
|
|
}
|
|
|
|
MODE=""
|
|
JOB_NAME=""
|
|
JOB_CMD=()
|
|
|
|
parse_args() {
|
|
while [[ $# -gt 0 ]]; do
|
|
case "$1" in
|
|
--wrap) MODE="wrap"; shift ;;
|
|
--collect) MODE="collect"; shift ;;
|
|
--name) JOB_NAME="$2"; shift 2 ;;
|
|
--stale-threshold) STALE_THRESHOLD="$2"; shift 2 ;;
|
|
--state-dir) STATE_DIR="$2"; shift 2 ;;
|
|
--help) show_help ;;
|
|
--) shift; JOB_CMD=("$@"); break ;;
|
|
*) log_error "Unknown option: $1"; exit 1 ;;
|
|
esac
|
|
done
|
|
|
|
if [[ -z "$MODE" ]]; then
|
|
log_error "Must specify --wrap or --collect"
|
|
echo "Run '$0 --help' for usage."
|
|
exit 1
|
|
fi
|
|
|
|
if [[ "$MODE" == "wrap" ]]; then
|
|
if [[ -z "$JOB_NAME" ]]; then
|
|
log_error "--name is required in wrapper mode"
|
|
exit 1
|
|
fi
|
|
if [[ ${#JOB_CMD[@]} -eq 0 ]]; then
|
|
log_error "No command specified after --"
|
|
exit 1
|
|
fi
|
|
fi
|
|
}
|
|
|
|
#########################
|
|
### Sanitize ###
|
|
#########################
|
|
|
|
sanitize_name() {
|
|
local name="$1"
|
|
name="${name,,}"
|
|
name="${name// /_}"
|
|
name=$(echo "$name" | sed 's/[^a-z0-9_]/_/g')
|
|
name=$(echo "$name" | sed 's/__*/_/g; s/^_//; s/_$//')
|
|
echo "$name"
|
|
}
|
|
|
|
#########################
|
|
### Wrapper Mode ###
|
|
#########################
|
|
|
|
run_wrapper() {
|
|
mkdir -p "$STATE_DIR"
|
|
|
|
local safe_name
|
|
safe_name=$(sanitize_name "$JOB_NAME")
|
|
local state_file="${STATE_DIR}/${safe_name}.state"
|
|
|
|
log_debug "Wrapping command: ${JOB_CMD[*]}"
|
|
log_debug "Job name: $safe_name"
|
|
|
|
local start_time end_time duration exit_code
|
|
start_time=$(date +%s%N)
|
|
|
|
# Run the command, capturing exit code
|
|
set +e
|
|
"${JOB_CMD[@]}"
|
|
exit_code=$?
|
|
set -e
|
|
|
|
end_time=$(date +%s%N)
|
|
duration=$(echo "scale=3; ($end_time - $start_time) / 1000000000" | bc 2>/dev/null || echo "0")
|
|
|
|
# Read current run count
|
|
local runs=0
|
|
if [[ -f "$state_file" ]]; then
|
|
runs=$(grep '^runs=' "$state_file" 2>/dev/null | cut -d= -f2 || echo "0")
|
|
fi
|
|
runs=$((runs + 1))
|
|
|
|
# Write state file atomically
|
|
local tmpfile
|
|
tmpfile=$(mktemp "${state_file}.XXXXXX")
|
|
|
|
cat > "$tmpfile" <<EOF
|
|
name=$safe_name
|
|
exit_code=$exit_code
|
|
duration=$duration
|
|
timestamp=$(date +%s)
|
|
runs=$runs
|
|
success=$(( exit_code == 0 ? 1 : 0 ))
|
|
EOF
|
|
|
|
mv "$tmpfile" "$state_file"
|
|
|
|
log_debug "State written to $state_file (exit=$exit_code, duration=${duration}s, runs=$runs)"
|
|
|
|
# Exit with the original command's exit code
|
|
exit "$exit_code"
|
|
}
|
|
|
|
#########################
|
|
### Collector Mode ###
|
|
#########################
|
|
|
|
run_collector() {
|
|
mkdir -p "$STATE_DIR" "$NODE_DIR"
|
|
|
|
local metrics=""
|
|
local now
|
|
now=$(date +%s)
|
|
|
|
# Metric headers
|
|
metrics+="# HELP cron_job_exit_code Exit code of the last cron job run
|
|
# TYPE cron_job_exit_code gauge
|
|
"
|
|
metrics+="# HELP cron_job_duration_seconds Duration of the last cron job run
|
|
# TYPE cron_job_duration_seconds gauge
|
|
"
|
|
metrics+="# HELP cron_job_last_run_timestamp Unix timestamp of the last cron job run
|
|
# TYPE cron_job_last_run_timestamp gauge
|
|
"
|
|
metrics+="# HELP cron_job_success Whether the last cron job run was successful (1=yes, 0=no)
|
|
# TYPE cron_job_success gauge
|
|
"
|
|
metrics+="# HELP cron_job_runs_total Total number of times the cron job has run
|
|
# TYPE cron_job_runs_total counter
|
|
"
|
|
metrics+="# HELP cron_job_stale Whether the cron job has not run within the stale threshold (1=stale, 0=ok)
|
|
# TYPE cron_job_stale gauge
|
|
"
|
|
|
|
local found=0
|
|
for state_file in "$STATE_DIR"/*.state; do
|
|
[[ -f "$state_file" ]] || continue
|
|
found=1
|
|
|
|
# Read state file
|
|
local name="" exit_code="0" duration="0" timestamp="0" runs="0" success="0"
|
|
while IFS='=' read -r key value; do
|
|
case "$key" in
|
|
name) name="$value" ;;
|
|
exit_code) exit_code="$value" ;;
|
|
duration) duration="$value" ;;
|
|
timestamp) timestamp="$value" ;;
|
|
runs) runs="$value" ;;
|
|
success) success="$value" ;;
|
|
esac
|
|
done < "$state_file"
|
|
|
|
[[ -z "$name" ]] && continue
|
|
|
|
local stale=0
|
|
if (( now - timestamp > STALE_THRESHOLD )); then
|
|
stale=1
|
|
fi
|
|
|
|
metrics+="cron_job_exit_code{job=\"${name}\"} ${exit_code}
|
|
"
|
|
metrics+="cron_job_duration_seconds{job=\"${name}\"} ${duration}
|
|
"
|
|
metrics+="cron_job_last_run_timestamp{job=\"${name}\"} ${timestamp}
|
|
"
|
|
metrics+="cron_job_success{job=\"${name}\"} ${success}
|
|
"
|
|
metrics+="cron_job_runs_total{job=\"${name}\"} ${runs}
|
|
"
|
|
metrics+="cron_job_stale{job=\"${name}\"} ${stale}
|
|
"
|
|
|
|
log_debug "Collected: $name (exit=$exit_code, stale=$stale)"
|
|
done
|
|
|
|
if [[ $found -eq 0 ]]; then
|
|
log_debug "No state files found in $STATE_DIR"
|
|
fi
|
|
|
|
# Collector metadata
|
|
metrics+="
|
|
# HELP cron_job_collector_last_run_timestamp Unix timestamp of last collector run
|
|
# TYPE cron_job_collector_last_run_timestamp gauge
|
|
cron_job_collector_last_run_timestamp $now
|
|
"
|
|
|
|
# Atomic write
|
|
local tmpfile
|
|
tmpfile=$(mktemp "${PROM_FILE}.XXXXXX")
|
|
echo "$metrics" > "$tmpfile"
|
|
mv "$tmpfile" "$PROM_FILE"
|
|
|
|
log_info "Metrics written to $PROM_FILE ($found jobs)"
|
|
}
|
|
|
|
#########################
|
|
### Main ###
|
|
#########################
|
|
|
|
main() {
|
|
parse_args "$@"
|
|
|
|
case "$MODE" in
|
|
wrap) run_wrapper ;;
|
|
collect) run_collector ;;
|
|
esac
|
|
}
|
|
|
|
main "$@"
|