Sync all scripts from website downloads — 352 scripts total
Includes updated JS challenge scripts with Claude-User whitelist, same-site referer bypass, Blackbox-Exporter allowed bot, and all new exporters, cheat sheets, and automation scripts.
This commit is contained in:
@@ -0,0 +1,925 @@
|
||||
#!/bin/bash
|
||||
################################################################################
|
||||
# Script Name: ntp-drift-exporter.sh
|
||||
# Version: 1.0
|
||||
# Description: Prometheus exporter for NTP time synchronisation metrics.
|
||||
# Monitors chrony, ntpd, and systemd-timesyncd clock offset,
|
||||
# stratum, sync status, and drift rate. Time drift is a silent
|
||||
# killer for logs, certificates, Kerberos, and distributed systems.
|
||||
#
|
||||
# Author: Phil Connor
|
||||
# Contact: contact@mylinux.work
|
||||
# Website: https://mylinux.work
|
||||
# License: MIT
|
||||
#
|
||||
# Prerequisites:
|
||||
# - chronyc (chrony), ntpq/ntpstat (ntpd), or timedatectl (systemd-timesyncd)
|
||||
# - netcat (nc) for HTTP mode
|
||||
# - Standard Unix tools (awk, grep)
|
||||
#
|
||||
# Usage:
|
||||
# # Output to stdout
|
||||
# ./ntp-drift-exporter.sh
|
||||
#
|
||||
# # HTTP server mode
|
||||
# ./ntp-drift-exporter.sh --http -p 9200
|
||||
#
|
||||
# # Textfile collector mode
|
||||
# ./ntp-drift-exporter.sh --textfile
|
||||
#
|
||||
# Metrics Exported:
|
||||
# Core Status:
|
||||
# - ntp_drift_up - Exporter status (1=up, 0=down)
|
||||
# - ntp_drift_exporter_info{version,source} - Exporter version and NTP source
|
||||
#
|
||||
# Sync Status:
|
||||
# - ntp_drift_synchronised - Clock synchronised (1=yes, 0=no)
|
||||
# - ntp_drift_stratum - Current stratum level
|
||||
# - ntp_drift_offset_seconds - Clock offset from upstream in seconds
|
||||
# - ntp_drift_offset_abs_seconds - Absolute clock offset in seconds
|
||||
#
|
||||
# Chrony (if chronyc available):
|
||||
# - ntp_drift_chrony_root_delay_seconds - Root delay
|
||||
# - ntp_drift_chrony_root_dispersion_seconds - Root dispersion
|
||||
# - ntp_drift_chrony_last_offset_seconds - Last measured offset
|
||||
# - ntp_drift_chrony_rms_offset_seconds - RMS offset
|
||||
# - ntp_drift_chrony_frequency_ppm - Frequency error in ppm
|
||||
# - ntp_drift_chrony_residual_freq_ppm - Residual frequency error in ppm
|
||||
# - ntp_drift_chrony_skew_ppm - Estimated skew in ppm
|
||||
# - ntp_drift_chrony_update_interval_seconds - Mean update interval
|
||||
# - ntp_drift_chrony_leap_status - Leap status (0=normal, 1=insert, 2=delete, 3=unsync)
|
||||
# - ntp_drift_chrony_sources_total - Total configured NTP sources
|
||||
# - ntp_drift_chrony_sources_reachable - Reachable NTP sources
|
||||
# - ntp_drift_chrony_source_offset_seconds{source,mode} - Per-source offset
|
||||
#
|
||||
# NTPd (if ntpq available):
|
||||
# - ntp_drift_ntpd_peers_total - Total configured peers
|
||||
# - ntp_drift_ntpd_peers_reachable - Reachable peers
|
||||
# - ntp_drift_ntpd_peer_offset_seconds{peer,type} - Per-peer offset
|
||||
# - ntp_drift_ntpd_peer_delay_seconds{peer} - Per-peer round-trip delay
|
||||
# - ntp_drift_ntpd_peer_jitter_seconds{peer} - Per-peer jitter
|
||||
# - ntp_drift_ntpd_selected_peer_offset_seconds - Selected peer offset
|
||||
#
|
||||
# systemd-timesyncd (if timedatectl available):
|
||||
# - ntp_drift_timesyncd_server_info{server,address} - NTP server info
|
||||
# - ntp_drift_timesyncd_delay_seconds - Round-trip delay
|
||||
# - ntp_drift_timesyncd_jitter_seconds - Jitter
|
||||
# - ntp_drift_timesyncd_frequency_ppm - Frequency error in ppm
|
||||
# - ntp_drift_timesyncd_root_distance_seconds - Root distance
|
||||
# - ntp_drift_timesyncd_poll_interval_seconds - Current poll interval
|
||||
# - ntp_drift_timesyncd_packet_count - NTP packet count
|
||||
# - ntp_drift_timesyncd_leap_status - Leap indicator (0=normal, 1=insert, 2=delete, 3=unsync)
|
||||
#
|
||||
# Alerts:
|
||||
# - ntp_drift_offset_critical - 1 if |offset| > 100ms
|
||||
# - ntp_drift_offset_warning - 1 if |offset| > 10ms
|
||||
# - ntp_drift_unsynchronised - 1 if clock is not synchronised
|
||||
#
|
||||
# Exporter:
|
||||
# - ntp_drift_exporter_duration_seconds - Script execution time
|
||||
# - ntp_drift_exporter_last_run_timestamp - Last run timestamp
|
||||
#
|
||||
# Configuration:
|
||||
# Default HTTP port: 9200
|
||||
# Textfile directory: /var/lib/node_exporter
|
||||
#
|
||||
################################################################################
|
||||
|
||||
# ============================================================================
|
||||
# CONFIGURATION VARIABLES
|
||||
# ============================================================================
|
||||
|
||||
TEXTFILE_DIR="/var/lib/node_exporter"
|
||||
OUTPUT_FILE=""
|
||||
HTTP_MODE=false
|
||||
HTTP_PORT=9200
|
||||
NTP_SOURCE=""
|
||||
|
||||
# ============================================================================
|
||||
# HELPER FUNCTIONS
|
||||
# ============================================================================
|
||||
|
||||
show_usage() {
|
||||
cat <<EOF
|
||||
Usage: $0 [OPTIONS]
|
||||
|
||||
Export NTP time synchronisation statistics as Prometheus metrics (v1.0).
|
||||
|
||||
MODES:
|
||||
--textfile Write to node_exporter textfile collector
|
||||
--http Run HTTP server on port $HTTP_PORT
|
||||
|
||||
OPTIONS:
|
||||
-p, --port HTTP port (default: 9200)
|
||||
-o, --output Output file path
|
||||
|
||||
EXAMPLES:
|
||||
$0 --textfile # Write to textfile collector
|
||||
$0 --http --port 9200 # Run HTTP server
|
||||
$0 -o /tmp/ntp_drift.prom # Write to custom file
|
||||
|
||||
METRICS:
|
||||
- Clock synchronisation status and stratum
|
||||
- Clock offset and absolute offset
|
||||
- Chrony tracking stats (root delay, dispersion, frequency, skew)
|
||||
- Chrony/ntpd per-source/peer offsets and reachability
|
||||
- Alert thresholds (>10ms warning, >100ms critical)
|
||||
|
||||
EOF
|
||||
exit 0
|
||||
}
|
||||
|
||||
parse_args() {
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case $1 in
|
||||
-h|--help) show_usage ;;
|
||||
--textfile) OUTPUT_FILE="$TEXTFILE_DIR/ntp_drift.prom"; shift ;;
|
||||
--http) HTTP_MODE=true; shift ;;
|
||||
-p|--port) HTTP_PORT="$2"; shift 2 ;;
|
||||
-o|--output) OUTPUT_FILE="$2"; shift 2 ;;
|
||||
*) echo "Unknown option: $1" >&2; exit 1 ;;
|
||||
esac
|
||||
done
|
||||
}
|
||||
|
||||
# Detect the NTP client in use
|
||||
# Sets NTP_SOURCE global variable
|
||||
# Returns: 0 if found, 1 if no NTP client available
|
||||
detect_ntp_source() {
|
||||
if command -v chronyc >/dev/null 2>&1 && chronyc tracking >/dev/null 2>&1; then
|
||||
NTP_SOURCE="chrony"
|
||||
return 0
|
||||
elif command -v ntpq >/dev/null 2>&1 && ntpq -p >/dev/null 2>&1; then
|
||||
NTP_SOURCE="ntpd"
|
||||
return 0
|
||||
elif command -v timedatectl >/dev/null 2>&1 && timedatectl show 2>/dev/null | grep -q "NTP=yes"; then
|
||||
NTP_SOURCE="timesyncd"
|
||||
return 0
|
||||
fi
|
||||
return 1
|
||||
}
|
||||
|
||||
# Get chrony tracking data
|
||||
# Returns: Lines of "key value" from chronyc tracking
|
||||
get_chrony_tracking() {
|
||||
chronyc tracking 2>/dev/null
|
||||
}
|
||||
|
||||
# Parse a value with unit suffix from chronyc output
|
||||
# Converts to seconds (for time values) or returns raw number
|
||||
# Args: $1 - raw value string (e.g., "+0.000012345 seconds", "0.123 ppm")
|
||||
# Returns: numeric value in base unit
|
||||
parse_chrony_value() {
|
||||
local raw="$1"
|
||||
local number unit
|
||||
number=$(echo "$raw" | awk '{gsub(/[+]/, ""); print $1}')
|
||||
unit=$(echo "$raw" | awk '{print $NF}')
|
||||
case "$unit" in
|
||||
seconds) echo "$number" ;;
|
||||
milliseconds) awk "BEGIN {printf \"%.9f\", $number / 1000}" ;;
|
||||
microseconds) awk "BEGIN {printf \"%.12f\", $number / 1000000}" ;;
|
||||
nanoseconds) awk "BEGIN {printf \"%.15f\", $number / 1000000000}" ;;
|
||||
ppm) echo "$number" ;;
|
||||
*) echo "$number" ;;
|
||||
esac
|
||||
}
|
||||
|
||||
# Get chrony synchronisation status
|
||||
# Returns: "synchronised stratum offset_seconds"
|
||||
get_chrony_sync_status() {
|
||||
local tracking
|
||||
tracking=$(get_chrony_tracking)
|
||||
|
||||
local ref_id stratum sys_offset leap
|
||||
ref_id=$(echo "$tracking" | awk -F: '/Reference ID/ { gsub(/^[ \t]+/, "", $2); print $2 }')
|
||||
stratum=$(echo "$tracking" | awk -F: '/Stratum/ { gsub(/^[ \t]+/, "", $2); print $2 }')
|
||||
|
||||
local offset_raw
|
||||
offset_raw=$(echo "$tracking" | awk -F: '/System time/ { gsub(/^[ \t]+/, "", $2); print $2 }')
|
||||
sys_offset=$(parse_chrony_value "$offset_raw")
|
||||
|
||||
leap=$(echo "$tracking" | awk -F: '/Leap status/ { gsub(/^[ \t]+/, "", $2); print $2 }')
|
||||
|
||||
local synchronised=0
|
||||
if [ -n "$ref_id" ] && ! echo "$ref_id" | grep -q "00000000"; then
|
||||
if ! echo "$leap" | grep -qi "not synchronised"; then
|
||||
synchronised=1
|
||||
fi
|
||||
fi
|
||||
|
||||
echo "$synchronised ${stratum:-16} ${sys_offset:-0}"
|
||||
}
|
||||
|
||||
# Get chrony detailed tracking metrics
|
||||
# Returns: "root_delay root_disp last_offset rms_offset freq resid_freq skew update_interval leap_code"
|
||||
get_chrony_details() {
|
||||
local tracking
|
||||
tracking=$(get_chrony_tracking)
|
||||
|
||||
local root_delay_raw root_disp_raw last_offset_raw rms_offset_raw
|
||||
local freq_raw resid_freq_raw skew_raw interval_raw leap_raw
|
||||
|
||||
root_delay_raw=$(echo "$tracking" | awk -F: '/Root delay/ { gsub(/^[ \t]+/, "", $2); print $2 }')
|
||||
root_disp_raw=$(echo "$tracking" | awk -F: '/Root dispersion/ { gsub(/^[ \t]+/, "", $2); print $2 }')
|
||||
last_offset_raw=$(echo "$tracking" | awk -F: '/Last offset/ { gsub(/^[ \t]+/, "", $2); print $2 }')
|
||||
rms_offset_raw=$(echo "$tracking" | awk -F: '/RMS offset/ { gsub(/^[ \t]+/, "", $2); print $2 }')
|
||||
freq_raw=$(echo "$tracking" | awk -F: '/Frequency/ { gsub(/^[ \t]+/, "", $2); print $2 }')
|
||||
resid_freq_raw=$(echo "$tracking" | awk -F: '/Residual freq/ { gsub(/^[ \t]+/, "", $2); print $2 }')
|
||||
skew_raw=$(echo "$tracking" | awk -F: '/Skew/ { gsub(/^[ \t]+/, "", $2); print $2 }')
|
||||
interval_raw=$(echo "$tracking" | awk -F: '/Update interval/ { gsub(/^[ \t]+/, "", $2); print $2 }')
|
||||
leap_raw=$(echo "$tracking" | awk -F: '/Leap status/ { gsub(/^[ \t]+/, "", $2); print $2 }')
|
||||
|
||||
local root_delay root_disp last_offset rms_offset freq resid_freq skew interval leap_code
|
||||
root_delay=$(parse_chrony_value "$root_delay_raw")
|
||||
root_disp=$(parse_chrony_value "$root_disp_raw")
|
||||
last_offset=$(parse_chrony_value "$last_offset_raw")
|
||||
rms_offset=$(parse_chrony_value "$rms_offset_raw")
|
||||
freq=$(parse_chrony_value "$freq_raw")
|
||||
resid_freq=$(parse_chrony_value "$resid_freq_raw")
|
||||
skew=$(parse_chrony_value "$skew_raw")
|
||||
interval=$(parse_chrony_value "$interval_raw")
|
||||
|
||||
case "$leap_raw" in
|
||||
*"Normal"*) leap_code=0 ;;
|
||||
*"Insert"*) leap_code=1 ;;
|
||||
*"Delete"*) leap_code=2 ;;
|
||||
*) leap_code=3 ;;
|
||||
esac
|
||||
|
||||
echo "${root_delay:-0} ${root_disp:-0} ${last_offset:-0} ${rms_offset:-0} ${freq:-0} ${resid_freq:-0} ${skew:-0} ${interval:-0} ${leap_code:-3}"
|
||||
}
|
||||
|
||||
# Get chrony source list with status
|
||||
# Returns: Lines of "source mode offset reachable"
|
||||
# mode: server/peer/ref_clock
|
||||
# reachable: 1 or 0
|
||||
get_chrony_sources() {
|
||||
chronyc sources 2>/dev/null | awk '
|
||||
NR > 3 && NF >= 8 {
|
||||
mode_char = substr($1, 1, 1)
|
||||
if (mode_char == "^") mode = "server"
|
||||
else if (mode_char == "=") mode = "peer"
|
||||
else if (mode_char == "#") mode = "ref_clock"
|
||||
else mode = "unknown"
|
||||
|
||||
source = $2
|
||||
state_char = substr($1, 2, 1)
|
||||
|
||||
# Reachable if state is * (synced), + (combined), - (not combined)
|
||||
reachable = 0
|
||||
if (state_char == "*" || state_char == "+" || state_char == "-") reachable = 1
|
||||
|
||||
# Offset is field 7, may have +/- prefix and unit suffix
|
||||
offset_raw = $7
|
||||
# Remove +/- prefix for awk math
|
||||
gsub(/[+]/, "", offset_raw)
|
||||
# Convert units: ns, us, ms, s
|
||||
if (offset_raw ~ /ns$/) {
|
||||
gsub(/ns$/, "", offset_raw)
|
||||
offset = offset_raw / 1000000000
|
||||
} else if (offset_raw ~ /us$/) {
|
||||
gsub(/us$/, "", offset_raw)
|
||||
offset = offset_raw / 1000000
|
||||
} else if (offset_raw ~ /ms$/) {
|
||||
gsub(/ms$/, "", offset_raw)
|
||||
offset = offset_raw / 1000
|
||||
} else if (offset_raw ~ /s$/) {
|
||||
gsub(/s$/, "", offset_raw)
|
||||
offset = offset_raw + 0
|
||||
} else {
|
||||
offset = offset_raw + 0
|
||||
}
|
||||
|
||||
printf "%s %s %.9f %d\n", source, mode, offset, reachable
|
||||
}'
|
||||
}
|
||||
|
||||
# Get ntpd sync status via ntpstat or ntpq
|
||||
# Returns: "synchronised stratum offset_seconds"
|
||||
get_ntpd_sync_status() {
|
||||
local synchronised=0
|
||||
local stratum=16
|
||||
local offset=0
|
||||
|
||||
# Try ntpstat first
|
||||
if command -v ntpstat >/dev/null 2>&1; then
|
||||
local ntpstat_output
|
||||
ntpstat_output=$(ntpstat 2>/dev/null)
|
||||
local exit_code=$?
|
||||
|
||||
if [ "$exit_code" -eq 0 ]; then
|
||||
synchronised=1
|
||||
stratum=$(echo "$ntpstat_output" | awk '/stratum/ { for(i=1;i<=NF;i++) if($i ~ /^[0-9]+$/) {print $i; exit} }')
|
||||
# Offset in ms from ntpstat
|
||||
local offset_ms
|
||||
offset_ms=$(echo "$ntpstat_output" | awk '/time correct/ { for(i=1;i<=NF;i++) if($i ~ /^[0-9.]+$/) {print $i; exit} }')
|
||||
if [ -n "$offset_ms" ]; then
|
||||
offset=$(awk "BEGIN {printf \"%.9f\", $offset_ms / 1000}")
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
# Fall back to ntpq if ntpstat not available or failed
|
||||
if [ "$synchronised" -eq 0 ] && command -v ntpq >/dev/null 2>&1; then
|
||||
local selected_peer
|
||||
selected_peer=$(ntpq -p 2>/dev/null | awk '/^\*/ { print $0 }')
|
||||
if [ -n "$selected_peer" ]; then
|
||||
synchronised=1
|
||||
stratum=$(echo "$selected_peer" | awk '{print $3}')
|
||||
local offset_ms
|
||||
offset_ms=$(echo "$selected_peer" | awk '{print $9}')
|
||||
if [ -n "$offset_ms" ]; then
|
||||
offset=$(awk "BEGIN {printf \"%.9f\", $offset_ms / 1000}")
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
echo "$synchronised ${stratum:-16} ${offset:-0}"
|
||||
}
|
||||
|
||||
# Get ntpd peer list with status
|
||||
# Returns: Lines of "peer type offset delay jitter reachable"
|
||||
get_ntpd_peers() {
|
||||
command -v ntpq >/dev/null 2>&1 || return
|
||||
ntpq -p 2>/dev/null | awk '
|
||||
NR > 2 && NF >= 9 {
|
||||
tally = substr($1, 1, 1)
|
||||
peer = substr($1, 2)
|
||||
if (peer == "") next
|
||||
|
||||
st = $3
|
||||
# offset in ms (field 9), delay in ms (field 8), jitter in ms (field 10)
|
||||
offset_ms = $9 + 0
|
||||
delay_ms = $8 + 0
|
||||
jitter_ms = $10 + 0
|
||||
|
||||
# Convert to seconds
|
||||
offset_s = offset_ms / 1000
|
||||
delay_s = delay_ms / 1000
|
||||
jitter_s = jitter_ms / 1000
|
||||
|
||||
# Type based on tally code
|
||||
if (tally == "*") type = "selected"
|
||||
else if (tally == "+") type = "candidate"
|
||||
else if (tally == "-") type = "outlier"
|
||||
else if (tally == "#") type = "selected_distance"
|
||||
else type = "other"
|
||||
|
||||
# Reachable if tally is *, +, -, or #
|
||||
reachable = (tally == "*" || tally == "+" || tally == "-" || tally == "#") ? 1 : 0
|
||||
|
||||
printf "%s %s %.9f %.9f %.9f %d\n", peer, type, offset_s, delay_s, jitter_s, reachable
|
||||
}'
|
||||
}
|
||||
|
||||
# Parse a timedatectl timesync-status value with unit suffix to seconds
|
||||
# Args: $1 - raw value string (e.g., "+2.764ms", "95.987ms", "34min 8s")
|
||||
# Returns: value in seconds
|
||||
parse_timesyncd_duration() {
|
||||
local raw="$1"
|
||||
# Remove leading +/-
|
||||
raw="${raw#[+-]}"
|
||||
# Handle compound durations like "34min 8s"
|
||||
if echo "$raw" | grep -q "min"; then
|
||||
local mins secs
|
||||
mins=$(echo "$raw" | grep -oE '[0-9]+min' | grep -oE '[0-9]+')
|
||||
secs=$(echo "$raw" | grep -oE '[0-9.]+s$' | grep -oE '[0-9.]+')
|
||||
awk "BEGIN {printf \"%.9f\", ${mins:-0} * 60 + ${secs:-0}}"
|
||||
return
|
||||
fi
|
||||
# Handle single unit values
|
||||
if echo "$raw" | grep -qE 'us$'; then
|
||||
local val="${raw%us}"
|
||||
awk "BEGIN {printf \"%.12f\", $val / 1000000}"
|
||||
elif echo "$raw" | grep -qE 'ms$'; then
|
||||
local val="${raw%ms}"
|
||||
awk "BEGIN {printf \"%.9f\", $val / 1000}"
|
||||
elif echo "$raw" | grep -qE 's$'; then
|
||||
echo "${raw%s}"
|
||||
else
|
||||
echo "$raw"
|
||||
fi
|
||||
}
|
||||
|
||||
# Get systemd-timesyncd sync status
|
||||
# Returns: "synchronised stratum offset_seconds"
|
||||
get_timesyncd_sync_status() {
|
||||
local synchronised=0
|
||||
local stratum=16
|
||||
local offset=0
|
||||
|
||||
local synced
|
||||
synced=$(timedatectl show 2>/dev/null | awk -F= '/NTPSynchronized/ {print $2}')
|
||||
if [ "$synced" = "yes" ]; then
|
||||
synchronised=1
|
||||
fi
|
||||
|
||||
local status
|
||||
status=$(timedatectl timesync-status 2>/dev/null)
|
||||
|
||||
if [ -n "$status" ]; then
|
||||
stratum=$(echo "$status" | awk -F: '/Stratum/ { gsub(/^[ \t]+/, "", $2); print $2 }')
|
||||
local offset_raw
|
||||
offset_raw=$(echo "$status" | awk -F: '/Offset/ { gsub(/^[ \t]+/, "", $2); print $2 }')
|
||||
if [ -n "$offset_raw" ]; then
|
||||
# Preserve sign
|
||||
local sign=""
|
||||
if echo "$offset_raw" | grep -q '^-'; then
|
||||
sign="-"
|
||||
fi
|
||||
local abs_val
|
||||
abs_val=$(parse_timesyncd_duration "$offset_raw")
|
||||
offset="${sign}${abs_val}"
|
||||
fi
|
||||
fi
|
||||
|
||||
echo "$synchronised ${stratum:-16} ${offset:-0}"
|
||||
}
|
||||
|
||||
# Get systemd-timesyncd detailed metrics
|
||||
# Returns: "delay jitter frequency root_distance poll_interval packet_count leap_code server address"
|
||||
get_timesyncd_details() {
|
||||
local status
|
||||
status=$(timedatectl timesync-status 2>/dev/null)
|
||||
|
||||
local delay_raw jitter_raw freq_raw rootdist_raw poll_raw packets_raw leap_raw
|
||||
delay_raw=$(echo "$status" | awk -F: '/Delay/ { gsub(/^[ \t]+/, "", $2); print $2 }')
|
||||
jitter_raw=$(echo "$status" | awk -F: '/Jitter/ { gsub(/^[ \t]+/, "", $2); print $2 }')
|
||||
freq_raw=$(echo "$status" | awk -F: '/Frequency/ { gsub(/^[ \t]+/, "", $2); print $2 }')
|
||||
rootdist_raw=$(echo "$status" | awk -F: '/Root distance/ { gsub(/^[ \t]+/, "", $2); print $2 }')
|
||||
poll_raw=$(echo "$status" | awk -F: '/Poll interval/ { gsub(/^[ \t]+/, "", $2); print $2 }')
|
||||
packets_raw=$(echo "$status" | awk -F: '/Packet count/ { gsub(/^[ \t]+/, "", $2); print $2 }')
|
||||
leap_raw=$(echo "$status" | awk -F: '/Leap/ { gsub(/^[ \t]+/, "", $2); print $2 }')
|
||||
|
||||
# Server info from show-timesync
|
||||
local show_output
|
||||
show_output=$(timedatectl show-timesync 2>/dev/null)
|
||||
local server address
|
||||
server=$(echo "$show_output" | awk -F= '/ServerName/ {print $2}')
|
||||
address=$(echo "$show_output" | awk -F= '/ServerAddress/ {print $2}')
|
||||
|
||||
local delay jitter root_distance poll_interval
|
||||
delay=$(parse_timesyncd_duration "$delay_raw")
|
||||
jitter=$(parse_timesyncd_duration "$jitter_raw")
|
||||
|
||||
# Root distance may have "(max: 5s)" suffix — strip it
|
||||
local rootdist_clean="${rootdist_raw%% (*}"
|
||||
root_distance=$(parse_timesyncd_duration "$rootdist_clean")
|
||||
|
||||
# Poll interval: format may be "34min 8s (min: 32s; max 34min 8s)" — strip parenthetical
|
||||
local poll_current="${poll_raw%% (*}"
|
||||
poll_interval=$(parse_timesyncd_duration "$poll_current")
|
||||
|
||||
# Frequency: strip "ppm" suffix, may have sign
|
||||
local frequency
|
||||
frequency="${freq_raw%ppm}"
|
||||
|
||||
# Leap indicator
|
||||
local leap_code
|
||||
case "$leap_raw" in
|
||||
*normal*) leap_code=0 ;;
|
||||
*"insert"*) leap_code=1 ;;
|
||||
*"delete"*) leap_code=2 ;;
|
||||
*) leap_code=3 ;;
|
||||
esac
|
||||
|
||||
# Trim whitespace from packets
|
||||
local packets
|
||||
packets=$(echo "$packets_raw" | awk '{print $1+0}')
|
||||
|
||||
echo "${delay:-0} ${jitter:-0} ${frequency:-0} ${root_distance:-0} ${poll_interval:-0} ${packets:-0} ${leap_code:-3} ${server:-unknown} ${address:-unknown}"
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# METRIC GENERATION
|
||||
# ============================================================================
|
||||
|
||||
# Generate all Prometheus metrics
|
||||
# Returns: Prometheus text format metrics on stdout
|
||||
generate_metrics() {
|
||||
local script_start
|
||||
script_start=$(date +%s)
|
||||
|
||||
# Detect NTP client
|
||||
if ! detect_ntp_source; then
|
||||
cat <<EOF
|
||||
# HELP ntp_drift_up NTP drift exporter status
|
||||
# TYPE ntp_drift_up gauge
|
||||
ntp_drift_up 0
|
||||
EOF
|
||||
return
|
||||
fi
|
||||
|
||||
cat <<EOF
|
||||
# HELP ntp_drift_up NTP drift exporter status
|
||||
# TYPE ntp_drift_up gauge
|
||||
ntp_drift_up 1
|
||||
|
||||
# HELP ntp_drift_exporter_info NTP drift exporter information
|
||||
# TYPE ntp_drift_exporter_info gauge
|
||||
ntp_drift_exporter_info{version="1.0",source="$NTP_SOURCE"} 1
|
||||
EOF
|
||||
|
||||
echo ""
|
||||
|
||||
# ========================================================================
|
||||
# Sync Status (common to all NTP sources)
|
||||
# ========================================================================
|
||||
local sync_status synchronised stratum offset
|
||||
if [ "$NTP_SOURCE" = "chrony" ]; then
|
||||
sync_status=$(get_chrony_sync_status)
|
||||
elif [ "$NTP_SOURCE" = "ntpd" ]; then
|
||||
sync_status=$(get_ntpd_sync_status)
|
||||
else
|
||||
sync_status=$(get_timesyncd_sync_status)
|
||||
fi
|
||||
|
||||
synchronised=$(echo "$sync_status" | awk '{print $1}')
|
||||
stratum=$(echo "$sync_status" | awk '{print $2}')
|
||||
offset=$(echo "$sync_status" | awk '{print $3}')
|
||||
|
||||
local abs_offset
|
||||
abs_offset=$(awk "BEGIN {v = $offset + 0; print (v < 0) ? -v : v}")
|
||||
|
||||
cat <<EOF
|
||||
# HELP ntp_drift_synchronised Clock synchronised to NTP source (1=yes, 0=no)
|
||||
# TYPE ntp_drift_synchronised gauge
|
||||
ntp_drift_synchronised $synchronised
|
||||
|
||||
# HELP ntp_drift_stratum Current NTP stratum level
|
||||
# TYPE ntp_drift_stratum gauge
|
||||
ntp_drift_stratum ${stratum:-16}
|
||||
|
||||
# HELP ntp_drift_offset_seconds Clock offset from upstream NTP source in seconds (signed)
|
||||
# TYPE ntp_drift_offset_seconds gauge
|
||||
ntp_drift_offset_seconds ${offset:-0}
|
||||
|
||||
# HELP ntp_drift_offset_abs_seconds Absolute clock offset in seconds
|
||||
# TYPE ntp_drift_offset_abs_seconds gauge
|
||||
ntp_drift_offset_abs_seconds ${abs_offset:-0}
|
||||
EOF
|
||||
|
||||
echo ""
|
||||
|
||||
# ========================================================================
|
||||
# Chrony Detailed Metrics (optional)
|
||||
# ========================================================================
|
||||
if [ "$NTP_SOURCE" = "chrony" ]; then
|
||||
local details root_delay root_disp last_offset rms_offset freq resid_freq skew interval leap_code
|
||||
details=$(get_chrony_details)
|
||||
root_delay=$(echo "$details" | awk '{print $1}')
|
||||
root_disp=$(echo "$details" | awk '{print $2}')
|
||||
last_offset=$(echo "$details" | awk '{print $3}')
|
||||
rms_offset=$(echo "$details" | awk '{print $4}')
|
||||
freq=$(echo "$details" | awk '{print $5}')
|
||||
resid_freq=$(echo "$details" | awk '{print $6}')
|
||||
skew=$(echo "$details" | awk '{print $7}')
|
||||
interval=$(echo "$details" | awk '{print $8}')
|
||||
leap_code=$(echo "$details" | awk '{print $9}')
|
||||
|
||||
cat <<EOF
|
||||
# HELP ntp_drift_chrony_root_delay_seconds Root delay to stratum-1 source in seconds
|
||||
# TYPE ntp_drift_chrony_root_delay_seconds gauge
|
||||
ntp_drift_chrony_root_delay_seconds ${root_delay:-0}
|
||||
|
||||
# HELP ntp_drift_chrony_root_dispersion_seconds Root dispersion in seconds
|
||||
# TYPE ntp_drift_chrony_root_dispersion_seconds gauge
|
||||
ntp_drift_chrony_root_dispersion_seconds ${root_disp:-0}
|
||||
|
||||
# HELP ntp_drift_chrony_last_offset_seconds Last clock offset measurement in seconds
|
||||
# TYPE ntp_drift_chrony_last_offset_seconds gauge
|
||||
ntp_drift_chrony_last_offset_seconds ${last_offset:-0}
|
||||
|
||||
# HELP ntp_drift_chrony_rms_offset_seconds RMS offset in seconds
|
||||
# TYPE ntp_drift_chrony_rms_offset_seconds gauge
|
||||
ntp_drift_chrony_rms_offset_seconds ${rms_offset:-0}
|
||||
|
||||
# HELP ntp_drift_chrony_frequency_ppm System clock frequency error in ppm
|
||||
# TYPE ntp_drift_chrony_frequency_ppm gauge
|
||||
ntp_drift_chrony_frequency_ppm ${freq:-0}
|
||||
|
||||
# HELP ntp_drift_chrony_residual_freq_ppm Residual frequency error in ppm
|
||||
# TYPE ntp_drift_chrony_residual_freq_ppm gauge
|
||||
ntp_drift_chrony_residual_freq_ppm ${resid_freq:-0}
|
||||
|
||||
# HELP ntp_drift_chrony_skew_ppm Estimated skew (frequency error bound) in ppm
|
||||
# TYPE ntp_drift_chrony_skew_ppm gauge
|
||||
ntp_drift_chrony_skew_ppm ${skew:-0}
|
||||
|
||||
# HELP ntp_drift_chrony_update_interval_seconds Mean update interval from NTP source
|
||||
# TYPE ntp_drift_chrony_update_interval_seconds gauge
|
||||
ntp_drift_chrony_update_interval_seconds ${interval:-0}
|
||||
|
||||
# HELP ntp_drift_chrony_leap_status Leap status (0=normal, 1=insert second, 2=delete second, 3=unsynchronised)
|
||||
# TYPE ntp_drift_chrony_leap_status gauge
|
||||
ntp_drift_chrony_leap_status ${leap_code:-3}
|
||||
EOF
|
||||
|
||||
echo ""
|
||||
|
||||
# Chrony source metrics
|
||||
local sources_output
|
||||
sources_output=$(get_chrony_sources)
|
||||
|
||||
if [ -n "$sources_output" ]; then
|
||||
local sources_total sources_reachable
|
||||
sources_total=$(echo "$sources_output" | wc -l)
|
||||
sources_reachable=$(echo "$sources_output" | awk '$4 == 1' | wc -l)
|
||||
|
||||
cat <<EOF
|
||||
# HELP ntp_drift_chrony_sources_total Total configured NTP sources
|
||||
# TYPE ntp_drift_chrony_sources_total gauge
|
||||
ntp_drift_chrony_sources_total ${sources_total:-0}
|
||||
|
||||
# HELP ntp_drift_chrony_sources_reachable Reachable NTP sources
|
||||
# TYPE ntp_drift_chrony_sources_reachable gauge
|
||||
ntp_drift_chrony_sources_reachable ${sources_reachable:-0}
|
||||
EOF
|
||||
|
||||
echo ""
|
||||
|
||||
cat <<EOF
|
||||
# HELP ntp_drift_chrony_source_offset_seconds Per-source clock offset in seconds
|
||||
# TYPE ntp_drift_chrony_source_offset_seconds gauge
|
||||
EOF
|
||||
|
||||
echo "$sources_output" | while read -r source mode src_offset _reachable; do
|
||||
[ -z "$source" ] && continue
|
||||
echo "ntp_drift_chrony_source_offset_seconds{source=\"$source\",mode=\"$mode\"} ${src_offset:-0}"
|
||||
done
|
||||
|
||||
echo ""
|
||||
fi
|
||||
fi
|
||||
|
||||
# ========================================================================
|
||||
# NTPd Detailed Metrics (optional)
|
||||
# ========================================================================
|
||||
if [ "$NTP_SOURCE" = "ntpd" ]; then
|
||||
local peers_output
|
||||
peers_output=$(get_ntpd_peers)
|
||||
|
||||
if [ -n "$peers_output" ]; then
|
||||
local peers_total peers_reachable
|
||||
peers_total=$(echo "$peers_output" | wc -l)
|
||||
peers_reachable=$(echo "$peers_output" | awk '$6 == 1' | wc -l)
|
||||
|
||||
cat <<EOF
|
||||
# HELP ntp_drift_ntpd_peers_total Total configured NTP peers
|
||||
# TYPE ntp_drift_ntpd_peers_total gauge
|
||||
ntp_drift_ntpd_peers_total ${peers_total:-0}
|
||||
|
||||
# HELP ntp_drift_ntpd_peers_reachable Reachable NTP peers
|
||||
# TYPE ntp_drift_ntpd_peers_reachable gauge
|
||||
ntp_drift_ntpd_peers_reachable ${peers_reachable:-0}
|
||||
EOF
|
||||
|
||||
echo ""
|
||||
|
||||
cat <<EOF
|
||||
# HELP ntp_drift_ntpd_peer_offset_seconds Per-peer clock offset in seconds
|
||||
# TYPE ntp_drift_ntpd_peer_offset_seconds gauge
|
||||
EOF
|
||||
|
||||
echo "$peers_output" | while read -r peer ptype peer_offset _peer_delay _peer_jitter _reachable; do
|
||||
[ -z "$peer" ] && continue
|
||||
echo "ntp_drift_ntpd_peer_offset_seconds{peer=\"$peer\",type=\"$ptype\"} ${peer_offset:-0}"
|
||||
done
|
||||
|
||||
echo ""
|
||||
|
||||
cat <<EOF
|
||||
# HELP ntp_drift_ntpd_peer_delay_seconds Per-peer round-trip delay in seconds
|
||||
# TYPE ntp_drift_ntpd_peer_delay_seconds gauge
|
||||
EOF
|
||||
|
||||
echo "$peers_output" | while read -r peer _ptype _peer_offset peer_delay _peer_jitter _reachable; do
|
||||
[ -z "$peer" ] && continue
|
||||
echo "ntp_drift_ntpd_peer_delay_seconds{peer=\"$peer\"} ${peer_delay:-0}"
|
||||
done
|
||||
|
||||
echo ""
|
||||
|
||||
cat <<EOF
|
||||
# HELP ntp_drift_ntpd_peer_jitter_seconds Per-peer jitter in seconds
|
||||
# TYPE ntp_drift_ntpd_peer_jitter_seconds gauge
|
||||
EOF
|
||||
|
||||
echo "$peers_output" | while read -r peer _ptype _peer_offset _peer_delay peer_jitter _reachable; do
|
||||
[ -z "$peer" ] && continue
|
||||
echo "ntp_drift_ntpd_peer_jitter_seconds{peer=\"$peer\"} ${peer_jitter:-0}"
|
||||
done
|
||||
|
||||
echo ""
|
||||
|
||||
# Selected peer offset
|
||||
local selected_offset
|
||||
selected_offset=$(echo "$peers_output" | awk '$2 == "selected" {print $3; exit}')
|
||||
|
||||
cat <<EOF
|
||||
# HELP ntp_drift_ntpd_selected_peer_offset_seconds Currently selected peer offset in seconds
|
||||
# TYPE ntp_drift_ntpd_selected_peer_offset_seconds gauge
|
||||
ntp_drift_ntpd_selected_peer_offset_seconds ${selected_offset:-0}
|
||||
EOF
|
||||
|
||||
echo ""
|
||||
fi
|
||||
fi
|
||||
|
||||
# ========================================================================
|
||||
# systemd-timesyncd Detailed Metrics (optional)
|
||||
# ========================================================================
|
||||
if [ "$NTP_SOURCE" = "timesyncd" ]; then
|
||||
local td_details td_delay td_jitter td_freq td_rootdist td_poll td_packets td_leap td_server td_address
|
||||
td_details=$(get_timesyncd_details)
|
||||
td_delay=$(echo "$td_details" | awk '{print $1}')
|
||||
td_jitter=$(echo "$td_details" | awk '{print $2}')
|
||||
td_freq=$(echo "$td_details" | awk '{print $3}')
|
||||
td_rootdist=$(echo "$td_details" | awk '{print $4}')
|
||||
td_poll=$(echo "$td_details" | awk '{print $5}')
|
||||
td_packets=$(echo "$td_details" | awk '{print $6}')
|
||||
td_leap=$(echo "$td_details" | awk '{print $7}')
|
||||
td_server=$(echo "$td_details" | awk '{print $8}')
|
||||
td_address=$(echo "$td_details" | awk '{print $9}')
|
||||
|
||||
cat <<EOF
|
||||
# HELP ntp_drift_timesyncd_server_info NTP server information
|
||||
# TYPE ntp_drift_timesyncd_server_info gauge
|
||||
ntp_drift_timesyncd_server_info{server="$td_server",address="$td_address"} 1
|
||||
|
||||
# HELP ntp_drift_timesyncd_delay_seconds Round-trip delay to NTP server in seconds
|
||||
# TYPE ntp_drift_timesyncd_delay_seconds gauge
|
||||
ntp_drift_timesyncd_delay_seconds ${td_delay:-0}
|
||||
|
||||
# HELP ntp_drift_timesyncd_jitter_seconds Jitter in seconds
|
||||
# TYPE ntp_drift_timesyncd_jitter_seconds gauge
|
||||
ntp_drift_timesyncd_jitter_seconds ${td_jitter:-0}
|
||||
|
||||
# HELP ntp_drift_timesyncd_frequency_ppm System clock frequency error in ppm
|
||||
# TYPE ntp_drift_timesyncd_frequency_ppm gauge
|
||||
ntp_drift_timesyncd_frequency_ppm ${td_freq:-0}
|
||||
|
||||
# HELP ntp_drift_timesyncd_root_distance_seconds Root distance in seconds
|
||||
# TYPE ntp_drift_timesyncd_root_distance_seconds gauge
|
||||
ntp_drift_timesyncd_root_distance_seconds ${td_rootdist:-0}
|
||||
|
||||
# HELP ntp_drift_timesyncd_poll_interval_seconds Current poll interval in seconds
|
||||
# TYPE ntp_drift_timesyncd_poll_interval_seconds gauge
|
||||
ntp_drift_timesyncd_poll_interval_seconds ${td_poll:-0}
|
||||
|
||||
# HELP ntp_drift_timesyncd_packet_count NTP packets exchanged
|
||||
# TYPE ntp_drift_timesyncd_packet_count gauge
|
||||
ntp_drift_timesyncd_packet_count ${td_packets:-0}
|
||||
|
||||
# HELP ntp_drift_timesyncd_leap_status Leap indicator (0=normal, 1=insert second, 2=delete second, 3=unsynchronised)
|
||||
# TYPE ntp_drift_timesyncd_leap_status gauge
|
||||
ntp_drift_timesyncd_leap_status ${td_leap:-3}
|
||||
EOF
|
||||
|
||||
echo ""
|
||||
fi
|
||||
|
||||
# ========================================================================
|
||||
# Alert Thresholds
|
||||
# ========================================================================
|
||||
local offset_critical offset_warning unsync_alert
|
||||
offset_critical=$(awk "BEGIN {print ($abs_offset > 0.1) ? 1 : 0}")
|
||||
offset_warning=$(awk "BEGIN {print ($abs_offset > 0.01) ? 1 : 0}")
|
||||
unsync_alert=$((1 - synchronised))
|
||||
|
||||
cat <<EOF
|
||||
# HELP ntp_drift_offset_critical Clock offset exceeds 100ms (1=critical, 0=ok)
|
||||
# TYPE ntp_drift_offset_critical gauge
|
||||
ntp_drift_offset_critical $offset_critical
|
||||
|
||||
# HELP ntp_drift_offset_warning Clock offset exceeds 10ms (1=warning, 0=ok)
|
||||
# TYPE ntp_drift_offset_warning gauge
|
||||
ntp_drift_offset_warning $offset_warning
|
||||
|
||||
# HELP ntp_drift_unsynchronised Clock is not synchronised (1=unsync, 0=ok)
|
||||
# TYPE ntp_drift_unsynchronised gauge
|
||||
ntp_drift_unsynchronised $unsync_alert
|
||||
EOF
|
||||
|
||||
echo ""
|
||||
|
||||
# ========================================================================
|
||||
# Exporter Runtime
|
||||
# ========================================================================
|
||||
local script_end script_duration
|
||||
script_end=$(date +%s)
|
||||
script_duration=$((script_end - script_start))
|
||||
|
||||
cat <<EOF
|
||||
# HELP ntp_drift_exporter_duration_seconds Time to generate all metrics
|
||||
# TYPE ntp_drift_exporter_duration_seconds gauge
|
||||
ntp_drift_exporter_duration_seconds $script_duration
|
||||
|
||||
# HELP ntp_drift_exporter_last_run_timestamp Unix timestamp of last successful run
|
||||
# TYPE ntp_drift_exporter_last_run_timestamp gauge
|
||||
ntp_drift_exporter_last_run_timestamp $script_end
|
||||
EOF
|
||||
|
||||
echo ""
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# HTTP SERVER MODE
|
||||
# ============================================================================
|
||||
|
||||
# Run simple HTTP server using netcat
|
||||
# Serves metrics on /metrics endpoint
|
||||
run_http_server() {
|
||||
echo "Starting NTP drift exporter on port $HTTP_PORT..." >&2
|
||||
|
||||
if ! command -v nc >/dev/null 2>&1; then
|
||||
echo "ERROR: netcat (nc) required for HTTP mode" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Infinite loop accepting HTTP requests
|
||||
while true; do
|
||||
{
|
||||
read -r request
|
||||
# Check if request is for /metrics endpoint
|
||||
if [[ "$request" =~ ^GET\ /metrics ]]; then
|
||||
echo -e "HTTP/1.1 200 OK\r\nContent-Type: text/plain; version=0.0.4\r\n\r"
|
||||
generate_metrics
|
||||
else # Serve HTML landing page for other requests
|
||||
echo -e "HTTP/1.1 200 OK\r\nContent-Type: text/html\r\n\r"
|
||||
cat <<EOF
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head><title>NTP Drift Exporter v1.0</title></head>
|
||||
<body>
|
||||
<h1>NTP Drift Exporter v1.0</h1>
|
||||
<p><a href="/metrics">Metrics</a></p>
|
||||
<h2>Metric Categories</h2>
|
||||
<ul>
|
||||
<li>Core Status: exporter up/down, NTP source type</li>
|
||||
<li>Sync Status: synchronised, stratum, offset</li>
|
||||
<li>Chrony: root delay, dispersion, frequency, skew, per-source offsets</li>
|
||||
<li>NTPd: peer offsets, delay, jitter, selected peer</li>
|
||||
<li>Alerts: offset warning (>10ms), critical (>100ms), unsynchronised</li>
|
||||
</ul>
|
||||
</body>
|
||||
</html>
|
||||
EOF
|
||||
fi
|
||||
} | nc -l -p "$HTTP_PORT" -q 1 2>/dev/null
|
||||
done
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# MAIN EXECUTION
|
||||
# ============================================================================
|
||||
|
||||
# Main entry point - routes to appropriate output mode
|
||||
main() {
|
||||
parse_args "$@"
|
||||
|
||||
if [ "$HTTP_MODE" = true ]; then
|
||||
# Run HTTP server (blocks until killed)
|
||||
run_http_server
|
||||
elif [ -n "$OUTPUT_FILE" ]; then
|
||||
# Textfile collector mode: write atomically using temp file
|
||||
local output_dir
|
||||
output_dir="$(dirname "$OUTPUT_FILE")"
|
||||
mkdir -p "$output_dir"
|
||||
|
||||
# Create temp file in SAME directory for atomic rename (same filesystem)
|
||||
local temp_file
|
||||
temp_file=$(mktemp "${output_dir}/.ntp_drift_metrics.XXXXXX")
|
||||
|
||||
# Generate metrics to temp file
|
||||
if ! generate_metrics > "$temp_file" 2>/dev/null; then
|
||||
rm -f "$temp_file"
|
||||
echo "ERROR: Failed to generate metrics" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Validate: file must have content
|
||||
local file_lines
|
||||
file_lines=$(wc -l < "$temp_file" 2>/dev/null || echo 0)
|
||||
|
||||
if [ "$file_lines" -lt 10 ]; then
|
||||
rm -f "$temp_file"
|
||||
echo "ERROR: Metrics file too small ($file_lines lines), keeping previous" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Set permissions before move
|
||||
chmod 644 "$temp_file"
|
||||
|
||||
# Atomic rename - no gap where file is missing
|
||||
mv -f "$temp_file" "$OUTPUT_FILE"
|
||||
|
||||
echo "Metrics written to $OUTPUT_FILE ($file_lines lines)" >&2
|
||||
else
|
||||
# Default: output to stdout
|
||||
generate_metrics
|
||||
fi
|
||||
}
|
||||
|
||||
# Execute main function with all script arguments
|
||||
main "$@"
|
||||
Reference in New Issue
Block a user