88551536e6
Amp-Thread-ID: https://ampcode.com/threads/T-019cc404-c628-759e-a50b-f5eeea35b91f Co-authored-by: Amp <amp@ampcode.com>
384 lines
11 KiB
Bash
384 lines
11 KiB
Bash
#!/bin/bash
|
|
################################################################################
|
|
# Script Name: dns-health-check.sh
|
|
# Version: 1.0
|
|
# Description: Prometheus textfile collector exporter for DNS resolution health
|
|
# Queries configurable DNS records and reports resolution status
|
|
# and latency via node_exporter textfile collector
|
|
#
|
|
# Author: Phil Connor
|
|
# Contact: contact@mylinux.work
|
|
# Website: https://mylinux.work
|
|
# License: MIT
|
|
# Date: 2026-03-03
|
|
#
|
|
# Prerequisites:
|
|
# - dig (bind-utils / dnsutils)
|
|
# - node_exporter with textfile collector enabled
|
|
# - /var/lib/node_exporter directory exists
|
|
#
|
|
# Usage:
|
|
# # Run with default config
|
|
# sudo ./dns-health-check.sh
|
|
#
|
|
# # Dry run (output to stdout)
|
|
# ./dns-health-check.sh --dry-run
|
|
#
|
|
# # Debug mode
|
|
# DEBUG=1 sudo ./dns-health-check.sh
|
|
#
|
|
# Config Format (pipe-delimited, one record per line):
|
|
# record_name|record_type|dns_server|expected_value(optional)
|
|
#
|
|
# Examples:
|
|
# example.com|A|8.8.8.8|
|
|
# mail.example.com|MX|8.8.8.8|
|
|
# _ldap._tcp.example.com|SRV|10.0.0.1|
|
|
# example.com|A|8.8.8.8|93.184.216.34
|
|
#
|
|
# Metrics Exported:
|
|
# - linux_dns_query_success{record,type,server} - 1=resolved, 0=failed
|
|
# - linux_dns_query_time_seconds{record,type,server} - Resolution time
|
|
# - linux_dns_query_answer_match{record,type,server,expected} - 1=match, 0=mismatch
|
|
#
|
|
################################################################################
|
|
|
|
set -o pipefail
|
|
|
|
# ============================================================================
|
|
# CONFIGURATION
|
|
# ============================================================================
|
|
|
|
readonly VERSION="1.0"
|
|
readonly SCRIPT_NAME="${0##*/}"
|
|
readonly TEXTFILE_DIR="${TEXTFILE_DIR:-/var/lib/node_exporter}"
|
|
readonly OUTPUT_FILE="${TEXTFILE_DIR}/dns_health.prom"
|
|
readonly CONFIG_FILE="${CONFIG_FILE:-/etc/dns-health-check.conf}"
|
|
readonly TMP_FILE="${OUTPUT_FILE}.$$"
|
|
|
|
# Runtime flags
|
|
DRY_RUN=false
|
|
DEBUG=${DEBUG:-}
|
|
|
|
# Default DNS records to check if no config file and no env var
|
|
readonly DEFAULT_RECORDS="localhost|A|127.0.0.1|"
|
|
|
|
# ============================================================================
|
|
# HELPER FUNCTIONS
|
|
# ============================================================================
|
|
|
|
debug_echo() {
|
|
if [[ -n "$DEBUG" ]]; then
|
|
echo "[DEBUG] $*" >&2
|
|
fi
|
|
}
|
|
|
|
log_error() {
|
|
echo "[ERROR] $*" >&2
|
|
}
|
|
|
|
cleanup() {
|
|
rm -f "$TMP_FILE"
|
|
}
|
|
|
|
trap cleanup EXIT
|
|
|
|
show_help() {
|
|
cat <<EOF
|
|
Usage: $SCRIPT_NAME [OPTIONS]
|
|
|
|
Prometheus textfile collector exporter for DNS resolution health.
|
|
Queries DNS records and reports resolution status and latency.
|
|
|
|
OPTIONS:
|
|
--dry-run Output metrics to stdout instead of writing to file
|
|
--debug Enable debug output
|
|
--help Show this help message
|
|
--version Show version
|
|
|
|
CONFIGURATION:
|
|
DNS records are configured in /etc/dns-health-check.conf (or set CONFIG_FILE).
|
|
Each line defines a DNS check in pipe-delimited format:
|
|
|
|
record_name|record_type|dns_server|expected_value(optional)
|
|
|
|
Example config:
|
|
example.com|A|8.8.8.8|
|
|
mail.example.com|MX|8.8.8.8|
|
|
_ldap._tcp.example.com|SRV|10.0.0.1|
|
|
example.com|A|8.8.8.8|93.184.216.34
|
|
|
|
Lines starting with # are comments. Blank lines are ignored.
|
|
If expected_value is set, the script checks whether the DNS answer matches.
|
|
|
|
Records can also be supplied via the DNS_RECORDS environment variable
|
|
as a semicolon-separated list using the same pipe-delimited format:
|
|
|
|
DNS_RECORDS="example.com|A|8.8.8.8|;google.com|A|8.8.4.4|"
|
|
|
|
If neither a config file nor DNS_RECORDS is found, a default check
|
|
queries localhost via 127.0.0.1 as a basic resolution test.
|
|
|
|
ENVIRONMENT VARIABLES:
|
|
CONFIG_FILE Path to config file (default: /etc/dns-health-check.conf)
|
|
TEXTFILE_DIR Textfile collector directory (default: /var/lib/node_exporter)
|
|
DNS_RECORDS Semicolon-separated DNS records (overrides config file)
|
|
DEBUG Enable debug output when set to any value
|
|
|
|
EXAMPLES:
|
|
sudo $SCRIPT_NAME
|
|
$SCRIPT_NAME --dry-run
|
|
DEBUG=1 sudo $SCRIPT_NAME
|
|
DNS_RECORDS="example.com|A|8.8.8.8|93.184.216.34" $SCRIPT_NAME --dry-run
|
|
|
|
EOF
|
|
exit 0
|
|
}
|
|
|
|
show_version() {
|
|
echo "$SCRIPT_NAME version $VERSION"
|
|
exit 0
|
|
}
|
|
|
|
# ============================================================================
|
|
# DEPENDENCY CHECK
|
|
# ============================================================================
|
|
|
|
check_dependencies() {
|
|
if ! command -v dig &>/dev/null; then
|
|
log_error "'dig' is not installed. Install bind-utils (RHEL/Rocky) or dnsutils (Debian/Ubuntu)."
|
|
exit 1
|
|
fi
|
|
}
|
|
|
|
# ============================================================================
|
|
# RECORD LOADING
|
|
# ============================================================================
|
|
|
|
load_records() {
|
|
local record_count=0
|
|
local source=""
|
|
|
|
# Priority: DNS_RECORDS env var > config file > defaults
|
|
if [[ -n "${DNS_RECORDS:-}" ]]; then
|
|
source="DNS_RECORDS environment variable"
|
|
local IFS=";"
|
|
local entry
|
|
for entry in $DNS_RECORDS; do
|
|
entry="${entry#"${entry%%[![:space:]]*}"}"
|
|
entry="${entry%"${entry##*[![:space:]]}"}"
|
|
if [[ -n "$entry" ]]; then
|
|
echo "$entry"
|
|
record_count=$((record_count + 1))
|
|
fi
|
|
done
|
|
elif [[ -f "$CONFIG_FILE" ]]; then
|
|
source="$CONFIG_FILE"
|
|
while IFS= read -r line; do
|
|
# Strip comments and whitespace
|
|
line="${line%%#*}"
|
|
line="${line#"${line%%[![:space:]]*}"}"
|
|
line="${line%"${line##*[![:space:]]}"}"
|
|
|
|
if [[ -z "$line" ]]; then
|
|
continue
|
|
fi
|
|
|
|
echo "$line"
|
|
record_count=$((record_count + 1))
|
|
done < "$CONFIG_FILE"
|
|
else
|
|
source="defaults"
|
|
echo "$DEFAULT_RECORDS"
|
|
record_count=1
|
|
fi
|
|
|
|
debug_echo "Loaded $record_count DNS record(s) from $source"
|
|
}
|
|
|
|
# ============================================================================
|
|
# DNS QUERY
|
|
# ============================================================================
|
|
|
|
query_dns() {
|
|
local record="$1"
|
|
local rtype="$2"
|
|
local server="$3"
|
|
local expected="$4"
|
|
|
|
debug_echo "Querying $rtype record for $record via $server"
|
|
|
|
local dig_output
|
|
local query_start
|
|
local query_end
|
|
local query_time
|
|
local success=0
|
|
local answer=""
|
|
local match=""
|
|
|
|
query_start=$(date +%s%N 2>/dev/null) || query_start=$(date +%s)000000000
|
|
|
|
if dig_output=$(dig +short +time=5 +tries=2 "$record" "$rtype" "@${server}" 2>/dev/null); then
|
|
query_end=$(date +%s%N 2>/dev/null) || query_end=$(date +%s)000000000
|
|
answer="${dig_output}"
|
|
|
|
if [[ -n "$answer" ]]; then
|
|
success=1
|
|
debug_echo " Answer: $(echo "$answer" | tr '\n' ' ')"
|
|
else
|
|
success=0
|
|
debug_echo " Empty answer (NXDOMAIN or no records)"
|
|
fi
|
|
else
|
|
query_end=$(date +%s%N 2>/dev/null) || query_end=$(date +%s)000000000
|
|
success=0
|
|
debug_echo " Query failed"
|
|
fi
|
|
|
|
# Calculate query time in seconds
|
|
local elapsed_ns=$((query_end - query_start))
|
|
query_time=$(awk "BEGIN {printf \"%.6f\", $elapsed_ns / 1000000000}")
|
|
|
|
# Check expected value if provided
|
|
if [[ -n "$expected" ]]; then
|
|
if echo "$answer" | grep -qF "$expected"; then
|
|
match=1
|
|
debug_echo " Expected value matched: $expected"
|
|
else
|
|
match=0
|
|
debug_echo " Expected value NOT matched: $expected (got: $(echo "$answer" | tr '\n' ' '))"
|
|
fi
|
|
fi
|
|
|
|
echo "${success}|${query_time}|${match}"
|
|
}
|
|
|
|
# ============================================================================
|
|
# METRICS COLLECTION
|
|
# ============================================================================
|
|
|
|
collect_metrics() {
|
|
local records=()
|
|
while IFS= read -r record_line; do
|
|
records+=("$record_line")
|
|
done < <(load_records)
|
|
|
|
local success_metrics=""
|
|
local time_metrics=""
|
|
local match_metrics=""
|
|
local has_match_metric=false
|
|
|
|
for record_line in "${records[@]}"; do
|
|
local record
|
|
record=$(echo "$record_line" | cut -d'|' -f1)
|
|
local rtype
|
|
rtype=$(echo "$record_line" | cut -d'|' -f2)
|
|
local server
|
|
server=$(echo "$record_line" | cut -d'|' -f3)
|
|
local expected
|
|
expected=$(echo "$record_line" | cut -d'|' -f4)
|
|
|
|
if [[ -z "$record" ]] || [[ -z "$rtype" ]] || [[ -z "$server" ]]; then
|
|
log_error "Invalid config line: $record_line (expected: record_name|record_type|dns_server|expected_value)"
|
|
continue
|
|
fi
|
|
|
|
local result
|
|
result=$(query_dns "$record" "$rtype" "$server" "$expected")
|
|
|
|
local qsuccess
|
|
qsuccess=$(echo "$result" | cut -d'|' -f1)
|
|
local qtime
|
|
qtime=$(echo "$result" | cut -d'|' -f2)
|
|
local qmatch
|
|
qmatch=$(echo "$result" | cut -d'|' -f3)
|
|
|
|
local labels="record=\"${record}\",type=\"${rtype}\",server=\"${server}\""
|
|
|
|
success_metrics+="linux_dns_query_success{${labels}} ${qsuccess}\n"
|
|
time_metrics+="linux_dns_query_time_seconds{${labels}} ${qtime}\n"
|
|
|
|
if [[ -n "$expected" ]]; then
|
|
has_match_metric=true
|
|
local match_labels="${labels},expected=\"${expected}\""
|
|
match_metrics+="linux_dns_query_answer_match{${match_labels}} ${qmatch}\n"
|
|
fi
|
|
done
|
|
|
|
local output=""
|
|
|
|
output+="# HELP linux_dns_query_success DNS query resolved successfully (1=resolved, 0=failed)\n"
|
|
output+="# TYPE linux_dns_query_success gauge\n"
|
|
output+="$success_metrics"
|
|
|
|
output+="# HELP linux_dns_query_time_seconds DNS query resolution time in seconds\n"
|
|
output+="# TYPE linux_dns_query_time_seconds gauge\n"
|
|
output+="$time_metrics"
|
|
|
|
if [[ "$has_match_metric" == "true" ]]; then
|
|
output+="# HELP linux_dns_query_answer_match DNS answer matches expected value (1=match, 0=mismatch)\n"
|
|
output+="# TYPE linux_dns_query_answer_match gauge\n"
|
|
output+="$match_metrics"
|
|
fi
|
|
|
|
printf '%b' "$output"
|
|
}
|
|
|
|
# ============================================================================
|
|
# OUTPUT
|
|
# ============================================================================
|
|
|
|
write_metrics() {
|
|
local metrics
|
|
metrics=$(collect_metrics)
|
|
|
|
if [[ "$DRY_RUN" == "true" ]]; then
|
|
echo "$metrics"
|
|
return
|
|
fi
|
|
|
|
if [[ ! -d "$TEXTFILE_DIR" ]]; then
|
|
log_error "Textfile collector directory does not exist: $TEXTFILE_DIR"
|
|
exit 1
|
|
fi
|
|
|
|
echo "$metrics" > "$TMP_FILE"
|
|
mv "$TMP_FILE" "$OUTPUT_FILE"
|
|
debug_echo "Metrics written to $OUTPUT_FILE"
|
|
}
|
|
|
|
# ============================================================================
|
|
# MAIN
|
|
# ============================================================================
|
|
|
|
main() {
|
|
while [[ $# -gt 0 ]]; do
|
|
case "$1" in
|
|
--dry-run)
|
|
DRY_RUN=true
|
|
shift
|
|
;;
|
|
--debug)
|
|
DEBUG=1
|
|
shift
|
|
;;
|
|
--help|-h)
|
|
show_help
|
|
;;
|
|
--version|-v)
|
|
show_version
|
|
;;
|
|
*)
|
|
log_error "Unknown option: $1"
|
|
echo "Use --help for usage information" >&2
|
|
exit 1
|
|
;;
|
|
esac
|
|
done
|
|
|
|
check_dependencies
|
|
write_metrics
|
|
}
|
|
|
|
main "$@"
|