#!/bin/bash ################################################################################ # Script Name: dns-health-check.sh # Version: 1.0 # Description: Prometheus textfile collector exporter for DNS resolution health # Queries configurable DNS records and reports resolution status # and latency via node_exporter textfile collector # # Author: Phil Connor # Contact: contact@mylinux.work # Website: https://mylinux.work # License: MIT # Date: 2026-03-03 # # Prerequisites: # - dig (bind-utils / dnsutils) # - node_exporter with textfile collector enabled # - /var/lib/node_exporter directory exists # # Usage: # # Run with default config # sudo ./dns-health-check.sh # # # Dry run (output to stdout) # ./dns-health-check.sh --dry-run # # # Debug mode # DEBUG=1 sudo ./dns-health-check.sh # # Config Format (pipe-delimited, one record per line): # record_name|record_type|dns_server|expected_value(optional) # # Examples: # example.com|A|8.8.8.8| # mail.example.com|MX|8.8.8.8| # _ldap._tcp.example.com|SRV|10.0.0.1| # example.com|A|8.8.8.8|93.184.216.34 # # Metrics Exported: # - linux_dns_query_success{record,type,server} - 1=resolved, 0=failed # - linux_dns_query_time_seconds{record,type,server} - Resolution time # - linux_dns_query_answer_match{record,type,server,expected} - 1=match, 0=mismatch # ################################################################################ set -o pipefail # ============================================================================ # CONFIGURATION # ============================================================================ readonly VERSION="1.0" readonly SCRIPT_NAME="${0##*/}" readonly TEXTFILE_DIR="${TEXTFILE_DIR:-/var/lib/node_exporter}" readonly OUTPUT_FILE="${TEXTFILE_DIR}/dns_health.prom" readonly CONFIG_FILE="${CONFIG_FILE:-/etc/dns-health-check.conf}" readonly TMP_FILE="${OUTPUT_FILE}.$$" # Runtime flags DRY_RUN=false DEBUG=${DEBUG:-} # Default DNS records to check if no config file and no env var readonly DEFAULT_RECORDS="localhost|A|127.0.0.1|" # ============================================================================ # HELPER FUNCTIONS # ============================================================================ debug_echo() { if [[ -n "$DEBUG" ]]; then echo "[DEBUG] $*" >&2 fi } log_error() { echo "[ERROR] $*" >&2 } cleanup() { rm -f "$TMP_FILE" } trap cleanup EXIT show_help() { cat </dev/null; then log_error "'dig' is not installed. Install bind-utils (RHEL/Rocky) or dnsutils (Debian/Ubuntu)." exit 1 fi } # ============================================================================ # RECORD LOADING # ============================================================================ load_records() { local record_count=0 local source="" # Priority: DNS_RECORDS env var > config file > defaults if [[ -n "${DNS_RECORDS:-}" ]]; then source="DNS_RECORDS environment variable" local IFS=";" local entry for entry in $DNS_RECORDS; do entry="${entry#"${entry%%[![:space:]]*}"}" entry="${entry%"${entry##*[![:space:]]}"}" if [[ -n "$entry" ]]; then echo "$entry" record_count=$((record_count + 1)) fi done elif [[ -f "$CONFIG_FILE" ]]; then source="$CONFIG_FILE" while IFS= read -r line; do # Strip comments and whitespace line="${line%%#*}" line="${line#"${line%%[![:space:]]*}"}" line="${line%"${line##*[![:space:]]}"}" if [[ -z "$line" ]]; then continue fi echo "$line" record_count=$((record_count + 1)) done < "$CONFIG_FILE" else source="defaults" echo "$DEFAULT_RECORDS" record_count=1 fi debug_echo "Loaded $record_count DNS record(s) from $source" } # ============================================================================ # DNS QUERY # ============================================================================ query_dns() { local record="$1" local rtype="$2" local server="$3" local expected="$4" debug_echo "Querying $rtype record for $record via $server" local dig_output local query_start local query_end local query_time local success=0 local answer="" local match="" query_start=$(date +%s%N 2>/dev/null) || query_start=$(date +%s)000000000 if dig_output=$(dig +short +time=5 +tries=2 "$record" "$rtype" "@${server}" 2>/dev/null); then query_end=$(date +%s%N 2>/dev/null) || query_end=$(date +%s)000000000 answer="${dig_output}" if [[ -n "$answer" ]]; then success=1 debug_echo " Answer: $(echo "$answer" | tr '\n' ' ')" else success=0 debug_echo " Empty answer (NXDOMAIN or no records)" fi else query_end=$(date +%s%N 2>/dev/null) || query_end=$(date +%s)000000000 success=0 debug_echo " Query failed" fi # Calculate query time in seconds local elapsed_ns=$((query_end - query_start)) query_time=$(awk "BEGIN {printf \"%.6f\", $elapsed_ns / 1000000000}") # Check expected value if provided if [[ -n "$expected" ]]; then if echo "$answer" | grep -qF "$expected"; then match=1 debug_echo " Expected value matched: $expected" else match=0 debug_echo " Expected value NOT matched: $expected (got: $(echo "$answer" | tr '\n' ' '))" fi fi echo "${success}|${query_time}|${match}" } # ============================================================================ # METRICS COLLECTION # ============================================================================ collect_metrics() { local records=() while IFS= read -r record_line; do records+=("$record_line") done < <(load_records) local success_metrics="" local time_metrics="" local match_metrics="" local has_match_metric=false for record_line in "${records[@]}"; do local record record=$(echo "$record_line" | cut -d'|' -f1) local rtype rtype=$(echo "$record_line" | cut -d'|' -f2) local server server=$(echo "$record_line" | cut -d'|' -f3) local expected expected=$(echo "$record_line" | cut -d'|' -f4) if [[ -z "$record" ]] || [[ -z "$rtype" ]] || [[ -z "$server" ]]; then log_error "Invalid config line: $record_line (expected: record_name|record_type|dns_server|expected_value)" continue fi local result result=$(query_dns "$record" "$rtype" "$server" "$expected") local qsuccess qsuccess=$(echo "$result" | cut -d'|' -f1) local qtime qtime=$(echo "$result" | cut -d'|' -f2) local qmatch qmatch=$(echo "$result" | cut -d'|' -f3) local labels="record=\"${record}\",type=\"${rtype}\",server=\"${server}\"" success_metrics+="linux_dns_query_success{${labels}} ${qsuccess}\n" time_metrics+="linux_dns_query_time_seconds{${labels}} ${qtime}\n" if [[ -n "$expected" ]]; then has_match_metric=true local match_labels="${labels},expected=\"${expected}\"" match_metrics+="linux_dns_query_answer_match{${match_labels}} ${qmatch}\n" fi done local output="" output+="# HELP linux_dns_query_success DNS query resolved successfully (1=resolved, 0=failed)\n" output+="# TYPE linux_dns_query_success gauge\n" output+="$success_metrics" output+="# HELP linux_dns_query_time_seconds DNS query resolution time in seconds\n" output+="# TYPE linux_dns_query_time_seconds gauge\n" output+="$time_metrics" if [[ "$has_match_metric" == "true" ]]; then output+="# HELP linux_dns_query_answer_match DNS answer matches expected value (1=match, 0=mismatch)\n" output+="# TYPE linux_dns_query_answer_match gauge\n" output+="$match_metrics" fi printf '%b' "$output" } # ============================================================================ # OUTPUT # ============================================================================ write_metrics() { local metrics metrics=$(collect_metrics) if [[ "$DRY_RUN" == "true" ]]; then echo "$metrics" return fi if [[ ! -d "$TEXTFILE_DIR" ]]; then log_error "Textfile collector directory does not exist: $TEXTFILE_DIR" exit 1 fi echo "$metrics" > "$TMP_FILE" mv "$TMP_FILE" "$OUTPUT_FILE" debug_echo "Metrics written to $OUTPUT_FILE" } # ============================================================================ # MAIN # ============================================================================ main() { while [[ $# -gt 0 ]]; do case "$1" in --dry-run) DRY_RUN=true shift ;; --debug) DEBUG=1 shift ;; --help|-h) show_help ;; --version|-v) show_version ;; *) log_error "Unknown option: $1" echo "Use --help for usage information" >&2 exit 1 ;; esac done check_dependencies write_metrics } main "$@"