Add all 44 scripts, update CI: error severity baseline, PowerShell validation, multi-distro testing

Amp-Thread-ID: https://ampcode.com/threads/T-019cc404-c628-759e-a50b-f5eeea35b91f
Co-authored-by: Amp <amp@ampcode.com>
This commit is contained in:
root
2026-03-07 05:40:51 +01:00
parent db43b8a313
commit 88551536e6
43 changed files with 28906 additions and 23 deletions
+383
View File
@@ -0,0 +1,383 @@
#!/bin/bash
################################################################################
# Script Name: dns-health-check.sh
# Version: 1.0
# Description: Prometheus textfile collector exporter for DNS resolution health
# Queries configurable DNS records and reports resolution status
# and latency via node_exporter textfile collector
#
# Author: Phil Connor
# Contact: contact@mylinux.work
# Website: https://mylinux.work
# License: MIT
# Date: 2026-03-03
#
# Prerequisites:
# - dig (bind-utils / dnsutils)
# - node_exporter with textfile collector enabled
# - /var/lib/node_exporter directory exists
#
# Usage:
# # Run with default config
# sudo ./dns-health-check.sh
#
# # Dry run (output to stdout)
# ./dns-health-check.sh --dry-run
#
# # Debug mode
# DEBUG=1 sudo ./dns-health-check.sh
#
# Config Format (pipe-delimited, one record per line):
# record_name|record_type|dns_server|expected_value(optional)
#
# Examples:
# example.com|A|8.8.8.8|
# mail.example.com|MX|8.8.8.8|
# _ldap._tcp.example.com|SRV|10.0.0.1|
# example.com|A|8.8.8.8|93.184.216.34
#
# Metrics Exported:
# - linux_dns_query_success{record,type,server} - 1=resolved, 0=failed
# - linux_dns_query_time_seconds{record,type,server} - Resolution time
# - linux_dns_query_answer_match{record,type,server,expected} - 1=match, 0=mismatch
#
################################################################################
set -o pipefail
# ============================================================================
# CONFIGURATION
# ============================================================================
readonly VERSION="1.0"
readonly SCRIPT_NAME="${0##*/}"
readonly TEXTFILE_DIR="${TEXTFILE_DIR:-/var/lib/node_exporter}"
readonly OUTPUT_FILE="${TEXTFILE_DIR}/dns_health.prom"
readonly CONFIG_FILE="${CONFIG_FILE:-/etc/dns-health-check.conf}"
readonly TMP_FILE="${OUTPUT_FILE}.$$"
# Runtime flags
DRY_RUN=false
DEBUG=${DEBUG:-}
# Default DNS records to check if no config file and no env var
readonly DEFAULT_RECORDS="localhost|A|127.0.0.1|"
# ============================================================================
# HELPER FUNCTIONS
# ============================================================================
debug_echo() {
if [[ -n "$DEBUG" ]]; then
echo "[DEBUG] $*" >&2
fi
}
log_error() {
echo "[ERROR] $*" >&2
}
cleanup() {
rm -f "$TMP_FILE"
}
trap cleanup EXIT
show_help() {
cat <<EOF
Usage: $SCRIPT_NAME [OPTIONS]
Prometheus textfile collector exporter for DNS resolution health.
Queries DNS records and reports resolution status and latency.
OPTIONS:
--dry-run Output metrics to stdout instead of writing to file
--debug Enable debug output
--help Show this help message
--version Show version
CONFIGURATION:
DNS records are configured in /etc/dns-health-check.conf (or set CONFIG_FILE).
Each line defines a DNS check in pipe-delimited format:
record_name|record_type|dns_server|expected_value(optional)
Example config:
example.com|A|8.8.8.8|
mail.example.com|MX|8.8.8.8|
_ldap._tcp.example.com|SRV|10.0.0.1|
example.com|A|8.8.8.8|93.184.216.34
Lines starting with # are comments. Blank lines are ignored.
If expected_value is set, the script checks whether the DNS answer matches.
Records can also be supplied via the DNS_RECORDS environment variable
as a semicolon-separated list using the same pipe-delimited format:
DNS_RECORDS="example.com|A|8.8.8.8|;google.com|A|8.8.4.4|"
If neither a config file nor DNS_RECORDS is found, a default check
queries localhost via 127.0.0.1 as a basic resolution test.
ENVIRONMENT VARIABLES:
CONFIG_FILE Path to config file (default: /etc/dns-health-check.conf)
TEXTFILE_DIR Textfile collector directory (default: /var/lib/node_exporter)
DNS_RECORDS Semicolon-separated DNS records (overrides config file)
DEBUG Enable debug output when set to any value
EXAMPLES:
sudo $SCRIPT_NAME
$SCRIPT_NAME --dry-run
DEBUG=1 sudo $SCRIPT_NAME
DNS_RECORDS="example.com|A|8.8.8.8|93.184.216.34" $SCRIPT_NAME --dry-run
EOF
exit 0
}
show_version() {
echo "$SCRIPT_NAME version $VERSION"
exit 0
}
# ============================================================================
# DEPENDENCY CHECK
# ============================================================================
check_dependencies() {
if ! command -v dig &>/dev/null; then
log_error "'dig' is not installed. Install bind-utils (RHEL/Rocky) or dnsutils (Debian/Ubuntu)."
exit 1
fi
}
# ============================================================================
# RECORD LOADING
# ============================================================================
load_records() {
local record_count=0
local source=""
# Priority: DNS_RECORDS env var > config file > defaults
if [[ -n "${DNS_RECORDS:-}" ]]; then
source="DNS_RECORDS environment variable"
local IFS=";"
local entry
for entry in $DNS_RECORDS; do
entry="${entry#"${entry%%[![:space:]]*}"}"
entry="${entry%"${entry##*[![:space:]]}"}"
if [[ -n "$entry" ]]; then
echo "$entry"
record_count=$((record_count + 1))
fi
done
elif [[ -f "$CONFIG_FILE" ]]; then
source="$CONFIG_FILE"
while IFS= read -r line; do
# Strip comments and whitespace
line="${line%%#*}"
line="${line#"${line%%[![:space:]]*}"}"
line="${line%"${line##*[![:space:]]}"}"
if [[ -z "$line" ]]; then
continue
fi
echo "$line"
record_count=$((record_count + 1))
done < "$CONFIG_FILE"
else
source="defaults"
echo "$DEFAULT_RECORDS"
record_count=1
fi
debug_echo "Loaded $record_count DNS record(s) from $source"
}
# ============================================================================
# DNS QUERY
# ============================================================================
query_dns() {
local record="$1"
local rtype="$2"
local server="$3"
local expected="$4"
debug_echo "Querying $rtype record for $record via $server"
local dig_output
local query_start
local query_end
local query_time
local success=0
local answer=""
local match=""
query_start=$(date +%s%N 2>/dev/null) || query_start=$(date +%s)000000000
if dig_output=$(dig +short +time=5 +tries=2 "$record" "$rtype" "@${server}" 2>/dev/null); then
query_end=$(date +%s%N 2>/dev/null) || query_end=$(date +%s)000000000
answer="${dig_output}"
if [[ -n "$answer" ]]; then
success=1
debug_echo " Answer: $(echo "$answer" | tr '\n' ' ')"
else
success=0
debug_echo " Empty answer (NXDOMAIN or no records)"
fi
else
query_end=$(date +%s%N 2>/dev/null) || query_end=$(date +%s)000000000
success=0
debug_echo " Query failed"
fi
# Calculate query time in seconds
local elapsed_ns=$((query_end - query_start))
query_time=$(awk "BEGIN {printf \"%.6f\", $elapsed_ns / 1000000000}")
# Check expected value if provided
if [[ -n "$expected" ]]; then
if echo "$answer" | grep -qF "$expected"; then
match=1
debug_echo " Expected value matched: $expected"
else
match=0
debug_echo " Expected value NOT matched: $expected (got: $(echo "$answer" | tr '\n' ' '))"
fi
fi
echo "${success}|${query_time}|${match}"
}
# ============================================================================
# METRICS COLLECTION
# ============================================================================
collect_metrics() {
local records=()
while IFS= read -r record_line; do
records+=("$record_line")
done < <(load_records)
local success_metrics=""
local time_metrics=""
local match_metrics=""
local has_match_metric=false
for record_line in "${records[@]}"; do
local record
record=$(echo "$record_line" | cut -d'|' -f1)
local rtype
rtype=$(echo "$record_line" | cut -d'|' -f2)
local server
server=$(echo "$record_line" | cut -d'|' -f3)
local expected
expected=$(echo "$record_line" | cut -d'|' -f4)
if [[ -z "$record" ]] || [[ -z "$rtype" ]] || [[ -z "$server" ]]; then
log_error "Invalid config line: $record_line (expected: record_name|record_type|dns_server|expected_value)"
continue
fi
local result
result=$(query_dns "$record" "$rtype" "$server" "$expected")
local qsuccess
qsuccess=$(echo "$result" | cut -d'|' -f1)
local qtime
qtime=$(echo "$result" | cut -d'|' -f2)
local qmatch
qmatch=$(echo "$result" | cut -d'|' -f3)
local labels="record=\"${record}\",type=\"${rtype}\",server=\"${server}\""
success_metrics+="linux_dns_query_success{${labels}} ${qsuccess}\n"
time_metrics+="linux_dns_query_time_seconds{${labels}} ${qtime}\n"
if [[ -n "$expected" ]]; then
has_match_metric=true
local match_labels="${labels},expected=\"${expected}\""
match_metrics+="linux_dns_query_answer_match{${match_labels}} ${qmatch}\n"
fi
done
local output=""
output+="# HELP linux_dns_query_success DNS query resolved successfully (1=resolved, 0=failed)\n"
output+="# TYPE linux_dns_query_success gauge\n"
output+="$success_metrics"
output+="# HELP linux_dns_query_time_seconds DNS query resolution time in seconds\n"
output+="# TYPE linux_dns_query_time_seconds gauge\n"
output+="$time_metrics"
if [[ "$has_match_metric" == "true" ]]; then
output+="# HELP linux_dns_query_answer_match DNS answer matches expected value (1=match, 0=mismatch)\n"
output+="# TYPE linux_dns_query_answer_match gauge\n"
output+="$match_metrics"
fi
printf '%b' "$output"
}
# ============================================================================
# OUTPUT
# ============================================================================
write_metrics() {
local metrics
metrics=$(collect_metrics)
if [[ "$DRY_RUN" == "true" ]]; then
echo "$metrics"
return
fi
if [[ ! -d "$TEXTFILE_DIR" ]]; then
log_error "Textfile collector directory does not exist: $TEXTFILE_DIR"
exit 1
fi
echo "$metrics" > "$TMP_FILE"
mv "$TMP_FILE" "$OUTPUT_FILE"
debug_echo "Metrics written to $OUTPUT_FILE"
}
# ============================================================================
# MAIN
# ============================================================================
main() {
while [[ $# -gt 0 ]]; do
case "$1" in
--dry-run)
DRY_RUN=true
shift
;;
--debug)
DEBUG=1
shift
;;
--help|-h)
show_help
;;
--version|-v)
show_version
;;
*)
log_error "Unknown option: $1"
echo "Use --help for usage information" >&2
exit 1
;;
esac
done
check_dependencies
write_metrics
}
main "$@"