Files
linux-scripts/textfile-health-exporter.sh
T
chiefgeek a1a17e81a1 Sync all scripts from website downloads — 352 scripts total
Includes updated JS challenge scripts with Claude-User whitelist,
same-site referer bypass, Blackbox-Exporter allowed bot, and all
new exporters, cheat sheets, and automation scripts.
2026-05-25 03:31:08 +02:00

357 lines
12 KiB
Bash
Executable File

#!/bin/bash
################################################################################
# Script Name: textfile-health-exporter.sh
# Version: 1.0
# Description: Prometheus exporter that monitors node_exporter textfile
# collector .prom files — detects stale files, parse errors,
# file sizes, and missing cron jobs. A meta-exporter that
# watches the health of your other exporters.
#
# Author: Phil Connor
# Contact: contact@mylinux.work
# Website: https://mylinux.work
# License: MIT
#
# Prerequisites:
# - awk, stat, find
# - netcat (nc) for HTTP mode
#
# Usage:
# ./textfile-health-exporter.sh # stdout
# ./textfile-health-exporter.sh --textfile # node_exporter textfile
# ./textfile-health-exporter.sh --http # HTTP server on port 9202
#
# Metrics Exported:
# Per-File:
# - textfile_health_file_age_seconds{file} - Seconds since last modified
# - textfile_health_file_size_bytes{file} - File size in bytes
# - textfile_health_file_lines{file} - Line count
# - textfile_health_parse_ok{file} - 1 if valid, 0 if errors
#
# Summary:
# - textfile_health_files_total - Total .prom files found
# - textfile_health_stale_files_total - Files older than threshold
# - textfile_health_parse_errors_total - Files with parse errors
# - textfile_health_total_size_bytes - Total size of all files
#
# Exporter:
# - textfile_health_up - Exporter status (1=up)
# - textfile_health_stale_threshold_seconds - Configured stale threshold
# - textfile_health_duration_seconds - Script execution time
# - textfile_health_last_run_timestamp - Last run timestamp
#
# Configuration:
# Default HTTP port: 9202
# Textfile directory: /var/lib/node_exporter
# Stale threshold: 600 seconds (10 minutes)
#
################################################################################
set -uo pipefail
# ============================================================================
# CONFIGURATION
# ============================================================================
TEXTFILE_DIR="/var/lib/node_exporter"
OUTPUT_FILE=""
HTTP_MODE=false
HTTP_PORT=9202
STALE_THRESHOLD=600
OWN_OUTPUT_FILE="textfile_health.prom"
# ============================================================================
# USAGE
# ============================================================================
show_usage() {
cat <<EOF
Usage: $(basename "$0") [OPTIONS]
Monitor node_exporter textfile collector .prom files for staleness and errors.
MODES:
--textfile Write to node_exporter textfile collector
--http Run HTTP server on port ${HTTP_PORT}
(default) Print metrics to stdout
OPTIONS:
-p, --port N HTTP port (default: ${HTTP_PORT})
-o, --output F Output file path
-d, --dir PATH Textfile directory (default: ${TEXTFILE_DIR})
-s, --stale-threshold N Stale threshold in seconds (default: ${STALE_THRESHOLD})
-h, --help Show this help
EXAMPLES:
$(basename "$0") --textfile
$(basename "$0") --http --port 9202
$(basename "$0") --dir /var/lib/node_exporter --stale-threshold 300
EOF
exit 0
}
# ============================================================================
# ARGUMENT PARSING
# ============================================================================
parse_args() {
while [[ $# -gt 0 ]]; do
case "$1" in
-h|--help) show_usage ;;
--textfile) OUTPUT_FILE="${TEXTFILE_DIR}/${OWN_OUTPUT_FILE}"; shift ;;
--http) HTTP_MODE=true; shift ;;
-p|--port) HTTP_PORT="$2"; shift 2 ;;
-o|--output) OUTPUT_FILE="$2"; shift 2 ;;
-d|--dir) TEXTFILE_DIR="$2"; shift 2 ;;
-s|--stale-threshold) STALE_THRESHOLD="$2"; shift 2 ;;
*) echo "# ERROR: Unknown option: $1" >&2; show_usage ;;
esac
done
}
# ============================================================================
# PREFLIGHT
# ============================================================================
preflight() {
local missing=()
for cmd in awk stat find wc; do
command -v "$cmd" &>/dev/null || missing+=("$cmd")
done
if [[ ${#missing[@]} -gt 0 ]]; then
echo "# ERROR: Missing required commands: ${missing[*]}" >&2
exit 1
fi
if [[ ! -d "$TEXTFILE_DIR" ]]; then
echo "# ERROR: Textfile directory does not exist: ${TEXTFILE_DIR}" >&2
exit 1
fi
}
# ============================================================================
# PARSE VALIDATION
# ============================================================================
validate_prom_file() {
local file="$1"
awk '
/^[[:space:]]*$/ { next }
/^#/ { next }
/^[a-zA-Z_:][a-zA-Z0-9_:]*(\{[^}]*\})?[[:space:]]+-?[0-9]/ { next }
/^[a-zA-Z_:][a-zA-Z0-9_:]*(\{[^}]*\})?[[:space:]]+[+-]?[Ii][Nn][Ff]/ { next }
/^[a-zA-Z_:][a-zA-Z0-9_:]*(\{[^}]*\})?[[:space:]]+[Nn][Aa][Nn]/ { next }
{ errors++; }
END { exit (errors > 0) ? 1 : 0 }
' "$file" 2>/dev/null
}
# ============================================================================
# METRICS COLLECTION
# ============================================================================
collect_metrics() {
local start_time
start_time=$(date +%s%N)
local now
now=$(date +%s)
echo "# HELP textfile_health_up Exporter status (1=up)"
echo "# TYPE textfile_health_up gauge"
echo "textfile_health_up 1"
echo ""
echo "# HELP textfile_health_stale_threshold_seconds Configured stale threshold"
echo "# TYPE textfile_health_stale_threshold_seconds gauge"
echo "textfile_health_stale_threshold_seconds ${STALE_THRESHOLD}"
echo ""
# --- Resolve own output filename for exclusion ---
local exclude_basename
if [[ -n "$OUTPUT_FILE" ]]; then
exclude_basename=$(basename "$OUTPUT_FILE")
else
exclude_basename="$OWN_OUTPUT_FILE"
fi
# --- Find all .prom files, excluding own output ---
local prom_files=()
while IFS= read -r -d '' f; do
local base
base=$(basename "$f")
[[ "$base" == "$exclude_basename" ]] && continue
prom_files+=("$f")
done < <(find "$TEXTFILE_DIR" -maxdepth 1 -name '*.prom' -type f -print0 2>/dev/null)
local files_total=${#prom_files[@]}
local stale_total=0
local parse_errors_total=0
local total_size=0
# --- Per-file metrics ---
if [[ $files_total -gt 0 ]]; then
echo "# HELP textfile_health_file_age_seconds Seconds since file was last modified"
echo "# TYPE textfile_health_file_age_seconds gauge"
for f in "${prom_files[@]}"; do
local fname
fname=$(basename "$f")
local mtime
mtime=$(stat -c %Y "$f" 2>/dev/null) || continue
local age=$(( now - mtime ))
echo "textfile_health_file_age_seconds{file=\"${fname}\"} ${age}"
done
echo ""
echo "# HELP textfile_health_file_size_bytes File size in bytes"
echo "# TYPE textfile_health_file_size_bytes gauge"
for f in "${prom_files[@]}"; do
local fname
fname=$(basename "$f")
local size
size=$(stat -c %s "$f" 2>/dev/null) || continue
echo "textfile_health_file_size_bytes{file=\"${fname}\"} ${size}"
total_size=$(( total_size + size ))
done
echo ""
echo "# HELP textfile_health_file_lines Line count per file"
echo "# TYPE textfile_health_file_lines gauge"
for f in "${prom_files[@]}"; do
local fname
fname=$(basename "$f")
local lines
lines=$(wc -l < "$f" 2>/dev/null) || continue
echo "textfile_health_file_lines{file=\"${fname}\"} ${lines}"
done
echo ""
echo "# HELP textfile_health_parse_ok Parse validation (1=valid, 0=errors)"
echo "# TYPE textfile_health_parse_ok gauge"
for f in "${prom_files[@]}"; do
local fname
fname=$(basename "$f")
if validate_prom_file "$f"; then
echo "textfile_health_parse_ok{file=\"${fname}\"} 1"
else
echo "textfile_health_parse_ok{file=\"${fname}\"} 0"
parse_errors_total=$(( parse_errors_total + 1 ))
fi
done
echo ""
# --- Count stale files ---
for f in "${prom_files[@]}"; do
local mtime
mtime=$(stat -c %Y "$f" 2>/dev/null) || continue
local age=$(( now - mtime ))
if [[ $age -ge $STALE_THRESHOLD ]]; then
stale_total=$(( stale_total + 1 ))
fi
done
fi
# --- Summary metrics ---
echo "# HELP textfile_health_files_total Total .prom files found"
echo "# TYPE textfile_health_files_total gauge"
echo "textfile_health_files_total ${files_total}"
echo ""
echo "# HELP textfile_health_stale_files_total Files older than stale threshold"
echo "# TYPE textfile_health_stale_files_total gauge"
echo "textfile_health_stale_files_total ${stale_total}"
echo ""
echo "# HELP textfile_health_parse_errors_total Files with parse errors"
echo "# TYPE textfile_health_parse_errors_total gauge"
echo "textfile_health_parse_errors_total ${parse_errors_total}"
echo ""
echo "# HELP textfile_health_total_size_bytes Total size of all .prom files"
echo "# TYPE textfile_health_total_size_bytes gauge"
echo "textfile_health_total_size_bytes ${total_size}"
echo ""
# --- Exporter metadata ---
local end_time duration
end_time=$(date +%s%N)
duration=$(echo "scale=3; ($end_time - $start_time) / 1000000000" | bc 2>/dev/null || echo "0")
echo "# HELP textfile_health_duration_seconds Script execution time"
echo "# TYPE textfile_health_duration_seconds gauge"
echo "textfile_health_duration_seconds ${duration}"
echo ""
echo "# HELP textfile_health_last_run_timestamp Last successful run (unix timestamp)"
echo "# TYPE textfile_health_last_run_timestamp gauge"
echo "textfile_health_last_run_timestamp ${now}"
}
# ============================================================================
# OUTPUT HANDLING
# ============================================================================
output_metrics() {
local metrics
metrics=$(collect_metrics)
if [[ -n "$OUTPUT_FILE" ]]; then
local output_dir
output_dir="$(dirname "$OUTPUT_FILE")"
mkdir -p "$output_dir"
local temp_file
temp_file=$(mktemp "${output_dir}/.textfile_health.XXXXXX")
echo "$metrics" > "$temp_file"
local file_lines
file_lines=$(wc -l < "$temp_file" 2>/dev/null || echo 0)
if [[ "$file_lines" -lt 5 ]]; then
rm -f "$temp_file"
echo "# ERROR: Metrics file too small (${file_lines} lines), keeping previous" >&2
exit 1
fi
chmod 644 "$temp_file"
mv -f "$temp_file" "$OUTPUT_FILE"
echo "# Metrics written to ${OUTPUT_FILE} (${file_lines} lines)" >&2
else
echo "$metrics"
fi
}
serve_http() {
echo "# Starting HTTP server on port ${HTTP_PORT}" >&2
echo "# Metrics endpoint: http://localhost:${HTTP_PORT}/metrics" >&2
if ! command -v nc &>/dev/null && ! command -v ncat &>/dev/null; then
echo "# ERROR: netcat (nc/ncat) required for HTTP mode" >&2
exit 1
fi
local nc_cmd="nc"
command -v ncat &>/dev/null && nc_cmd="ncat"
while true; do
local metrics
metrics=$(collect_metrics)
local content_length=${#metrics}
local response="HTTP/1.1 200 OK\r\nContent-Type: text/plain; charset=utf-8\r\nContent-Length: ${content_length}\r\nConnection: close\r\n\r\n${metrics}"
echo -e "$response" | $nc_cmd -l -p "$HTTP_PORT" -q 1 2>/dev/null || \
echo -e "$response" | $nc_cmd -l "$HTTP_PORT" 2>/dev/null || true
done
}
# ============================================================================
# MAIN
# ============================================================================
parse_args "$@"
preflight
if [[ "$HTTP_MODE" == "true" ]]; then
serve_http
else
output_metrics
fi