a1a17e81a1
Includes updated JS challenge scripts with Claude-User whitelist, same-site referer bypass, Blackbox-Exporter allowed bot, and all new exporters, cheat sheets, and automation scripts.
358 lines
11 KiB
Bash
358 lines
11 KiB
Bash
#!/bin/bash
|
|
################################################################################
|
|
# Script Name: systemd-boot-time-exporter.sh
|
|
# Version: 1.1
|
|
# Description: Prometheus textfile collector exporter for systemd boot timing
|
|
# Exports boot phase durations, per-service startup times, and
|
|
# total boot time using systemd-analyze
|
|
#
|
|
# Author: Phil Connor
|
|
# Contact: contact@mylinux.work
|
|
# Website: https://mylinux.work
|
|
# License: MIT
|
|
# Date: 2026-03-31
|
|
#
|
|
# Prerequisites:
|
|
# - systemd-analyze command available
|
|
# - node_exporter with textfile collector enabled
|
|
# - /var/lib/node_exporter directory exists
|
|
#
|
|
# Usage:
|
|
# ./systemd-boot-time-exporter.sh # Output to stdout
|
|
# ./systemd-boot-time-exporter.sh --textfile # Write to textfile collector
|
|
# ./systemd-boot-time-exporter.sh -o /tmp/boot.prom # Write to custom file
|
|
# TOP_N=10 ./systemd-boot-time-exporter.sh
|
|
# DEBUG=1 ./systemd-boot-time-exporter.sh
|
|
#
|
|
# Metrics Exported:
|
|
# - linux_boot_time_seconds{phase} - Boot phase durations
|
|
# - linux_boot_total_seconds - Total boot time
|
|
# - linux_boot_service_time_seconds{service} - Per-service startup time
|
|
# - linux_boot_timestamp - Unix timestamp of last boot
|
|
# - linux_boot_services_total{state} - Service count by state
|
|
# - linux_boot_service_state_info{service,state} - Per-service state
|
|
# - linux_boot_exporter_duration_seconds - Collection runtime
|
|
#
|
|
################################################################################
|
|
|
|
set -o pipefail
|
|
|
|
# ============================================================================
|
|
# CONFIGURATION
|
|
# ============================================================================
|
|
|
|
readonly VERSION="1.1"
|
|
readonly SCRIPT_NAME="${0##*/}"
|
|
readonly TEXTFILE_DIR="${TEXTFILE_DIR:-/var/lib/node_exporter}"
|
|
readonly TOP_N="${TOP_N:-20}"
|
|
|
|
# Runtime flags
|
|
OUTPUT_FILE=""
|
|
DEBUG=${DEBUG:-}
|
|
|
|
# ============================================================================
|
|
# HELPER FUNCTIONS
|
|
# ============================================================================
|
|
|
|
debug_echo() {
|
|
if [[ -n "$DEBUG" ]]; then
|
|
echo "[DEBUG] $*" >&2
|
|
fi
|
|
}
|
|
|
|
log_error() {
|
|
echo "[ERROR] $*" >&2
|
|
}
|
|
|
|
cleanup() {
|
|
rm -f "${OUTPUT_FILE}.$$" 2>/dev/null
|
|
}
|
|
|
|
trap cleanup EXIT
|
|
|
|
show_help() {
|
|
cat <<EOF
|
|
Usage: $SCRIPT_NAME [OPTIONS]
|
|
|
|
Exports systemd boot timing metrics for Prometheus node_exporter textfile collector.
|
|
|
|
OPTIONS:
|
|
--textfile Write to node_exporter textfile collector
|
|
-o, --output FILE Write to a custom file path
|
|
--debug Enable debug output
|
|
--help Show this help message
|
|
--version Show version
|
|
|
|
ENVIRONMENT:
|
|
TEXTFILE_DIR Textfile collector directory (default: /var/lib/node_exporter)
|
|
TOP_N Number of slowest services to export (default: 20)
|
|
DEBUG Enable debug output when set to any value
|
|
|
|
EXAMPLES:
|
|
$SCRIPT_NAME # Output to stdout
|
|
$SCRIPT_NAME --textfile # Write to textfile collector
|
|
$SCRIPT_NAME -o /tmp/boot.prom # Write to custom file
|
|
EOF
|
|
exit 0
|
|
}
|
|
|
|
show_version() {
|
|
echo "$SCRIPT_NAME version $VERSION"
|
|
exit 0
|
|
}
|
|
|
|
# ============================================================================
|
|
# BOOT PHASE PARSING
|
|
# ============================================================================
|
|
|
|
parse_time_value() {
|
|
# Convert systemd-analyze time strings to seconds
|
|
# Handles: "1min 2.345s", "57.597s", "234ms", "120us"
|
|
local input="$1"
|
|
local min_val=0 sec_val=0 ms_val=0 us_val=0
|
|
|
|
[[ "$input" =~ ([0-9]+)min ]] && min_val="${BASH_REMATCH[1]}"
|
|
[[ "$input" =~ ([0-9]+\.?[0-9]*)ms ]] && ms_val="${BASH_REMATCH[1]}"
|
|
[[ "$input" =~ ([0-9]+)us ]] && us_val="${BASH_REMATCH[1]}"
|
|
|
|
# Match seconds only if not part of ms/us (look for digit(s) + optional decimal + "s" at end or before space)
|
|
if [[ "$ms_val" == "0" && "$us_val" == "0" ]]; then
|
|
[[ "$input" =~ ([0-9]+\.?[0-9]*)s ]] && sec_val="${BASH_REMATCH[1]}"
|
|
fi
|
|
|
|
awk -v m="$min_val" -v s="$sec_val" -v ms="$ms_val" -v us="$us_val" \
|
|
'BEGIN {print m * 60 + s + ms / 1000 + us / 1000000}'
|
|
}
|
|
|
|
collect_boot_phases() {
|
|
local output=""
|
|
|
|
local analyze_output
|
|
analyze_output=$(systemd-analyze 2>/dev/null) || true
|
|
|
|
if [[ -z "$analyze_output" ]]; then
|
|
log_error "systemd-analyze returned no output"
|
|
return 1
|
|
fi
|
|
|
|
debug_echo "systemd-analyze output: $analyze_output"
|
|
|
|
output+="# HELP linux_boot_time_seconds Duration of each boot phase in seconds\n"
|
|
output+="# TYPE linux_boot_time_seconds gauge\n"
|
|
|
|
for phase in firmware loader kernel initrd userspace; do
|
|
local val=0
|
|
if [[ "$analyze_output" =~ ([0-9]+(\.[0-9]+)?(min )?[0-9]*(\.[0-9]+)?m?s?)\ \(${phase}\) ]]; then
|
|
val=$(parse_time_value "${BASH_REMATCH[1]}")
|
|
fi
|
|
debug_echo "${phase} phase: ${val}s"
|
|
output+="linux_boot_time_seconds{phase=\"${phase}\"} ${val}\n"
|
|
done
|
|
|
|
output+="\n# HELP linux_boot_total_seconds Total boot time in seconds\n"
|
|
output+="# TYPE linux_boot_total_seconds gauge\n"
|
|
|
|
local total_time=0
|
|
if [[ "$analyze_output" =~ =\ ([0-9]+(\.[0-9]+)?(min )?[0-9]*(\.[0-9]+)?m?s?) ]]; then
|
|
total_time=$(parse_time_value "${BASH_REMATCH[1]}")
|
|
fi
|
|
debug_echo "Total boot time: ${total_time}s"
|
|
output+="linux_boot_total_seconds ${total_time}\n"
|
|
|
|
printf '%b' "$output"
|
|
}
|
|
|
|
collect_service_times() {
|
|
local output=""
|
|
local blame_output
|
|
blame_output=$(systemd-analyze blame 2>/dev/null | head -n "$TOP_N") || true
|
|
|
|
if [[ -z "$blame_output" ]]; then
|
|
debug_echo "systemd-analyze blame returned no output"
|
|
return 0
|
|
fi
|
|
|
|
output+="# HELP linux_boot_service_time_seconds Time taken by each systemd service to start\n"
|
|
output+="# TYPE linux_boot_service_time_seconds gauge\n"
|
|
|
|
while read -r time_str service_name _; do
|
|
[[ -z "$service_name" ]] && continue
|
|
local seconds
|
|
seconds=$(parse_time_value "$time_str")
|
|
debug_echo "Service $service_name: ${seconds}s"
|
|
output+="linux_boot_service_time_seconds{service=\"${service_name}\"} ${seconds}\n"
|
|
done <<< "$blame_output"
|
|
|
|
printf '%b' "$output"
|
|
}
|
|
|
|
collect_boot_timestamp() {
|
|
local output=""
|
|
|
|
output+="# HELP linux_boot_timestamp Unix timestamp of last boot\n"
|
|
output+="# TYPE linux_boot_timestamp gauge\n"
|
|
|
|
local boot_ts
|
|
boot_ts=$(who -b 2>/dev/null | awk '{print $3, $4}') || true
|
|
|
|
if [[ -n "$boot_ts" ]]; then
|
|
local epoch
|
|
epoch=$(date -d "$boot_ts" +%s 2>/dev/null) || true
|
|
if [[ -n "$epoch" ]]; then
|
|
debug_echo "Boot timestamp: $boot_ts (epoch: $epoch)"
|
|
output+="linux_boot_timestamp ${epoch}\n"
|
|
else
|
|
output+="linux_boot_timestamp 0\n"
|
|
fi
|
|
else
|
|
# Fallback to /proc/stat btime
|
|
local btime
|
|
btime=$(awk '/^btime/ {print $2}' /proc/stat 2>/dev/null) || true
|
|
if [[ -n "$btime" ]]; then
|
|
debug_echo "Boot timestamp from /proc/stat: $btime"
|
|
output+="linux_boot_timestamp ${btime}\n"
|
|
else
|
|
output+="linux_boot_timestamp 0\n"
|
|
fi
|
|
fi
|
|
|
|
printf '%b' "$output"
|
|
}
|
|
|
|
collect_service_state_counts() {
|
|
local output=""
|
|
|
|
output+="# HELP linux_boot_services_total Count of services by activation state at boot\n"
|
|
output+="# TYPE linux_boot_services_total gauge\n"
|
|
|
|
local active=0 inactive=0 failed=0
|
|
|
|
active=$(systemctl list-units --type=service --state=active --no-legend 2>/dev/null | wc -l) || true
|
|
inactive=$(systemctl list-units --type=service --state=inactive --no-legend 2>/dev/null | wc -l) || true
|
|
failed=$(systemctl list-units --type=service --state=failed --no-legend 2>/dev/null | wc -l) || true
|
|
|
|
debug_echo "Service states — active: $active, inactive: $inactive, failed: $failed"
|
|
|
|
output+="linux_boot_services_total{state=\"active\"} ${active}\n"
|
|
output+="linux_boot_services_total{state=\"inactive\"} ${inactive}\n"
|
|
output+="linux_boot_services_total{state=\"failed\"} ${failed}\n"
|
|
|
|
output+="\n# HELP linux_boot_service_state_info Service state information\n"
|
|
output+="# TYPE linux_boot_service_state_info gauge\n"
|
|
|
|
local line svc
|
|
for state in failed active inactive; do
|
|
while read -r line; do
|
|
[[ -z "$line" ]] && continue
|
|
svc=$(echo "$line" | awk '{for(i=1;i<=NF;i++) if($i ~ /\.service$/) {print $i; exit}}')
|
|
[[ -z "$svc" ]] && continue
|
|
output+="linux_boot_service_state_info{service=\"${svc}\",state=\"${state}\"} 1\n"
|
|
done < <(systemctl list-units --type=service --state="$state" --no-legend 2>/dev/null)
|
|
done
|
|
|
|
printf '%b' "$output"
|
|
}
|
|
|
|
# ============================================================================
|
|
# METRICS COLLECTION
|
|
# ============================================================================
|
|
|
|
collect_metrics() {
|
|
local start_time
|
|
start_time=$(date +%s%N)
|
|
|
|
collect_boot_phases
|
|
echo
|
|
collect_service_times
|
|
echo
|
|
collect_boot_timestamp
|
|
echo
|
|
collect_service_state_counts
|
|
|
|
local end_time duration
|
|
end_time=$(date +%s%N)
|
|
duration=$(awk -v s="$start_time" -v e="$end_time" 'BEGIN {printf "%.4f", (e - s) / 1000000000}')
|
|
|
|
echo
|
|
echo "# HELP linux_boot_exporter_duration_seconds Time taken to collect all metrics"
|
|
echo "# TYPE linux_boot_exporter_duration_seconds gauge"
|
|
echo "linux_boot_exporter_duration_seconds ${duration}"
|
|
|
|
if [[ -n "$DEBUG" ]]; then
|
|
debug_echo "--- critical-chain output (for reference) ---"
|
|
systemd-analyze critical-chain 2>/dev/null | while IFS= read -r line; do
|
|
debug_echo " $line"
|
|
done
|
|
fi
|
|
}
|
|
|
|
# ============================================================================
|
|
# OUTPUT
|
|
# ============================================================================
|
|
|
|
write_metrics() {
|
|
local metrics
|
|
metrics=$(collect_metrics)
|
|
|
|
if [[ -n "$OUTPUT_FILE" ]]; then
|
|
local output_dir
|
|
output_dir="$(dirname "$OUTPUT_FILE")"
|
|
|
|
if [[ ! -d "$output_dir" ]]; then
|
|
log_error "Directory does not exist: $output_dir"
|
|
exit 1
|
|
fi
|
|
|
|
local temp_file="${OUTPUT_FILE}.$$"
|
|
echo "$metrics" > "$temp_file"
|
|
chmod 644 "$temp_file"
|
|
mv -f "$temp_file" "$OUTPUT_FILE"
|
|
echo "Metrics written to $OUTPUT_FILE" >&2
|
|
else
|
|
echo "$metrics"
|
|
fi
|
|
}
|
|
|
|
# ============================================================================
|
|
# MAIN
|
|
# ============================================================================
|
|
|
|
main() {
|
|
while [[ $# -gt 0 ]]; do
|
|
case "$1" in
|
|
--textfile)
|
|
OUTPUT_FILE="${TEXTFILE_DIR}/systemd_boot_time.prom"
|
|
shift
|
|
;;
|
|
-o|--output)
|
|
OUTPUT_FILE="$2"
|
|
shift 2
|
|
;;
|
|
--debug)
|
|
DEBUG=1
|
|
shift
|
|
;;
|
|
--help|-h)
|
|
show_help
|
|
;;
|
|
--version|-v)
|
|
show_version
|
|
;;
|
|
*)
|
|
log_error "Unknown option: $1"
|
|
echo "Use --help for usage information" >&2
|
|
exit 1
|
|
;;
|
|
esac
|
|
done
|
|
|
|
if ! command -v systemd-analyze &>/dev/null; then
|
|
log_error "systemd-analyze not found — this script requires systemd"
|
|
exit 1
|
|
fi
|
|
|
|
write_metrics
|
|
}
|
|
|
|
main "$@"
|