Files
linux-scripts/systemd-boot-time-exporter.sh
T
chiefgeek a1a17e81a1 Sync all scripts from website downloads — 352 scripts total
Includes updated JS challenge scripts with Claude-User whitelist,
same-site referer bypass, Blackbox-Exporter allowed bot, and all
new exporters, cheat sheets, and automation scripts.
2026-05-25 03:31:08 +02:00

358 lines
11 KiB
Bash

#!/bin/bash
################################################################################
# Script Name: systemd-boot-time-exporter.sh
# Version: 1.1
# Description: Prometheus textfile collector exporter for systemd boot timing
# Exports boot phase durations, per-service startup times, and
# total boot time using systemd-analyze
#
# Author: Phil Connor
# Contact: contact@mylinux.work
# Website: https://mylinux.work
# License: MIT
# Date: 2026-03-31
#
# Prerequisites:
# - systemd-analyze command available
# - node_exporter with textfile collector enabled
# - /var/lib/node_exporter directory exists
#
# Usage:
# ./systemd-boot-time-exporter.sh # Output to stdout
# ./systemd-boot-time-exporter.sh --textfile # Write to textfile collector
# ./systemd-boot-time-exporter.sh -o /tmp/boot.prom # Write to custom file
# TOP_N=10 ./systemd-boot-time-exporter.sh
# DEBUG=1 ./systemd-boot-time-exporter.sh
#
# Metrics Exported:
# - linux_boot_time_seconds{phase} - Boot phase durations
# - linux_boot_total_seconds - Total boot time
# - linux_boot_service_time_seconds{service} - Per-service startup time
# - linux_boot_timestamp - Unix timestamp of last boot
# - linux_boot_services_total{state} - Service count by state
# - linux_boot_service_state_info{service,state} - Per-service state
# - linux_boot_exporter_duration_seconds - Collection runtime
#
################################################################################
set -o pipefail
# ============================================================================
# CONFIGURATION
# ============================================================================
readonly VERSION="1.1"
readonly SCRIPT_NAME="${0##*/}"
readonly TEXTFILE_DIR="${TEXTFILE_DIR:-/var/lib/node_exporter}"
readonly TOP_N="${TOP_N:-20}"
# Runtime flags
OUTPUT_FILE=""
DEBUG=${DEBUG:-}
# ============================================================================
# HELPER FUNCTIONS
# ============================================================================
debug_echo() {
if [[ -n "$DEBUG" ]]; then
echo "[DEBUG] $*" >&2
fi
}
log_error() {
echo "[ERROR] $*" >&2
}
cleanup() {
rm -f "${OUTPUT_FILE}.$$" 2>/dev/null
}
trap cleanup EXIT
show_help() {
cat <<EOF
Usage: $SCRIPT_NAME [OPTIONS]
Exports systemd boot timing metrics for Prometheus node_exporter textfile collector.
OPTIONS:
--textfile Write to node_exporter textfile collector
-o, --output FILE Write to a custom file path
--debug Enable debug output
--help Show this help message
--version Show version
ENVIRONMENT:
TEXTFILE_DIR Textfile collector directory (default: /var/lib/node_exporter)
TOP_N Number of slowest services to export (default: 20)
DEBUG Enable debug output when set to any value
EXAMPLES:
$SCRIPT_NAME # Output to stdout
$SCRIPT_NAME --textfile # Write to textfile collector
$SCRIPT_NAME -o /tmp/boot.prom # Write to custom file
EOF
exit 0
}
show_version() {
echo "$SCRIPT_NAME version $VERSION"
exit 0
}
# ============================================================================
# BOOT PHASE PARSING
# ============================================================================
parse_time_value() {
# Convert systemd-analyze time strings to seconds
# Handles: "1min 2.345s", "57.597s", "234ms", "120us"
local input="$1"
local min_val=0 sec_val=0 ms_val=0 us_val=0
[[ "$input" =~ ([0-9]+)min ]] && min_val="${BASH_REMATCH[1]}"
[[ "$input" =~ ([0-9]+\.?[0-9]*)ms ]] && ms_val="${BASH_REMATCH[1]}"
[[ "$input" =~ ([0-9]+)us ]] && us_val="${BASH_REMATCH[1]}"
# Match seconds only if not part of ms/us (look for digit(s) + optional decimal + "s" at end or before space)
if [[ "$ms_val" == "0" && "$us_val" == "0" ]]; then
[[ "$input" =~ ([0-9]+\.?[0-9]*)s ]] && sec_val="${BASH_REMATCH[1]}"
fi
awk -v m="$min_val" -v s="$sec_val" -v ms="$ms_val" -v us="$us_val" \
'BEGIN {print m * 60 + s + ms / 1000 + us / 1000000}'
}
collect_boot_phases() {
local output=""
local analyze_output
analyze_output=$(systemd-analyze 2>/dev/null) || true
if [[ -z "$analyze_output" ]]; then
log_error "systemd-analyze returned no output"
return 1
fi
debug_echo "systemd-analyze output: $analyze_output"
output+="# HELP linux_boot_time_seconds Duration of each boot phase in seconds\n"
output+="# TYPE linux_boot_time_seconds gauge\n"
for phase in firmware loader kernel initrd userspace; do
local val=0
if [[ "$analyze_output" =~ ([0-9]+(\.[0-9]+)?(min )?[0-9]*(\.[0-9]+)?m?s?)\ \(${phase}\) ]]; then
val=$(parse_time_value "${BASH_REMATCH[1]}")
fi
debug_echo "${phase} phase: ${val}s"
output+="linux_boot_time_seconds{phase=\"${phase}\"} ${val}\n"
done
output+="\n# HELP linux_boot_total_seconds Total boot time in seconds\n"
output+="# TYPE linux_boot_total_seconds gauge\n"
local total_time=0
if [[ "$analyze_output" =~ =\ ([0-9]+(\.[0-9]+)?(min )?[0-9]*(\.[0-9]+)?m?s?) ]]; then
total_time=$(parse_time_value "${BASH_REMATCH[1]}")
fi
debug_echo "Total boot time: ${total_time}s"
output+="linux_boot_total_seconds ${total_time}\n"
printf '%b' "$output"
}
collect_service_times() {
local output=""
local blame_output
blame_output=$(systemd-analyze blame 2>/dev/null | head -n "$TOP_N") || true
if [[ -z "$blame_output" ]]; then
debug_echo "systemd-analyze blame returned no output"
return 0
fi
output+="# HELP linux_boot_service_time_seconds Time taken by each systemd service to start\n"
output+="# TYPE linux_boot_service_time_seconds gauge\n"
while read -r time_str service_name _; do
[[ -z "$service_name" ]] && continue
local seconds
seconds=$(parse_time_value "$time_str")
debug_echo "Service $service_name: ${seconds}s"
output+="linux_boot_service_time_seconds{service=\"${service_name}\"} ${seconds}\n"
done <<< "$blame_output"
printf '%b' "$output"
}
collect_boot_timestamp() {
local output=""
output+="# HELP linux_boot_timestamp Unix timestamp of last boot\n"
output+="# TYPE linux_boot_timestamp gauge\n"
local boot_ts
boot_ts=$(who -b 2>/dev/null | awk '{print $3, $4}') || true
if [[ -n "$boot_ts" ]]; then
local epoch
epoch=$(date -d "$boot_ts" +%s 2>/dev/null) || true
if [[ -n "$epoch" ]]; then
debug_echo "Boot timestamp: $boot_ts (epoch: $epoch)"
output+="linux_boot_timestamp ${epoch}\n"
else
output+="linux_boot_timestamp 0\n"
fi
else
# Fallback to /proc/stat btime
local btime
btime=$(awk '/^btime/ {print $2}' /proc/stat 2>/dev/null) || true
if [[ -n "$btime" ]]; then
debug_echo "Boot timestamp from /proc/stat: $btime"
output+="linux_boot_timestamp ${btime}\n"
else
output+="linux_boot_timestamp 0\n"
fi
fi
printf '%b' "$output"
}
collect_service_state_counts() {
local output=""
output+="# HELP linux_boot_services_total Count of services by activation state at boot\n"
output+="# TYPE linux_boot_services_total gauge\n"
local active=0 inactive=0 failed=0
active=$(systemctl list-units --type=service --state=active --no-legend 2>/dev/null | wc -l) || true
inactive=$(systemctl list-units --type=service --state=inactive --no-legend 2>/dev/null | wc -l) || true
failed=$(systemctl list-units --type=service --state=failed --no-legend 2>/dev/null | wc -l) || true
debug_echo "Service states — active: $active, inactive: $inactive, failed: $failed"
output+="linux_boot_services_total{state=\"active\"} ${active}\n"
output+="linux_boot_services_total{state=\"inactive\"} ${inactive}\n"
output+="linux_boot_services_total{state=\"failed\"} ${failed}\n"
output+="\n# HELP linux_boot_service_state_info Service state information\n"
output+="# TYPE linux_boot_service_state_info gauge\n"
local line svc
for state in failed active inactive; do
while read -r line; do
[[ -z "$line" ]] && continue
svc=$(echo "$line" | awk '{for(i=1;i<=NF;i++) if($i ~ /\.service$/) {print $i; exit}}')
[[ -z "$svc" ]] && continue
output+="linux_boot_service_state_info{service=\"${svc}\",state=\"${state}\"} 1\n"
done < <(systemctl list-units --type=service --state="$state" --no-legend 2>/dev/null)
done
printf '%b' "$output"
}
# ============================================================================
# METRICS COLLECTION
# ============================================================================
collect_metrics() {
local start_time
start_time=$(date +%s%N)
collect_boot_phases
echo
collect_service_times
echo
collect_boot_timestamp
echo
collect_service_state_counts
local end_time duration
end_time=$(date +%s%N)
duration=$(awk -v s="$start_time" -v e="$end_time" 'BEGIN {printf "%.4f", (e - s) / 1000000000}')
echo
echo "# HELP linux_boot_exporter_duration_seconds Time taken to collect all metrics"
echo "# TYPE linux_boot_exporter_duration_seconds gauge"
echo "linux_boot_exporter_duration_seconds ${duration}"
if [[ -n "$DEBUG" ]]; then
debug_echo "--- critical-chain output (for reference) ---"
systemd-analyze critical-chain 2>/dev/null | while IFS= read -r line; do
debug_echo " $line"
done
fi
}
# ============================================================================
# OUTPUT
# ============================================================================
write_metrics() {
local metrics
metrics=$(collect_metrics)
if [[ -n "$OUTPUT_FILE" ]]; then
local output_dir
output_dir="$(dirname "$OUTPUT_FILE")"
if [[ ! -d "$output_dir" ]]; then
log_error "Directory does not exist: $output_dir"
exit 1
fi
local temp_file="${OUTPUT_FILE}.$$"
echo "$metrics" > "$temp_file"
chmod 644 "$temp_file"
mv -f "$temp_file" "$OUTPUT_FILE"
echo "Metrics written to $OUTPUT_FILE" >&2
else
echo "$metrics"
fi
}
# ============================================================================
# MAIN
# ============================================================================
main() {
while [[ $# -gt 0 ]]; do
case "$1" in
--textfile)
OUTPUT_FILE="${TEXTFILE_DIR}/systemd_boot_time.prom"
shift
;;
-o|--output)
OUTPUT_FILE="$2"
shift 2
;;
--debug)
DEBUG=1
shift
;;
--help|-h)
show_help
;;
--version|-v)
show_version
;;
*)
log_error "Unknown option: $1"
echo "Use --help for usage information" >&2
exit 1
;;
esac
done
if ! command -v systemd-analyze &>/dev/null; then
log_error "systemd-analyze not found — this script requires systemd"
exit 1
fi
write_metrics
}
main "$@"