Add all 44 scripts, update CI: error severity baseline, PowerShell validation, multi-distro testing
Amp-Thread-ID: https://ampcode.com/threads/T-019cc404-c628-759e-a50b-f5eeea35b91f Co-authored-by: Amp <amp@ampcode.com>
This commit is contained in:
@@ -0,0 +1,347 @@
|
||||
#!/bin/bash
|
||||
################################################################################
|
||||
# Script Name: systemd-service-exporter.sh
|
||||
# Version: 1.0
|
||||
# Description: Prometheus textfile collector exporter for systemd service status
|
||||
# Monitors service state, uptime, restart count, and enabled status
|
||||
#
|
||||
# Author: Phil Connor
|
||||
# Contact: contact@mylinux.work
|
||||
# Website: https://mylinux.work
|
||||
# License: MIT
|
||||
# Date: 2026-03-03
|
||||
#
|
||||
# Prerequisites:
|
||||
# - systemctl command available (systemd)
|
||||
# - node_exporter with textfile collector enabled
|
||||
# - /var/lib/node_exporter directory exists
|
||||
#
|
||||
# Usage:
|
||||
# # Configure services via environment variable
|
||||
# SERVICE_LIST="nginx,sshd,cron" ./systemd-service-exporter.sh
|
||||
#
|
||||
# # Configure services via config file
|
||||
# echo -e "nginx\nsshd\ncron" > /etc/systemd-service-exporter.conf
|
||||
# ./systemd-service-exporter.sh
|
||||
#
|
||||
# # Debug mode
|
||||
# DEBUG=1 SERVICE_LIST="nginx" ./systemd-service-exporter.sh
|
||||
#
|
||||
# # Dry run (output to stdout)
|
||||
# ./systemd-service-exporter.sh --dry-run
|
||||
#
|
||||
# Metrics Exported:
|
||||
# - linux_systemd_service_state{service,state} - Service state (1=current, 0=other)
|
||||
# - linux_systemd_service_uptime_seconds{service} - Seconds since service became active
|
||||
# - linux_systemd_service_restarts_total{service} - Number of times the service restarted
|
||||
# - linux_systemd_service_enabled{service} - Whether the service is enabled (1/0)
|
||||
#
|
||||
# Configuration:
|
||||
# Environment: SERVICE_LIST (comma-separated)
|
||||
# Config file: /etc/systemd-service-exporter.conf (one per line)
|
||||
# Textfile directory: /var/lib/node_exporter
|
||||
#
|
||||
################################################################################
|
||||
|
||||
set -o pipefail
|
||||
|
||||
# ============================================================================
|
||||
# CONFIGURATION
|
||||
# ============================================================================
|
||||
|
||||
readonly VERSION="1.0"
|
||||
readonly SCRIPT_NAME="${0##*/}"
|
||||
readonly TEXTFILE_DIR="${TEXTFILE_DIR:-/var/lib/node_exporter}"
|
||||
readonly OUTPUT_FILE="${TEXTFILE_DIR}/systemd_services.prom"
|
||||
readonly CONFIG_FILE="${CONFIG_FILE:-/etc/systemd-service-exporter.conf}"
|
||||
readonly TMP_FILE="${OUTPUT_FILE}.$$"
|
||||
|
||||
# Runtime flags
|
||||
DRY_RUN=false
|
||||
DEBUG=${DEBUG:-}
|
||||
|
||||
# ============================================================================
|
||||
# HELPER FUNCTIONS
|
||||
# ============================================================================
|
||||
|
||||
debug_echo() {
|
||||
if [[ -n "$DEBUG" ]]; then
|
||||
echo "[DEBUG] $*" >&2
|
||||
fi
|
||||
}
|
||||
|
||||
log_error() {
|
||||
echo "[ERROR] $*" >&2
|
||||
}
|
||||
|
||||
cleanup() {
|
||||
rm -f "$TMP_FILE"
|
||||
}
|
||||
|
||||
trap cleanup EXIT
|
||||
|
||||
show_help() {
|
||||
cat <<EOF
|
||||
Usage: $SCRIPT_NAME [OPTIONS]
|
||||
|
||||
Prometheus textfile collector exporter for systemd service status.
|
||||
Monitors service state, uptime, restart count, and enabled status.
|
||||
|
||||
OPTIONS:
|
||||
--dry-run Output metrics to stdout instead of writing to file
|
||||
--debug Enable debug output
|
||||
--help Show this help message
|
||||
--version Show version
|
||||
|
||||
CONFIGURATION:
|
||||
Services can be configured in two ways:
|
||||
|
||||
1. Environment variable (comma-separated):
|
||||
SERVICE_LIST="nginx,sshd,cron" $SCRIPT_NAME
|
||||
|
||||
2. Config file (one service per line):
|
||||
/etc/systemd-service-exporter.conf
|
||||
|
||||
The environment variable takes precedence over the config file.
|
||||
|
||||
ENVIRONMENT VARIABLES:
|
||||
SERVICE_LIST Comma-separated list of services to monitor
|
||||
CONFIG_FILE Path to config file (default: /etc/systemd-service-exporter.conf)
|
||||
TEXTFILE_DIR Textfile collector directory (default: /var/lib/node_exporter)
|
||||
DEBUG Enable debug output when set to any value
|
||||
|
||||
EXAMPLES:
|
||||
SERVICE_LIST="nginx,sshd,cron" $SCRIPT_NAME
|
||||
SERVICE_LIST="docker" $SCRIPT_NAME --dry-run
|
||||
DEBUG=1 $SCRIPT_NAME
|
||||
|
||||
EOF
|
||||
exit 0
|
||||
}
|
||||
|
||||
show_version() {
|
||||
echo "$SCRIPT_NAME version $VERSION"
|
||||
exit 0
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# SERVICE DISCOVERY
|
||||
# ============================================================================
|
||||
|
||||
load_services() {
|
||||
local services=()
|
||||
|
||||
if [[ -n "${SERVICE_LIST:-}" ]]; then
|
||||
debug_echo "Loading services from SERVICE_LIST environment variable"
|
||||
IFS=',' read -ra services <<< "$SERVICE_LIST"
|
||||
elif [[ -f "$CONFIG_FILE" ]]; then
|
||||
debug_echo "Loading services from config file: $CONFIG_FILE"
|
||||
while IFS= read -r line; do
|
||||
line="${line%%#*}"
|
||||
line="${line// /}"
|
||||
if [[ -n "$line" ]]; then
|
||||
services+=("$line")
|
||||
fi
|
||||
done < "$CONFIG_FILE"
|
||||
else
|
||||
log_error "No services configured. Set SERVICE_LIST or create $CONFIG_FILE"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ ${#services[@]} -eq 0 ]]; then
|
||||
log_error "No services found in configuration"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
debug_echo "Monitoring ${#services[@]} services: ${services[*]}"
|
||||
printf '%s\n' "${services[@]}"
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# METRICS COLLECTION
|
||||
# ============================================================================
|
||||
|
||||
get_service_state() {
|
||||
local service="$1"
|
||||
local state
|
||||
state=$(systemctl is-active "$service" 2>/dev/null) || true
|
||||
echo "${state:-unknown}"
|
||||
}
|
||||
|
||||
get_service_uptime() {
|
||||
local service="$1"
|
||||
local timestamp
|
||||
timestamp=$(systemctl show "$service" --property=ActiveEnterTimestamp --value 2>/dev/null) || true
|
||||
|
||||
if [[ -z "$timestamp" || "$timestamp" == "" ]]; then
|
||||
echo "0"
|
||||
return
|
||||
fi
|
||||
|
||||
local active_epoch
|
||||
active_epoch=$(date -d "$timestamp" +%s 2>/dev/null) || true
|
||||
|
||||
if [[ -z "$active_epoch" ]]; then
|
||||
echo "0"
|
||||
return
|
||||
fi
|
||||
|
||||
local now
|
||||
now=$(date +%s)
|
||||
local uptime=$((now - active_epoch))
|
||||
|
||||
if [[ $uptime -lt 0 ]]; then
|
||||
echo "0"
|
||||
else
|
||||
echo "$uptime"
|
||||
fi
|
||||
}
|
||||
|
||||
get_restart_count() {
|
||||
local service="$1"
|
||||
local count
|
||||
count=$(systemctl show "$service" --property=NRestarts --value 2>/dev/null) || true
|
||||
echo "${count:-0}"
|
||||
}
|
||||
|
||||
get_enabled_status() {
|
||||
local service="$1"
|
||||
local status
|
||||
status=$(systemctl is-enabled "$service" 2>/dev/null) || true
|
||||
|
||||
if [[ "$status" == "enabled" ]]; then
|
||||
echo "1"
|
||||
else
|
||||
echo "0"
|
||||
fi
|
||||
}
|
||||
|
||||
state_to_value() {
|
||||
local current_state="$1"
|
||||
local check_state="$2"
|
||||
|
||||
if [[ "$current_state" == "$check_state" ]]; then
|
||||
echo "1"
|
||||
else
|
||||
echo "0"
|
||||
fi
|
||||
}
|
||||
|
||||
collect_metrics() {
|
||||
local services=()
|
||||
while IFS= read -r svc; do
|
||||
services+=("$svc")
|
||||
done < <(load_services)
|
||||
|
||||
local output=""
|
||||
|
||||
# Header comments
|
||||
output+="# HELP linux_systemd_service_state Current state of the systemd service\n"
|
||||
output+="# TYPE linux_systemd_service_state gauge\n"
|
||||
|
||||
for service in "${services[@]}"; do
|
||||
local state
|
||||
state=$(get_service_state "$service")
|
||||
debug_echo "Service $service: state=$state"
|
||||
|
||||
for s in active inactive failed; do
|
||||
local val
|
||||
val=$(state_to_value "$state" "$s")
|
||||
output+="linux_systemd_service_state{service=\"${service}\",state=\"${s}\"} ${val}\n"
|
||||
done
|
||||
done
|
||||
|
||||
output+="# HELP linux_systemd_service_uptime_seconds Time in seconds since the service became active\n"
|
||||
output+="# TYPE linux_systemd_service_uptime_seconds gauge\n"
|
||||
|
||||
for service in "${services[@]}"; do
|
||||
local uptime
|
||||
uptime=$(get_service_uptime "$service")
|
||||
debug_echo "Service $service: uptime=${uptime}s"
|
||||
output+="linux_systemd_service_uptime_seconds{service=\"${service}\"} ${uptime}\n"
|
||||
done
|
||||
|
||||
output+="# HELP linux_systemd_service_restarts_total Total number of service restarts\n"
|
||||
output+="# TYPE linux_systemd_service_restarts_total counter\n"
|
||||
|
||||
for service in "${services[@]}"; do
|
||||
local restarts
|
||||
restarts=$(get_restart_count "$service")
|
||||
debug_echo "Service $service: restarts=$restarts"
|
||||
output+="linux_systemd_service_restarts_total{service=\"${service}\"} ${restarts}\n"
|
||||
done
|
||||
|
||||
output+="# HELP linux_systemd_service_enabled Whether the service is enabled to start at boot\n"
|
||||
output+="# TYPE linux_systemd_service_enabled gauge\n"
|
||||
|
||||
for service in "${services[@]}"; do
|
||||
local enabled
|
||||
enabled=$(get_enabled_status "$service")
|
||||
debug_echo "Service $service: enabled=$enabled"
|
||||
output+="linux_systemd_service_enabled{service=\"${service}\"} ${enabled}\n"
|
||||
done
|
||||
|
||||
printf '%b' "$output"
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# OUTPUT
|
||||
# ============================================================================
|
||||
|
||||
write_metrics() {
|
||||
local metrics
|
||||
metrics=$(collect_metrics)
|
||||
|
||||
if [[ "$DRY_RUN" == "true" ]]; then
|
||||
echo "$metrics"
|
||||
return
|
||||
fi
|
||||
|
||||
if [[ ! -d "$TEXTFILE_DIR" ]]; then
|
||||
log_error "Textfile collector directory does not exist: $TEXTFILE_DIR"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "$metrics" > "$TMP_FILE"
|
||||
mv "$TMP_FILE" "$OUTPUT_FILE"
|
||||
debug_echo "Metrics written to $OUTPUT_FILE"
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# MAIN
|
||||
# ============================================================================
|
||||
|
||||
main() {
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
--dry-run)
|
||||
DRY_RUN=true
|
||||
shift
|
||||
;;
|
||||
--debug)
|
||||
DEBUG=1
|
||||
shift
|
||||
;;
|
||||
--help|-h)
|
||||
show_help
|
||||
;;
|
||||
--version|-v)
|
||||
show_version
|
||||
;;
|
||||
*)
|
||||
log_error "Unknown option: $1"
|
||||
echo "Use --help for usage information" >&2
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
if ! command -v systemctl &>/dev/null; then
|
||||
log_error "systemctl not found — this script requires systemd"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
write_metrics
|
||||
}
|
||||
|
||||
main "$@"
|
||||
Reference in New Issue
Block a user