#!/bin/bash ################################################################################ # Script Name: systemd-service-exporter.sh # Version: 1.0 # Description: Prometheus textfile collector exporter for systemd service status # Monitors service state, uptime, restart count, and enabled status # # Author: Phil Connor # Contact: contact@mylinux.work # Website: https://mylinux.work # License: MIT # Date: 2026-03-03 # # Prerequisites: # - systemctl command available (systemd) # - node_exporter with textfile collector enabled # - /var/lib/node_exporter directory exists # # Usage: # # Configure services via environment variable # SERVICE_LIST="nginx,sshd,cron" ./systemd-service-exporter.sh # # # Configure services via config file # echo -e "nginx\nsshd\ncron" > /etc/systemd-service-exporter.conf # ./systemd-service-exporter.sh # # # Debug mode # DEBUG=1 SERVICE_LIST="nginx" ./systemd-service-exporter.sh # # # Dry run (output to stdout) # ./systemd-service-exporter.sh --dry-run # # Metrics Exported: # - linux_systemd_service_state{service,state} - Service state (1=current, 0=other) # - linux_systemd_service_uptime_seconds{service} - Seconds since service became active # - linux_systemd_service_restarts_total{service} - Number of times the service restarted # - linux_systemd_service_enabled{service} - Whether the service is enabled (1/0) # # Configuration: # Environment: SERVICE_LIST (comma-separated) # Config file: /etc/systemd-service-exporter.conf (one per line) # Textfile directory: /var/lib/node_exporter # ################################################################################ set -o pipefail # ============================================================================ # CONFIGURATION # ============================================================================ readonly VERSION="1.0" readonly SCRIPT_NAME="${0##*/}" readonly TEXTFILE_DIR="${TEXTFILE_DIR:-/var/lib/node_exporter}" readonly OUTPUT_FILE="${TEXTFILE_DIR}/systemd_services.prom" readonly CONFIG_FILE="${CONFIG_FILE:-/etc/systemd-service-exporter.conf}" readonly TMP_FILE="${OUTPUT_FILE}.$$" # Runtime flags DRY_RUN=false DEBUG=${DEBUG:-} # ============================================================================ # HELPER FUNCTIONS # ============================================================================ debug_echo() { if [[ -n "$DEBUG" ]]; then echo "[DEBUG] $*" >&2 fi } log_error() { echo "[ERROR] $*" >&2 } cleanup() { rm -f "$TMP_FILE" } trap cleanup EXIT show_help() { cat </dev/null) || true echo "${state:-unknown}" } get_service_uptime() { local service="$1" local timestamp timestamp=$(systemctl show "$service" --property=ActiveEnterTimestamp --value 2>/dev/null) || true if [[ -z "$timestamp" || "$timestamp" == "" ]]; then echo "0" return fi local active_epoch active_epoch=$(date -d "$timestamp" +%s 2>/dev/null) || true if [[ -z "$active_epoch" ]]; then echo "0" return fi local now now=$(date +%s) local uptime=$((now - active_epoch)) if [[ $uptime -lt 0 ]]; then echo "0" else echo "$uptime" fi } get_restart_count() { local service="$1" local count count=$(systemctl show "$service" --property=NRestarts --value 2>/dev/null) || true echo "${count:-0}" } get_enabled_status() { local service="$1" local status status=$(systemctl is-enabled "$service" 2>/dev/null) || true if [[ "$status" == "enabled" ]]; then echo "1" else echo "0" fi } state_to_value() { local current_state="$1" local check_state="$2" if [[ "$current_state" == "$check_state" ]]; then echo "1" else echo "0" fi } collect_metrics() { local services=() while IFS= read -r svc; do services+=("$svc") done < <(load_services) local output="" # Header comments output+="# HELP linux_systemd_service_state Current state of the systemd service\n" output+="# TYPE linux_systemd_service_state gauge\n" for service in "${services[@]}"; do local state state=$(get_service_state "$service") debug_echo "Service $service: state=$state" for s in active inactive failed; do local val val=$(state_to_value "$state" "$s") output+="linux_systemd_service_state{service=\"${service}\",state=\"${s}\"} ${val}\n" done done output+="# HELP linux_systemd_service_uptime_seconds Time in seconds since the service became active\n" output+="# TYPE linux_systemd_service_uptime_seconds gauge\n" for service in "${services[@]}"; do local uptime uptime=$(get_service_uptime "$service") debug_echo "Service $service: uptime=${uptime}s" output+="linux_systemd_service_uptime_seconds{service=\"${service}\"} ${uptime}\n" done output+="# HELP linux_systemd_service_restarts_total Total number of service restarts\n" output+="# TYPE linux_systemd_service_restarts_total counter\n" for service in "${services[@]}"; do local restarts restarts=$(get_restart_count "$service") debug_echo "Service $service: restarts=$restarts" output+="linux_systemd_service_restarts_total{service=\"${service}\"} ${restarts}\n" done output+="# HELP linux_systemd_service_enabled Whether the service is enabled to start at boot\n" output+="# TYPE linux_systemd_service_enabled gauge\n" for service in "${services[@]}"; do local enabled enabled=$(get_enabled_status "$service") debug_echo "Service $service: enabled=$enabled" output+="linux_systemd_service_enabled{service=\"${service}\"} ${enabled}\n" done printf '%b' "$output" } # ============================================================================ # OUTPUT # ============================================================================ write_metrics() { local metrics metrics=$(collect_metrics) if [[ "$DRY_RUN" == "true" ]]; then echo "$metrics" return fi if [[ ! -d "$TEXTFILE_DIR" ]]; then log_error "Textfile collector directory does not exist: $TEXTFILE_DIR" exit 1 fi echo "$metrics" > "$TMP_FILE" mv "$TMP_FILE" "$OUTPUT_FILE" debug_echo "Metrics written to $OUTPUT_FILE" } # ============================================================================ # MAIN # ============================================================================ main() { while [[ $# -gt 0 ]]; do case "$1" in --dry-run) DRY_RUN=true shift ;; --debug) DEBUG=1 shift ;; --help|-h) show_help ;; --version|-v) show_version ;; *) log_error "Unknown option: $1" echo "Use --help for usage information" >&2 exit 1 ;; esac done if ! command -v systemctl &>/dev/null; then log_error "systemctl not found — this script requires systemd" exit 1 fi write_metrics } main "$@"