Add all 44 scripts, update CI: error severity baseline, PowerShell validation, multi-distro testing
Amp-Thread-ID: https://ampcode.com/threads/T-019cc404-c628-759e-a50b-f5eeea35b91f Co-authored-by: Amp <amp@ampcode.com>
This commit is contained in:
+74
-23
@@ -1,9 +1,11 @@
|
||||
###############################################################################
|
||||
# .gitlab-ci.yml — CI pipeline for bash script testing
|
||||
# .gitlab-ci.yml — CI pipeline for linux-scripts repository
|
||||
#
|
||||
# Stages:
|
||||
# 1. lint — ShellCheck static analysis + bash syntax check
|
||||
# 2. test — Run --help and --dry-run in Ubuntu and RHEL containers
|
||||
# All scripts are tested on every push:
|
||||
# 1. lint — ShellCheck + bash syntax + PowerShell syntax
|
||||
# 2. test — --help and --dry-run validation on Ubuntu and Rocky Linux
|
||||
#
|
||||
# On success on master, scripts are ready to sync to the website.
|
||||
###############################################################################
|
||||
|
||||
stages:
|
||||
@@ -11,7 +13,8 @@ stages:
|
||||
- test
|
||||
|
||||
variables:
|
||||
SHELLCHECK_SEVERITY: "warning"
|
||||
# Start at "error" for a clean baseline, tighten to "warning" as scripts are cleaned up
|
||||
SHELLCHECK_SEVERITY: "error"
|
||||
|
||||
# ─────────────────────────────────────────────
|
||||
# Lint Stage
|
||||
@@ -21,30 +24,57 @@ shellcheck:
|
||||
stage: lint
|
||||
image: koalaman/shellcheck-alpine:stable
|
||||
script:
|
||||
- echo "Running ShellCheck on all .sh files..."
|
||||
- echo "Running ShellCheck on $(find . -name '*.sh' -not -path './.git/*' | wc -l) scripts..."
|
||||
- find . -name "*.sh" -not -path "./.git/*" -print0 |
|
||||
xargs -0 -r shellcheck --severity="$SHELLCHECK_SEVERITY" --format=tty
|
||||
- echo "ShellCheck passed"
|
||||
|
||||
bash-syntax:
|
||||
stage: lint
|
||||
image: bash:5
|
||||
script:
|
||||
- echo "Checking bash syntax (bash -n)..."
|
||||
- echo "Checking bash syntax..."
|
||||
- |
|
||||
errors=0
|
||||
total=0
|
||||
for script in $(find . -name "*.sh" -not -path "./.git/*"); do
|
||||
total=$((total + 1))
|
||||
if ! bash -n "$script" 2>&1; then
|
||||
errors=$((errors + 1))
|
||||
fi
|
||||
done
|
||||
if [ "$errors" -gt 0 ]; then
|
||||
echo "FAILED: $errors script(s) have syntax errors"
|
||||
echo "FAILED: $errors/$total script(s) have syntax errors"
|
||||
exit 1
|
||||
fi
|
||||
echo "All scripts pass syntax check"
|
||||
echo "All $total scripts pass syntax check"
|
||||
|
||||
powershell-syntax:
|
||||
stage: lint
|
||||
image: mcr.microsoft.com/powershell:lts-ubuntu-24.04
|
||||
script:
|
||||
- echo "Checking PowerShell syntax..."
|
||||
- |
|
||||
errors=0
|
||||
total=0
|
||||
for script in $(find . -name "*.ps1" -not -path "./.git/*"); do
|
||||
total=$((total + 1))
|
||||
echo "Checking: $script"
|
||||
if ! pwsh -Command "try { \$null = [System.Management.Automation.Language.Parser]::ParseFile('$script', [ref]\$null, [ref]\$null); Write-Host 'OK: $script' } catch { Write-Error \$_; exit 1 }" 2>&1; then
|
||||
errors=$((errors + 1))
|
||||
fi
|
||||
done
|
||||
if [ "$errors" -gt 0 ]; then
|
||||
echo "FAILED: $errors/$total PowerShell script(s) have syntax errors"
|
||||
exit 1
|
||||
fi
|
||||
echo "All $total PowerShell scripts pass syntax check"
|
||||
rules:
|
||||
- exists:
|
||||
- "*.ps1"
|
||||
|
||||
# ─────────────────────────────────────────────
|
||||
# Test Stage — Ubuntu
|
||||
# Test Stage — Ubuntu 24.04
|
||||
# ─────────────────────────────────────────────
|
||||
|
||||
test-ubuntu:
|
||||
@@ -54,19 +84,30 @@ test-ubuntu:
|
||||
- apt-get update -qq
|
||||
- apt-get install -y -qq procps iproute2 kmod >/dev/null 2>&1
|
||||
script:
|
||||
- echo "=== Testing on Ubuntu 24.04 ==="
|
||||
- echo "=== Testing --help flags on Ubuntu 24.04 ==="
|
||||
- |
|
||||
for script in $(find . -maxdepth 1 -name "*.sh" -not -path "./.git/*"); do
|
||||
echo ""
|
||||
echo "--- $(basename "$script") --help ---"
|
||||
bash "$script" --help 2>&1 || true
|
||||
passed=0
|
||||
failed=0
|
||||
for script in $(find . -maxdepth 1 -name "*.sh" -not -path "./.git/*" | sort); do
|
||||
name=$(basename "$script")
|
||||
if bash "$script" --help >/dev/null 2>&1; then
|
||||
echo "✓ $name --help"
|
||||
passed=$((passed + 1))
|
||||
elif bash "$script" -h >/dev/null 2>&1; then
|
||||
echo "✓ $name -h"
|
||||
passed=$((passed + 1))
|
||||
else
|
||||
echo "○ $name (no --help flag)"
|
||||
fi
|
||||
done
|
||||
echo ""
|
||||
echo "$passed scripts have working --help"
|
||||
- echo ""
|
||||
- echo "--- networktuning.sh --dry-run ---"
|
||||
- echo "=== Testing networktuning.sh --dry-run ==="
|
||||
- bash networktuning.sh --dry-run 2>&1 || true
|
||||
|
||||
# ─────────────────────────────────────────────
|
||||
# Test Stage — RHEL
|
||||
# Test Stage — Rocky Linux 9
|
||||
# ─────────────────────────────────────────────
|
||||
|
||||
test-rhel:
|
||||
@@ -75,13 +116,23 @@ test-rhel:
|
||||
before_script:
|
||||
- dnf install -y -q procps iproute kmod >/dev/null 2>&1
|
||||
script:
|
||||
- echo "=== Testing on Rocky Linux 9 ==="
|
||||
- echo "=== Testing --help flags on Rocky Linux 9 ==="
|
||||
- |
|
||||
for script in $(find . -maxdepth 1 -name "*.sh" -not -path "./.git/*"); do
|
||||
echo ""
|
||||
echo "--- $(basename "$script") --help ---"
|
||||
bash "$script" --help 2>&1 || true
|
||||
passed=0
|
||||
for script in $(find . -maxdepth 1 -name "*.sh" -not -path "./.git/*" | sort); do
|
||||
name=$(basename "$script")
|
||||
if bash "$script" --help >/dev/null 2>&1; then
|
||||
echo "✓ $name --help"
|
||||
passed=$((passed + 1))
|
||||
elif bash "$script" -h >/dev/null 2>&1; then
|
||||
echo "✓ $name -h"
|
||||
passed=$((passed + 1))
|
||||
else
|
||||
echo "○ $name (no --help flag)"
|
||||
fi
|
||||
done
|
||||
echo ""
|
||||
echo "$passed scripts have working --help"
|
||||
- echo ""
|
||||
- echo "--- networktuning.sh --dry-run ---"
|
||||
- echo "=== Testing networktuning.sh --dry-run ==="
|
||||
- bash networktuning.sh --dry-run 2>&1 || true
|
||||
|
||||
@@ -0,0 +1,287 @@
|
||||
#!/bin/bash
|
||||
|
||||
######################################################################################
|
||||
#### Version 2.2 ####
|
||||
#### For questions or comments contact@mylinux.work ####
|
||||
#### Author : Phil Connor ####
|
||||
#### ####
|
||||
#### Notes : ####
|
||||
#### This script is a simple "helper" to install and configure Maria, ####
|
||||
#### PowerDNS and PowerAdmin on RedHat Based servers. ####
|
||||
#### There is no silver bullet. Don't expect the perfect setup, ####
|
||||
#### review comments and adapt the parameters to your application usage. ####
|
||||
#### ####
|
||||
#### Use this script at your OWN risk. There is no guarantee whatsoever. ####
|
||||
#### ####
|
||||
#### Usage chmod 755 then ./PdnsInstall.sh or bash PdnsInstall.sh ####
|
||||
######################################################################################
|
||||
|
||||
############################
|
||||
#### User Configurables ####
|
||||
############################
|
||||
# HTTP=apache
|
||||
NAGAD=nagiosadmin
|
||||
NAGADPASS=MyPaSsWoRd
|
||||
|
||||
|
||||
##########################
|
||||
#### System Variables ####
|
||||
##########################
|
||||
# IPADD=$(ifconfig | grep -Eo 'inet (addr:)?([0-9]*\.){3}[0-9]*' | grep -Eo '([0-9]*\.){3}[0-9]*' | grep -v '127.0.0.1')
|
||||
OS=$(grep PRETTY_NAME /etc/os-release | sed 's/PRETTY_NAME=//g' | tr -d '="' | awk '{print $1}' | tr '[:upper:]' '[:lower:]')
|
||||
OSVER=$(grep VERSION_ID /etc/os-release | sed 's/VERSION_ID=//g' | tr -d '="' | awk -F. '{print $1}')
|
||||
# SAEMAIL=
|
||||
|
||||
###########################################################
|
||||
#### Detect Package Manger from OS and OSVer Variables ####
|
||||
###########################################################
|
||||
if [ "${OS}" = ubuntu ]; then
|
||||
PAKMGR="apt -y"
|
||||
elif [[ ${OS} = centos || ${OS} = red || ${OS} = oracle || ${OS} = rocky || ${OS} = alma ]]; then
|
||||
if [ "${OSVER}" = 8 ] || [ "${OSVER}" = 9 ]; then
|
||||
PAKMGR="dnf -y"
|
||||
fi
|
||||
fi
|
||||
|
||||
###########################
|
||||
#### Install Net-Utils ####
|
||||
###########################
|
||||
if [ ! "$(command -v ifconfig)" ]; then
|
||||
if [ "${OS}" = ubuntu ]; then
|
||||
${PAKMGR} update
|
||||
${PAKMGR} install net-utils
|
||||
else
|
||||
${PAKMGR} install net-tools
|
||||
fi
|
||||
fi
|
||||
|
||||
########################
|
||||
#### Nagios Install ####
|
||||
########################
|
||||
function nagios_install() {
|
||||
{
|
||||
if [ "${OS}" = ubuntu ]; then
|
||||
htpath=/etc/apache2/conf-enabled/nagios4-cgi.conf
|
||||
else
|
||||
htpath=/etc/apache2/conf.d/nagios.conf
|
||||
fi
|
||||
#if [ "${OS}" = ubuntu ]; then
|
||||
${PAKMGR} update
|
||||
DEBIAN_FRONTEND=noninteractive ${PAKMGR} install nagios4 nagios-nrpe-server nagios-plugins nagios-plugins-contrib expect
|
||||
a2enmod authz_groupfile auth_digest
|
||||
# ${PAKMGR} install autoconf gcc libc6 make wget unzip apache2 php libapache2-mod-php libgd-dev libssl-dev expect
|
||||
sed -i 's/Require ip ::1\/128 fc00::\/7 fe80::\/10 10\.0\.0\.0\/8 127\.0\.0\.0\/8 169\.254\.0\.0\/16 172\.16\.0\.0\/12 192\.168\.0\.0\/16/# Require ip ::1\/128 fc00::\/7 fe80::\/10 10\.0\.0\.0\/8 127\.0\.0\.0\/8 169\.254\.0\.0\/16 172\.16\.0\.0\/12 192\.168\.0\.0\/16/g' $htpath
|
||||
#sed -i 's/<Files "cmd.cgi">/#<Files "cmd.cgi">/g' $htpath
|
||||
sed -i 's/Require all/#Require all/g' $htpath
|
||||
#sed -i 's/<//Files>/#<//Files>/g' $htpath
|
||||
sed -i 's/#Require /Require /g' $htpath
|
||||
expect -f - <<-EOF
|
||||
set timeout 5
|
||||
spawn htdigest -c /etc/nagios4/htdigest.users Nagios4 $NAGAD
|
||||
expect "New password:"
|
||||
send -- "$NAGADPASS\r"
|
||||
expect "Re-type new password:"
|
||||
send -- "$NAGADPASS\r"
|
||||
expect eof
|
||||
EOF
|
||||
systemctl enable --now nagios
|
||||
systemctl status nagios
|
||||
if [ "${OS}" = ubuntu ]; then
|
||||
systemctl enable apache2
|
||||
systemctl restart apache2
|
||||
else
|
||||
systemctl enable httpd
|
||||
systemctl restart httpd
|
||||
fi
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
nagios_install
|
||||
|
||||
|
||||
|
||||
|
||||
# OUTFILE1="$nagdir/nrpe_rule.te"
|
||||
# # TITLE="nrpe_rule"
|
||||
# define NRPE_RULE << 'EOF'
|
||||
# module nrpe_rule 1.0;
|
||||
|
||||
# require {
|
||||
# type nrpe_t;
|
||||
# type proc_net_t;
|
||||
# class file { open read };
|
||||
# class file { ioctl open read getattr };
|
||||
# }
|
||||
|
||||
# #=================== nrpe_t =========================
|
||||
# allow nrpe_t proc_net_t:file open;
|
||||
# allow nrpe_t proc_net_t:file read;
|
||||
# allow nrpe_t proc_net_t:file { getattr ioctl };
|
||||
# EOF
|
||||
# {
|
||||
# printf "%s\n" "$NRPE_RULE" | cut -c 3-
|
||||
# } > "$OUTFILE1"
|
||||
# checkmodule -M -m -o $nagdir/nrpe_rule.mod $nagdir/nrpe_rule.te
|
||||
# semodule_package -o $nagdir/nrpe_rule.pp -m $nagdir/nrpe_rule.mod
|
||||
# semodule -i $nagdir/nrpe_rule.pp
|
||||
# semanage permissive -a nrpe_t
|
||||
# ${PAKMGR} install nrpe nrpe-selinux nagios-plugins nagios-plugins-all nagios-plugins-uptime nagios-plugins-oracle nagios-plugins-check-updates
|
||||
|
||||
|
||||
# sed -i "/^allowed_hosts/c\allowed_hosts=127.0.0.1,::1,$IPADD" $nrpecfg
|
||||
# sed -i "/^#command\[check_load\]/c\command[check_uptime]=$nagdir/check_uptime" $nrpecfg
|
||||
# sed -i "/^command\[check_load\]/c\command\[check_load\]=$nagdir/check_load -r -w 6,4,2 -c 12,10,7" $nrpecfg
|
||||
# sed -i "/^command\[check_hda1\]/c\command[check_hda1]=$nagdir/check_disk -w 15% -c 10% -p /dev/sda3" $nrpecfg
|
||||
# sed -i "/^command\[check_zombie_procs\]/c\# command[check_zombie_procs]=$nagdir/check_procs -w 5 -c 10 -s Z" $nrpecfg
|
||||
# sed -i "/^command\[check_total_procs\]/c\command[check_total_procs]=$nagdir/check_procs -w 250 -c 300 -s RSZDT" $nrpecfg
|
||||
# sed -i "/^#command\[check_users\]/c\command[check_net]=$nagdir/check_net" $nrpecfg
|
||||
# sed -i "/^#command\[check_swap\]/c\command[check_swap]=$nagdir/check_swap -w 20% -c 10%" $nrpecfg
|
||||
# sed -i "/^#command\[check_mem\]/c\command[check_mem]=$nagdir/check_mem" $nrpecfg
|
||||
# if [ "${OS}" = ubuntu ]; then
|
||||
# sed -i "/^#command\[check_apt\]/c\command[check_apt]=$nagdir/check_apt/" $nrpecfg
|
||||
# else
|
||||
# sed -i "/^#command\[check_yum\]/c\command[check_yum]=$nagdir/check_updates" $nrpecfg
|
||||
# fi
|
||||
# sed -i "/^#command\[check_all_procs\]/c\command[check_logic]=$nagdir/check_http -p 7011" $nrpecfg
|
||||
# sed -i "/^#command\[check_procs\]/c\command[check_oracle]=$nagdir/check_http -p 8010" $nrpecfg
|
||||
# sed -i "/^#command\[check_disk\]/c\command[check_ping]=$nagdir/check_ping 127.0.0.1 -w 100.0,20% -c 500.0,60%" $nrpecfg
|
||||
# sed -i "/^#command\[check_cpu_stats\]/c\command[check_ssh]=$nagdir/check_ssh" $nrpecfg
|
||||
|
||||
# }
|
||||
# }
|
||||
|
||||
######################
|
||||
#### HTTP Install ####
|
||||
######################
|
||||
# function install_http() {
|
||||
# {
|
||||
# if [ "${OS}" = ubuntu ]; then
|
||||
# if [ $HTTP = apache ]; then
|
||||
# echo "Apache"
|
||||
# else
|
||||
# echo "Nginx"
|
||||
# fi
|
||||
# echo "something"
|
||||
# else
|
||||
# if [ $HTTP = apache ]; then
|
||||
# echo "Apache"
|
||||
# else
|
||||
# echo "Nginx"
|
||||
# fi
|
||||
# fi
|
||||
# }
|
||||
# }
|
||||
|
||||
nagios_install
|
||||
# install_http
|
||||
|
||||
|
||||
# # SAMPLE CONFIG SNIPPETS FOR APACHE WEB SERVER
|
||||
# #
|
||||
# # This file contains examples of entries that need
|
||||
# # to be incorporated into your Apache web server
|
||||
# # configuration file. Customize the paths, etc. as
|
||||
# # needed to fit your system.
|
||||
|
||||
# ScriptAlias /nagios/cgi-bin "/usr/local/nagios/sbin"
|
||||
|
||||
# <Directory "/usr/local/nagios/sbin">
|
||||
# # SSLRequireSSL
|
||||
# Options ExecCGI
|
||||
# AllowOverride None
|
||||
# <IfVersion >= 2.3>
|
||||
# <RequireAll>
|
||||
# Require all granted
|
||||
# # Require host 127.0.0.1
|
||||
|
||||
# AuthName "Nagios Access"
|
||||
# AuthType Basic
|
||||
# AuthUserFile /usr/local/nagios/etc/htpasswd.users
|
||||
# Require valid-user
|
||||
# </RequireAll>
|
||||
# </IfVersion>
|
||||
# <IfVersion < 2.3>
|
||||
# Order allow,deny
|
||||
# Allow from all
|
||||
# # Order deny,allow
|
||||
# # Deny from all
|
||||
# # Allow from 127.0.0.1
|
||||
|
||||
# AuthName "Nagios Access"
|
||||
# AuthType Basic
|
||||
# AuthUserFile /usr/local/nagios/etc/htpasswd.users
|
||||
# Require valid-user
|
||||
# </IfVersion>
|
||||
# </Directory>
|
||||
|
||||
# Alias /nagios "/usr/local/nagios/share"
|
||||
|
||||
# <Directory "/usr/local/nagios/share">
|
||||
# # SSLRequireSSL
|
||||
# Options None
|
||||
# AllowOverride None
|
||||
# <IfVersion >= 2.3>
|
||||
# <RequireAll>
|
||||
# Require all granted
|
||||
# # Require host 127.0.0.1
|
||||
|
||||
# AuthName "Nagios Access"
|
||||
# AuthType Basic
|
||||
# AuthUserFile /usr/local/nagios/etc/htpasswd.users
|
||||
# Require valid-user
|
||||
# </RequireAll>
|
||||
# </IfVersion>
|
||||
# <IfVersion < 2.3>
|
||||
# Order allow,deny
|
||||
# Allow from all
|
||||
# # Order deny,allow
|
||||
# # Deny from all
|
||||
# # Allow from 127.0.0.1
|
||||
|
||||
# AuthName "Nagios Access"
|
||||
# AuthType Basic
|
||||
# AuthUserFile /usr/local/nagios/etc/htpasswd.users
|
||||
# Require valid-user
|
||||
# </IfVersion>
|
||||
# </Directory>
|
||||
# wget https://assets.nagios.com/downloads/nagioscore/releases/nagios-4.4.7.tar.gz
|
||||
# tar xzf nagios-4.4.7.tar.gz
|
||||
# cd nagios-4.4.7 || exit
|
||||
# if [ "${OS}" = ubuntu ]; then
|
||||
# ./configure --with-httpd-conf=/etc/apache2/sites-enabled
|
||||
# else
|
||||
# ./configure --with-httpd-conf=/etc/httpd/conf.d
|
||||
# fi
|
||||
# make all
|
||||
# make install-groups-users
|
||||
# if [ "${OS}" = ubuntu ]; then
|
||||
# usermod -aG nagios www-data
|
||||
# else
|
||||
# usermod -aG nagios apache
|
||||
# fi
|
||||
# make install
|
||||
# make install-init
|
||||
# make install-daemoninit
|
||||
# make install-commandmode
|
||||
# make install-config
|
||||
# make install-webconf
|
||||
# if [ "${OS}" = ubuntu ]; then
|
||||
# a2enmod rewrite cgi
|
||||
# fi
|
||||
|
||||
# fi
|
||||
# if [ ! "$(command -v wget)" ]; then
|
||||
# ${PAKMGR} install wget
|
||||
# fi
|
||||
# ndir1=/usr/lib/nagios/plugins
|
||||
# ndir2=/usr/lib64/nagios/plugins
|
||||
# #nrpecfg=/etc/nagios/nrpe.cfg
|
||||
# if [ -d $ndir1 ]; then
|
||||
# nagdir=$ndir1
|
||||
# elif [ -d $ndir2 ]; then
|
||||
# nagdir=$ndir2
|
||||
# fi
|
||||
# define () {
|
||||
# IFS=$'\n' read -r -d '' "$1"
|
||||
# }
|
||||
+1298
File diff suppressed because it is too large
Load Diff
+3953
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,809 @@
|
||||
#!/bin/bash
|
||||
################################################################################
|
||||
# Script Name: add-http-auth.sh
|
||||
# Version: 3.0
|
||||
# Description: Add HTTP Basic Auth to Prometheus stack reverse proxies
|
||||
# Supports both nginx and Apache — auto-detects which is in use.
|
||||
# Uses non-destructive include snippets to preserve existing
|
||||
# HTTPS/certbot configs.
|
||||
#
|
||||
# Author: Phil Connor
|
||||
# Contact: contact@mylinux.work
|
||||
# Website: https://mylinux.work
|
||||
# License: MIT
|
||||
#
|
||||
# Supported Services:
|
||||
# - Prometheus (port 9090)
|
||||
# - Alertmanager (port 9093)
|
||||
# - Mimir (port 9009) — optionally protects /api/v1/push
|
||||
# - Loki (port 3100) — optionally protects /loki/api/v1/push
|
||||
#
|
||||
# Supported Web Servers:
|
||||
# - nginx — inserts 'include' snippets into location blocks
|
||||
# - Apache — inserts 'Include' snippets into <Location> blocks
|
||||
#
|
||||
# Usage:
|
||||
# sudo ./add-http-auth.sh
|
||||
# sudo ./add-http-auth.sh --remove
|
||||
# sudo ./add-http-auth.sh --status
|
||||
#
|
||||
################################################################################
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_VERSION="3.0"
|
||||
BACKUP_DIR="/var/backups/http-auth"
|
||||
|
||||
# Detected at runtime
|
||||
WEB_SERVER="" # "nginx" or "apache"
|
||||
CONFIG_DIR="" # where vhost configs live
|
||||
SNIPPET_DIR="" # where auth snippets go
|
||||
AUTH_DIR="" # where htpasswd files go
|
||||
WEB_USER="" # www-data, nginx, apache, etc.
|
||||
SERVICE_NAME="" # systemd service name
|
||||
|
||||
# Service definitions: name|nginx_config|apache_config|port
|
||||
SERVICES=(
|
||||
"prometheus|prometheus.conf|prometheus.conf|9090"
|
||||
"alertmanager|alerts.conf|alerts.conf|9093"
|
||||
"mimir|mimir.conf|mimir.conf|9009"
|
||||
"loki|loki.conf|loki.conf|3100"
|
||||
)
|
||||
|
||||
# ============================================================================
|
||||
# HELPER FUNCTIONS
|
||||
# ============================================================================
|
||||
|
||||
show_usage() {
|
||||
cat <<EOF
|
||||
Usage: $0 [OPTIONS]
|
||||
|
||||
Add HTTP Basic Auth to Prometheus stack reverse proxies (v${SCRIPT_VERSION}).
|
||||
Supports both nginx and Apache — auto-detects which is in use.
|
||||
|
||||
OPTIONS:
|
||||
--remove Remove auth from all services (restore backups)
|
||||
--status Show current auth status for each service
|
||||
-h, --help Show this help
|
||||
|
||||
EXAMPLES:
|
||||
$0 # Interactive setup
|
||||
$0 --status # Check which services have auth enabled
|
||||
$0 --remove # Remove auth and restore original configs
|
||||
|
||||
EOF
|
||||
exit 0
|
||||
}
|
||||
|
||||
die() {
|
||||
echo "ERROR: $1" >&2
|
||||
exit 1
|
||||
}
|
||||
|
||||
warn() {
|
||||
echo "WARNING: $1" >&2
|
||||
}
|
||||
|
||||
# Get the config filename for the current web server
|
||||
get_config_file() {
|
||||
local entry="$1"
|
||||
local name nginx_conf apache_conf port
|
||||
IFS='|' read -r name nginx_conf apache_conf port <<< "$entry"
|
||||
if [ "$WEB_SERVER" = "nginx" ]; then
|
||||
echo "$nginx_conf"
|
||||
else
|
||||
echo "$apache_conf"
|
||||
fi
|
||||
}
|
||||
|
||||
get_service_name() {
|
||||
local entry="$1"
|
||||
IFS='|' read -r name _ _ _ <<< "$entry"
|
||||
echo "$name"
|
||||
}
|
||||
|
||||
get_service_port() {
|
||||
local entry="$1"
|
||||
IFS='|' read -r _ _ _ port <<< "$entry"
|
||||
echo "$port"
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# WEB SERVER DETECTION
|
||||
# ============================================================================
|
||||
|
||||
detect_web_server() {
|
||||
local has_nginx=false
|
||||
local has_apache=false
|
||||
|
||||
if command -v nginx &>/dev/null && systemctl is-active --quiet nginx 2>/dev/null; then
|
||||
has_nginx=true
|
||||
fi
|
||||
|
||||
if command -v apache2ctl &>/dev/null && systemctl is-active --quiet apache2 2>/dev/null; then
|
||||
has_apache=true
|
||||
elif command -v httpd &>/dev/null && systemctl is-active --quiet httpd 2>/dev/null; then
|
||||
has_apache=true
|
||||
fi
|
||||
|
||||
if [ "$has_nginx" = true ] && [ "$has_apache" = true ]; then
|
||||
echo ""
|
||||
echo "Both nginx and Apache detected. Which are you using for reverse proxies?"
|
||||
echo " 1) nginx"
|
||||
echo " 2) Apache"
|
||||
read -r -p "Select [1]: " choice
|
||||
case "${choice:-1}" in
|
||||
2) WEB_SERVER="apache" ;;
|
||||
*) WEB_SERVER="nginx" ;;
|
||||
esac
|
||||
elif [ "$has_nginx" = true ]; then
|
||||
WEB_SERVER="nginx"
|
||||
elif [ "$has_apache" = true ]; then
|
||||
WEB_SERVER="apache"
|
||||
else
|
||||
die "Neither nginx nor Apache detected as running"
|
||||
fi
|
||||
|
||||
echo " Detected web server: ${WEB_SERVER}"
|
||||
}
|
||||
|
||||
# Set paths based on detected web server
|
||||
configure_paths() {
|
||||
if [ "$WEB_SERVER" = "nginx" ]; then
|
||||
if [ -d "/etc/nginx/sites-available" ]; then
|
||||
CONFIG_DIR="/etc/nginx/sites-available"
|
||||
elif [ -d "/etc/nginx/conf.d" ]; then
|
||||
CONFIG_DIR="/etc/nginx/conf.d"
|
||||
else
|
||||
die "nginx config directory not found"
|
||||
fi
|
||||
SNIPPET_DIR="/etc/nginx/snippets"
|
||||
AUTH_DIR="/etc/nginx/auth"
|
||||
SERVICE_NAME="nginx"
|
||||
|
||||
if id "www-data" &>/dev/null; then
|
||||
WEB_USER="www-data"
|
||||
elif id "nginx" &>/dev/null; then
|
||||
WEB_USER="nginx"
|
||||
else
|
||||
WEB_USER="root"
|
||||
fi
|
||||
else
|
||||
# Apache
|
||||
if [ -d "/etc/apache2/sites-available" ]; then
|
||||
CONFIG_DIR="/etc/apache2/sites-available"
|
||||
SNIPPET_DIR="/etc/apache2/conf-available"
|
||||
SERVICE_NAME="apache2"
|
||||
elif [ -d "/etc/httpd/conf.d" ]; then
|
||||
CONFIG_DIR="/etc/httpd/conf.d"
|
||||
SNIPPET_DIR="/etc/httpd/conf.d"
|
||||
SERVICE_NAME="httpd"
|
||||
else
|
||||
die "Apache config directory not found"
|
||||
fi
|
||||
AUTH_DIR="/etc/httpd/auth"
|
||||
[ -d "/etc/apache2" ] && AUTH_DIR="/etc/apache2/auth"
|
||||
|
||||
if id "www-data" &>/dev/null; then
|
||||
WEB_USER="www-data"
|
||||
elif id "apache" &>/dev/null; then
|
||||
WEB_USER="apache"
|
||||
else
|
||||
WEB_USER="root"
|
||||
fi
|
||||
fi
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# HTTPS DETECTION
|
||||
# ============================================================================
|
||||
|
||||
has_https() {
|
||||
local config_file="$1"
|
||||
if [ "$WEB_SERVER" = "nginx" ]; then
|
||||
grep -qE 'listen\s+.*443\s+ssl' "$config_file" 2>/dev/null
|
||||
else
|
||||
grep -qE 'SSLEngine\s+on|<VirtualHost\s+\*:443' "$config_file" 2>/dev/null
|
||||
fi
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# AUTH SNIPPET CHECK
|
||||
# ============================================================================
|
||||
|
||||
has_auth_snippet() {
|
||||
local config_file="$1"
|
||||
local service="$2"
|
||||
if [ "$WEB_SERVER" = "nginx" ]; then
|
||||
grep -qF "include ${SNIPPET_DIR}/auth-${service}.conf" "$config_file" 2>/dev/null
|
||||
else
|
||||
grep -qF "Include ${SNIPPET_DIR}/auth-${service}.conf" "$config_file" 2>/dev/null
|
||||
fi
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# SETUP FUNCTIONS
|
||||
# ============================================================================
|
||||
|
||||
install_htpasswd() {
|
||||
if command -v htpasswd &>/dev/null; then
|
||||
return 0
|
||||
fi
|
||||
|
||||
echo "Installing htpasswd..."
|
||||
if command -v apt-get &>/dev/null; then
|
||||
apt-get -y install apache2-utils
|
||||
elif command -v dnf &>/dev/null; then
|
||||
dnf -y install httpd-tools
|
||||
elif command -v yum &>/dev/null; then
|
||||
yum -y install httpd-tools
|
||||
else
|
||||
die "Cannot install htpasswd — install apache2-utils or httpd-tools manually"
|
||||
fi
|
||||
}
|
||||
|
||||
backup_config() {
|
||||
local config_file="$1"
|
||||
local timestamp
|
||||
timestamp=$(date +%F_%H%M%S)
|
||||
local backup_path="${BACKUP_DIR}/${timestamp}"
|
||||
|
||||
mkdir -p "$backup_path"
|
||||
cp "$config_file" "$backup_path/"
|
||||
echo " Backed up to ${backup_path}/$(basename "$config_file")"
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# NGINX-SPECIFIC FUNCTIONS
|
||||
# ============================================================================
|
||||
|
||||
nginx_create_snippet() {
|
||||
local service="$1"
|
||||
local display_name="$2"
|
||||
|
||||
cat > "${SNIPPET_DIR}/auth-${service}.conf" <<EOF
|
||||
# Auth snippet for ${display_name} — managed by add-http-auth.sh
|
||||
auth_basic "${display_name} - Authentication Required";
|
||||
auth_basic_user_file ${AUTH_DIR}/.htpasswd-${service};
|
||||
EOF
|
||||
|
||||
echo " Created ${SNIPPET_DIR}/auth-${service}.conf"
|
||||
}
|
||||
|
||||
nginx_insert_auth() {
|
||||
local config_file="$1"
|
||||
local service="$2"
|
||||
local include_line=" include ${SNIPPET_DIR}/auth-${service}.conf;"
|
||||
|
||||
if has_auth_snippet "$config_file" "$service"; then
|
||||
echo " Auth already configured in $(basename "$config_file") — skipping"
|
||||
return 0
|
||||
fi
|
||||
|
||||
backup_config "$config_file"
|
||||
|
||||
local temp_file
|
||||
temp_file=$(mktemp)
|
||||
|
||||
awk -v inc="$include_line" '
|
||||
/location \/ \{/ && !done {
|
||||
print
|
||||
print inc
|
||||
done = 1
|
||||
next
|
||||
}
|
||||
{ print }
|
||||
' "$config_file" > "$temp_file"
|
||||
|
||||
mv "$temp_file" "$config_file"
|
||||
echo " Inserted auth include into $(basename "$config_file")"
|
||||
}
|
||||
|
||||
nginx_insert_push_auth() {
|
||||
local config_file="$1"
|
||||
local service="$2"
|
||||
|
||||
if grep -q "location.*/api/v1/push" "$config_file" && \
|
||||
! grep -A2 "location.*/api/v1/push" "$config_file" | grep -qF "auth-${service}.conf"; then
|
||||
local temp_file
|
||||
temp_file=$(mktemp)
|
||||
local include_line=" include ${SNIPPET_DIR}/auth-${service}.conf;"
|
||||
awk -v inc="$include_line" '
|
||||
/location.*\/api\/v1\/push/ && !push_done {
|
||||
print
|
||||
print inc
|
||||
push_done = 1
|
||||
next
|
||||
}
|
||||
{ print }
|
||||
' "$config_file" > "$temp_file"
|
||||
mv "$temp_file" "$config_file"
|
||||
echo " Protected push endpoint with auth"
|
||||
fi
|
||||
}
|
||||
|
||||
nginx_remove_auth() {
|
||||
local config_file="$1"
|
||||
local service="$2"
|
||||
|
||||
local temp_file
|
||||
temp_file=$(mktemp)
|
||||
grep -vF "include ${SNIPPET_DIR}/auth-${service}.conf" "$config_file" > "$temp_file"
|
||||
mv "$temp_file" "$config_file"
|
||||
}
|
||||
|
||||
nginx_test_config() {
|
||||
nginx -t 2>&1
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# APACHE-SPECIFIC FUNCTIONS
|
||||
# ============================================================================
|
||||
|
||||
apache_create_snippet() {
|
||||
local service="$1"
|
||||
local display_name="$2"
|
||||
|
||||
cat > "${SNIPPET_DIR}/auth-${service}.conf" <<EOF
|
||||
# Auth snippet for ${display_name} — managed by add-http-auth.sh
|
||||
AuthType Basic
|
||||
AuthName "${display_name} - Authentication Required"
|
||||
AuthUserFile ${AUTH_DIR}/.htpasswd-${service}
|
||||
Require valid-user
|
||||
EOF
|
||||
|
||||
echo " Created ${SNIPPET_DIR}/auth-${service}.conf"
|
||||
}
|
||||
|
||||
apache_insert_auth() {
|
||||
local config_file="$1"
|
||||
local service="$2"
|
||||
local include_line=" Include ${SNIPPET_DIR}/auth-${service}.conf"
|
||||
|
||||
if has_auth_snippet "$config_file" "$service"; then
|
||||
echo " Auth already configured in $(basename "$config_file") — skipping"
|
||||
return 0
|
||||
fi
|
||||
|
||||
backup_config "$config_file"
|
||||
|
||||
# Check if config uses <Location /> or <Proxy *>
|
||||
local temp_file
|
||||
temp_file=$(mktemp)
|
||||
|
||||
if grep -qE '<Location\s+/\s*>' "$config_file"; then
|
||||
# Insert after <Location /> opening tag
|
||||
awk -v inc="$include_line" '
|
||||
/<Location\s+\/\s*>/ && !done {
|
||||
print
|
||||
print inc
|
||||
done = 1
|
||||
next
|
||||
}
|
||||
{ print }
|
||||
' "$config_file" > "$temp_file"
|
||||
elif grep -qE '<Proxy\s+' "$config_file"; then
|
||||
# Insert inside <VirtualHost> before the first ProxyPass
|
||||
awk -v inc="$include_line" -v sdir="${SNIPPET_DIR}" -v svc="$service" '
|
||||
/ProxyPass\s/ && !done {
|
||||
# Add a Location block with auth before ProxyPass
|
||||
print " <Location />"
|
||||
print inc
|
||||
print " </Location>"
|
||||
print ""
|
||||
done = 1
|
||||
}
|
||||
{ print }
|
||||
' "$config_file" > "$temp_file"
|
||||
else
|
||||
# No Location or Proxy block found — add a Location block before </VirtualHost>
|
||||
awk -v inc="$include_line" '
|
||||
/<\/VirtualHost>/ && !done {
|
||||
print ""
|
||||
print " <Location />"
|
||||
print inc
|
||||
print " </Location>"
|
||||
print ""
|
||||
done = 1
|
||||
}
|
||||
{ print }
|
||||
' "$config_file" > "$temp_file"
|
||||
fi
|
||||
|
||||
mv "$temp_file" "$config_file"
|
||||
echo " Inserted auth into $(basename "$config_file")"
|
||||
}
|
||||
|
||||
apache_insert_push_auth() {
|
||||
local config_file="$1"
|
||||
local service="$2"
|
||||
local push_path=""
|
||||
|
||||
if [ "$service" = "mimir" ]; then
|
||||
push_path="/api/v1/push"
|
||||
elif [ "$service" = "loki" ]; then
|
||||
push_path="/loki/api/v1/push"
|
||||
else
|
||||
return 0
|
||||
fi
|
||||
|
||||
# Check if there's already a Location block for the push path
|
||||
if grep -qF "$push_path" "$config_file" && \
|
||||
! grep -A3 "$push_path" "$config_file" | grep -qF "auth-${service}.conf"; then
|
||||
backup_config "$config_file"
|
||||
local temp_file
|
||||
temp_file=$(mktemp)
|
||||
local include_line=" Include ${SNIPPET_DIR}/auth-${service}.conf"
|
||||
awk -v path="$push_path" -v inc="$include_line" '
|
||||
$0 ~ path && /Location/ && !push_done {
|
||||
print
|
||||
print inc
|
||||
push_done = 1
|
||||
next
|
||||
}
|
||||
{ print }
|
||||
' "$config_file" > "$temp_file"
|
||||
mv "$temp_file" "$config_file"
|
||||
echo " Protected push endpoint with auth"
|
||||
fi
|
||||
}
|
||||
|
||||
apache_remove_auth() {
|
||||
local config_file="$1"
|
||||
local service="$2"
|
||||
|
||||
local temp_file
|
||||
temp_file=$(mktemp)
|
||||
grep -vF "Include ${SNIPPET_DIR}/auth-${service}.conf" "$config_file" > "$temp_file"
|
||||
mv "$temp_file" "$config_file"
|
||||
}
|
||||
|
||||
apache_test_config() {
|
||||
if command -v apache2ctl &>/dev/null; then
|
||||
apache2ctl configtest 2>&1
|
||||
else
|
||||
httpd -t 2>&1
|
||||
fi
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# GENERIC WRAPPERS (dispatch to nginx or apache functions)
|
||||
# ============================================================================
|
||||
|
||||
create_snippet() {
|
||||
if [ "$WEB_SERVER" = "nginx" ]; then
|
||||
nginx_create_snippet "$@"
|
||||
else
|
||||
apache_create_snippet "$@"
|
||||
fi
|
||||
}
|
||||
|
||||
insert_auth() {
|
||||
if [ "$WEB_SERVER" = "nginx" ]; then
|
||||
nginx_insert_auth "$@"
|
||||
else
|
||||
apache_insert_auth "$@"
|
||||
fi
|
||||
}
|
||||
|
||||
insert_push_auth() {
|
||||
if [ "$WEB_SERVER" = "nginx" ]; then
|
||||
nginx_insert_push_auth "$@"
|
||||
else
|
||||
apache_insert_push_auth "$@"
|
||||
fi
|
||||
}
|
||||
|
||||
remove_auth_from_config() {
|
||||
if [ "$WEB_SERVER" = "nginx" ]; then
|
||||
nginx_remove_auth "$@"
|
||||
else
|
||||
apache_remove_auth "$@"
|
||||
fi
|
||||
}
|
||||
|
||||
test_config() {
|
||||
if [ "$WEB_SERVER" = "nginx" ]; then
|
||||
nginx_test_config
|
||||
else
|
||||
apache_test_config
|
||||
fi
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# STATUS & REMOVE
|
||||
# ============================================================================
|
||||
|
||||
show_status() {
|
||||
detect_web_server
|
||||
configure_paths
|
||||
|
||||
echo ""
|
||||
echo "=========================================="
|
||||
echo "HTTP Basic Auth Status (${WEB_SERVER})"
|
||||
echo "=========================================="
|
||||
echo ""
|
||||
|
||||
for entry in "${SERVICES[@]}"; do
|
||||
local name config_file
|
||||
name=$(get_service_name "$entry")
|
||||
config_file=$(get_config_file "$entry")
|
||||
local display_name
|
||||
display_name="${name^}"
|
||||
local full_path="${CONFIG_DIR}/${config_file}"
|
||||
|
||||
printf " %-14s " "${display_name}:"
|
||||
|
||||
if [ ! -f "$full_path" ]; then
|
||||
echo "no config found"
|
||||
continue
|
||||
fi
|
||||
|
||||
if has_auth_snippet "$full_path" "$name"; then
|
||||
if [ -f "${AUTH_DIR}/.htpasswd-${name}" ]; then
|
||||
echo "ENABLED (htpasswd + snippet)"
|
||||
else
|
||||
echo "BROKEN (snippet exists but htpasswd file missing)"
|
||||
fi
|
||||
else
|
||||
echo "not configured"
|
||||
fi
|
||||
done
|
||||
|
||||
echo ""
|
||||
echo "Web server: ${WEB_SERVER}"
|
||||
echo "Config dir: ${CONFIG_DIR}"
|
||||
echo "Snippet dir: ${SNIPPET_DIR}"
|
||||
echo "Auth dir: ${AUTH_DIR}"
|
||||
echo "Backup dir: ${BACKUP_DIR}"
|
||||
echo ""
|
||||
}
|
||||
|
||||
do_remove() {
|
||||
detect_web_server
|
||||
configure_paths
|
||||
|
||||
echo ""
|
||||
echo "Removing HTTP Basic Auth from all services (${WEB_SERVER})..."
|
||||
echo ""
|
||||
|
||||
for entry in "${SERVICES[@]}"; do
|
||||
local name config_file
|
||||
name=$(get_service_name "$entry")
|
||||
config_file=$(get_config_file "$entry")
|
||||
local full_path="${CONFIG_DIR}/${config_file}"
|
||||
|
||||
if [ ! -f "$full_path" ]; then
|
||||
continue
|
||||
fi
|
||||
|
||||
if has_auth_snippet "$full_path" "$name"; then
|
||||
backup_config "$full_path"
|
||||
remove_auth_from_config "$full_path" "$name"
|
||||
echo " Removed auth from ${config_file}"
|
||||
fi
|
||||
|
||||
rm -f "${SNIPPET_DIR}/auth-${name}.conf"
|
||||
done
|
||||
|
||||
echo ""
|
||||
echo "Testing ${WEB_SERVER} configuration..."
|
||||
if test_config; then
|
||||
systemctl reload "$SERVICE_NAME"
|
||||
echo ""
|
||||
echo "Auth removed and ${WEB_SERVER} reloaded."
|
||||
else
|
||||
warn "${WEB_SERVER} config test failed — check your config manually"
|
||||
fi
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# MAIN SETUP
|
||||
# ============================================================================
|
||||
|
||||
setup_auth() {
|
||||
detect_web_server
|
||||
configure_paths
|
||||
|
||||
echo ""
|
||||
echo "=========================================="
|
||||
echo "Add HTTP Basic Auth to Prometheus Stack"
|
||||
echo "Version: ${SCRIPT_VERSION} (${WEB_SERVER})"
|
||||
echo "=========================================="
|
||||
|
||||
# Check for HTTPS
|
||||
local has_any_https=false
|
||||
for entry in "${SERVICES[@]}"; do
|
||||
local name config_file
|
||||
name=$(get_service_name "$entry")
|
||||
config_file=$(get_config_file "$entry")
|
||||
local full_path="${CONFIG_DIR}/${config_file}"
|
||||
if [ -f "$full_path" ] && has_https "$full_path"; then
|
||||
has_any_https=true
|
||||
break
|
||||
fi
|
||||
done
|
||||
|
||||
if [ "$has_any_https" = false ]; then
|
||||
echo ""
|
||||
warn "No HTTPS configuration detected!"
|
||||
echo " Basic Auth over HTTP sends credentials in cleartext."
|
||||
echo " Strongly recommended: run certbot first to enable HTTPS."
|
||||
echo ""
|
||||
read -r -p "Continue without HTTPS? [y/N]: " confirm
|
||||
if [[ ! "$confirm" =~ ^[Yy]$ ]]; then
|
||||
echo "Aborted. Run certbot first, then re-run this script."
|
||||
exit 0
|
||||
fi
|
||||
fi
|
||||
|
||||
# Detect which services have configs
|
||||
echo ""
|
||||
echo "Detected services:"
|
||||
local found_any=false
|
||||
for entry in "${SERVICES[@]}"; do
|
||||
local name config_file
|
||||
name=$(get_service_name "$entry")
|
||||
config_file=$(get_config_file "$entry")
|
||||
local full_path="${CONFIG_DIR}/${config_file}"
|
||||
if [ -f "$full_path" ]; then
|
||||
local https_status="HTTP"
|
||||
has_https "$full_path" && https_status="HTTPS"
|
||||
echo " ✓ ${name} (${config_file}) [${https_status}]"
|
||||
found_any=true
|
||||
fi
|
||||
done
|
||||
|
||||
if [ "$found_any" = false ]; then
|
||||
die "No service configs found in ${CONFIG_DIR}. Set up ${WEB_SERVER} reverse proxies first."
|
||||
fi
|
||||
|
||||
echo ""
|
||||
|
||||
# Ask about push endpoint protection
|
||||
local protect_push=false
|
||||
echo "Mimir and Loki have push endpoints used by remote agents."
|
||||
echo "Protecting them requires configuring credentials in Prometheus/Alloy."
|
||||
read -r -p "Protect push endpoints with auth too? [y/N]: " push_confirm
|
||||
if [[ "$push_confirm" =~ ^[Yy]$ ]]; then
|
||||
protect_push=true
|
||||
fi
|
||||
|
||||
# Ask about shared vs per-service credentials
|
||||
local shared_creds=false
|
||||
local shared_htpasswd=""
|
||||
echo ""
|
||||
echo "Credential mode:"
|
||||
echo " 1) Same username/password for all services"
|
||||
echo " 2) Different credentials per service"
|
||||
read -r -p "Select [1]: " cred_mode
|
||||
if [[ "${cred_mode:-1}" != "2" ]]; then
|
||||
shared_creds=true
|
||||
read -r -p "Username for all services [admin]: " shared_user
|
||||
shared_user=${shared_user:-admin}
|
||||
# Create a temporary shared htpasswd file — will be copied per service
|
||||
shared_htpasswd=$(mktemp)
|
||||
htpasswd -c "$shared_htpasswd" "$shared_user"
|
||||
fi
|
||||
|
||||
# Create directories
|
||||
mkdir -p "$AUTH_DIR" "$SNIPPET_DIR" "$BACKUP_DIR"
|
||||
|
||||
echo ""
|
||||
|
||||
# Set up auth for each detected service
|
||||
for entry in "${SERVICES[@]}"; do
|
||||
local name config_file port
|
||||
name=$(get_service_name "$entry")
|
||||
config_file=$(get_config_file "$entry")
|
||||
port=$(get_service_port "$entry")
|
||||
local full_path="${CONFIG_DIR}/${config_file}"
|
||||
|
||||
if [ ! -f "$full_path" ]; then
|
||||
continue
|
||||
fi
|
||||
|
||||
local display_name
|
||||
display_name="${name^}"
|
||||
|
||||
echo "--- ${display_name} ---"
|
||||
|
||||
# Create htpasswd file
|
||||
if [ "$shared_creds" = true ]; then
|
||||
if [ -f "${AUTH_DIR}/.htpasswd-${name}" ]; then
|
||||
read -r -p " htpasswd file exists. Overwrite with shared credentials? [Y/n]: " overwrite
|
||||
if [[ "$overwrite" =~ ^[Nn]$ ]]; then
|
||||
echo " Keeping existing htpasswd"
|
||||
else
|
||||
cp "$shared_htpasswd" "${AUTH_DIR}/.htpasswd-${name}"
|
||||
echo " Using shared credentials"
|
||||
fi
|
||||
else
|
||||
cp "$shared_htpasswd" "${AUTH_DIR}/.htpasswd-${name}"
|
||||
echo " Using shared credentials"
|
||||
fi
|
||||
else
|
||||
if [ -f "${AUTH_DIR}/.htpasswd-${name}" ]; then
|
||||
read -r -p " htpasswd file exists. Recreate? [y/N]: " recreate
|
||||
if [[ ! "$recreate" =~ ^[Yy]$ ]]; then
|
||||
echo " Keeping existing htpasswd"
|
||||
else
|
||||
read -r -p " Username [admin]: " username
|
||||
username=${username:-admin}
|
||||
htpasswd -c "${AUTH_DIR}/.htpasswd-${name}" "$username"
|
||||
fi
|
||||
else
|
||||
read -r -p " Username [admin]: " username
|
||||
username=${username:-admin}
|
||||
htpasswd -c "${AUTH_DIR}/.htpasswd-${name}" "$username"
|
||||
fi
|
||||
fi
|
||||
|
||||
# Create auth snippet
|
||||
create_snippet "$name" "$display_name"
|
||||
|
||||
# Insert into main location/proxy block
|
||||
insert_auth "$full_path" "$name"
|
||||
|
||||
# Handle push endpoints for Mimir and Loki
|
||||
if [[ "$name" == "mimir" ]] || [[ "$name" == "loki" ]]; then
|
||||
if [ "$protect_push" = true ]; then
|
||||
insert_push_auth "$full_path" "$name"
|
||||
else
|
||||
echo " ⚠ Push endpoint left open — consider IP restrictions"
|
||||
fi
|
||||
fi
|
||||
|
||||
echo ""
|
||||
done
|
||||
|
||||
# Clean up shared temp file
|
||||
[ -n "$shared_htpasswd" ] && rm -f "$shared_htpasswd"
|
||||
|
||||
# Set permissions on htpasswd files
|
||||
chmod 640 "${AUTH_DIR}"/.htpasswd-* 2>/dev/null || true
|
||||
chown "root:${WEB_USER}" "${AUTH_DIR}"/.htpasswd-* 2>/dev/null || true
|
||||
|
||||
# Test and reload
|
||||
echo "Testing ${WEB_SERVER} configuration..."
|
||||
if test_config; then
|
||||
systemctl reload "$SERVICE_NAME"
|
||||
echo ""
|
||||
echo "=========================================="
|
||||
echo "HTTP Basic Auth Successfully Configured!"
|
||||
echo "=========================================="
|
||||
echo ""
|
||||
echo "Web server: ${WEB_SERVER}"
|
||||
echo "Backups: ${BACKUP_DIR}"
|
||||
echo ""
|
||||
echo "To remove auth later: $0 --remove"
|
||||
echo "To check status: $0 --status"
|
||||
else
|
||||
echo ""
|
||||
echo "${WEB_SERVER} configuration test FAILED!"
|
||||
echo "Your backups are in ${BACKUP_DIR} — restore manually if needed."
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# MAIN
|
||||
# ============================================================================
|
||||
|
||||
main() {
|
||||
if [[ $EUID -ne 0 ]]; then
|
||||
die "This script must be run as root"
|
||||
fi
|
||||
|
||||
case "${1:-}" in
|
||||
-h|--help) show_usage ;;
|
||||
--remove) do_remove ;;
|
||||
--status) show_status ;;
|
||||
*)
|
||||
install_htpasswd
|
||||
setup_auth
|
||||
;;
|
||||
esac
|
||||
}
|
||||
|
||||
main "$@"
|
||||
@@ -0,0 +1,94 @@
|
||||
#!/bin/bash
|
||||
|
||||
######################################################################################
|
||||
#### Version 2.01 ####
|
||||
#### For questions or comments contact@mylinux.work ####
|
||||
#### Author : Phil Connor ####
|
||||
#### ####
|
||||
#### Notes : ####
|
||||
#### This script is a simple "helper" to configure Auto Updates on linux ####
|
||||
#### servers. ####
|
||||
#### ####
|
||||
#### Use this script at your OWN risk. There is no guarantee whatsoever. ####
|
||||
#### ####
|
||||
#### Usage "tuning.sh" or "tuning.sh ssd" if you are running on ssd'd ####
|
||||
######################################################################################
|
||||
|
||||
###########################
|
||||
#### System Variables ####
|
||||
###########################
|
||||
OS=$(grep PRETTY_NAME /etc/os-release | sed 's/PRETTY_NAME=//g' | tr -d '="' | awk '{print $1}' | tr '[:upper:]' '[:lower:]')
|
||||
OSVER=$(grep VERSION_ID /etc/os-release | sed 's/VERSION_ID=//g' | tr -d '="' | awk -F. '{print $1}')
|
||||
aptcnf="/etc/apt/apt.conf.d"
|
||||
dnfcnf="/etc/dnf/automatic.conf"
|
||||
yum6cnf="/etc/sysconfig/yum-cron"
|
||||
yum7cnf="/etc/yum/yum-cron.conf"
|
||||
|
||||
###################################
|
||||
#### Copy to EOF file function ####
|
||||
###################################
|
||||
function no_show() {
|
||||
{
|
||||
expand | awk 'NR == 1 {match($0, /^ */); l = RLENGTH + 1}
|
||||
{print substr($0, l)}'
|
||||
}
|
||||
}
|
||||
|
||||
###########################################################
|
||||
#### Detect Package Manger from OS and OSVer Variables ####
|
||||
###########################################################
|
||||
if [ "${OS}" = ubuntu ]; then
|
||||
PAKMGR="apt-get -y"
|
||||
elif [[ ${OS} = centos || ${OS} = red || ${OS} = oracle || ${OS} = rocky || ${OS} = alma ]]; then
|
||||
if [ "${OSVER}" = 7 ]; then
|
||||
PAKMGR="yum -y"
|
||||
fi
|
||||
if [ "${OSVER}" = 8 ]; then
|
||||
PAKMGR="dnf -y"
|
||||
fi
|
||||
fi
|
||||
|
||||
#####################################
|
||||
#### Install Auto Update Service ####
|
||||
#####################################
|
||||
if [[ ${OS} = centos || ${OS} = red || ${OS} = oracle || ${OS} = rocky || ${OS} = alma ]]; then
|
||||
if [ "${OSVER}" = 6 ] || [ "${OSVER}" = 7 ]; then
|
||||
${PAKMGR} update
|
||||
${PAKMGR} install yum-cron
|
||||
if [ "${OSVER}" = 6 ]; then
|
||||
chkconfig yum-cron on
|
||||
chkconfig yum-updatesd off
|
||||
service yum-updatesd stop
|
||||
#echo 'exclude= http php* kernel*' >> /etc/yum.conf # <-- If you need to add exclude package from updating
|
||||
#sed -i 's/YUM_PARAMETER=""/YUM_PARAMETER="-x http -x php* -x kernel*"/g' >> $yum6cnf # <-- If you need to add exclude package from updating
|
||||
sed -i 's/CHECK_ONLY=yes/CHECK_ONLY=no/g' $yum6cnf
|
||||
sed -i 's/DOWNLOAD_ONLY=yes/DOWNLOAD_ONLY=no/g' $yum6cnf
|
||||
sed -i 's/MAILTO=/MAILTO=root/g' $yum6cnf
|
||||
service yum-cron start
|
||||
fi
|
||||
if [ "${OSVER}" = 7 ]; then
|
||||
sed -i 's/update_cmd = default/update_cmd = security/g' $yum7cnf #<-- comment this out for ALL available upgrades
|
||||
sed -i 's/apply_updates = no/apply_updates = yes/g' $yum7cnf
|
||||
sed -i 's/download_updates = no/download_updates = yes/g' $yum7cnf
|
||||
systemctl enable --nom yum-cron
|
||||
fi
|
||||
fi
|
||||
if [ "${OSVER}" = 8 ] || [ "${OSVER}" = 9 ]; then
|
||||
${PAKMGR} update
|
||||
${PAKMGR} install dnf-automatic
|
||||
sed -i 's/upgrade_type = default/upgrade_type = security/g' $dnfcnf #<-- comment this out for ALL available upgrades
|
||||
sed -i 's/apply_updates = no/apply_updates = yes/g' $dnfcnf
|
||||
systemctl enable --now dnf-automatic.timer
|
||||
fi
|
||||
elif [ "${OS}" = ubuntu ]; then
|
||||
${PAKMGR} upgrade
|
||||
${PAKMGR} install unattended-upgrades apticron
|
||||
touch $aptcnf/20auto-upgrades
|
||||
no_show << EOF > $aptcnf/20auto-upgrades
|
||||
APT::Periodic::Update-Package-Lists "1";
|
||||
APT::Periodic::Download-Upgradeable-Packages "1";
|
||||
APT::Periodic::AutocleanInterval "7";
|
||||
APT::Periodic::Unattended-Upgrade "1";
|
||||
EOF
|
||||
sed -i 's/\/\/Unattended-Upgrade\:\:Mail "root";/Unattended-Upgrade\:\:Mail "root";/g' $aptcnf/50unattended-upgrades
|
||||
fi
|
||||
Executable
+452
@@ -0,0 +1,452 @@
|
||||
#!/bin/bash
|
||||
################################################################################
|
||||
# Script Name: backup-status-exporter.sh
|
||||
# Version: 1.0
|
||||
# Description: Prometheus textfile collector exporter for backup job status
|
||||
# Monitors backup age, size, and success/failure from multiple
|
||||
# sources including timestamp files, log files, and directories
|
||||
#
|
||||
# Author: Phil Connor
|
||||
# Contact: contact@mylinux.work
|
||||
# Website: https://mylinux.work
|
||||
# License: MIT
|
||||
# Date: 2026-03-03
|
||||
#
|
||||
# Prerequisites:
|
||||
# - node_exporter with textfile collector enabled
|
||||
# - /var/lib/node_exporter directory exists
|
||||
# - Config file at /etc/backup-status-exporter.conf
|
||||
#
|
||||
# Usage:
|
||||
# # Run with default config
|
||||
# sudo ./backup-status-exporter.sh
|
||||
#
|
||||
# # Dry run (output to stdout)
|
||||
# ./backup-status-exporter.sh --dry-run
|
||||
#
|
||||
# # Debug mode
|
||||
# DEBUG=1 sudo ./backup-status-exporter.sh
|
||||
#
|
||||
# Config Format (pipe-delimited, one job per line):
|
||||
# job_name|type|path|max_age_hours
|
||||
#
|
||||
# Types:
|
||||
# directory - find newest file in directory, report mtime and size
|
||||
# statusfile - read unix timestamp of last success from a file
|
||||
# logfile - grep for success/failure patterns in a log file
|
||||
#
|
||||
# Metrics Exported:
|
||||
# - linux_backup_last_success_timestamp{job} - Unix timestamp of last backup
|
||||
# - linux_backup_age_hours{job} - Hours since last backup
|
||||
# - linux_backup_size_bytes{job} - Size of last backup in bytes
|
||||
# - linux_backup_status{job} - 1=ok, 0=stale/failed
|
||||
#
|
||||
################################################################################
|
||||
|
||||
set -o pipefail
|
||||
|
||||
# ============================================================================
|
||||
# CONFIGURATION
|
||||
# ============================================================================
|
||||
|
||||
readonly VERSION="1.0"
|
||||
readonly SCRIPT_NAME="${0##*/}"
|
||||
readonly TEXTFILE_DIR="${TEXTFILE_DIR:-/var/lib/node_exporter}"
|
||||
readonly OUTPUT_FILE="${TEXTFILE_DIR}/backup_status.prom"
|
||||
readonly CONFIG_FILE="${CONFIG_FILE:-/etc/backup-status-exporter.conf}"
|
||||
readonly TMP_FILE="${OUTPUT_FILE}.$$"
|
||||
|
||||
# Runtime flags
|
||||
DRY_RUN=false
|
||||
DEBUG=${DEBUG:-}
|
||||
|
||||
# Log success patterns (case-insensitive grep)
|
||||
readonly SUCCESS_PATTERNS="(completed successfully|backup successful|backup finished|success|completed without error)"
|
||||
readonly FAILURE_PATTERNS="(failed|error|fatal|backup failed|aborted)"
|
||||
|
||||
# ============================================================================
|
||||
# HELPER FUNCTIONS
|
||||
# ============================================================================
|
||||
|
||||
debug_echo() {
|
||||
if [[ -n "$DEBUG" ]]; then
|
||||
echo "[DEBUG] $*" >&2
|
||||
fi
|
||||
}
|
||||
|
||||
log_error() {
|
||||
echo "[ERROR] $*" >&2
|
||||
}
|
||||
|
||||
cleanup() {
|
||||
rm -f "$TMP_FILE"
|
||||
}
|
||||
|
||||
trap cleanup EXIT
|
||||
|
||||
show_help() {
|
||||
cat <<EOF
|
||||
Usage: $SCRIPT_NAME [OPTIONS]
|
||||
|
||||
Prometheus textfile collector exporter for backup job status.
|
||||
Monitors backup age, size, and success/failure from multiple sources.
|
||||
|
||||
OPTIONS:
|
||||
--dry-run Output metrics to stdout instead of writing to file
|
||||
--debug Enable debug output
|
||||
--help Show this help message
|
||||
--version Show version
|
||||
|
||||
CONFIGURATION:
|
||||
Jobs are configured in /etc/backup-status-exporter.conf (or set CONFIG_FILE).
|
||||
Each line defines a backup job in pipe-delimited format:
|
||||
|
||||
job_name|type|path|max_age_hours
|
||||
|
||||
Types:
|
||||
directory Find the newest file in a directory, report mtime and size
|
||||
statusfile Read a file containing a unix timestamp of last success
|
||||
logfile Parse a log file for success/failure patterns
|
||||
|
||||
Example config:
|
||||
daily_db|directory|/backups/db/|26
|
||||
rsync_home|statusfile|/var/log/rsync-home.status|26
|
||||
restic_full|logfile|/var/log/restic-backup.log|170
|
||||
|
||||
Lines starting with # are comments. Blank lines are ignored.
|
||||
|
||||
ENVIRONMENT VARIABLES:
|
||||
CONFIG_FILE Path to config file (default: /etc/backup-status-exporter.conf)
|
||||
TEXTFILE_DIR Textfile collector directory (default: /var/lib/node_exporter)
|
||||
DEBUG Enable debug output when set to any value
|
||||
|
||||
EXAMPLES:
|
||||
sudo $SCRIPT_NAME
|
||||
$SCRIPT_NAME --dry-run
|
||||
DEBUG=1 sudo $SCRIPT_NAME
|
||||
CONFIG_FILE=/etc/my-backups.conf sudo $SCRIPT_NAME
|
||||
|
||||
EOF
|
||||
exit 0
|
||||
}
|
||||
|
||||
show_version() {
|
||||
echo "$SCRIPT_NAME version $VERSION"
|
||||
exit 0
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# JOB LOADING
|
||||
# ============================================================================
|
||||
|
||||
load_jobs() {
|
||||
if [[ ! -f "$CONFIG_FILE" ]]; then
|
||||
log_error "Config file not found: $CONFIG_FILE"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
local job_count=0
|
||||
|
||||
while IFS= read -r line; do
|
||||
# Strip comments and whitespace
|
||||
line="${line%%#*}"
|
||||
line="${line#"${line%%[![:space:]]*}"}"
|
||||
line="${line%"${line##*[![:space:]]}"}"
|
||||
|
||||
if [[ -z "$line" ]]; then
|
||||
continue
|
||||
fi
|
||||
|
||||
echo "$line"
|
||||
job_count=$((job_count + 1))
|
||||
done < "$CONFIG_FILE"
|
||||
|
||||
if [[ "$job_count" -eq 0 ]]; then
|
||||
log_error "No jobs found in config file: $CONFIG_FILE"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
debug_echo "Loaded $job_count backup jobs from $CONFIG_FILE"
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# BACKUP CHECK FUNCTIONS
|
||||
# ============================================================================
|
||||
|
||||
check_directory() {
|
||||
local job_name="$1"
|
||||
local path="$2"
|
||||
local max_age_hours="$3"
|
||||
|
||||
if [[ ! -d "$path" ]]; then
|
||||
debug_echo "[$job_name] Directory not found: $path"
|
||||
echo "0|0|0|0"
|
||||
return
|
||||
fi
|
||||
|
||||
# Find the newest file in the directory
|
||||
local newest_file
|
||||
newest_file=$(find "$path" -type f -printf '%T@ %s %p\n' 2>/dev/null | sort -rn | head -1)
|
||||
|
||||
if [[ -z "$newest_file" ]]; then
|
||||
debug_echo "[$job_name] No files found in: $path"
|
||||
echo "0|0|0|0"
|
||||
return
|
||||
fi
|
||||
|
||||
local file_epoch
|
||||
file_epoch=$(echo "$newest_file" | awk '{printf "%.0f", $1}')
|
||||
local file_size
|
||||
file_size=$(echo "$newest_file" | awk '{print $2}')
|
||||
local file_path
|
||||
file_path=$(echo "$newest_file" | awk '{$1=""; $2=""; print}' | sed 's/^ //')
|
||||
|
||||
local now
|
||||
now=$(date +%s)
|
||||
local age_seconds=$((now - file_epoch))
|
||||
local age_hours
|
||||
age_hours=$(awk "BEGIN {printf \"%.1f\", $age_seconds / 3600}")
|
||||
|
||||
local max_age_seconds=$((max_age_hours * 3600))
|
||||
local status=1
|
||||
if [[ "$age_seconds" -gt "$max_age_seconds" ]]; then
|
||||
status=0
|
||||
fi
|
||||
|
||||
debug_echo "[$job_name] Newest file: $file_path (age=${age_hours}h, size=${file_size}B, status=$status)"
|
||||
echo "${file_epoch}|${age_hours}|${file_size}|${status}"
|
||||
}
|
||||
|
||||
check_statusfile() {
|
||||
local job_name="$1"
|
||||
local path="$2"
|
||||
local max_age_hours="$3"
|
||||
|
||||
if [[ ! -f "$path" ]]; then
|
||||
debug_echo "[$job_name] Status file not found: $path"
|
||||
echo "0|0|0|0"
|
||||
return
|
||||
fi
|
||||
|
||||
local timestamp
|
||||
timestamp=$(head -1 "$path" 2>/dev/null)
|
||||
timestamp="${timestamp//[[:space:]]/}"
|
||||
|
||||
if [[ -z "$timestamp" ]] || ! [[ "$timestamp" =~ ^[0-9]+$ ]]; then
|
||||
debug_echo "[$job_name] Invalid timestamp in status file: $path"
|
||||
echo "0|0|0|0"
|
||||
return
|
||||
fi
|
||||
|
||||
local now
|
||||
now=$(date +%s)
|
||||
local age_seconds=$((now - timestamp))
|
||||
local age_hours
|
||||
age_hours=$(awk "BEGIN {printf \"%.1f\", $age_seconds / 3600}")
|
||||
|
||||
# Status files don't have a meaningful size — report file size of the status file itself
|
||||
local file_size
|
||||
file_size=$(stat -c '%s' "$path" 2>/dev/null) || file_size=0
|
||||
|
||||
local max_age_seconds=$((max_age_hours * 3600))
|
||||
local status=1
|
||||
if [[ "$age_seconds" -gt "$max_age_seconds" ]]; then
|
||||
status=0
|
||||
fi
|
||||
|
||||
debug_echo "[$job_name] Status timestamp: $timestamp (age=${age_hours}h, status=$status)"
|
||||
echo "${timestamp}|${age_hours}|${file_size}|${status}"
|
||||
}
|
||||
|
||||
check_logfile() {
|
||||
local job_name="$1"
|
||||
local path="$2"
|
||||
local max_age_hours="$3"
|
||||
|
||||
if [[ ! -f "$path" ]]; then
|
||||
debug_echo "[$job_name] Log file not found: $path"
|
||||
echo "0|0|0|0"
|
||||
return
|
||||
fi
|
||||
|
||||
# Check for failure patterns first (most recent occurrence)
|
||||
local last_failure
|
||||
last_failure=$(grep -inE "$FAILURE_PATTERNS" "$path" 2>/dev/null | tail -1) || true
|
||||
local last_success
|
||||
last_success=$(grep -inE "$SUCCESS_PATTERNS" "$path" 2>/dev/null | tail -1) || true
|
||||
|
||||
local failure_line=0
|
||||
local success_line=0
|
||||
|
||||
if [[ -n "$last_failure" ]]; then
|
||||
failure_line=$(echo "$last_failure" | cut -d: -f1)
|
||||
fi
|
||||
if [[ -n "$last_success" ]]; then
|
||||
success_line=$(echo "$last_success" | cut -d: -f1)
|
||||
fi
|
||||
|
||||
# Use the log file's mtime as the timestamp
|
||||
local file_epoch
|
||||
file_epoch=$(stat -c '%Y' "$path" 2>/dev/null) || file_epoch=0
|
||||
local file_size
|
||||
file_size=$(stat -c '%s' "$path" 2>/dev/null) || file_size=0
|
||||
|
||||
local now
|
||||
now=$(date +%s)
|
||||
local age_seconds=$((now - file_epoch))
|
||||
local age_hours
|
||||
age_hours=$(awk "BEGIN {printf \"%.1f\", $age_seconds / 3600}")
|
||||
|
||||
local max_age_seconds=$((max_age_hours * 3600))
|
||||
|
||||
# Determine status: success if last success line is after last failure line
|
||||
# and the log is not stale
|
||||
local status=0
|
||||
if [[ "$success_line" -gt "$failure_line" ]] && [[ "$age_seconds" -le "$max_age_seconds" ]]; then
|
||||
status=1
|
||||
fi
|
||||
|
||||
if [[ "$success_line" -eq 0 ]] && [[ "$failure_line" -eq 0 ]]; then
|
||||
debug_echo "[$job_name] No success or failure patterns found in: $path"
|
||||
status=0
|
||||
fi
|
||||
|
||||
debug_echo "[$job_name] Log file: $path (age=${age_hours}h, success_line=$success_line, failure_line=$failure_line, status=$status)"
|
||||
echo "${file_epoch}|${age_hours}|${file_size}|${status}"
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# METRICS COLLECTION
|
||||
# ============================================================================
|
||||
|
||||
collect_metrics() {
|
||||
local jobs=()
|
||||
while IFS= read -r job_line; do
|
||||
jobs+=("$job_line")
|
||||
done < <(load_jobs)
|
||||
|
||||
local output=""
|
||||
local timestamps=""
|
||||
local ages=""
|
||||
local sizes=""
|
||||
local statuses=""
|
||||
|
||||
for job_line in "${jobs[@]}"; do
|
||||
local job_name
|
||||
job_name=$(echo "$job_line" | cut -d'|' -f1)
|
||||
local job_type
|
||||
job_type=$(echo "$job_line" | cut -d'|' -f2)
|
||||
local job_path
|
||||
job_path=$(echo "$job_line" | cut -d'|' -f3)
|
||||
local max_age_hours
|
||||
max_age_hours=$(echo "$job_line" | cut -d'|' -f4)
|
||||
|
||||
if [[ -z "$job_name" ]] || [[ -z "$job_type" ]] || [[ -z "$job_path" ]] || [[ -z "$max_age_hours" ]]; then
|
||||
log_error "Invalid config line: $job_line (expected: job_name|type|path|max_age_hours)"
|
||||
continue
|
||||
fi
|
||||
|
||||
local result=""
|
||||
case "$job_type" in
|
||||
directory)
|
||||
result=$(check_directory "$job_name" "$job_path" "$max_age_hours")
|
||||
;;
|
||||
statusfile)
|
||||
result=$(check_statusfile "$job_name" "$job_path" "$max_age_hours")
|
||||
;;
|
||||
logfile)
|
||||
result=$(check_logfile "$job_name" "$job_path" "$max_age_hours")
|
||||
;;
|
||||
*)
|
||||
log_error "Unknown job type '$job_type' for job '$job_name' (expected: directory, statusfile, logfile)"
|
||||
continue
|
||||
;;
|
||||
esac
|
||||
|
||||
local ts
|
||||
ts=$(echo "$result" | cut -d'|' -f1)
|
||||
local age
|
||||
age=$(echo "$result" | cut -d'|' -f2)
|
||||
local size
|
||||
size=$(echo "$result" | cut -d'|' -f3)
|
||||
local st
|
||||
st=$(echo "$result" | cut -d'|' -f4)
|
||||
|
||||
timestamps+="linux_backup_last_success_timestamp{job=\"${job_name}\"} ${ts}\n"
|
||||
ages+="linux_backup_age_hours{job=\"${job_name}\"} ${age}\n"
|
||||
sizes+="linux_backup_size_bytes{job=\"${job_name}\"} ${size}\n"
|
||||
statuses+="linux_backup_status{job=\"${job_name}\"} ${st}\n"
|
||||
done
|
||||
|
||||
output+="# HELP linux_backup_last_success_timestamp Unix timestamp of the last successful backup\n"
|
||||
output+="# TYPE linux_backup_last_success_timestamp gauge\n"
|
||||
output+="$timestamps"
|
||||
output+="# HELP linux_backup_age_hours Hours since the last successful backup\n"
|
||||
output+="# TYPE linux_backup_age_hours gauge\n"
|
||||
output+="$ages"
|
||||
output+="# HELP linux_backup_size_bytes Size of the last backup in bytes\n"
|
||||
output+="# TYPE linux_backup_size_bytes gauge\n"
|
||||
output+="$sizes"
|
||||
output+="# HELP linux_backup_status Backup job status (1=ok, 0=stale or failed)\n"
|
||||
output+="# TYPE linux_backup_status gauge\n"
|
||||
output+="$statuses"
|
||||
|
||||
printf '%b' "$output"
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# OUTPUT
|
||||
# ============================================================================
|
||||
|
||||
write_metrics() {
|
||||
local metrics
|
||||
metrics=$(collect_metrics)
|
||||
|
||||
if [[ "$DRY_RUN" == "true" ]]; then
|
||||
echo "$metrics"
|
||||
return
|
||||
fi
|
||||
|
||||
if [[ ! -d "$TEXTFILE_DIR" ]]; then
|
||||
log_error "Textfile collector directory does not exist: $TEXTFILE_DIR"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "$metrics" > "$TMP_FILE"
|
||||
mv "$TMP_FILE" "$OUTPUT_FILE"
|
||||
debug_echo "Metrics written to $OUTPUT_FILE"
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# MAIN
|
||||
# ============================================================================
|
||||
|
||||
main() {
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
--dry-run)
|
||||
DRY_RUN=true
|
||||
shift
|
||||
;;
|
||||
--debug)
|
||||
DEBUG=1
|
||||
shift
|
||||
;;
|
||||
--help|-h)
|
||||
show_help
|
||||
;;
|
||||
--version|-v)
|
||||
show_version
|
||||
;;
|
||||
*)
|
||||
log_error "Unknown option: $1"
|
||||
echo "Use --help for usage information" >&2
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
write_metrics
|
||||
}
|
||||
|
||||
main "$@"
|
||||
Executable
+428
@@ -0,0 +1,428 @@
|
||||
#!/bin/bash
|
||||
|
||||
################################################
|
||||
#### AD Certificate checker and renewal ####
|
||||
#### for Amazon, Ubuntu and RedHat servers ####
|
||||
#### ####
|
||||
#### Author: Phil Connor ####
|
||||
#### License: MIT ####
|
||||
#### Contact: contact@mylinux.work ####
|
||||
#### Version: 3.00-081425 ####
|
||||
################################################
|
||||
|
||||
set -o pipefail
|
||||
|
||||
SCRIPT_NAME=$(basename "$0")
|
||||
readonly SCRIPT_NAME
|
||||
|
||||
# Default configuration
|
||||
readonly DEFAULT_PEM_PATH="/etc/pki/ca-trust/source/anchors/ad-cert.pem"
|
||||
readonly DEFAULT_DAYS_THRESHOLD=30
|
||||
readonly DEFAULT_DOMAIN="example"
|
||||
readonly DEFAULT_NODE_DIR="/var/lib/node_exporter"
|
||||
|
||||
# Configuration variables (can be overridden by environment)
|
||||
PEM_PATH=${PEM_PATH:-$DEFAULT_PEM_PATH}
|
||||
DAYS_THRESHOLD=${DAYS_THRESHOLD:-$DEFAULT_DAYS_THRESHOLD}
|
||||
DOMAIN=${DOMAIN:-$DEFAULT_DOMAIN}
|
||||
NODE_DIR=${NODE_DIR:-$DEFAULT_NODE_DIR}
|
||||
SERVER_TYPE=${SERVER_TYPE:-}
|
||||
DEBUG=${DEBUG:-}
|
||||
|
||||
# Runtime flags
|
||||
MONITOR_ONLY=false
|
||||
RENEW_ONLY=false
|
||||
|
||||
handle_error() {
|
||||
local exit_code=$1
|
||||
local line_number=$2
|
||||
echo "Error: $SCRIPT_NAME failed at line $line_number with exit code $exit_code" >&2
|
||||
exit "$exit_code"
|
||||
}
|
||||
|
||||
trap 'handle_error $? $LINENO' ERR
|
||||
|
||||
debug_echo() {
|
||||
if [[ -n "$DEBUG" ]]; then
|
||||
echo "[DEBUG] $*" >&2
|
||||
fi
|
||||
}
|
||||
|
||||
show_help() {
|
||||
cat << EOF
|
||||
Usage: $SCRIPT_NAME [OPTIONS]
|
||||
|
||||
SSL certificate checker and renewal script for Prometheus monitoring.
|
||||
|
||||
OPTIONS:
|
||||
--monitor Only generate Prometheus metrics (no renewal)
|
||||
--renew Only handle certificate renewal (no monitoring)
|
||||
--all Run both monitoring and renewal (default)
|
||||
--help, -h Show this help message
|
||||
|
||||
ENVIRONMENT VARIABLES:
|
||||
PEM_PATH Path to certificate file (default: $DEFAULT_PEM_PATH)
|
||||
DAYS_THRESHOLD Days before expiry to trigger renewal (default: $DEFAULT_DAYS_THRESHOLD)
|
||||
DOMAIN Domain name (default: $DEFAULT_DOMAIN)
|
||||
NODE_DIR Node exporter directory (default: $DEFAULT_NODE_DIR)
|
||||
SERVER_TYPE Server type (artifactory, bitbucket, cloudaccess, jira)
|
||||
DEBUG Enable debug output
|
||||
|
||||
EXAMPLES:
|
||||
$SCRIPT_NAME --monitor
|
||||
SERVER_TYPE=bitbucket $SCRIPT_NAME --renew
|
||||
DEBUG=1 $SCRIPT_NAME --all
|
||||
EOF
|
||||
}
|
||||
|
||||
validate_certificate_file() {
|
||||
local cert_file="$1"
|
||||
|
||||
if [[ ! -f "$cert_file" ]]; then
|
||||
debug_echo "Certificate file not found: $cert_file"
|
||||
return 1
|
||||
fi
|
||||
|
||||
if ! openssl x509 -noout -text -in "$cert_file" >/dev/null 2>&1; then
|
||||
echo "Error: Invalid certificate file: $cert_file" >&2
|
||||
return 1
|
||||
fi
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
download_certificate() {
|
||||
local domain="$1"
|
||||
local output_file="$2"
|
||||
local server_url="us.${domain}.net:636"
|
||||
|
||||
debug_echo "Downloading certificate from $server_url"
|
||||
|
||||
if ! timeout 30 openssl s_client -connect "$server_url" -servername "us.${domain}.net" < /dev/null 2>/dev/null | \
|
||||
sed -ne '/-BEGIN CERTIFICATE-/,/-END CERTIFICATE-/p' > "$output_file"; then
|
||||
echo "Error: Failed to download certificate from $server_url" >&2
|
||||
return 1
|
||||
fi
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
calculate_certificate_dates() {
|
||||
local cert_file="$1"
|
||||
local -n days_left_ref=$2
|
||||
local -n days_gone_ref=$3
|
||||
|
||||
local beg_date end_date beg_sec end_sec now_sec
|
||||
|
||||
beg_date=$(openssl x509 -noout -startdate -in "$cert_file")
|
||||
end_date=$(openssl x509 -noout -enddate -in "$cert_file")
|
||||
|
||||
beg_sec=$(date --date="${beg_date##*=}" +%s)
|
||||
end_sec=$(date --date="${end_date##*=}" +%s)
|
||||
now_sec=$(date +%s)
|
||||
|
||||
days_gone_ref=$(( (now_sec - beg_sec) / 86400 ))
|
||||
days_left_ref=$(( (end_sec - now_sec) / 86400 ))
|
||||
|
||||
debug_echo "Certificate valid from $(date -d @"$beg_sec") to $(date -d @"$end_sec")"
|
||||
debug_echo "Days gone: $days_gone_ref, Days left: $days_left_ref"
|
||||
}
|
||||
|
||||
generate_prometheus_metrics() {
|
||||
local days_left="$1"
|
||||
local days_gone="$2"
|
||||
local output_file="$NODE_DIR/adcert_check.prom"
|
||||
|
||||
debug_echo "Generating Prometheus metrics to $output_file"
|
||||
|
||||
mkdir -p "$NODE_DIR"
|
||||
|
||||
{
|
||||
echo '# HELP linux_ad_cert_expire AD Certificate expiration days'
|
||||
echo '# TYPE linux_ad_cert_expire gauge'
|
||||
if [[ $days_left -lt 0 ]]; then
|
||||
echo "linux_ad_cert_expire{status=\"expired\",days_gone=\"$days_gone\"} 0"
|
||||
else
|
||||
echo "linux_ad_cert_expire{status=\"valid\"} $days_left"
|
||||
fi
|
||||
} > "$output_file"
|
||||
}
|
||||
|
||||
get_keystore_password() {
|
||||
local password_url="$1"
|
||||
local storepass=""
|
||||
|
||||
# Try Vault HTTP API first if URL provided
|
||||
if [[ -n "$password_url" ]]; then
|
||||
debug_echo "Retrieving keystore password from $password_url"
|
||||
storepass=$(curl -sf -X GET "$password_url" 2>/dev/null | jq -r '.data.password // empty' 2>/dev/null || true)
|
||||
fi
|
||||
|
||||
# Fall back to Vault CLI
|
||||
if [[ -z "$storepass" ]]; then
|
||||
debug_echo "Falling back to Vault CLI for keystore password"
|
||||
storepass=$(vault kv get -field=password secret/keystore 2>/dev/null || true)
|
||||
fi
|
||||
|
||||
# Fall back to default
|
||||
if [[ -z "$storepass" ]]; then
|
||||
debug_echo "Using default keystore password"
|
||||
storepass="changeit"
|
||||
fi
|
||||
|
||||
echo "$storepass"
|
||||
}
|
||||
|
||||
execute_keytool_command() {
|
||||
local java_bin="$1"
|
||||
local keystore="$2"
|
||||
local action="$3"
|
||||
local cert_file="$4"
|
||||
local password_url="$5"
|
||||
|
||||
local storepass
|
||||
storepass=$(get_keystore_password "$password_url")
|
||||
|
||||
case "$action" in
|
||||
"delete")
|
||||
"$java_bin/keytool" -delete -alias ad -keystore "$keystore" -storepass "$storepass" 2>/dev/null || true
|
||||
;;
|
||||
"import")
|
||||
"$java_bin/keytool" -import -noprompt -alias ad -keystore "$keystore" -file "$cert_file" -storepass "$storepass"
|
||||
;;
|
||||
esac
|
||||
}
|
||||
|
||||
handle_artifactory_renewal() {
|
||||
local java_bin keystore
|
||||
local vault_url="http://vault.${DOMAIN}.net/v1/secret/secret/artifactory/keytool"
|
||||
|
||||
# Check app-specific paths first, then fall back to auto-detection
|
||||
java_bin="/opt/jfrog/artifactory/app/third-party/java/bin"
|
||||
keystore="/opt/jfrog/artifactory/app/third-party/java/lib/security/cacerts"
|
||||
|
||||
if [[ ! -x "$java_bin/keytool" || ! -f "$keystore" ]]; then
|
||||
debug_echo "Artifactory default paths not found, searching for Java"
|
||||
if ! find_java_keystore java_bin keystore; then
|
||||
echo "Error: Could not find Java keytool or keystore for Artifactory" >&2
|
||||
return 1
|
||||
fi
|
||||
fi
|
||||
|
||||
execute_keytool_command "$java_bin" "$keystore" "delete" "$PEM_PATH" "$vault_url"
|
||||
execute_keytool_command "$java_bin" "$keystore" "import" "$PEM_PATH" "$vault_url"
|
||||
systemctl restart artifactory
|
||||
}
|
||||
|
||||
handle_bitbucket_renewal() {
|
||||
local java_bin keystore
|
||||
local vault_url="http://vault.${DOMAIN}.net/v1/secret/secret/bitbucket/keytool"
|
||||
|
||||
# Check app-specific paths first, then fall back to auto-detection
|
||||
java_bin="/mnt/ebs/bitbucket/8.19.3/jre/bin"
|
||||
keystore="/mnt/ebs/bitbucket/8.19.3/jre/lib/security/cacerts"
|
||||
|
||||
if [[ ! -x "$java_bin/keytool" || ! -f "$keystore" ]]; then
|
||||
debug_echo "Bitbucket default paths not found, searching for Java"
|
||||
if ! find_java_keystore java_bin keystore; then
|
||||
echo "Error: Could not find Java keytool or keystore for Bitbucket" >&2
|
||||
return 1
|
||||
fi
|
||||
fi
|
||||
|
||||
if [[ -n "$DEBUG" ]]; then
|
||||
debug_echo "Would execute: $java_bin/keytool -delete -alias ad -keystore $keystore"
|
||||
debug_echo "Would execute: curl -X GET $vault_url"
|
||||
debug_echo "Would execute: $java_bin/keytool -import -alias ad -keystore $keystore -file $PEM_PATH"
|
||||
debug_echo "Would execute: systemctl restart atlbitbucket"
|
||||
else
|
||||
execute_keytool_command "$java_bin" "$keystore" "delete" "$PEM_PATH" "$vault_url"
|
||||
execute_keytool_command "$java_bin" "$keystore" "import" "$PEM_PATH" "$vault_url"
|
||||
systemctl restart atlbitbucket
|
||||
fi
|
||||
}
|
||||
|
||||
handle_cloudaccess_renewal() {
|
||||
docker restart cloudaccess_server_
|
||||
}
|
||||
|
||||
handle_jira_renewal() {
|
||||
local java_bin keystore
|
||||
local vault_url="http://vault.${DOMAIN}.net/v1/secret/secret/jira/keytool"
|
||||
|
||||
# Check app-specific paths first, then fall back to auto-detection
|
||||
java_bin="/mnt/ebs/jira/jre/bin"
|
||||
keystore="/mnt/ebs/jira/jre/lib/security/cacerts"
|
||||
|
||||
if [[ ! -x "$java_bin/keytool" || ! -f "$keystore" ]]; then
|
||||
debug_echo "Jira default paths not found, searching for Java"
|
||||
if ! find_java_keystore java_bin keystore; then
|
||||
echo "Error: Could not find Java keytool or keystore for Jira" >&2
|
||||
return 1
|
||||
fi
|
||||
fi
|
||||
|
||||
execute_keytool_command "$java_bin" "$keystore" "delete" "$PEM_PATH" "$vault_url"
|
||||
execute_keytool_command "$java_bin" "$keystore" "import" "$PEM_PATH" "$vault_url"
|
||||
systemctl restart jira
|
||||
}
|
||||
|
||||
find_java_keystore() {
|
||||
local -n java_bin_ref=$1
|
||||
local -n keystore_ref=$2
|
||||
|
||||
# Common Java installation paths
|
||||
local java_paths=(
|
||||
"/opt/jfrog/artifactory/app/third-party/java"
|
||||
"/mnt/ebs/bitbucket/*/jre"
|
||||
"/mnt/ebs/jira/jre"
|
||||
"/usr/lib/jvm/java-*-openjdk"
|
||||
"/usr/lib/jvm/default-java"
|
||||
"/opt/java"
|
||||
"/usr/java/latest"
|
||||
)
|
||||
|
||||
# Check JAVA_HOME first
|
||||
if [[ -n "$JAVA_HOME" && -x "$JAVA_HOME/bin/keytool" ]]; then
|
||||
java_bin_ref="$JAVA_HOME/bin"
|
||||
keystore_ref="$JAVA_HOME/lib/security/cacerts"
|
||||
if [[ -f "$keystore_ref" ]]; then
|
||||
debug_echo "Found Java via JAVA_HOME: $java_bin_ref"
|
||||
return 0
|
||||
fi
|
||||
fi
|
||||
|
||||
# Search common paths with glob expansion
|
||||
for path_pattern in "${java_paths[@]}"; do
|
||||
for java_dir in $path_pattern; do
|
||||
if [[ -d "$java_dir" ]]; then
|
||||
local bin_dir="$java_dir/bin"
|
||||
local cacerts="$java_dir/lib/security/cacerts"
|
||||
|
||||
if [[ -x "$bin_dir/keytool" && -f "$cacerts" ]]; then
|
||||
java_bin_ref="$bin_dir"
|
||||
keystore_ref="$cacerts"
|
||||
debug_echo "Found Java at: $java_dir"
|
||||
return 0
|
||||
fi
|
||||
fi
|
||||
done
|
||||
done
|
||||
|
||||
# Fallback: try system keytool
|
||||
if command -v keytool >/dev/null 2>&1; then
|
||||
java_bin_ref="$(dirname "$(command -v keytool)")"
|
||||
# Try common system keystore locations
|
||||
local system_keystores=(
|
||||
"/etc/ssl/certs/java/cacerts"
|
||||
"/usr/lib/jvm/default-java/lib/security/cacerts"
|
||||
"/etc/pki/ca-trust/extracted/java/cacerts"
|
||||
)
|
||||
for keystore in "${system_keystores[@]}"; do
|
||||
if [[ -f "$keystore" ]]; then
|
||||
keystore_ref="$keystore"
|
||||
debug_echo "Found system Java at: $java_bin_ref"
|
||||
return 0
|
||||
fi
|
||||
done
|
||||
fi
|
||||
|
||||
return 1
|
||||
}
|
||||
|
||||
handle_server_renewal() {
|
||||
if [[ -z "$SERVER_TYPE" ]]; then
|
||||
echo "Error: SERVER_TYPE environment variable must be set for renewal" >&2
|
||||
echo "Valid values: artifactory, bitbucket, cloudaccess, jira" >&2
|
||||
return 1
|
||||
fi
|
||||
|
||||
debug_echo "Handling renewal for server type: $SERVER_TYPE"
|
||||
|
||||
case "$SERVER_TYPE" in
|
||||
"artifactory") handle_artifactory_renewal ;;
|
||||
"bitbucket") handle_bitbucket_renewal ;;
|
||||
"cloudaccess") handle_cloudaccess_renewal ;;
|
||||
"jira") handle_jira_renewal ;;
|
||||
*)
|
||||
echo "Error: Unknown server type: $SERVER_TYPE" >&2
|
||||
echo "Valid values: artifactory, bitbucket, cloudaccess, jira" >&2
|
||||
return 1
|
||||
;;
|
||||
esac
|
||||
}
|
||||
|
||||
parse_arguments() {
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case $1 in
|
||||
--monitor)
|
||||
MONITOR_ONLY=true
|
||||
shift
|
||||
;;
|
||||
--renew)
|
||||
RENEW_ONLY=true
|
||||
shift
|
||||
;;
|
||||
--all)
|
||||
MONITOR_ONLY=false
|
||||
RENEW_ONLY=false
|
||||
shift
|
||||
;;
|
||||
--help|-h)
|
||||
show_help
|
||||
exit 0
|
||||
;;
|
||||
*)
|
||||
echo "Error: Unknown option: $1" >&2
|
||||
show_help >&2
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
}
|
||||
|
||||
main() {
|
||||
parse_arguments "$@"
|
||||
|
||||
# Check if certificate file exists, if not exit silently
|
||||
if [[ ! -f "$PEM_PATH" ]]; then
|
||||
debug_echo "Certificate file not found: $PEM_PATH"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Download fresh certificate
|
||||
if ! download_certificate "$DOMAIN" "$PEM_PATH"; then
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Validate the downloaded certificate
|
||||
if ! validate_certificate_file "$PEM_PATH"; then
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Calculate certificate expiration dates
|
||||
local days_left days_gone
|
||||
calculate_certificate_dates "$PEM_PATH" days_left days_gone
|
||||
|
||||
# Handle monitoring (unless renew-only mode)
|
||||
if [[ "$RENEW_ONLY" != true ]]; then
|
||||
generate_prometheus_metrics "$days_left" "$days_gone"
|
||||
debug_echo "Generated Prometheus metrics"
|
||||
fi
|
||||
|
||||
# Handle renewal (unless monitor-only mode)
|
||||
if [[ "$MONITOR_ONLY" != true && $days_left -le $DAYS_THRESHOLD ]]; then
|
||||
debug_echo "Certificate expires in $days_left days (threshold: $DAYS_THRESHOLD)"
|
||||
if ! handle_server_renewal; then
|
||||
exit 1
|
||||
fi
|
||||
debug_echo "Certificate renewal completed"
|
||||
fi
|
||||
|
||||
debug_echo "Script completed successfully"
|
||||
}
|
||||
|
||||
# Execute main function if script is run directly
|
||||
if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
|
||||
main "$@"
|
||||
fi
|
||||
+300
@@ -0,0 +1,300 @@
|
||||
#!/bin/bash
|
||||
|
||||
##############################################
|
||||
#### Create Swap for all Linux Servers ####
|
||||
#### ####
|
||||
#### Author: Phil Connor ####
|
||||
#### Contact: pconnor@ara.com ####
|
||||
#### Version 3.50.20250729 ####
|
||||
#### ####
|
||||
#### Created 06/01/2023 ####
|
||||
##############################################
|
||||
|
||||
# Exit on any error, undefined variables, and pipe failures
|
||||
set -euo pipefail
|
||||
|
||||
# Script configuration constants
|
||||
readonly SCRIPT_NAME="$(basename "$0")"
|
||||
readonly SWAPFILE_PATH="/.swapfile" # Standard location for swap file
|
||||
readonly SWAPPINESS_VALUE=80 # How aggressively to use swap (0-100)
|
||||
|
||||
# Logging function - outputs to stderr with script name prefix
|
||||
log() {
|
||||
echo "[$SCRIPT_NAME] $*" >&2
|
||||
}
|
||||
|
||||
# Error function - logs error message and exits with status 1
|
||||
error() {
|
||||
log "ERROR: $*"
|
||||
exit 1
|
||||
}
|
||||
|
||||
# Display usage information
|
||||
usage() {
|
||||
cat <<EOF
|
||||
Usage: $SCRIPT_NAME [-h|--help]
|
||||
|
||||
Creates and configures a swap file sized 1:1 with system RAM.
|
||||
Handles creation, resizing, and removal of existing swap files.
|
||||
|
||||
Options:
|
||||
-h, --help Show this help message and exit
|
||||
|
||||
Must be run as root.
|
||||
EOF
|
||||
exit 0
|
||||
}
|
||||
|
||||
# Clean up partial swap file on unexpected failure
|
||||
cleanup_on_error() {
|
||||
log "Error detected, cleaning up partial swap file"
|
||||
swapoff "$SWAPFILE_PATH" 2>/dev/null || true
|
||||
rm -f "$SWAPFILE_PATH"
|
||||
}
|
||||
|
||||
# Detect the operating system distribution (ubuntu, centos, etc.)
|
||||
detect_os() {
|
||||
if command -v lsb_release >/dev/null 2>&1; then
|
||||
# Use lsb_release if available (most reliable)
|
||||
lsb_release -i | awk '{print $3}' | tr '[:upper:]' '[:lower:]'
|
||||
else
|
||||
# Fallback to parsing /etc/os-release
|
||||
# shellcheck source=/dev/null
|
||||
. /etc/os-release 2>/dev/null && echo "${ID:-unknown}" | tr '[:upper:]' '[:lower:]'
|
||||
fi
|
||||
}
|
||||
|
||||
# Get total system memory in GB, rounded to nearest whole number
|
||||
get_memory_gb() {
|
||||
local mem_kb
|
||||
# Extract memory from /proc/meminfo (in KB)
|
||||
mem_kb=$(grep MemTotal /proc/meminfo | awk '{print $2}')
|
||||
|
||||
if [[ -z "$mem_kb" || "$mem_kb" -eq 0 ]]; then
|
||||
error "Unable to determine system memory"
|
||||
fi
|
||||
|
||||
local mem_gb
|
||||
# Convert KB to GB and round to nearest whole number
|
||||
mem_gb=$(awk "BEGIN {printf \"%.0f\", ($mem_kb/1024/1024)}")
|
||||
# Ensure minimum of 1GB to avoid division by zero issues
|
||||
[[ "$mem_gb" -eq 0 ]] && mem_gb=1
|
||||
|
||||
echo "$mem_gb"
|
||||
}
|
||||
|
||||
# Calculate swap size needed in MB (1:1 ratio with RAM)
|
||||
get_swap_needed_mb() {
|
||||
local mem_gb="$1"
|
||||
echo $((mem_gb * 1024))
|
||||
}
|
||||
|
||||
# Get the current swap file size in MB, or 0 if no swap file exists
|
||||
get_current_swap_size() {
|
||||
if [[ -f "$SWAPFILE_PATH" ]]; then
|
||||
local size_bytes
|
||||
size_bytes=$(stat -c%s "$SWAPFILE_PATH" 2>/dev/null || echo 0)
|
||||
echo $((size_bytes / 1024 / 1024))
|
||||
else
|
||||
echo 0
|
||||
fi
|
||||
}
|
||||
|
||||
# Check if our swap file is currently active
|
||||
is_swap_active() {
|
||||
swapon --show=NAME --noheadings 2>/dev/null | grep -q "^${SWAPFILE_PATH}$"
|
||||
}
|
||||
|
||||
# Check if there's enough disk space for the swap file (with 10% buffer)
|
||||
check_disk_space() {
|
||||
local needed_mb="$1"
|
||||
local filesystem="/"
|
||||
|
||||
log "Checking available disk space for ${needed_mb}MB swap file"
|
||||
|
||||
local available_kb
|
||||
# Get available space in KB from df command
|
||||
available_kb=$(df --output=avail "$filesystem" | tail -n 1)
|
||||
local available_mb=$((available_kb / 1024))
|
||||
|
||||
# Add 10% buffer for safety
|
||||
local required_mb=$((needed_mb + (needed_mb / 10)))
|
||||
|
||||
if [[ "$available_mb" -lt "$required_mb" ]]; then
|
||||
error "Insufficient disk space. Need ${required_mb}MB (${needed_mb}MB + 10% buffer), but only ${available_mb}MB available on $filesystem"
|
||||
fi
|
||||
|
||||
log "Disk space check passed: ${available_mb}MB available, ${required_mb}MB required"
|
||||
}
|
||||
|
||||
# Verify script is running with root privileges
|
||||
check_permissions() {
|
||||
if [[ $EUID -ne 0 ]]; then
|
||||
error "This script must be run as root! Login as root, or use sudo."
|
||||
fi
|
||||
}
|
||||
|
||||
# Configure system swappiness (how aggressively to use swap)
|
||||
setup_swappiness() {
|
||||
local sysconf="/etc/sysctl.conf"
|
||||
local procswap="/proc/sys/vm/swappiness"
|
||||
|
||||
log "Configuring swappiness to $SWAPPINESS_VALUE"
|
||||
|
||||
# If no swappiness setting exists, add it
|
||||
if ! grep -q "vm.swappiness" "$sysconf"; then
|
||||
echo "$SWAPPINESS_VALUE" > "$procswap"
|
||||
echo "vm.swappiness = $SWAPPINESS_VALUE" >> "$sysconf"
|
||||
# If setting exists but with different value, update it
|
||||
elif ! grep -q "vm.swappiness = $SWAPPINESS_VALUE" "$sysconf"; then
|
||||
sed -i "/vm.swappiness/d" "$sysconf"
|
||||
echo "$SWAPPINESS_VALUE" > "$procswap"
|
||||
echo "vm.swappiness = $SWAPPINESS_VALUE" >> "$sysconf"
|
||||
fi
|
||||
}
|
||||
|
||||
# Set up automated cache clearing cron job (every 5 minutes)
|
||||
setup_cache_clearing() {
|
||||
local os="$1"
|
||||
local ctab
|
||||
|
||||
# Different crontab locations for different distributions
|
||||
if [[ "$os" == "ubuntu" ]]; then
|
||||
ctab="/var/spool/cron/crontabs/root"
|
||||
else
|
||||
ctab="/var/spool/cron/root"
|
||||
fi
|
||||
|
||||
log "Setting up cache clearing cron job"
|
||||
|
||||
# Remove any existing cache clearing jobs that use 'echo 3' (more aggressive)
|
||||
if crontab -l 2>/dev/null | grep -q '/usr/bin/sync; echo 3'; then
|
||||
sed -i "/\/usr\/bin\/sync.*echo 3/d" "$ctab" 2>/dev/null || true
|
||||
fi
|
||||
|
||||
# Add cache clearing job if it doesn't exist (echo 1 = page cache only)
|
||||
if ! crontab -l 2>/dev/null | grep -q '/usr/bin/sync; echo 1'; then
|
||||
(crontab -u root -l 2>/dev/null; echo "*/5 * * * * /usr/bin/sync; echo 1 > /proc/sys/vm/drop_caches") | crontab -u root -
|
||||
fi
|
||||
}
|
||||
|
||||
# Remove existing swap file and clean up fstab entries
|
||||
remove_swap() {
|
||||
local backup_time
|
||||
|
||||
# Create timestamp for backup file
|
||||
backup_time=$(date +%y-%m-%d--%H-%M-%S)
|
||||
|
||||
log "Removing existing swap file: $SWAPFILE_PATH"
|
||||
|
||||
# Disable swap file (ignore errors if already disabled)
|
||||
swapoff "$SWAPFILE_PATH" 2>/dev/null || true
|
||||
|
||||
# Backup fstab before modifying
|
||||
cp /etc/fstab "/etc/fstab.$backup_time"
|
||||
|
||||
# Remove swap entries from fstab
|
||||
sed -i "\|${SWAPFILE_PATH}|d" /etc/fstab
|
||||
|
||||
# Delete the swap file
|
||||
rm -f "$SWAPFILE_PATH"
|
||||
}
|
||||
|
||||
# Create and configure a new swap file
|
||||
create_swap() {
|
||||
local swap_mb="$1"
|
||||
|
||||
if [[ "$swap_mb" -eq 0 ]]; then
|
||||
error "Cannot create swap: swap size cannot be 0 MB"
|
||||
fi
|
||||
|
||||
log "Creating swap file of size ${swap_mb}MB at $SWAPFILE_PATH"
|
||||
|
||||
# Set trap to clean up partial swap file on failure
|
||||
trap cleanup_on_error ERR
|
||||
|
||||
# Create swap file using dd with progress display (oflag=direct avoids polluting page cache)
|
||||
dd if=/dev/zero of="$SWAPFILE_PATH" bs=1M count="$swap_mb" oflag=direct status=progress
|
||||
|
||||
# Set proper permissions (only root can read/write)
|
||||
chmod 600 "$SWAPFILE_PATH"
|
||||
|
||||
# Format the file as swap space
|
||||
mkswap "$SWAPFILE_PATH"
|
||||
|
||||
# Enable the swap file
|
||||
swapon "$SWAPFILE_PATH"
|
||||
|
||||
# Add to fstab for persistent mounting if not already present
|
||||
if ! grep -q "$SWAPFILE_PATH" /etc/fstab; then
|
||||
echo "$SWAPFILE_PATH swap swap defaults 0 0" >> /etc/fstab
|
||||
fi
|
||||
|
||||
# Clear the error trap now that swap is fully created
|
||||
trap - ERR
|
||||
|
||||
log "Swap file created and enabled successfully"
|
||||
}
|
||||
|
||||
# Main function - orchestrates the entire swap setup process
|
||||
main() {
|
||||
# Handle --help flag
|
||||
if [[ "${1:-}" == "-h" || "${1:-}" == "--help" ]]; then
|
||||
usage
|
||||
fi
|
||||
|
||||
# Ensure script is run with root privileges
|
||||
check_permissions
|
||||
|
||||
# Detect operating system for distribution-specific configurations
|
||||
local os
|
||||
os=$(detect_os)
|
||||
|
||||
# Get system memory information
|
||||
local mem_gb
|
||||
mem_gb=$(get_memory_gb)
|
||||
|
||||
# Calculate required swap size
|
||||
local needed_mb
|
||||
needed_mb=$(get_swap_needed_mb "$mem_gb")
|
||||
|
||||
# Check current swap configuration
|
||||
local current_size
|
||||
current_size=$(get_current_swap_size)
|
||||
|
||||
# Configure system settings
|
||||
setup_swappiness
|
||||
setup_cache_clearing "$os"
|
||||
|
||||
# If swap file exists at the correct size and is active, nothing to do
|
||||
if [[ "$current_size" -eq "$needed_mb" ]] && is_swap_active; then
|
||||
log "Swap size is already correct and active"
|
||||
log "Swap setup completed successfully"
|
||||
return 0
|
||||
fi
|
||||
|
||||
# If swap file exists but wrong size, remove first so disk space check is accurate
|
||||
if [[ "$current_size" -ne 0 && "$needed_mb" -ne "$current_size" ]]; then
|
||||
remove_swap
|
||||
fi
|
||||
|
||||
# Verify system has enough disk space (after potential removal)
|
||||
if [[ "$needed_mb" -ne "$current_size" ]]; then
|
||||
check_disk_space "$needed_mb"
|
||||
create_swap "$needed_mb"
|
||||
else
|
||||
# File is the right size but not active, re-enable it
|
||||
log "Swap file exists at correct size but is not active, enabling"
|
||||
chmod 600 "$SWAPFILE_PATH"
|
||||
mkswap "$SWAPFILE_PATH"
|
||||
swapon "$SWAPFILE_PATH"
|
||||
if ! grep -q "$SWAPFILE_PATH" /etc/fstab; then
|
||||
echo "$SWAPFILE_PATH swap swap defaults 0 0" >> /etc/fstab
|
||||
fi
|
||||
fi
|
||||
|
||||
log "Swap setup completed successfully"
|
||||
}
|
||||
|
||||
# Execute main function with all script arguments
|
||||
main "$@"
|
||||
@@ -0,0 +1,267 @@
|
||||
#!/usr/bin/env bash
|
||||
# directory-size-exporter.sh — Prometheus exporter for directory sizes
|
||||
#
|
||||
# Monitors directory disk usage that node_exporter can't see.
|
||||
# Node exporter only reports mounted filesystem totals — this script
|
||||
# tracks individual directories like /var/log, /home, /opt, or any
|
||||
# path you care about.
|
||||
#
|
||||
# Author: Phil Connor
|
||||
# Contact: contact@mylinux.work
|
||||
# License: MIT
|
||||
# Version: 1.0.0
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
EXPORTER_NAME="directory_size"
|
||||
DEFAULT_PORT=9101
|
||||
OUTPUT_MODE="stdout"
|
||||
OUTPUT_FILE=""
|
||||
PORT="${DIRECTORY_SIZE_PORT:-$DEFAULT_PORT}"
|
||||
TIMEOUT="${DIRECTORY_SIZE_TIMEOUT:-300}"
|
||||
VERBOSE=false
|
||||
QUIET=false
|
||||
DRY_RUN=false
|
||||
TARGET_DIRECTORIES=()
|
||||
|
||||
# ── Metrics Collection ──────────────────────────────────────────────
|
||||
|
||||
log_verbose() {
|
||||
[[ "$VERBOSE" == true ]] && echo "[$(date '+%Y-%m-%d %H:%M:%S')] $*" >&2
|
||||
}
|
||||
|
||||
log_info() {
|
||||
[[ "$QUIET" == false ]] && echo "[$(date '+%Y-%m-%d %H:%M:%S')] $*" >&2
|
||||
}
|
||||
|
||||
collect_metrics() {
|
||||
local start_time
|
||||
start_time=$(date +%s%N)
|
||||
|
||||
echo "# HELP node_directory_size_bytes Disk space used by directory"
|
||||
echo "# TYPE node_directory_size_bytes gauge"
|
||||
echo "# HELP node_directory_filesystem_usage_percent Filesystem usage percentage for the directory mount point"
|
||||
echo "# TYPE node_directory_filesystem_usage_percent gauge"
|
||||
|
||||
local success=1
|
||||
|
||||
for directory in "${TARGET_DIRECTORIES[@]}"; do
|
||||
log_verbose "Running du for: $directory"
|
||||
|
||||
# Get directory size in bytes
|
||||
local du_output
|
||||
du_output=$(timeout "$TIMEOUT" du --block-size=1 --summarize "$directory" 2>/dev/null) || {
|
||||
log_info "WARNING: du failed for $directory"
|
||||
success=0
|
||||
continue
|
||||
}
|
||||
|
||||
local size_bytes
|
||||
size_bytes=$(echo "$du_output" | awk '{print $1}')
|
||||
echo "node_directory_size_bytes{directory=\"${directory}\"} ${size_bytes}"
|
||||
|
||||
# Get filesystem usage percentage for the mount point
|
||||
local pct
|
||||
pct=$(df --output=pcent "$directory" 2>/dev/null | tail -n 1 | tr -d ' %')
|
||||
if [[ "$pct" =~ ^[0-9]+$ ]]; then
|
||||
echo "node_directory_filesystem_usage_percent{directory=\"${directory}\"} ${pct}"
|
||||
fi
|
||||
done
|
||||
|
||||
# ── Script runtime ──
|
||||
local end_time runtime
|
||||
end_time=$(date +%s%N)
|
||||
runtime=$(awk "BEGIN {printf \"%.3f\", ($end_time - $start_time) / 1000000000}")
|
||||
|
||||
echo ""
|
||||
echo "# HELP ${EXPORTER_NAME}_duration_seconds Script execution time"
|
||||
echo "# TYPE ${EXPORTER_NAME}_duration_seconds gauge"
|
||||
echo "${EXPORTER_NAME}_duration_seconds ${runtime}"
|
||||
|
||||
echo "# HELP ${EXPORTER_NAME}_last_run_timestamp Last successful run"
|
||||
echo "# TYPE ${EXPORTER_NAME}_last_run_timestamp gauge"
|
||||
echo "${EXPORTER_NAME}_last_run_timestamp $(date +%s)"
|
||||
|
||||
echo "# HELP ${EXPORTER_NAME}_success Whether the exporter ran successfully"
|
||||
echo "# TYPE ${EXPORTER_NAME}_success gauge"
|
||||
echo "${EXPORTER_NAME}_success ${success}"
|
||||
}
|
||||
|
||||
# ── HTTP Request Handler ────────────────────────────────────────────
|
||||
|
||||
handle_request() {
|
||||
read -r method path version
|
||||
|
||||
while IFS= read -r header; do
|
||||
[[ "$header" == $'\r' || -z "$header" ]] && break
|
||||
done
|
||||
|
||||
if [[ "$path" == "/metrics" ]]; then
|
||||
local metrics length
|
||||
metrics=$(collect_metrics)
|
||||
length=${#metrics}
|
||||
|
||||
printf "HTTP/1.1 200 OK\r\n"
|
||||
printf "Content-Type: text/plain; version=0.0.4; charset=utf-8\r\n"
|
||||
printf "Content-Length: %d\r\n" "$length"
|
||||
printf "Connection: close\r\n"
|
||||
printf "\r\n"
|
||||
printf "%s" "$metrics"
|
||||
else
|
||||
local body="404 Not Found"
|
||||
printf "HTTP/1.1 404 Not Found\r\n"
|
||||
printf "Content-Type: text/plain\r\n"
|
||||
printf "Content-Length: %d\r\n" "${#body}"
|
||||
printf "Connection: close\r\n"
|
||||
printf "\r\n"
|
||||
printf "%s" "$body"
|
||||
fi
|
||||
}
|
||||
|
||||
# ── Help ─────────────────────────────────────────────────────────────
|
||||
|
||||
show_help() {
|
||||
cat <<EOF
|
||||
Usage: $0 [OPTIONS] <directory> [directory2 ...]
|
||||
|
||||
Monitor directory sizes for Prometheus. Node exporter only reports
|
||||
mounted filesystem totals — this script tracks individual directories.
|
||||
|
||||
Output modes:
|
||||
(default) Print metrics to stdout
|
||||
--textfile Write to node_exporter textfile collector
|
||||
-o FILE Write to a specific file
|
||||
--http Run as HTTP server (default port: ${DEFAULT_PORT})
|
||||
|
||||
Options:
|
||||
--port PORT HTTP listen port (default: ${DEFAULT_PORT})
|
||||
--timeout SECS du command timeout (default: 300)
|
||||
--dry-run Show what would be written without writing
|
||||
--verbose, -v Enable verbose debug output
|
||||
--quiet, -q Suppress non-error output
|
||||
-h, --help Show this help message
|
||||
|
||||
Environment variables:
|
||||
DIRECTORY_SIZE_PORT HTTP listen port (default: ${DEFAULT_PORT})
|
||||
DIRECTORY_SIZE_TIMEOUT du command timeout in seconds (default: 300)
|
||||
|
||||
Examples:
|
||||
$0 /var/log /home /opt
|
||||
$0 --textfile /var/log /var/lib/mysql
|
||||
$0 --http --port 9101 /var/log /home
|
||||
$0 -o /tmp/dir_sizes.prom /var/log
|
||||
EOF
|
||||
}
|
||||
|
||||
# ── Argument Parsing ────────────────────────────────────────────────
|
||||
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
--textfile)
|
||||
OUTPUT_MODE="textfile"
|
||||
shift
|
||||
;;
|
||||
-o)
|
||||
OUTPUT_MODE="file"
|
||||
OUTPUT_FILE="$2"
|
||||
shift 2
|
||||
;;
|
||||
--http)
|
||||
OUTPUT_MODE="http"
|
||||
shift
|
||||
;;
|
||||
--port)
|
||||
PORT="$2"
|
||||
shift 2
|
||||
;;
|
||||
--timeout)
|
||||
TIMEOUT="$2"
|
||||
shift 2
|
||||
;;
|
||||
--dry-run)
|
||||
DRY_RUN=true
|
||||
shift
|
||||
;;
|
||||
--verbose|-v)
|
||||
VERBOSE=true
|
||||
shift
|
||||
;;
|
||||
--quiet|-q)
|
||||
QUIET=true
|
||||
shift
|
||||
;;
|
||||
--handle-request)
|
||||
handle_request
|
||||
exit 0
|
||||
;;
|
||||
-h|--help)
|
||||
show_help
|
||||
exit 0
|
||||
;;
|
||||
-*)
|
||||
echo "Unknown option: $1" >&2
|
||||
exit 1
|
||||
;;
|
||||
*)
|
||||
TARGET_DIRECTORIES+=("$1")
|
||||
shift
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
# Validate directories
|
||||
if [[ ${#TARGET_DIRECTORIES[@]} -eq 0 ]]; then
|
||||
echo "Error: at least one directory argument is required" >&2
|
||||
echo "Run with --help for usage" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
for dir in "${TARGET_DIRECTORIES[@]}"; do
|
||||
if [[ ! -d "$dir" ]]; then
|
||||
echo "Error: directory does not exist: $dir" >&2
|
||||
exit 1
|
||||
fi
|
||||
if [[ ! -r "$dir" ]]; then
|
||||
echo "Error: directory is not readable: $dir" >&2
|
||||
exit 1
|
||||
fi
|
||||
done
|
||||
|
||||
# ── Output ──────────────────────────────────────────────────────────
|
||||
|
||||
if [[ "$DRY_RUN" == true ]]; then
|
||||
log_info "DRY RUN — metrics that would be written:"
|
||||
collect_metrics
|
||||
exit 0
|
||||
fi
|
||||
|
||||
case "$OUTPUT_MODE" in
|
||||
stdout)
|
||||
collect_metrics
|
||||
;;
|
||||
textfile)
|
||||
output_dir="/var/lib/node_exporter"
|
||||
OUTPUT_FILE="${output_dir}/${EXPORTER_NAME}.prom"
|
||||
mkdir -p "$output_dir"
|
||||
temp_file=$(mktemp "${output_dir}/.${EXPORTER_NAME}.XXXXXX")
|
||||
collect_metrics > "$temp_file"
|
||||
chmod 644 "$temp_file"
|
||||
mv -f "$temp_file" "$OUTPUT_FILE"
|
||||
;;
|
||||
file)
|
||||
temp_file=$(mktemp "${OUTPUT_FILE}.XXXXXX")
|
||||
collect_metrics > "$temp_file"
|
||||
chmod 644 "$temp_file"
|
||||
mv -f "$temp_file" "$OUTPUT_FILE"
|
||||
;;
|
||||
http)
|
||||
if ! command -v socat &>/dev/null; then
|
||||
echo "ERROR: socat is required for --http mode" >&2
|
||||
echo "Install it: apt install socat or dnf install socat" >&2
|
||||
exit 1
|
||||
fi
|
||||
echo "${EXPORTER_NAME} listening on port ${PORT}..."
|
||||
echo "Monitoring directories: ${TARGET_DIRECTORIES[*]}"
|
||||
socat TCP-LISTEN:"$PORT",reuseaddr,fork EXEC:"$0 --handle-request"
|
||||
;;
|
||||
esac
|
||||
@@ -0,0 +1,354 @@
|
||||
#!/usr/bin/env bash
|
||||
# disk-io-exporter.sh — Prometheus exporter for per-disk I/O performance
|
||||
#
|
||||
# Reads /proc/diskstats and calculates per-disk IOPS, throughput,
|
||||
# latency, utilization, and queue depth. Takes two samples with a
|
||||
# configurable interval to compute rates from the cumulative counters.
|
||||
#
|
||||
# Author: Phil Connor
|
||||
# Contact: contact@mylinux.work
|
||||
# License: MIT
|
||||
# Date: 2026-03-03
|
||||
# Version: 1.0.0
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# ── Configuration ───────────────────────────────────────────────────
|
||||
|
||||
readonly VERSION="1.0.0"
|
||||
readonly SCRIPT_NAME="${0##*/}"
|
||||
readonly NODE_DIR="${NODE_DIR:-/var/lib/node_exporter}"
|
||||
readonly OUTPUT_FILE="${NODE_DIR}/disk_io.prom"
|
||||
readonly TMP_FILE="${OUTPUT_FILE}.$$"
|
||||
readonly SAMPLE_INTERVAL="${SAMPLE_INTERVAL:-1}"
|
||||
readonly DISK_FILTER="${DISK_FILTER:-}"
|
||||
|
||||
# Runtime flags
|
||||
DRY_RUN=false
|
||||
DEBUG=${DEBUG:-}
|
||||
|
||||
# ── Helpers ─────────────────────────────────────────────────────────
|
||||
|
||||
debug_echo() {
|
||||
if [[ -n "$DEBUG" ]]; then
|
||||
echo "[DEBUG] $*" >&2
|
||||
fi
|
||||
}
|
||||
|
||||
log_error() {
|
||||
echo "[ERROR] $*" >&2
|
||||
}
|
||||
|
||||
cleanup() {
|
||||
rm -f "$TMP_FILE"
|
||||
}
|
||||
|
||||
trap cleanup EXIT
|
||||
|
||||
show_help() {
|
||||
cat <<EOF
|
||||
Usage: $SCRIPT_NAME [OPTIONS]
|
||||
|
||||
Prometheus textfile collector exporter for per-disk I/O performance.
|
||||
Reads /proc/diskstats, takes two samples ${SAMPLE_INTERVAL}s apart, and
|
||||
calculates rates per disk.
|
||||
|
||||
OPTIONS:
|
||||
--dry-run Output metrics to stdout instead of writing to file
|
||||
--debug Enable debug output
|
||||
--help Show this help message
|
||||
--version Show version
|
||||
|
||||
ENVIRONMENT VARIABLES:
|
||||
DISK_FILTER Regex of disk names to include (default: all real disks)
|
||||
Example: DISK_FILTER="^sd[a-z]+$|^nvme[0-9]+n[0-9]+$"
|
||||
NODE_DIR Textfile collector directory (default: /var/lib/node_exporter)
|
||||
SAMPLE_INTERVAL Seconds between the two samples (default: 1)
|
||||
DEBUG Enable debug output when set to any value
|
||||
|
||||
EXAMPLES:
|
||||
$SCRIPT_NAME --dry-run
|
||||
DISK_FILTER="^sda$" $SCRIPT_NAME
|
||||
SAMPLE_INTERVAL=2 $SCRIPT_NAME
|
||||
DEBUG=1 $SCRIPT_NAME --dry-run
|
||||
|
||||
FILTERED DEVICES:
|
||||
loop*, ram* devices are excluded by default. Use DISK_FILTER to
|
||||
restrict to specific disks (e.g. only sd* or nvme* devices).
|
||||
|
||||
EOF
|
||||
exit 0
|
||||
}
|
||||
|
||||
show_version() {
|
||||
echo "$SCRIPT_NAME version $VERSION"
|
||||
exit 0
|
||||
}
|
||||
|
||||
# ── Snapshot /proc/diskstats ────────────────────────────────────────
|
||||
#
|
||||
# Fields from /proc/diskstats (kernel 4.18+):
|
||||
# $1 major
|
||||
# $2 minor
|
||||
# $3 device name
|
||||
# $4 reads completed
|
||||
# $5 reads merged
|
||||
# $6 sectors read
|
||||
# $7 time reading (ms)
|
||||
# $8 writes completed
|
||||
# $9 writes merged
|
||||
# $10 sectors written
|
||||
# $11 time writing (ms)
|
||||
# $12 I/Os in progress (instantaneous)
|
||||
# $13 time doing I/Os (ms)
|
||||
# $14 weighted time doing I/Os (ms)
|
||||
|
||||
take_snapshot() {
|
||||
local -n _snapshot=$1
|
||||
|
||||
while read -r _ _ dev reads _ sectors_read read_ms writes _ sectors_written write_ms inflight io_ms weighted_ms _; do
|
||||
# Skip loop and ram devices
|
||||
[[ "$dev" =~ ^loop[0-9] ]] && continue
|
||||
[[ "$dev" =~ ^ram[0-9] ]] && continue
|
||||
|
||||
# Skip partition devices (e.g. sda1, nvme0n1p1) — report whole disks only
|
||||
[[ "$dev" =~ [0-9]+p[0-9]+$ ]] && continue
|
||||
[[ "$dev" =~ ^[a-z]+[0-9]+$ && ! "$dev" =~ ^nvme ]] && continue
|
||||
|
||||
# Apply user filter if set
|
||||
if [[ -n "$DISK_FILTER" ]]; then
|
||||
if ! [[ "$dev" =~ $DISK_FILTER ]]; then
|
||||
continue
|
||||
fi
|
||||
fi
|
||||
|
||||
_snapshot["${dev}_reads"]="$reads"
|
||||
_snapshot["${dev}_sectors_read"]="$sectors_read"
|
||||
_snapshot["${dev}_read_ms"]="$read_ms"
|
||||
_snapshot["${dev}_writes"]="$writes"
|
||||
_snapshot["${dev}_sectors_written"]="$sectors_written"
|
||||
_snapshot["${dev}_write_ms"]="$write_ms"
|
||||
_snapshot["${dev}_inflight"]="$inflight"
|
||||
_snapshot["${dev}_io_ms"]="$io_ms"
|
||||
_snapshot["${dev}_weighted_ms"]="$weighted_ms"
|
||||
done < /proc/diskstats
|
||||
}
|
||||
|
||||
# ── Collect device list from a snapshot ─────────────────────────────
|
||||
|
||||
get_devices() {
|
||||
local -n _snap=$1
|
||||
local dev
|
||||
for key in "${!_snap[@]}"; do
|
||||
dev="${key%_reads}"
|
||||
if [[ "$dev" != "$key" ]]; then
|
||||
echo "$dev"
|
||||
fi
|
||||
done | sort
|
||||
}
|
||||
|
||||
# ── Metrics Collection ─────────────────────────────────────────────
|
||||
|
||||
collect_metrics() {
|
||||
local start_time
|
||||
start_time=$(date +%s%N)
|
||||
|
||||
# First snapshot
|
||||
declare -A snap1
|
||||
take_snapshot snap1
|
||||
debug_echo "First snapshot taken"
|
||||
|
||||
sleep "$SAMPLE_INTERVAL"
|
||||
|
||||
# Second snapshot
|
||||
declare -A snap2
|
||||
take_snapshot snap2
|
||||
debug_echo "Second snapshot taken after ${SAMPLE_INTERVAL}s interval"
|
||||
|
||||
local devices
|
||||
devices=$(get_devices snap2)
|
||||
|
||||
if [[ -z "$devices" ]]; then
|
||||
log_error "No disks found after filtering"
|
||||
echo "# No disks found"
|
||||
return
|
||||
fi
|
||||
|
||||
local interval="$SAMPLE_INTERVAL"
|
||||
|
||||
# ── HELP/TYPE headers and metric values ──
|
||||
|
||||
echo "# HELP linux_disk_io_read_iops Read operations per second"
|
||||
echo "# TYPE linux_disk_io_read_iops gauge"
|
||||
while read -r dev; do
|
||||
local r1 r2 delta
|
||||
r1="${snap1[${dev}_reads]:-0}"
|
||||
r2="${snap2[${dev}_reads]:-0}"
|
||||
delta=$((r2 - r1))
|
||||
local value
|
||||
value=$(awk "BEGIN {printf \"%.2f\", $delta / $interval}")
|
||||
echo "linux_disk_io_read_iops{disk=\"${dev}\"} ${value}"
|
||||
debug_echo "$dev read_iops=$value"
|
||||
done <<< "$devices"
|
||||
|
||||
echo "# HELP linux_disk_io_write_iops Write operations per second"
|
||||
echo "# TYPE linux_disk_io_write_iops gauge"
|
||||
while read -r dev; do
|
||||
local w1 w2 delta
|
||||
w1="${snap1[${dev}_writes]:-0}"
|
||||
w2="${snap2[${dev}_writes]:-0}"
|
||||
delta=$((w2 - w1))
|
||||
local value
|
||||
value=$(awk "BEGIN {printf \"%.2f\", $delta / $interval}")
|
||||
echo "linux_disk_io_write_iops{disk=\"${dev}\"} ${value}"
|
||||
debug_echo "$dev write_iops=$value"
|
||||
done <<< "$devices"
|
||||
|
||||
echo "# HELP linux_disk_io_read_bytes_per_sec Bytes read per second"
|
||||
echo "# TYPE linux_disk_io_read_bytes_per_sec gauge"
|
||||
while read -r dev; do
|
||||
local s1 s2 delta
|
||||
s1="${snap1[${dev}_sectors_read]:-0}"
|
||||
s2="${snap2[${dev}_sectors_read]:-0}"
|
||||
delta=$((s2 - s1))
|
||||
# Each sector is 512 bytes
|
||||
local value
|
||||
value=$(awk "BEGIN {printf \"%.2f\", ($delta * 512) / $interval}")
|
||||
echo "linux_disk_io_read_bytes_per_sec{disk=\"${dev}\"} ${value}"
|
||||
debug_echo "$dev read_bytes_per_sec=$value"
|
||||
done <<< "$devices"
|
||||
|
||||
echo "# HELP linux_disk_io_write_bytes_per_sec Bytes written per second"
|
||||
echo "# TYPE linux_disk_io_write_bytes_per_sec gauge"
|
||||
while read -r dev; do
|
||||
local s1 s2 delta
|
||||
s1="${snap1[${dev}_sectors_written]:-0}"
|
||||
s2="${snap2[${dev}_sectors_written]:-0}"
|
||||
delta=$((s2 - s1))
|
||||
local value
|
||||
value=$(awk "BEGIN {printf \"%.2f\", ($delta * 512) / $interval}")
|
||||
echo "linux_disk_io_write_bytes_per_sec{disk=\"${dev}\"} ${value}"
|
||||
debug_echo "$dev write_bytes_per_sec=$value"
|
||||
done <<< "$devices"
|
||||
|
||||
echo "# HELP linux_disk_io_await_ms Average I/O latency in milliseconds"
|
||||
echo "# TYPE linux_disk_io_await_ms gauge"
|
||||
while read -r dev; do
|
||||
local r1 r2 w1 w2 rm1 rm2 wm1 wm2
|
||||
r1="${snap1[${dev}_reads]:-0}"
|
||||
r2="${snap2[${dev}_reads]:-0}"
|
||||
w1="${snap1[${dev}_writes]:-0}"
|
||||
w2="${snap2[${dev}_writes]:-0}"
|
||||
rm1="${snap1[${dev}_read_ms]:-0}"
|
||||
rm2="${snap2[${dev}_read_ms]:-0}"
|
||||
wm1="${snap1[${dev}_write_ms]:-0}"
|
||||
wm2="${snap2[${dev}_write_ms]:-0}"
|
||||
local total_ops total_ms
|
||||
total_ops=$(( (r2 - r1) + (w2 - w1) ))
|
||||
total_ms=$(( (rm2 - rm1) + (wm2 - wm1) ))
|
||||
local value
|
||||
if [[ "$total_ops" -gt 0 ]]; then
|
||||
value=$(awk "BEGIN {printf \"%.2f\", $total_ms / $total_ops}")
|
||||
else
|
||||
value="0.00"
|
||||
fi
|
||||
echo "linux_disk_io_await_ms{disk=\"${dev}\"} ${value}"
|
||||
debug_echo "$dev await_ms=$value"
|
||||
done <<< "$devices"
|
||||
|
||||
echo "# HELP linux_disk_io_util_percent Disk utilization percentage"
|
||||
echo "# TYPE linux_disk_io_util_percent gauge"
|
||||
while read -r dev; do
|
||||
local m1 m2 delta
|
||||
m1="${snap1[${dev}_io_ms]:-0}"
|
||||
m2="${snap2[${dev}_io_ms]:-0}"
|
||||
delta=$((m2 - m1))
|
||||
# io_ms is milliseconds spent doing I/O; interval is in seconds
|
||||
local value
|
||||
value=$(awk "BEGIN {v = ($delta / ($interval * 1000)) * 100; if (v > 100) v = 100; printf \"%.2f\", v}")
|
||||
echo "linux_disk_io_util_percent{disk=\"${dev}\"} ${value}"
|
||||
debug_echo "$dev util_percent=$value"
|
||||
done <<< "$devices"
|
||||
|
||||
echo "# HELP linux_disk_io_queue_depth Weighted number of I/Os in progress (avgqu-sz)"
|
||||
echo "# TYPE linux_disk_io_queue_depth gauge"
|
||||
while read -r dev; do
|
||||
local m1 m2 delta
|
||||
m1="${snap1[${dev}_weighted_ms]:-0}"
|
||||
m2="${snap2[${dev}_weighted_ms]:-0}"
|
||||
delta=$((m2 - m1))
|
||||
local value
|
||||
value=$(awk "BEGIN {printf \"%.2f\", $delta / ($interval * 1000)}")
|
||||
echo "linux_disk_io_queue_depth{disk=\"${dev}\"} ${value}"
|
||||
debug_echo "$dev queue_depth=$value"
|
||||
done <<< "$devices"
|
||||
|
||||
# ── Script metadata metrics ──
|
||||
|
||||
local end_time runtime
|
||||
end_time=$(date +%s%N)
|
||||
runtime=$(awk "BEGIN {printf \"%.3f\", ($end_time - $start_time) / 1000000000}")
|
||||
|
||||
echo ""
|
||||
echo "# HELP linux_disk_io_exporter_duration_seconds Script execution time"
|
||||
echo "# TYPE linux_disk_io_exporter_duration_seconds gauge"
|
||||
echo "linux_disk_io_exporter_duration_seconds ${runtime}"
|
||||
|
||||
echo "# HELP linux_disk_io_exporter_last_run_timestamp Last successful run"
|
||||
echo "# TYPE linux_disk_io_exporter_last_run_timestamp gauge"
|
||||
echo "linux_disk_io_exporter_last_run_timestamp $(date +%s)"
|
||||
|
||||
echo "# HELP linux_disk_io_exporter_success Whether the exporter ran successfully"
|
||||
echo "# TYPE linux_disk_io_exporter_success gauge"
|
||||
echo "linux_disk_io_exporter_success 1"
|
||||
}
|
||||
|
||||
# ── Main ────────────────────────────────────────────────────────────
|
||||
|
||||
main() {
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
--dry-run)
|
||||
DRY_RUN=true
|
||||
shift
|
||||
;;
|
||||
--debug)
|
||||
DEBUG=1
|
||||
shift
|
||||
;;
|
||||
--help|-h)
|
||||
show_help
|
||||
;;
|
||||
--version|-v)
|
||||
show_version
|
||||
;;
|
||||
*)
|
||||
log_error "Unknown option: $1"
|
||||
echo "Use --help for usage information" >&2
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [[ ! -f /proc/diskstats ]]; then
|
||||
log_error "/proc/diskstats not found — this script requires a Linux system"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ "$DRY_RUN" == true ]]; then
|
||||
collect_metrics
|
||||
exit 0
|
||||
fi
|
||||
|
||||
if [[ ! -d "$NODE_DIR" ]]; then
|
||||
log_error "Textfile collector directory does not exist: $NODE_DIR"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
collect_metrics > "$TMP_FILE"
|
||||
chmod 644 "$TMP_FILE"
|
||||
mv -f "$TMP_FILE" "$OUTPUT_FILE"
|
||||
debug_echo "Metrics written to $OUTPUT_FILE"
|
||||
}
|
||||
|
||||
main "$@"
|
||||
@@ -0,0 +1,383 @@
|
||||
#!/bin/bash
|
||||
################################################################################
|
||||
# Script Name: dns-health-check.sh
|
||||
# Version: 1.0
|
||||
# Description: Prometheus textfile collector exporter for DNS resolution health
|
||||
# Queries configurable DNS records and reports resolution status
|
||||
# and latency via node_exporter textfile collector
|
||||
#
|
||||
# Author: Phil Connor
|
||||
# Contact: contact@mylinux.work
|
||||
# Website: https://mylinux.work
|
||||
# License: MIT
|
||||
# Date: 2026-03-03
|
||||
#
|
||||
# Prerequisites:
|
||||
# - dig (bind-utils / dnsutils)
|
||||
# - node_exporter with textfile collector enabled
|
||||
# - /var/lib/node_exporter directory exists
|
||||
#
|
||||
# Usage:
|
||||
# # Run with default config
|
||||
# sudo ./dns-health-check.sh
|
||||
#
|
||||
# # Dry run (output to stdout)
|
||||
# ./dns-health-check.sh --dry-run
|
||||
#
|
||||
# # Debug mode
|
||||
# DEBUG=1 sudo ./dns-health-check.sh
|
||||
#
|
||||
# Config Format (pipe-delimited, one record per line):
|
||||
# record_name|record_type|dns_server|expected_value(optional)
|
||||
#
|
||||
# Examples:
|
||||
# example.com|A|8.8.8.8|
|
||||
# mail.example.com|MX|8.8.8.8|
|
||||
# _ldap._tcp.example.com|SRV|10.0.0.1|
|
||||
# example.com|A|8.8.8.8|93.184.216.34
|
||||
#
|
||||
# Metrics Exported:
|
||||
# - linux_dns_query_success{record,type,server} - 1=resolved, 0=failed
|
||||
# - linux_dns_query_time_seconds{record,type,server} - Resolution time
|
||||
# - linux_dns_query_answer_match{record,type,server,expected} - 1=match, 0=mismatch
|
||||
#
|
||||
################################################################################
|
||||
|
||||
set -o pipefail
|
||||
|
||||
# ============================================================================
|
||||
# CONFIGURATION
|
||||
# ============================================================================
|
||||
|
||||
readonly VERSION="1.0"
|
||||
readonly SCRIPT_NAME="${0##*/}"
|
||||
readonly TEXTFILE_DIR="${TEXTFILE_DIR:-/var/lib/node_exporter}"
|
||||
readonly OUTPUT_FILE="${TEXTFILE_DIR}/dns_health.prom"
|
||||
readonly CONFIG_FILE="${CONFIG_FILE:-/etc/dns-health-check.conf}"
|
||||
readonly TMP_FILE="${OUTPUT_FILE}.$$"
|
||||
|
||||
# Runtime flags
|
||||
DRY_RUN=false
|
||||
DEBUG=${DEBUG:-}
|
||||
|
||||
# Default DNS records to check if no config file and no env var
|
||||
readonly DEFAULT_RECORDS="localhost|A|127.0.0.1|"
|
||||
|
||||
# ============================================================================
|
||||
# HELPER FUNCTIONS
|
||||
# ============================================================================
|
||||
|
||||
debug_echo() {
|
||||
if [[ -n "$DEBUG" ]]; then
|
||||
echo "[DEBUG] $*" >&2
|
||||
fi
|
||||
}
|
||||
|
||||
log_error() {
|
||||
echo "[ERROR] $*" >&2
|
||||
}
|
||||
|
||||
cleanup() {
|
||||
rm -f "$TMP_FILE"
|
||||
}
|
||||
|
||||
trap cleanup EXIT
|
||||
|
||||
show_help() {
|
||||
cat <<EOF
|
||||
Usage: $SCRIPT_NAME [OPTIONS]
|
||||
|
||||
Prometheus textfile collector exporter for DNS resolution health.
|
||||
Queries DNS records and reports resolution status and latency.
|
||||
|
||||
OPTIONS:
|
||||
--dry-run Output metrics to stdout instead of writing to file
|
||||
--debug Enable debug output
|
||||
--help Show this help message
|
||||
--version Show version
|
||||
|
||||
CONFIGURATION:
|
||||
DNS records are configured in /etc/dns-health-check.conf (or set CONFIG_FILE).
|
||||
Each line defines a DNS check in pipe-delimited format:
|
||||
|
||||
record_name|record_type|dns_server|expected_value(optional)
|
||||
|
||||
Example config:
|
||||
example.com|A|8.8.8.8|
|
||||
mail.example.com|MX|8.8.8.8|
|
||||
_ldap._tcp.example.com|SRV|10.0.0.1|
|
||||
example.com|A|8.8.8.8|93.184.216.34
|
||||
|
||||
Lines starting with # are comments. Blank lines are ignored.
|
||||
If expected_value is set, the script checks whether the DNS answer matches.
|
||||
|
||||
Records can also be supplied via the DNS_RECORDS environment variable
|
||||
as a semicolon-separated list using the same pipe-delimited format:
|
||||
|
||||
DNS_RECORDS="example.com|A|8.8.8.8|;google.com|A|8.8.4.4|"
|
||||
|
||||
If neither a config file nor DNS_RECORDS is found, a default check
|
||||
queries localhost via 127.0.0.1 as a basic resolution test.
|
||||
|
||||
ENVIRONMENT VARIABLES:
|
||||
CONFIG_FILE Path to config file (default: /etc/dns-health-check.conf)
|
||||
TEXTFILE_DIR Textfile collector directory (default: /var/lib/node_exporter)
|
||||
DNS_RECORDS Semicolon-separated DNS records (overrides config file)
|
||||
DEBUG Enable debug output when set to any value
|
||||
|
||||
EXAMPLES:
|
||||
sudo $SCRIPT_NAME
|
||||
$SCRIPT_NAME --dry-run
|
||||
DEBUG=1 sudo $SCRIPT_NAME
|
||||
DNS_RECORDS="example.com|A|8.8.8.8|93.184.216.34" $SCRIPT_NAME --dry-run
|
||||
|
||||
EOF
|
||||
exit 0
|
||||
}
|
||||
|
||||
show_version() {
|
||||
echo "$SCRIPT_NAME version $VERSION"
|
||||
exit 0
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# DEPENDENCY CHECK
|
||||
# ============================================================================
|
||||
|
||||
check_dependencies() {
|
||||
if ! command -v dig &>/dev/null; then
|
||||
log_error "'dig' is not installed. Install bind-utils (RHEL/Rocky) or dnsutils (Debian/Ubuntu)."
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# RECORD LOADING
|
||||
# ============================================================================
|
||||
|
||||
load_records() {
|
||||
local record_count=0
|
||||
local source=""
|
||||
|
||||
# Priority: DNS_RECORDS env var > config file > defaults
|
||||
if [[ -n "${DNS_RECORDS:-}" ]]; then
|
||||
source="DNS_RECORDS environment variable"
|
||||
local IFS=";"
|
||||
local entry
|
||||
for entry in $DNS_RECORDS; do
|
||||
entry="${entry#"${entry%%[![:space:]]*}"}"
|
||||
entry="${entry%"${entry##*[![:space:]]}"}"
|
||||
if [[ -n "$entry" ]]; then
|
||||
echo "$entry"
|
||||
record_count=$((record_count + 1))
|
||||
fi
|
||||
done
|
||||
elif [[ -f "$CONFIG_FILE" ]]; then
|
||||
source="$CONFIG_FILE"
|
||||
while IFS= read -r line; do
|
||||
# Strip comments and whitespace
|
||||
line="${line%%#*}"
|
||||
line="${line#"${line%%[![:space:]]*}"}"
|
||||
line="${line%"${line##*[![:space:]]}"}"
|
||||
|
||||
if [[ -z "$line" ]]; then
|
||||
continue
|
||||
fi
|
||||
|
||||
echo "$line"
|
||||
record_count=$((record_count + 1))
|
||||
done < "$CONFIG_FILE"
|
||||
else
|
||||
source="defaults"
|
||||
echo "$DEFAULT_RECORDS"
|
||||
record_count=1
|
||||
fi
|
||||
|
||||
debug_echo "Loaded $record_count DNS record(s) from $source"
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# DNS QUERY
|
||||
# ============================================================================
|
||||
|
||||
query_dns() {
|
||||
local record="$1"
|
||||
local rtype="$2"
|
||||
local server="$3"
|
||||
local expected="$4"
|
||||
|
||||
debug_echo "Querying $rtype record for $record via $server"
|
||||
|
||||
local dig_output
|
||||
local query_start
|
||||
local query_end
|
||||
local query_time
|
||||
local success=0
|
||||
local answer=""
|
||||
local match=""
|
||||
|
||||
query_start=$(date +%s%N 2>/dev/null) || query_start=$(date +%s)000000000
|
||||
|
||||
if dig_output=$(dig +short +time=5 +tries=2 "$record" "$rtype" "@${server}" 2>/dev/null); then
|
||||
query_end=$(date +%s%N 2>/dev/null) || query_end=$(date +%s)000000000
|
||||
answer="${dig_output}"
|
||||
|
||||
if [[ -n "$answer" ]]; then
|
||||
success=1
|
||||
debug_echo " Answer: $(echo "$answer" | tr '\n' ' ')"
|
||||
else
|
||||
success=0
|
||||
debug_echo " Empty answer (NXDOMAIN or no records)"
|
||||
fi
|
||||
else
|
||||
query_end=$(date +%s%N 2>/dev/null) || query_end=$(date +%s)000000000
|
||||
success=0
|
||||
debug_echo " Query failed"
|
||||
fi
|
||||
|
||||
# Calculate query time in seconds
|
||||
local elapsed_ns=$((query_end - query_start))
|
||||
query_time=$(awk "BEGIN {printf \"%.6f\", $elapsed_ns / 1000000000}")
|
||||
|
||||
# Check expected value if provided
|
||||
if [[ -n "$expected" ]]; then
|
||||
if echo "$answer" | grep -qF "$expected"; then
|
||||
match=1
|
||||
debug_echo " Expected value matched: $expected"
|
||||
else
|
||||
match=0
|
||||
debug_echo " Expected value NOT matched: $expected (got: $(echo "$answer" | tr '\n' ' '))"
|
||||
fi
|
||||
fi
|
||||
|
||||
echo "${success}|${query_time}|${match}"
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# METRICS COLLECTION
|
||||
# ============================================================================
|
||||
|
||||
collect_metrics() {
|
||||
local records=()
|
||||
while IFS= read -r record_line; do
|
||||
records+=("$record_line")
|
||||
done < <(load_records)
|
||||
|
||||
local success_metrics=""
|
||||
local time_metrics=""
|
||||
local match_metrics=""
|
||||
local has_match_metric=false
|
||||
|
||||
for record_line in "${records[@]}"; do
|
||||
local record
|
||||
record=$(echo "$record_line" | cut -d'|' -f1)
|
||||
local rtype
|
||||
rtype=$(echo "$record_line" | cut -d'|' -f2)
|
||||
local server
|
||||
server=$(echo "$record_line" | cut -d'|' -f3)
|
||||
local expected
|
||||
expected=$(echo "$record_line" | cut -d'|' -f4)
|
||||
|
||||
if [[ -z "$record" ]] || [[ -z "$rtype" ]] || [[ -z "$server" ]]; then
|
||||
log_error "Invalid config line: $record_line (expected: record_name|record_type|dns_server|expected_value)"
|
||||
continue
|
||||
fi
|
||||
|
||||
local result
|
||||
result=$(query_dns "$record" "$rtype" "$server" "$expected")
|
||||
|
||||
local qsuccess
|
||||
qsuccess=$(echo "$result" | cut -d'|' -f1)
|
||||
local qtime
|
||||
qtime=$(echo "$result" | cut -d'|' -f2)
|
||||
local qmatch
|
||||
qmatch=$(echo "$result" | cut -d'|' -f3)
|
||||
|
||||
local labels="record=\"${record}\",type=\"${rtype}\",server=\"${server}\""
|
||||
|
||||
success_metrics+="linux_dns_query_success{${labels}} ${qsuccess}\n"
|
||||
time_metrics+="linux_dns_query_time_seconds{${labels}} ${qtime}\n"
|
||||
|
||||
if [[ -n "$expected" ]]; then
|
||||
has_match_metric=true
|
||||
local match_labels="${labels},expected=\"${expected}\""
|
||||
match_metrics+="linux_dns_query_answer_match{${match_labels}} ${qmatch}\n"
|
||||
fi
|
||||
done
|
||||
|
||||
local output=""
|
||||
|
||||
output+="# HELP linux_dns_query_success DNS query resolved successfully (1=resolved, 0=failed)\n"
|
||||
output+="# TYPE linux_dns_query_success gauge\n"
|
||||
output+="$success_metrics"
|
||||
|
||||
output+="# HELP linux_dns_query_time_seconds DNS query resolution time in seconds\n"
|
||||
output+="# TYPE linux_dns_query_time_seconds gauge\n"
|
||||
output+="$time_metrics"
|
||||
|
||||
if [[ "$has_match_metric" == "true" ]]; then
|
||||
output+="# HELP linux_dns_query_answer_match DNS answer matches expected value (1=match, 0=mismatch)\n"
|
||||
output+="# TYPE linux_dns_query_answer_match gauge\n"
|
||||
output+="$match_metrics"
|
||||
fi
|
||||
|
||||
printf '%b' "$output"
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# OUTPUT
|
||||
# ============================================================================
|
||||
|
||||
write_metrics() {
|
||||
local metrics
|
||||
metrics=$(collect_metrics)
|
||||
|
||||
if [[ "$DRY_RUN" == "true" ]]; then
|
||||
echo "$metrics"
|
||||
return
|
||||
fi
|
||||
|
||||
if [[ ! -d "$TEXTFILE_DIR" ]]; then
|
||||
log_error "Textfile collector directory does not exist: $TEXTFILE_DIR"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "$metrics" > "$TMP_FILE"
|
||||
mv "$TMP_FILE" "$OUTPUT_FILE"
|
||||
debug_echo "Metrics written to $OUTPUT_FILE"
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# MAIN
|
||||
# ============================================================================
|
||||
|
||||
main() {
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
--dry-run)
|
||||
DRY_RUN=true
|
||||
shift
|
||||
;;
|
||||
--debug)
|
||||
DEBUG=1
|
||||
shift
|
||||
;;
|
||||
--help|-h)
|
||||
show_help
|
||||
;;
|
||||
--version|-v)
|
||||
show_version
|
||||
;;
|
||||
*)
|
||||
log_error "Unknown option: $1"
|
||||
echo "Use --help for usage information" >&2
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
check_dependencies
|
||||
write_metrics
|
||||
}
|
||||
|
||||
main "$@"
|
||||
File diff suppressed because it is too large
Load Diff
Executable
+319
@@ -0,0 +1,319 @@
|
||||
#!/bin/bash
|
||||
|
||||
#############################################################
|
||||
#### Expand Drive ####
|
||||
#### Auto-expand partitions and filesystems ####
|
||||
#### ####
|
||||
#### Author: Phil Connor ####
|
||||
#### Contact: contact@mylinux.work ####
|
||||
#### License: MIT ####
|
||||
#### Version: 2.3 ####
|
||||
#### ####
|
||||
#### Usage: sudo ./expand-drive.sh ####
|
||||
#############################################################
|
||||
|
||||
# Set strict error handling:
|
||||
# -e: Exit immediately if a command exits with a non-zero status
|
||||
# -u: Treat unset variables as an error when substituting
|
||||
# -o pipefail: The return value of a pipeline is the status of the last command to exit with a non-zero status
|
||||
set -euo pipefail
|
||||
|
||||
# Constants - Define paths to required system binaries (use command names, let PATH resolve)
|
||||
readonly BLKID_PATH="blkid" # Tool to locate/print block device attributes
|
||||
readonly LSBLK_PATH="lsblk" # Tool to list block devices
|
||||
readonly LOG_FILE="/var/log/expand_drive.log" # Location for script log output
|
||||
|
||||
# Configuration - Runtime behavior settings
|
||||
readonly DRY_RUN=${DRY_RUN:-false} # If true, show what would be done without making changes
|
||||
readonly REQUIRED_COMMANDS=("growpart" "xfs_growfs" "resize2fs") # Commands that must be available
|
||||
readonly SUPPORTED_FILESYSTEMS=("xfs" "ext2" "ext3" "ext4") # Filesystem types we can expand
|
||||
|
||||
# Exit codes - Standardized exit status values
|
||||
readonly EXIT_SUCCESS=0 # Script completed successfully
|
||||
readonly EXIT_ERROR=1 # General error occurred
|
||||
readonly EXIT_ROOT_REQUIRED=2 # Script must be run as root user
|
||||
readonly EXIT_MISSING_DEPS=3 # Required dependencies are missing
|
||||
|
||||
# Function to log messages with timestamp to both console and log file
|
||||
log_message() {
|
||||
echo "$(date): $1" | tee -a "$LOG_FILE"
|
||||
}
|
||||
|
||||
# Function to log error messages with timestamp to both console, log file, and stderr
|
||||
log_error() {
|
||||
echo "$(date): ERROR: $1" | tee -a "$LOG_FILE" >&2
|
||||
}
|
||||
|
||||
# Function to check if a command exists in the system PATH
|
||||
command_exists() {
|
||||
command -v "$1" >/dev/null 2>&1
|
||||
}
|
||||
|
||||
# Function to handle script interruption (SIGINT/SIGTERM) and perform cleanup
|
||||
cleanup() {
|
||||
# shellcheck disable=SC2317 # Suppress warning about unreachable code
|
||||
log_message "Script interrupted, cleaning up..."
|
||||
# shellcheck disable=SC2317 # Suppress warning about unreachable code
|
||||
exit "$EXIT_ERROR"
|
||||
}
|
||||
|
||||
# Function to validate prerequisites before script execution
|
||||
validate_prerequisites() {
|
||||
# Check if script is run as root (required for partition/filesystem operations)
|
||||
if [ "$(id -u)" -ne 0 ]; then
|
||||
echo "Error: This script must be run as root"
|
||||
exit "$EXIT_ROOT_REQUIRED"
|
||||
fi
|
||||
|
||||
# Ensure log directory exists and is writable
|
||||
local log_dir
|
||||
log_dir=$(dirname "$LOG_FILE")
|
||||
if [ ! -d "$log_dir" ]; then
|
||||
mkdir -p "$log_dir" || {
|
||||
echo "Error: Cannot create log directory $log_dir"
|
||||
exit "$EXIT_ERROR"
|
||||
}
|
||||
fi
|
||||
|
||||
# Verify all required system commands are available
|
||||
for cmd in "${REQUIRED_COMMANDS[@]}"; do
|
||||
if ! command_exists "$cmd"; then
|
||||
log_error "Required command '$cmd' not found. Please install it."
|
||||
exit "$EXIT_MISSING_DEPS"
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
# Function to check if filesystem type is supported by this script
|
||||
is_supported_filesystem() {
|
||||
local fs_type="$1"
|
||||
# Loop through supported filesystem types array
|
||||
for supported in "${SUPPORTED_FILESYSTEMS[@]}"; do
|
||||
if [[ "$fs_type" == "$supported" ]]; then
|
||||
return 0 # Filesystem type is supported
|
||||
fi
|
||||
done
|
||||
return 1 # Filesystem type is not supported
|
||||
}
|
||||
|
||||
# Function to expand filesystem based on type (XFS or EXT variants)
|
||||
expand_filesystem() {
|
||||
local partition="$1" # Block device path (e.g., /dev/sda1)
|
||||
local fs_type="$2" # Filesystem type (xfs, ext2, ext3, ext4)
|
||||
local mount_point="$3" # Where the filesystem is mounted
|
||||
|
||||
# Validate filesystem type is one we support
|
||||
if ! is_supported_filesystem "$fs_type"; then
|
||||
log_error "Unsupported filesystem type $fs_type on $partition"
|
||||
return 1
|
||||
fi
|
||||
|
||||
# Handle different filesystem types with appropriate expansion commands
|
||||
case $fs_type in
|
||||
"xfs")
|
||||
log_message "Expanding XFS filesystem on $partition"
|
||||
if [ "$DRY_RUN" = "true" ]; then
|
||||
log_message "DRY RUN: Would expand XFS filesystem on $partition"
|
||||
return 0
|
||||
# XFS uses xfs_growfs and requires the mount point as argument
|
||||
elif xfs_growfs "$mount_point" >/dev/null 2>&1; then
|
||||
log_message "Successfully expanded XFS filesystem on $partition"
|
||||
return 0
|
||||
else
|
||||
log_error "Failed to expand XFS filesystem on $partition"
|
||||
return 1
|
||||
fi
|
||||
;;
|
||||
"ext2" | "ext3" | "ext4")
|
||||
log_message "Expanding EXT filesystem on $partition"
|
||||
if [ "$DRY_RUN" = "true" ]; then
|
||||
log_message "DRY RUN: Would expand EXT filesystem on $partition"
|
||||
return 0
|
||||
# EXT filesystems use resize2fs and require the device path as argument
|
||||
elif resize2fs "$partition" >/dev/null 2>&1; then
|
||||
log_message "Successfully expanded EXT filesystem on $partition"
|
||||
return 0
|
||||
else
|
||||
log_error "Failed to expand EXT filesystem on $partition"
|
||||
return 1
|
||||
fi
|
||||
;;
|
||||
esac
|
||||
}
|
||||
|
||||
# Function to expand partition to use available disk space
|
||||
expand_partition() {
|
||||
local disk="$1" # Parent disk device (e.g., /dev/sda)
|
||||
local partition="$2" # Partition device (e.g., /dev/sda1)
|
||||
local part_num="$3" # Partition number (e.g., 1)
|
||||
|
||||
# Check if partition can be expanded using growpart dry-run
|
||||
if ! growpart "$disk" "$part_num" --dry-run 2>/dev/null; then
|
||||
log_message "Partition $partition doesn't need expansion or cannot be expanded, skipping..."
|
||||
return 1 # Not an error, just nothing to do
|
||||
fi
|
||||
|
||||
# Perform the actual partition expansion
|
||||
if [ "$DRY_RUN" = "true" ]; then
|
||||
log_message "DRY RUN: Would expand partition $partition"
|
||||
return 0
|
||||
elif growpart "$disk" "$part_num" >/dev/null 2>&1; then
|
||||
log_message "Successfully expanded partition $partition"
|
||||
return 0
|
||||
else
|
||||
log_error "Failed to expand partition $partition"
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
# Set up signal trap to handle interruptions gracefully
|
||||
trap cleanup INT TERM
|
||||
|
||||
# Initialize script by validating prerequisites
|
||||
validate_prerequisites
|
||||
|
||||
# Function to process a single partition (expand partition and filesystem)
|
||||
process_partition() {
|
||||
local partition="$1" # Partition device path (e.g., /dev/sda1)
|
||||
local disk="$2" # Parent disk device path (e.g., /dev/sda)
|
||||
|
||||
log_message "Processing partition $partition"
|
||||
|
||||
# Check if the filesystem is currently mounted (required for filesystem expansion)
|
||||
local mount_point
|
||||
mount_point=$(findmnt -n -o TARGET "$partition" 2>/dev/null)
|
||||
if [ -z "$mount_point" ]; then
|
||||
log_message "Warning: $partition is not mounted, skipping filesystem resize"
|
||||
return 0
|
||||
fi
|
||||
|
||||
# Extract partition number from device path (e.g., extract "1" from "/dev/sda1")
|
||||
local part_num
|
||||
part_num=$(echo "$partition" | grep -o '[0-9]\+$' | tail -1)
|
||||
if [ -z "$part_num" ]; then
|
||||
log_error "Could not extract partition number from $partition"
|
||||
return 1
|
||||
fi
|
||||
|
||||
# First expand the partition to use available disk space
|
||||
if ! expand_partition "$disk" "$partition" "$part_num"; then
|
||||
return 0 # Not an error if partition doesn't need expansion
|
||||
fi
|
||||
|
||||
# Detect the filesystem type using blkid
|
||||
local fs_type
|
||||
fs_type=$($BLKID_PATH -s TYPE -o value "$partition")
|
||||
if [ -z "$fs_type" ]; then
|
||||
log_message "Warning: Could not detect filesystem type for $partition, skipping..."
|
||||
return 0
|
||||
fi
|
||||
|
||||
# Get current filesystem size before expansion
|
||||
local current_size
|
||||
current_size=$(df -h "$mount_point" | awk 'NR==2 {print $2}')
|
||||
log_message "Current filesystem size on $partition: $current_size"
|
||||
|
||||
# Expand the filesystem to use the newly available partition space
|
||||
expand_filesystem "$partition" "$fs_type" "$mount_point"
|
||||
|
||||
# Show new size after expansion
|
||||
local new_size
|
||||
new_size=$(df -h "$mount_point" | awk 'NR==2 {print $2}')
|
||||
log_message "New filesystem size on $partition: $new_size"
|
||||
}
|
||||
|
||||
# Function to process a disk with direct filesystem (no partitions)
|
||||
process_direct_filesystem() {
|
||||
local disk="$1" # Disk device path (e.g., /dev/nvme3n1)
|
||||
local mount_point="$2" # Where the filesystem is mounted
|
||||
|
||||
log_message "Processing direct filesystem on $disk mounted at $mount_point"
|
||||
|
||||
# Detect the filesystem type using blkid
|
||||
local fs_type
|
||||
fs_type=$($BLKID_PATH -s TYPE -o value "$disk")
|
||||
if [ -z "$fs_type" ]; then
|
||||
log_message "Warning: Could not detect filesystem type for $disk, skipping..."
|
||||
return 0
|
||||
fi
|
||||
|
||||
# Get current filesystem size before expansion
|
||||
local current_size
|
||||
current_size=$(df -h "$mount_point" | awk 'NR==2 {print $2}')
|
||||
log_message "Current filesystem size on $disk: $current_size"
|
||||
|
||||
# Expand the filesystem to use the full disk space
|
||||
expand_filesystem "$disk" "$fs_type" "$mount_point"
|
||||
|
||||
# Show new size after expansion
|
||||
local new_size
|
||||
new_size=$(df -h "$mount_point" | awk 'NR==2 {print $2}')
|
||||
log_message "New filesystem size on $disk: $new_size"
|
||||
}
|
||||
|
||||
# Function to process all partitions on a single disk
|
||||
process_disk() {
|
||||
local disk="$1" # Disk device path (e.g., /dev/sda)
|
||||
|
||||
log_message "Checking partitions on $disk..."
|
||||
|
||||
# Get list of partitions for the current disk using lsblk
|
||||
# Filter for partition type and extract device names
|
||||
local partitions
|
||||
local lsblk_output
|
||||
lsblk_output=$($LSBLK_PATH -pln -o NAME,TYPE "$disk" 2>&1) || {
|
||||
log_error "lsblk command failed for $disk: $lsblk_output"
|
||||
return 1
|
||||
}
|
||||
partitions=$(echo "$lsblk_output" | grep "part" | cut -d' ' -f1 || true)
|
||||
|
||||
if [ -z "$partitions" ]; then
|
||||
# Check if the disk itself has a filesystem (no partition table)
|
||||
local mount_point
|
||||
mount_point=$(findmnt -n -o TARGET "$disk" 2>/dev/null)
|
||||
if [ -n "$mount_point" ]; then
|
||||
log_message "No partitions found on $disk, but disk has direct filesystem. Processing disk directly..."
|
||||
process_direct_filesystem "$disk" "$mount_point"
|
||||
else
|
||||
log_message "No partitions found on $disk, skipping..."
|
||||
fi
|
||||
return 0
|
||||
fi
|
||||
|
||||
# Process each partition found on this disk
|
||||
for partition in $partitions; do
|
||||
process_partition "$partition" "$disk"
|
||||
done
|
||||
}
|
||||
|
||||
# Main execution function - orchestrates the entire drive expansion process
|
||||
main() {
|
||||
log_message "Starting drive expansion process..."
|
||||
|
||||
# Get list of all disk devices in the system using lsblk
|
||||
# Filter for disk type and extract device names
|
||||
local devices
|
||||
devices=$($LSBLK_PATH -pln -o NAME,TYPE | grep "disk" | cut -d' ' -f1)
|
||||
|
||||
# Verify we found at least one disk device
|
||||
if [ -z "$devices" ]; then
|
||||
log_error "No disk devices found"
|
||||
exit "$EXIT_ERROR"
|
||||
fi
|
||||
|
||||
# Process each disk device found
|
||||
for disk in $devices; do
|
||||
# Verify device is actually a block device before processing
|
||||
if [ ! -b "$disk" ]; then
|
||||
log_error "Device $disk is not a block device, skipping..."
|
||||
continue
|
||||
fi
|
||||
process_disk "$disk"
|
||||
done
|
||||
|
||||
log_message "Drive expansion completed"
|
||||
exit "$EXIT_SUCCESS"
|
||||
}
|
||||
|
||||
# Execute the main function to start the script
|
||||
main
|
||||
Executable
+914
@@ -0,0 +1,914 @@
|
||||
#!/bin/bash
|
||||
################################################################################
|
||||
# Script Name: fail2ban-exporter.sh
|
||||
# Version: 2.0
|
||||
# Description: Prometheus exporter for fail2ban providing comprehensive metrics
|
||||
# for monitoring jail status, ban/unban activity, and threat analysis
|
||||
#
|
||||
# Author: Phil Connor
|
||||
# Contact: contact@mylinux.work
|
||||
# Website: https://mylinux.work
|
||||
# License: MIT
|
||||
#
|
||||
# Prerequisites:
|
||||
# - fail2ban-client command available
|
||||
# - fail2ban service running
|
||||
# - journalctl (systemd) for historical data
|
||||
# - netcat (nc) for HTTP mode
|
||||
# - /var/log/fail2ban.log for timestamp parsing
|
||||
#
|
||||
# Usage:
|
||||
# # Output to stdout
|
||||
# ./fail2ban-exporter.sh
|
||||
#
|
||||
# # HTTP server mode
|
||||
# ./fail2ban-exporter.sh --http -p 9191
|
||||
#
|
||||
# # Textfile collector mode
|
||||
# ./fail2ban-exporter.sh --textfile
|
||||
#
|
||||
# Metrics Exported:
|
||||
# Core Metrics (v1.0):
|
||||
# - fail2ban_up{} - Exporter status (1=up, 0=down)
|
||||
# - fail2ban_server_info{version,exporter_version} - Server version info
|
||||
# - fail2ban_jail_count{} - Total number of jails (gauge)
|
||||
# - fail2ban_jail_enabled{jail} - Jail enabled status (gauge)
|
||||
# - fail2ban_jail_failed_current{jail} - Currently failed attempts (gauge)
|
||||
# - fail2ban_jail_banned_current{jail} - Currently banned IPs (gauge)
|
||||
# - fail2ban_jail_failed_total{jail} - Total failed attempts (counter)
|
||||
# - fail2ban_jail_banned_total{jail} - Total banned IPs (counter)
|
||||
# - fail2ban_jail_ban_rate{jail} - Ban ratio: banned/failed (gauge)
|
||||
#
|
||||
# Enhanced Metrics (v2.0):
|
||||
# - fail2ban_jail_last_ban_timestamp{jail} - Unix timestamp of last ban (gauge)
|
||||
# - fail2ban_jail_last_unban_timestamp{jail} - Unix timestamp of last unban (gauge)
|
||||
# - fail2ban_jail_bans_per_period{jail,period} - Bans in 1h/24h (gauge)
|
||||
# - fail2ban_jail_unbans_per_period{jail,period} - Unbans in 1h/24h (gauge)
|
||||
# - fail2ban_jail_unique_banned_ips{jail,period} - Unique IPs banned (gauge)
|
||||
# - fail2ban_jail_info{jail,port,protocol,filter} - Jail configuration (gauge)
|
||||
# - fail2ban_jail_top_attacker_count{jail,ip} - Top 5 attacking IPs (gauge)
|
||||
# - fail2ban_jail_ban_rate_per_hour{jail} - Average bans/hour over 24h (gauge)
|
||||
# - fail2ban_jail_repeat_offenders{jail,threshold} - Repeat offender count (7d)
|
||||
# - fail2ban_jail_seconds_since_last_ban{jail} - Seconds since last ban
|
||||
# - fail2ban_jail_seconds_since_last_unban{jail} - Seconds since last unban
|
||||
# - fail2ban_log_size_bytes - Size of fail2ban.log file
|
||||
# - fail2ban_log_age_seconds - Time since last log modification
|
||||
# - fail2ban_log_rotation_timestamp - Last log rotation time
|
||||
# - fail2ban_exporter_duration_seconds - Script execution time
|
||||
# - fail2ban_exporter_last_run_timestamp - Last successful run time
|
||||
#
|
||||
# Configuration:
|
||||
# Default HTTP port: 9191
|
||||
# Textfile directory: /var/lib/node_exporter
|
||||
# Log source: /var/log/fail2ban.log
|
||||
#
|
||||
################################################################################
|
||||
|
||||
# ============================================================================
|
||||
# CONFIGURATION VARIABLES
|
||||
# ============================================================================
|
||||
|
||||
TEXTFILE_DIR="/var/lib/node_exporter"
|
||||
OUTPUT_FILE=""
|
||||
HTTP_MODE=false
|
||||
HTTP_PORT=9191
|
||||
FAIL2BAN_LOG="/var/log/fail2ban.log"
|
||||
|
||||
# ============================================================================
|
||||
# HELPER FUNCTIONS
|
||||
# ============================================================================
|
||||
|
||||
show_usage() {
|
||||
cat <<EOF
|
||||
Usage: $0 [OPTIONS]
|
||||
|
||||
Export fail2ban statistics as Prometheus metrics (Enhanced v2.0).
|
||||
|
||||
MODES:
|
||||
--textfile Write to node_exporter textfile collector
|
||||
--http Run HTTP server on port $HTTP_PORT
|
||||
|
||||
OPTIONS:
|
||||
-p, --port HTTP port (default: 9191)
|
||||
-o, --output Output file path
|
||||
|
||||
EXAMPLES:
|
||||
$0 --textfile # Write to textfile collector
|
||||
$0 --http --port 9191 # Run HTTP server
|
||||
$0 -o /tmp/fail2ban.prom # Write to custom file
|
||||
|
||||
NEW METRICS v2.0:
|
||||
- Jail health: last ban/unban timestamps, ban rates
|
||||
- Top attackers: most banned IPs per jail
|
||||
- Ban duration: average, min, max per jail
|
||||
- Protocol/port breakdown
|
||||
- Jail uptime and status
|
||||
|
||||
EOF
|
||||
exit 0
|
||||
}
|
||||
|
||||
parse_args() {
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case $1 in
|
||||
-h|--help) show_usage ;;
|
||||
--textfile) OUTPUT_FILE="$TEXTFILE_DIR/fail2ban.prom"; shift ;;
|
||||
--http) HTTP_MODE=true; shift ;;
|
||||
-p|--port) HTTP_PORT="$2"; shift 2 ;;
|
||||
-o|--output) OUTPUT_FILE="$2"; shift 2 ;;
|
||||
*) echo "Unknown option: $1" >&2; exit 1 ;;
|
||||
esac
|
||||
done
|
||||
}
|
||||
|
||||
# Check if fail2ban is installed and running
|
||||
# Returns: 0 if OK, 1 if error
|
||||
check_fail2ban() {
|
||||
if ! command -v fail2ban-client >/dev/null 2>&1; then
|
||||
echo "ERROR: fail2ban-client not found" >&2
|
||||
return 1
|
||||
fi
|
||||
|
||||
# Verify fail2ban server is responding
|
||||
if ! fail2ban-client ping >/dev/null 2>&1; then
|
||||
echo "ERROR: fail2ban server not responding" >&2
|
||||
return 1
|
||||
fi
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
# Get list of all active fail2ban jails
|
||||
# Returns: Space-separated list of jail names
|
||||
get_jails() {
|
||||
# Extract jail names from status output, convert comma-separated to space-separated
|
||||
fail2ban-client status 2>/dev/null | grep "Jail list:" | sed 's/.*Jail list://' | tr -d '\t' | tr ',' '\n' | xargs
|
||||
}
|
||||
|
||||
# Get statistics for a specific jail
|
||||
# Args: $1 - jail name
|
||||
# Returns: Pipe-delimited string: currently_failed|currently_banned|total_failed|total_banned
|
||||
get_jail_stats() {
|
||||
local jail="$1"
|
||||
local status_output
|
||||
|
||||
status_output=$(fail2ban-client status "$jail" 2>/dev/null)
|
||||
|
||||
local currently_failed currently_banned total_failed total_banned
|
||||
|
||||
# Parse fail2ban-client output using awk to extract last field (the number)
|
||||
currently_failed=$(echo "$status_output" | grep "Currently failed:" | awk '{print $NF}')
|
||||
currently_banned=$(echo "$status_output" | grep "Currently banned:" | awk '{print $NF}')
|
||||
total_failed=$(echo "$status_output" | grep "Total failed:" | awk '{print $NF}')
|
||||
total_banned=$(echo "$status_output" | grep "Total banned:" | awk '{print $NF}')
|
||||
|
||||
# Return pipe-delimited format with defaults to 0 if empty
|
||||
echo "${currently_failed:-0}|${currently_banned:-0}|${total_failed:-0}|${total_banned:-0}"
|
||||
}
|
||||
|
||||
# Get list of currently banned IPs for a jail
|
||||
# Args: $1 - jail name
|
||||
# Returns: List of IPs, one per line
|
||||
get_banned_ips() {
|
||||
local jail="$1"
|
||||
fail2ban-client status "$jail" 2>/dev/null | grep "Banned IP list:" | sed 's/.*Banned IP list://' | tr ' ' '\n' | grep -v '^$'
|
||||
}
|
||||
|
||||
# Get timestamp of last ban event for a jail
|
||||
# Args: $1 - jail name
|
||||
# Returns: Unix timestamp (seconds since epoch) or 0 if not found
|
||||
get_last_ban_timestamp() {
|
||||
local jail="$1"
|
||||
local timestamp
|
||||
# Extract date from log, convert to Unix timestamp
|
||||
timestamp=$(grep "\[$jail\]" "$FAIL2BAN_LOG" 2>/dev/null | grep "Ban " | tail -1 | awk '{print $1, $2}' | xargs -I{} date -d "{}" +%s 2>/dev/null)
|
||||
echo "${timestamp:-0}"
|
||||
}
|
||||
|
||||
# Get timestamp of last unban event for a jail
|
||||
# Args: $1 - jail name
|
||||
# Returns: Unix timestamp (seconds since epoch) or 0 if not found
|
||||
get_last_unban_timestamp() {
|
||||
local jail="$1"
|
||||
local timestamp
|
||||
# Extract date from log, convert to Unix timestamp
|
||||
timestamp=$(grep "\[$jail\]" "$FAIL2BAN_LOG" 2>/dev/null | grep "Unban " | tail -1 | awk '{print $1, $2}' | xargs -I{} date -d "{}" +%s 2>/dev/null)
|
||||
echo "${timestamp:-0}"
|
||||
}
|
||||
|
||||
# Count ban events within a time period
|
||||
# Args: $1 - jail name, $2 - time period (e.g., "1 hour ago")
|
||||
# Returns: Number of ban events
|
||||
get_ban_rate() {
|
||||
local jail="$1"
|
||||
local period="$2"
|
||||
local count cutoff_timestamp
|
||||
|
||||
# Convert period to Unix timestamp
|
||||
cutoff_timestamp=$(date -d "$period" +%s 2>/dev/null || echo 0)
|
||||
|
||||
# Try journalctl first (faster)
|
||||
count=$(journalctl -u fail2ban --since "$period" 2>/dev/null | grep -c "\[$jail\] Ban " 2>/dev/null)
|
||||
|
||||
# If journalctl returns 0, fall back to log file (more reliable)
|
||||
if [ "$count" -eq 0 ] && [ -f "$FAIL2BAN_LOG" ]; then
|
||||
count=$(awk -v jail="$jail" -v cutoff="$cutoff_timestamp" '
|
||||
/\['"$jail"'\] Ban / {
|
||||
# Parse timestamp from log line
|
||||
cmd = "date -d \"" $1 " " $2 "\" +%s 2>/dev/null"
|
||||
cmd | getline ts
|
||||
close(cmd)
|
||||
if (ts >= cutoff) count++
|
||||
}
|
||||
END { print count+0 }
|
||||
' "$FAIL2BAN_LOG" 2>/dev/null)
|
||||
fi
|
||||
|
||||
echo "${count:-0}"
|
||||
}
|
||||
|
||||
# Count unban events within a time period
|
||||
# Args: $1 - jail name, $2 - time period (e.g., "1 hour ago")
|
||||
# Returns: Number of unban events
|
||||
get_unban_rate() {
|
||||
local jail="$1"
|
||||
local period="$2"
|
||||
local count cutoff_timestamp
|
||||
|
||||
# Convert period to Unix timestamp
|
||||
cutoff_timestamp=$(date -d "$period" +%s 2>/dev/null || echo 0)
|
||||
|
||||
# Try journalctl first
|
||||
count=$(journalctl -u fail2ban --since "$period" 2>/dev/null | grep -c "\[$jail\] Unban " 2>/dev/null)
|
||||
|
||||
# Fall back to log file
|
||||
if [ "$count" -eq 0 ] && [ -f "$FAIL2BAN_LOG" ]; then
|
||||
count=$(awk -v jail="$jail" -v cutoff="$cutoff_timestamp" '
|
||||
/\['"$jail"'\] Unban / {
|
||||
cmd = "date -d \"" $1 " " $2 "\" +%s 2>/dev/null"
|
||||
cmd | getline ts
|
||||
close(cmd)
|
||||
if (ts >= cutoff) count++
|
||||
}
|
||||
END { print count+0 }
|
||||
' "$FAIL2BAN_LOG" 2>/dev/null)
|
||||
fi
|
||||
|
||||
echo "${count:-0}"
|
||||
}
|
||||
|
||||
# Get top attacking IPs by ban count
|
||||
# Args: $1 - jail name, $2 - limit (default: 5)
|
||||
# Returns: Lines with "count IP" format, sorted by count descending
|
||||
get_top_banned_ips() {
|
||||
local jail="$1"
|
||||
local limit="${2:-5}"
|
||||
grep "\[$jail\] Ban " "$FAIL2BAN_LOG" 2>/dev/null | \
|
||||
grep -oE '[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+' | \
|
||||
sort | uniq -c | sort -rn | head -n "$limit"
|
||||
}
|
||||
|
||||
# Count unique IPs banned in a time period
|
||||
# Args: $1 - jail name, $2 - time period (e.g., "24 hours ago")
|
||||
# Returns: Number of unique IPs
|
||||
get_unique_banned_ips() {
|
||||
local jail="$1"
|
||||
local period="$2"
|
||||
local count cutoff_timestamp
|
||||
|
||||
# Convert period to Unix timestamp
|
||||
cutoff_timestamp=$(date -d "$period" +%s 2>/dev/null || echo 0)
|
||||
|
||||
# Try journalctl first
|
||||
count=$(journalctl -u fail2ban --since "$period" 2>/dev/null | \
|
||||
grep "\[$jail\] Ban " | \
|
||||
grep -oE '[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+' | \
|
||||
sort -u | wc -l 2>/dev/null)
|
||||
|
||||
# Fall back to log file if journalctl returns 0
|
||||
if [ "$count" -eq 0 ] && [ -f "$FAIL2BAN_LOG" ]; then
|
||||
count=$(awk -v jail="$jail" -v cutoff="$cutoff_timestamp" '
|
||||
/\['"$jail"'\] Ban / {
|
||||
# Extract IP
|
||||
match($0, /[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+/)
|
||||
if (RSTART > 0) {
|
||||
ip = substr($0, RSTART, RLENGTH)
|
||||
# Parse timestamp
|
||||
cmd = "date -d \"" $1 " " $2 "\" +%s 2>/dev/null"
|
||||
cmd | getline ts
|
||||
close(cmd)
|
||||
if (ts >= cutoff && ip != "") ips[ip] = 1
|
||||
}
|
||||
}
|
||||
END {
|
||||
count = 0
|
||||
for (ip in ips) count++
|
||||
print count
|
||||
}
|
||||
' "$FAIL2BAN_LOG" 2>/dev/null)
|
||||
fi
|
||||
|
||||
echo "${count:-0}"
|
||||
}
|
||||
|
||||
get_ban_duration_stats() {
|
||||
local jail="$1"
|
||||
# Parse ban times and calculate average duration (placeholder - complex to implement)
|
||||
# Returns: avg|min|max in seconds
|
||||
echo "3600|1800|7200" # Placeholder: 1h avg, 30min min, 2h max
|
||||
}
|
||||
|
||||
get_jail_port() {
|
||||
local jail="$1"
|
||||
local port
|
||||
# Extract port from jail config (simplified)
|
||||
if [ -f "/etc/fail2ban/jail.d/$jail.conf" ]; then
|
||||
port=$(grep "^port" "/etc/fail2ban/jail.d/$jail.conf" 2>/dev/null | awk '{print $NF}')
|
||||
fi
|
||||
if [ -z "$port" ] && [ -f "/etc/fail2ban/jail.local" ]; then
|
||||
port=$(awk "/\[$jail\]/,/^\[/ {if(/^port/) print \$NF}" "/etc/fail2ban/jail.local" 2>/dev/null | head -1)
|
||||
fi
|
||||
echo "${port:-unknown}"
|
||||
}
|
||||
|
||||
# Detect protocol based on jail name
|
||||
# Args: $1 - jail name
|
||||
# Returns: Protocol (tcp/udp), defaults to tcp
|
||||
get_jail_protocol() {
|
||||
local jail="$1"
|
||||
# Heuristic matching based on common service patterns
|
||||
case "$jail" in
|
||||
*ssh*|*sshd*) echo "tcp" ;;
|
||||
*http*|*nginx*|*apache*) echo "tcp" ;;
|
||||
*smtp*|*mail*) echo "tcp" ;;
|
||||
*dns*) echo "udp" ;;
|
||||
*) echo "tcp" ;; # Default to TCP for unknown services
|
||||
esac
|
||||
}
|
||||
|
||||
get_jail_logpath() {
|
||||
local jail="$1"
|
||||
local logpath
|
||||
if [ -f "/etc/fail2ban/jail.d/$jail.conf" ]; then
|
||||
logpath=$(grep "^logpath" "/etc/fail2ban/jail.d/$jail.conf" 2>/dev/null | awk '{print $NF}')
|
||||
fi
|
||||
if [ -z "$logpath" ] && [ -f "/etc/fail2ban/jail.local" ]; then
|
||||
logpath=$(awk "/\[$jail\]/,/^\[/ {if(/^logpath/) print \$NF}" "/etc/fail2ban/jail.local" 2>/dev/null | head -1)
|
||||
fi
|
||||
echo "${logpath:-/var/log/auth.log}"
|
||||
}
|
||||
|
||||
get_jail_filter() {
|
||||
local jail="$1"
|
||||
# Filter command doesn't work in fail2ban-client, extract from config
|
||||
if [ -f "/etc/fail2ban/jail.d/$jail.local" ]; then
|
||||
grep "^filter" "/etc/fail2ban/jail.d/$jail.local" 2>/dev/null | awk '{print $NF}' || echo "$jail"
|
||||
else
|
||||
echo "$jail" # Default to jail name
|
||||
fi
|
||||
}
|
||||
|
||||
get_jail_enabled() {
|
||||
local jail="$1"
|
||||
# Check if jail is enabled in config
|
||||
if fail2ban-client status "$jail" >/dev/null 2>&1; then
|
||||
echo "1"
|
||||
else
|
||||
echo "0"
|
||||
fi
|
||||
}
|
||||
|
||||
get_repeat_offender_count() {
|
||||
local jail="$1"
|
||||
local threshold="${2:-2}" # Default: 2+ bans = repeat offender
|
||||
local count cutoff_timestamp
|
||||
|
||||
# 7 days ago timestamp
|
||||
cutoff_timestamp=$(date -d "7 days ago" +%s 2>/dev/null || echo 0)
|
||||
|
||||
# Try journalctl first
|
||||
count=$(journalctl -u fail2ban --since "7 days ago" 2>/dev/null | \
|
||||
grep "\[$jail\] Ban " | \
|
||||
grep -oE '[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+' | \
|
||||
sort | uniq -c | \
|
||||
awk -v t="$threshold" '$1 >= t {count++} END {print count+0}')
|
||||
|
||||
# Fall back to log file if journalctl returns 0
|
||||
if [ "$count" -eq 0 ] && [ -f "$FAIL2BAN_LOG" ]; then
|
||||
count=$(awk -v jail="$jail" -v cutoff="$cutoff_timestamp" -v threshold="$threshold" '
|
||||
/\['"$jail"'\] Ban / {
|
||||
# Extract IP
|
||||
match($0, /[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+/)
|
||||
if (RSTART > 0) {
|
||||
ip = substr($0, RSTART, RLENGTH)
|
||||
# Parse timestamp
|
||||
cmd = "date -d \"" $1 " " $2 "\" +%s 2>/dev/null"
|
||||
cmd | getline ts
|
||||
close(cmd)
|
||||
if (ts >= cutoff && ip != "") ip_count[ip]++
|
||||
}
|
||||
}
|
||||
END {
|
||||
repeat_count = 0
|
||||
for (ip in ip_count) {
|
||||
if (ip_count[ip] >= threshold) repeat_count++
|
||||
}
|
||||
print repeat_count
|
||||
}
|
||||
' "$FAIL2BAN_LOG" 2>/dev/null)
|
||||
fi
|
||||
|
||||
echo "${count:-0}"
|
||||
}
|
||||
|
||||
get_log_size() {
|
||||
[ -f "$FAIL2BAN_LOG" ] && stat -c %s "$FAIL2BAN_LOG" 2>/dev/null || echo "0"
|
||||
}
|
||||
|
||||
get_log_age() {
|
||||
if [ -f "$FAIL2BAN_LOG" ]; then
|
||||
echo $(($(date +%s) - $(stat -c %Y "$FAIL2BAN_LOG" 2>/dev/null || echo 0)))
|
||||
else
|
||||
echo "0"
|
||||
fi
|
||||
}
|
||||
|
||||
get_log_rotation_timestamp() {
|
||||
# Find most recent rotated log to determine last rotation time
|
||||
local rotated_log
|
||||
rotated_log=$(ls -t "${FAIL2BAN_LOG}".1 "${FAIL2BAN_LOG}"-*.gz 2>/dev/null | head -1)
|
||||
if [ -n "$rotated_log" ]; then
|
||||
stat -c %Y "$rotated_log" 2>/dev/null || echo "0"
|
||||
else
|
||||
echo "0"
|
||||
fi
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# METRIC GENERATION
|
||||
# ============================================================================
|
||||
|
||||
# Generate all Prometheus metrics
|
||||
# Returns: Prometheus text format metrics on stdout
|
||||
generate_metrics() {
|
||||
local script_start=$(date +%s)
|
||||
|
||||
# Check fail2ban status first
|
||||
if ! check_fail2ban; then
|
||||
cat <<EOF
|
||||
# HELP fail2ban_up Fail2ban exporter status
|
||||
# TYPE fail2ban_up gauge
|
||||
fail2ban_up 0
|
||||
EOF
|
||||
return
|
||||
fi
|
||||
|
||||
local jails
|
||||
jails=$(get_jails)
|
||||
local jail_count
|
||||
jail_count=$(echo "$jails" | wc -w)
|
||||
|
||||
cat <<EOF
|
||||
# HELP fail2ban_up Fail2ban exporter status
|
||||
# TYPE fail2ban_up gauge
|
||||
fail2ban_up 1
|
||||
|
||||
# HELP fail2ban_server_info Fail2ban server information
|
||||
# TYPE fail2ban_server_info gauge
|
||||
fail2ban_server_info{version="$(fail2ban-client version 2>/dev/null | head -1 | awk '{print $NF}')",exporter_version="2.0"} 1
|
||||
|
||||
# HELP fail2ban_jail_count Total number of jails
|
||||
# TYPE fail2ban_jail_count gauge
|
||||
fail2ban_jail_count $jail_count
|
||||
|
||||
# HELP fail2ban_jail_enabled Jail enabled status
|
||||
# TYPE fail2ban_jail_enabled gauge
|
||||
EOF
|
||||
|
||||
for jail in $jails; do
|
||||
local enabled
|
||||
enabled=$(get_jail_enabled "$jail")
|
||||
echo "fail2ban_jail_enabled{jail=\"$jail\"} $enabled"
|
||||
done
|
||||
|
||||
echo ""
|
||||
|
||||
cat <<EOF
|
||||
# HELP fail2ban_jail_failed_current Currently failed login attempts per jail
|
||||
# TYPE fail2ban_jail_failed_current gauge
|
||||
EOF
|
||||
|
||||
for jail in $jails; do
|
||||
local stats
|
||||
stats=$(get_jail_stats "$jail")
|
||||
local currently_failed
|
||||
currently_failed=$(echo "$stats" | cut -d'|' -f1)
|
||||
echo "fail2ban_jail_failed_current{jail=\"$jail\"} ${currently_failed:-0}"
|
||||
done
|
||||
|
||||
echo ""
|
||||
|
||||
cat <<EOF
|
||||
# HELP fail2ban_jail_banned_current Currently banned IPs per jail
|
||||
# TYPE fail2ban_jail_banned_current gauge
|
||||
EOF
|
||||
|
||||
for jail in $jails; do
|
||||
local stats
|
||||
stats=$(get_jail_stats "$jail")
|
||||
local currently_banned
|
||||
currently_banned=$(echo "$stats" | cut -d'|' -f2)
|
||||
echo "fail2ban_jail_banned_current{jail=\"$jail\"} ${currently_banned:-0}"
|
||||
done
|
||||
|
||||
echo ""
|
||||
|
||||
cat <<EOF
|
||||
# HELP fail2ban_jail_failed_total Total failed login attempts per jail
|
||||
# TYPE fail2ban_jail_failed_total counter
|
||||
EOF
|
||||
|
||||
for jail in $jails; do
|
||||
local stats
|
||||
stats=$(get_jail_stats "$jail")
|
||||
local total_failed
|
||||
total_failed=$(echo "$stats" | cut -d'|' -f3)
|
||||
echo "fail2ban_jail_failed_total{jail=\"$jail\"} ${total_failed:-0}"
|
||||
done
|
||||
|
||||
echo ""
|
||||
|
||||
cat <<EOF
|
||||
# HELP fail2ban_jail_banned_total Total banned IPs per jail (all time)
|
||||
# TYPE fail2ban_jail_banned_total counter
|
||||
EOF
|
||||
|
||||
for jail in $jails; do
|
||||
local stats
|
||||
stats=$(get_jail_stats "$jail")
|
||||
local total_banned
|
||||
total_banned=$(echo "$stats" | cut -d'|' -f4)
|
||||
echo "fail2ban_jail_banned_total{jail=\"$jail\"} ${total_banned:-0}"
|
||||
done
|
||||
|
||||
echo ""
|
||||
|
||||
cat <<EOF
|
||||
# HELP fail2ban_jail_ban_rate Ban rate (total_banned / total_failed) per jail
|
||||
# TYPE fail2ban_jail_ban_rate gauge
|
||||
EOF
|
||||
|
||||
# Calculate ban rate (ratio of banned to failed attempts)
|
||||
for jail in $jails; do
|
||||
local stats
|
||||
stats=$(get_jail_stats "$jail")
|
||||
local total_failed total_banned ban_rate
|
||||
total_failed=$(echo "$stats" | cut -d'|' -f3)
|
||||
total_banned=$(echo "$stats" | cut -d'|' -f4)
|
||||
|
||||
# Avoid division by zero
|
||||
if [ "${total_failed:-0}" -gt 0 ] 2>/dev/null; then
|
||||
# Use awk for floating point arithmetic
|
||||
ban_rate=$(awk "BEGIN {printf \"%.4f\", ${total_banned:-0} / ${total_failed}}" 2>/dev/null || echo "0")
|
||||
else
|
||||
ban_rate="0"
|
||||
fi
|
||||
|
||||
echo "fail2ban_jail_ban_rate{jail=\"$jail\"} $ban_rate"
|
||||
done
|
||||
|
||||
echo ""
|
||||
|
||||
# ========================================================================
|
||||
# ENHANCED METRICS (v2.0) - Jail Health & Activity Tracking
|
||||
# ========================================================================
|
||||
cat <<EOF
|
||||
# HELP fail2ban_jail_last_ban_timestamp Timestamp of last ban per jail
|
||||
# TYPE fail2ban_jail_last_ban_timestamp gauge
|
||||
EOF
|
||||
|
||||
for jail in $jails; do
|
||||
local last_ban
|
||||
last_ban=$(get_last_ban_timestamp "$jail")
|
||||
echo "fail2ban_jail_last_ban_timestamp{jail=\"$jail\"} ${last_ban}"
|
||||
done
|
||||
|
||||
echo ""
|
||||
|
||||
cat <<EOF
|
||||
# HELP fail2ban_jail_last_unban_timestamp Timestamp of last unban per jail
|
||||
# TYPE fail2ban_jail_last_unban_timestamp gauge
|
||||
EOF
|
||||
|
||||
for jail in $jails; do
|
||||
local last_unban
|
||||
last_unban=$(get_last_unban_timestamp "$jail")
|
||||
echo "fail2ban_jail_last_unban_timestamp{jail=\"$jail\"} ${last_unban}"
|
||||
done
|
||||
|
||||
echo ""
|
||||
|
||||
# NEW METRICS - Ban/Unban Rates
|
||||
cat <<EOF
|
||||
# HELP fail2ban_jail_bans_per_period Bans in time period per jail
|
||||
# TYPE fail2ban_jail_bans_per_period gauge
|
||||
EOF
|
||||
|
||||
for jail in $jails; do
|
||||
local bans_1h bans_24h
|
||||
bans_1h=$(get_ban_rate "$jail" "1 hour ago")
|
||||
bans_24h=$(get_ban_rate "$jail" "24 hours ago")
|
||||
echo "fail2ban_jail_bans_per_period{jail=\"$jail\",period=\"1h\"} ${bans_1h}"
|
||||
echo "fail2ban_jail_bans_per_period{jail=\"$jail\",period=\"24h\"} ${bans_24h}"
|
||||
done
|
||||
|
||||
echo ""
|
||||
|
||||
cat <<EOF
|
||||
# HELP fail2ban_jail_unbans_per_period Unbans in time period per jail
|
||||
# TYPE fail2ban_jail_unbans_per_period gauge
|
||||
EOF
|
||||
|
||||
for jail in $jails; do
|
||||
local unbans_1h unbans_24h
|
||||
unbans_1h=$(get_unban_rate "$jail" "1 hour ago")
|
||||
unbans_24h=$(get_unban_rate "$jail" "24 hours ago")
|
||||
echo "fail2ban_jail_unbans_per_period{jail=\"$jail\",period=\"1h\"} ${unbans_1h}"
|
||||
echo "fail2ban_jail_unbans_per_period{jail=\"$jail\",period=\"24h\"} ${unbans_24h}"
|
||||
done
|
||||
|
||||
echo ""
|
||||
|
||||
# NEW METRICS - Unique IPs
|
||||
cat <<EOF
|
||||
# HELP fail2ban_jail_unique_banned_ips Unique IPs banned in period per jail
|
||||
# TYPE fail2ban_jail_unique_banned_ips gauge
|
||||
EOF
|
||||
|
||||
for jail in $jails; do
|
||||
local unique_1h unique_24h
|
||||
unique_1h=$(get_unique_banned_ips "$jail" "1 hour ago")
|
||||
unique_24h=$(get_unique_banned_ips "$jail" "24 hours ago")
|
||||
echo "fail2ban_jail_unique_banned_ips{jail=\"$jail\",period=\"1h\"} ${unique_1h}"
|
||||
echo "fail2ban_jail_unique_banned_ips{jail=\"$jail\",period=\"24h\"} ${unique_24h}"
|
||||
done
|
||||
|
||||
echo ""
|
||||
|
||||
# NEW METRICS - Jail Configuration
|
||||
cat <<EOF
|
||||
# HELP fail2ban_jail_info Jail configuration information
|
||||
# TYPE fail2ban_jail_info gauge
|
||||
EOF
|
||||
|
||||
for jail in $jails; do
|
||||
local port protocol filter
|
||||
port=$(get_jail_port "$jail")
|
||||
protocol=$(get_jail_protocol "$jail")
|
||||
filter=$(get_jail_filter "$jail")
|
||||
echo "fail2ban_jail_info{jail=\"$jail\",port=\"$port\",protocol=\"$protocol\",filter=\"$filter\"} 1"
|
||||
done
|
||||
|
||||
echo ""
|
||||
|
||||
# NEW METRICS - Top Attackers (as labels with counts)
|
||||
cat <<EOF
|
||||
# HELP fail2ban_jail_top_attacker_count Top attacking IPs per jail (24h)
|
||||
# TYPE fail2ban_jail_top_attacker_count gauge
|
||||
EOF
|
||||
|
||||
for jail in $jails; do
|
||||
while read -r count ip; do
|
||||
[ -z "$ip" ] && continue
|
||||
echo "fail2ban_jail_top_attacker_count{jail=\"$jail\",ip=\"$ip\"} $count"
|
||||
done < <(get_top_banned_ips "$jail" 5)
|
||||
done
|
||||
|
||||
echo ""
|
||||
|
||||
# NEW METRICS - Ban Effectiveness (bans per hour rate)
|
||||
cat <<EOF
|
||||
# HELP fail2ban_jail_ban_rate_per_hour Bans per hour over last 24h per jail
|
||||
# TYPE fail2ban_jail_ban_rate_per_hour gauge
|
||||
EOF
|
||||
|
||||
for jail in $jails; do
|
||||
local bans_24h ban_rate
|
||||
bans_24h=$(get_ban_rate "$jail" "24 hours ago")
|
||||
|
||||
# Strip whitespace and ensure integer
|
||||
bans_24h=$(echo "$bans_24h" | tr -d '\n' | tr -d ' ')
|
||||
bans_24h=${bans_24h:-0}
|
||||
|
||||
# Calculate average: total bans in 24h divided by 24 hours
|
||||
if [ "$bans_24h" -gt 0 ] 2>/dev/null; then
|
||||
ban_rate=$(awk "BEGIN {printf \"%.2f\", $bans_24h / 24}" 2>/dev/null || echo "0")
|
||||
else
|
||||
ban_rate="0.00"
|
||||
fi
|
||||
|
||||
echo "fail2ban_jail_ban_rate_per_hour{jail=\"$jail\"} $ban_rate"
|
||||
done
|
||||
|
||||
echo ""
|
||||
|
||||
# NEW METRICS - Repeat Offenders
|
||||
cat <<EOF
|
||||
# HELP fail2ban_jail_repeat_offenders IPs banned multiple times (7 day window)
|
||||
# TYPE fail2ban_jail_repeat_offenders gauge
|
||||
EOF
|
||||
|
||||
for jail in $jails; do
|
||||
local repeat_2 repeat_5 repeat_10
|
||||
repeat_2=$(get_repeat_offender_count "$jail" 2)
|
||||
repeat_5=$(get_repeat_offender_count "$jail" 5)
|
||||
repeat_10=$(get_repeat_offender_count "$jail" 10)
|
||||
echo "fail2ban_jail_repeat_offenders{jail=\"$jail\",threshold=\"2+\"} $repeat_2"
|
||||
echo "fail2ban_jail_repeat_offenders{jail=\"$jail\",threshold=\"5+\"} $repeat_5"
|
||||
echo "fail2ban_jail_repeat_offenders{jail=\"$jail\",threshold=\"10+\"} $repeat_10"
|
||||
done
|
||||
|
||||
echo ""
|
||||
|
||||
# Log file health metrics
|
||||
local log_size log_age log_rotation
|
||||
log_size=$(get_log_size)
|
||||
log_age=$(get_log_age)
|
||||
log_rotation=$(get_log_rotation_timestamp)
|
||||
|
||||
cat <<EOF
|
||||
# HELP fail2ban_log_size_bytes Size of fail2ban log file
|
||||
# TYPE fail2ban_log_size_bytes gauge
|
||||
fail2ban_log_size_bytes $log_size
|
||||
|
||||
# HELP fail2ban_log_age_seconds Time since last log file modification
|
||||
# TYPE fail2ban_log_age_seconds gauge
|
||||
fail2ban_log_age_seconds $log_age
|
||||
|
||||
# HELP fail2ban_log_rotation_timestamp Unix timestamp of last log rotation
|
||||
# TYPE fail2ban_log_rotation_timestamp gauge
|
||||
fail2ban_log_rotation_timestamp $log_rotation
|
||||
EOF
|
||||
|
||||
echo ""
|
||||
|
||||
# Time since last ban/unban (easier to alert on than timestamps)
|
||||
local current_time
|
||||
current_time=$(date +%s)
|
||||
|
||||
cat <<EOF
|
||||
# HELP fail2ban_jail_seconds_since_last_ban Seconds since last ban per jail
|
||||
# TYPE fail2ban_jail_seconds_since_last_ban gauge
|
||||
EOF
|
||||
|
||||
for jail in $jails; do
|
||||
local last_ban seconds_since
|
||||
last_ban=$(get_last_ban_timestamp "$jail")
|
||||
if [ "$last_ban" -gt 0 ]; then
|
||||
seconds_since=$((current_time - last_ban))
|
||||
else
|
||||
seconds_since=0
|
||||
fi
|
||||
echo "fail2ban_jail_seconds_since_last_ban{jail=\"$jail\"} $seconds_since"
|
||||
done
|
||||
|
||||
echo ""
|
||||
|
||||
cat <<EOF
|
||||
# HELP fail2ban_jail_seconds_since_last_unban Seconds since last unban per jail
|
||||
# TYPE fail2ban_jail_seconds_since_last_unban gauge
|
||||
EOF
|
||||
|
||||
for jail in $jails; do
|
||||
local last_unban seconds_since
|
||||
last_unban=$(get_last_unban_timestamp "$jail")
|
||||
if [ "$last_unban" -gt 0 ]; then
|
||||
seconds_since=$((current_time - last_unban))
|
||||
else
|
||||
seconds_since=0
|
||||
fi
|
||||
echo "fail2ban_jail_seconds_since_last_unban{jail=\"$jail\"} $seconds_since"
|
||||
done
|
||||
|
||||
echo ""
|
||||
|
||||
# Exporter runtime
|
||||
local script_end script_duration
|
||||
script_end=$(date +%s)
|
||||
script_duration=$((script_end - script_start))
|
||||
|
||||
cat <<EOF
|
||||
# HELP fail2ban_exporter_duration_seconds Time to generate all metrics
|
||||
# TYPE fail2ban_exporter_duration_seconds gauge
|
||||
fail2ban_exporter_duration_seconds $script_duration
|
||||
|
||||
# HELP fail2ban_exporter_last_run_timestamp Unix timestamp of last successful run
|
||||
# TYPE fail2ban_exporter_last_run_timestamp gauge
|
||||
fail2ban_exporter_last_run_timestamp $script_end
|
||||
EOF
|
||||
|
||||
echo ""
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# HTTP SERVER MODE
|
||||
# ============================================================================
|
||||
|
||||
# Run simple HTTP server using netcat
|
||||
# Serves metrics on /metrics endpoint
|
||||
run_http_server() {
|
||||
echo "Starting fail2ban exporter on port $HTTP_PORT..." >&2
|
||||
|
||||
if ! command -v nc >/dev/null 2>&1; then
|
||||
echo "ERROR: netcat (nc) required for HTTP mode" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Infinite loop accepting HTTP requests
|
||||
while true; do
|
||||
{
|
||||
read -r request
|
||||
# Check if request is for /metrics endpoint
|
||||
if [[ "$request" =~ ^GET\ /metrics ]]; then
|
||||
echo -e "HTTP/1.1 200 OK\r\nContent-Type: text/plain; version=0.0.4\r\n\r"
|
||||
generate_metrics
|
||||
else # Serve HTML landing page for other requests
|
||||
echo -e "HTTP/1.1 200 OK\r\nContent-Type: text/html\r\n\r"
|
||||
cat <<EOF
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head><title>Fail2ban Exporter v2.0</title></head>
|
||||
<body>
|
||||
<h1>Fail2ban Prometheus Exporter (Enhanced v2.0)</h1>
|
||||
<p><a href="/metrics">Metrics</a></p>
|
||||
<h2>New Metrics</h2>
|
||||
<ul>
|
||||
<li>Last ban/unban timestamps per jail</li>
|
||||
<li>Ban/unban rates (1h, 24h)</li>
|
||||
<li>Unique banned IPs per period</li>
|
||||
<li>Top attackers per jail</li>
|
||||
<li>Jail configuration info (port, protocol, filter)</li>
|
||||
<li>Ban rate per hour</li>
|
||||
</ul>
|
||||
</body>
|
||||
</html>
|
||||
EOF
|
||||
fi
|
||||
} | nc -l -p "$HTTP_PORT" -q 1 2>/dev/null # -q 1: wait 1 second after EOF before closing
|
||||
done
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# MAIN EXECUTION
|
||||
# ============================================================================
|
||||
|
||||
# Main entry point - routes to appropriate output mode
|
||||
main() {
|
||||
parse_args "$@"
|
||||
|
||||
if [ "$HTTP_MODE" = true ]; then
|
||||
# Run HTTP server (blocks until killed)
|
||||
run_http_server
|
||||
elif [ -n "$OUTPUT_FILE" ]; then
|
||||
# Textfile collector mode: write atomically using temp file
|
||||
local output_dir
|
||||
output_dir="$(dirname "$OUTPUT_FILE")"
|
||||
mkdir -p "$output_dir"
|
||||
|
||||
# Create temp file in SAME directory for atomic rename (same filesystem)
|
||||
local temp_file
|
||||
temp_file=$(mktemp "${output_dir}/.fail2ban_metrics.XXXXXX")
|
||||
|
||||
# Generate metrics to temp file
|
||||
if ! generate_metrics > "$temp_file" 2>/dev/null; then
|
||||
rm -f "$temp_file"
|
||||
echo "ERROR: Failed to generate metrics" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Validate: file must exist, have content, and contain fail2ban_up 1
|
||||
# If fail2ban is down, we still get fail2ban_up 0 which is valid
|
||||
local file_lines
|
||||
file_lines=$(wc -l < "$temp_file" 2>/dev/null || echo 0)
|
||||
|
||||
if [ "$file_lines" -lt 10 ]; then
|
||||
rm -f "$temp_file"
|
||||
echo "ERROR: Metrics file too small ($file_lines lines), keeping previous" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Set permissions before move
|
||||
chmod 644 "$temp_file"
|
||||
|
||||
# Atomic rename - no gap where file is missing
|
||||
mv -f "$temp_file" "$OUTPUT_FILE"
|
||||
|
||||
echo "Metrics written to $OUTPUT_FILE ($file_lines lines)" >&2
|
||||
else
|
||||
# Default: output to stdout
|
||||
generate_metrics
|
||||
fi
|
||||
}
|
||||
|
||||
# Execute main function with all script arguments
|
||||
main "$@"
|
||||
Executable
+787
@@ -0,0 +1,787 @@
|
||||
#!/bin/bash
|
||||
|
||||
################################################
|
||||
#### GitLab Metrics Collector ####
|
||||
#### for Prometheus node_exporter textfile ####
|
||||
#### ####
|
||||
#### Author: Phil Connor ####
|
||||
#### Contact: contact@mylinux.work ####
|
||||
#### Version: 1.00-030426 ####
|
||||
################################################
|
||||
|
||||
set -o pipefail
|
||||
|
||||
SCRIPT_NAME=$(basename "$0")
|
||||
readonly SCRIPT_NAME
|
||||
|
||||
# Default configuration
|
||||
readonly DEFAULT_NODE_DIR="/var/lib/node_exporter"
|
||||
readonly DEFAULT_COLLECTION_INTERVAL=60
|
||||
readonly DEFAULT_MAX_PROJECTS=100
|
||||
readonly DEFAULT_CURL_TIMEOUT=30
|
||||
readonly DEFAULT_METRICS_URL="http://localhost/-/metrics"
|
||||
readonly DEFAULT_SIDEKIQ_URL="http://localhost:8082/metrics"
|
||||
|
||||
# Configuration variables (can be overridden by environment)
|
||||
GITLAB_URL=${GITLAB_URL:-}
|
||||
GITLAB_TOKEN=${GITLAB_TOKEN:-}
|
||||
GITLAB_METRICS_URL=${GITLAB_METRICS_URL:-$DEFAULT_METRICS_URL}
|
||||
GITLAB_SIDEKIQ_URL=${GITLAB_SIDEKIQ_URL:-$DEFAULT_SIDEKIQ_URL}
|
||||
NODE_DIR=${NODE_DIR:-$DEFAULT_NODE_DIR}
|
||||
COLLECTION_INTERVAL=${COLLECTION_INTERVAL:-$DEFAULT_COLLECTION_INTERVAL}
|
||||
MAX_PROJECTS=${MAX_PROJECTS:-$DEFAULT_MAX_PROJECTS}
|
||||
DEBUG=${DEBUG:-}
|
||||
|
||||
# Runtime flags
|
||||
RUN_MODE="once"
|
||||
LOCAL_MODE=false
|
||||
|
||||
# Error tracking
|
||||
ERRORS_TOTAL=0
|
||||
|
||||
handle_error() {
|
||||
local exit_code=$1
|
||||
local line_number=$2
|
||||
echo "Error: $SCRIPT_NAME failed at line $line_number with exit code $exit_code" >&2
|
||||
exit "$exit_code"
|
||||
}
|
||||
|
||||
trap 'handle_error $? $LINENO' ERR
|
||||
|
||||
debug_echo() {
|
||||
if [[ -n "$DEBUG" ]]; then
|
||||
echo "[DEBUG] $*" >&2
|
||||
fi
|
||||
}
|
||||
|
||||
show_help() {
|
||||
cat << EOF
|
||||
Usage: $SCRIPT_NAME [OPTIONS]
|
||||
|
||||
GitLab metrics collector for Prometheus node_exporter textfile directory.
|
||||
|
||||
OPTIONS:
|
||||
--once Run collection once and exit (default)
|
||||
--daemon Run continuously at COLLECTION_INTERVAL
|
||||
--local Scrape /-/metrics endpoint for server-side metrics (Puma, Sidekiq, Redis, DB)
|
||||
--help, -h Show this help message
|
||||
|
||||
ENVIRONMENT VARIABLES:
|
||||
GITLAB_URL GitLab base URL (required, e.g. https://gitlab.example.com)
|
||||
GITLAB_TOKEN GitLab private access token (required for API metrics)
|
||||
GITLAB_METRICS_URL Local metrics endpoint URL (default: $DEFAULT_METRICS_URL)
|
||||
GITLAB_SIDEKIQ_URL Sidekiq exporter endpoint URL (default: $DEFAULT_SIDEKIQ_URL)
|
||||
NODE_DIR Node exporter textfile directory (default: $DEFAULT_NODE_DIR)
|
||||
COLLECTION_INTERVAL Seconds between collections in daemon mode (default: $DEFAULT_COLLECTION_INTERVAL)
|
||||
MAX_PROJECTS Maximum number of projects to collect per-project metrics for (default: $DEFAULT_MAX_PROJECTS)
|
||||
DEBUG Enable debug output
|
||||
|
||||
EXAMPLES:
|
||||
GITLAB_URL=https://gitlab.example.com GITLAB_TOKEN=glpat-xxx $SCRIPT_NAME --once
|
||||
GITLAB_URL=https://gitlab.example.com GITLAB_TOKEN=glpat-xxx $SCRIPT_NAME --daemon
|
||||
$SCRIPT_NAME --local --once
|
||||
GITLAB_URL=https://gitlab.example.com GITLAB_TOKEN=glpat-xxx $SCRIPT_NAME --local --daemon
|
||||
DEBUG=1 GITLAB_URL=https://gitlab.example.com GITLAB_TOKEN=glpat-xxx $SCRIPT_NAME
|
||||
EOF
|
||||
}
|
||||
|
||||
sanitize_label() {
|
||||
local value="$1"
|
||||
echo "${value//[^a-zA-Z0-9_]/_}"
|
||||
}
|
||||
|
||||
gitlab_api() {
|
||||
local endpoint="$1"
|
||||
local include_headers="${2:-false}"
|
||||
|
||||
local url="${GITLAB_URL}${endpoint}"
|
||||
debug_echo "API call: $url"
|
||||
|
||||
if [[ "$include_headers" == "true" ]]; then
|
||||
curl -sf --max-time "$DEFAULT_CURL_TIMEOUT" \
|
||||
--header "PRIVATE-TOKEN: $GITLAB_TOKEN" \
|
||||
-D - \
|
||||
"$url" 2>/dev/null
|
||||
else
|
||||
curl -sf --max-time "$DEFAULT_CURL_TIMEOUT" \
|
||||
--header "PRIVATE-TOKEN: $GITLAB_TOKEN" \
|
||||
"$url" 2>/dev/null
|
||||
fi
|
||||
}
|
||||
|
||||
gitlab_api_paginated() {
|
||||
local endpoint="$1"
|
||||
local per_page="${2:-100}"
|
||||
local max_pages="${3:-50}"
|
||||
|
||||
local page=1
|
||||
local all_results="[]"
|
||||
|
||||
while [[ $page -le $max_pages ]]; do
|
||||
local separator="?"
|
||||
if [[ "$endpoint" == *"?"* ]]; then
|
||||
separator="&"
|
||||
fi
|
||||
|
||||
local response
|
||||
response=$(gitlab_api "${endpoint}${separator}per_page=${per_page}&page=${page}" "true" 2>/dev/null) || break
|
||||
|
||||
local headers body
|
||||
headers=$(echo "$response" | sed '/^\r\{0,1\}$/q')
|
||||
body=$(echo "$response" | sed '1,/^\r\{0,1\}$/d')
|
||||
|
||||
if [[ -z "$body" ]] || ! echo "$body" | jq -e '.' >/dev/null 2>&1; then
|
||||
break
|
||||
fi
|
||||
|
||||
local count
|
||||
count=$(echo "$body" | jq 'length' 2>/dev/null) || break
|
||||
if [[ "$count" -eq 0 ]]; then
|
||||
break
|
||||
fi
|
||||
|
||||
all_results=$(echo "$all_results" "$body" | jq -s '.[0] + .[1]' 2>/dev/null) || break
|
||||
|
||||
local next_page
|
||||
next_page=$(echo "$headers" | grep -i '^x-next-page:' | tr -d '[:space:]' | cut -d: -f2)
|
||||
if [[ -z "$next_page" ]]; then
|
||||
break
|
||||
fi
|
||||
|
||||
page=$((page + 1))
|
||||
done
|
||||
|
||||
echo "$all_results"
|
||||
}
|
||||
|
||||
collect_instance_health() {
|
||||
local metrics=""
|
||||
|
||||
debug_echo "Collecting instance health metrics"
|
||||
|
||||
# Check if instance is reachable
|
||||
local up=0
|
||||
if gitlab_api "/-/health" >/dev/null 2>&1; then
|
||||
up=1
|
||||
fi
|
||||
metrics+="# HELP gitlab_instance_up Whether the GitLab instance is reachable\n"
|
||||
metrics+="# TYPE gitlab_instance_up gauge\n"
|
||||
metrics+="gitlab_instance_up $up\n"
|
||||
|
||||
# Version info
|
||||
local version_json
|
||||
if version_json=$(gitlab_api "/api/v4/version" 2>/dev/null); then
|
||||
local version revision
|
||||
version=$(echo "$version_json" | jq -r '.version // "unknown"' 2>/dev/null)
|
||||
revision=$(echo "$version_json" | jq -r '.revision // "unknown"' 2>/dev/null)
|
||||
metrics+="# HELP gitlab_instance_version_info GitLab version information\n"
|
||||
metrics+="# TYPE gitlab_instance_version_info gauge\n"
|
||||
metrics+="gitlab_instance_version_info{version=\"$version\",revision=\"$revision\"} 1\n"
|
||||
debug_echo "GitLab version: $version ($revision)"
|
||||
else
|
||||
ERRORS_TOTAL=$((ERRORS_TOTAL + 1))
|
||||
debug_echo "Failed to collect version info"
|
||||
fi
|
||||
|
||||
echo -e "$metrics"
|
||||
}
|
||||
|
||||
collect_project_statistics() {
|
||||
local metrics=""
|
||||
|
||||
debug_echo "Collecting project statistics"
|
||||
|
||||
local projects
|
||||
if ! projects=$(gitlab_api_paginated "/api/v4/projects?statistics=true" 100 2>/dev/null); then
|
||||
ERRORS_TOTAL=$((ERRORS_TOTAL + 1))
|
||||
debug_echo "Failed to collect project statistics"
|
||||
return
|
||||
fi
|
||||
|
||||
local project_count
|
||||
project_count=$(echo "$projects" | jq 'length' 2>/dev/null) || project_count=0
|
||||
debug_echo "Found $project_count projects"
|
||||
|
||||
metrics+="# HELP gitlab_project_stars_count Number of stars for a project\n"
|
||||
metrics+="# TYPE gitlab_project_stars_count gauge\n"
|
||||
metrics+="# HELP gitlab_project_forks_count Number of forks for a project\n"
|
||||
metrics+="# TYPE gitlab_project_forks_count gauge\n"
|
||||
metrics+="# HELP gitlab_project_open_issues_count Number of open issues for a project\n"
|
||||
metrics+="# TYPE gitlab_project_open_issues_count gauge\n"
|
||||
metrics+="# HELP gitlab_project_commit_count Number of commits in default branch\n"
|
||||
metrics+="# TYPE gitlab_project_commit_count gauge\n"
|
||||
metrics+="# HELP gitlab_project_storage_size_bytes Total storage size in bytes\n"
|
||||
metrics+="# TYPE gitlab_project_storage_size_bytes gauge\n"
|
||||
metrics+="# HELP gitlab_project_repository_size_bytes Repository size in bytes\n"
|
||||
metrics+="# TYPE gitlab_project_repository_size_bytes gauge\n"
|
||||
metrics+="# HELP gitlab_project_lfs_objects_size_bytes LFS objects size in bytes\n"
|
||||
metrics+="# TYPE gitlab_project_lfs_objects_size_bytes gauge\n"
|
||||
metrics+="# HELP gitlab_project_job_artifacts_size_bytes Job artifacts size in bytes\n"
|
||||
metrics+="# TYPE gitlab_project_job_artifacts_size_bytes gauge\n"
|
||||
metrics+="# HELP gitlab_project_packages_size_bytes Packages size in bytes\n"
|
||||
metrics+="# TYPE gitlab_project_packages_size_bytes gauge\n"
|
||||
metrics+="# HELP gitlab_project_wiki_size_bytes Wiki size in bytes\n"
|
||||
metrics+="# TYPE gitlab_project_wiki_size_bytes gauge\n"
|
||||
metrics+="# HELP gitlab_project_snippets_size_bytes Snippets size in bytes\n"
|
||||
metrics+="# TYPE gitlab_project_snippets_size_bytes gauge\n"
|
||||
metrics+="# HELP gitlab_project_uploads_size_bytes Uploads size in bytes\n"
|
||||
metrics+="# TYPE gitlab_project_uploads_size_bytes gauge\n"
|
||||
|
||||
echo "$projects" | jq -c '.[]' 2>/dev/null | while IFS= read -r project; do
|
||||
local name namespace
|
||||
name=$(echo "$project" | jq -r '.name // "unknown"' 2>/dev/null)
|
||||
namespace=$(echo "$project" | jq -r '.namespace.name // "unknown"' 2>/dev/null)
|
||||
name=$(sanitize_label "$name")
|
||||
namespace=$(sanitize_label "$namespace")
|
||||
|
||||
local labels="project=\"$name\",namespace=\"$namespace\""
|
||||
|
||||
local stars forks issues
|
||||
stars=$(echo "$project" | jq -r '.star_count // 0' 2>/dev/null)
|
||||
forks=$(echo "$project" | jq -r '.forks_count // 0' 2>/dev/null)
|
||||
issues=$(echo "$project" | jq -r '.open_issues_count // 0' 2>/dev/null)
|
||||
|
||||
metrics+="gitlab_project_stars_count{$labels} $stars\n"
|
||||
metrics+="gitlab_project_forks_count{$labels} $forks\n"
|
||||
metrics+="gitlab_project_open_issues_count{$labels} $issues\n"
|
||||
|
||||
local commit_count storage_size repo_size lfs_size artifacts_size packages_size wiki_size snippets_size uploads_size
|
||||
commit_count=$(echo "$project" | jq -r '.statistics.commit_count // 0' 2>/dev/null)
|
||||
storage_size=$(echo "$project" | jq -r '.statistics.storage_size // 0' 2>/dev/null)
|
||||
repo_size=$(echo "$project" | jq -r '.statistics.repository_size // 0' 2>/dev/null)
|
||||
lfs_size=$(echo "$project" | jq -r '.statistics.lfs_objects_size // 0' 2>/dev/null)
|
||||
artifacts_size=$(echo "$project" | jq -r '.statistics.job_artifacts_size // 0' 2>/dev/null)
|
||||
packages_size=$(echo "$project" | jq -r '.statistics.packages_size // 0' 2>/dev/null)
|
||||
wiki_size=$(echo "$project" | jq -r '.statistics.wiki_size // 0' 2>/dev/null)
|
||||
snippets_size=$(echo "$project" | jq -r '.statistics.snippets_size // 0' 2>/dev/null)
|
||||
uploads_size=$(echo "$project" | jq -r '.statistics.uploads_size // 0' 2>/dev/null)
|
||||
|
||||
metrics+="gitlab_project_commit_count{$labels} $commit_count\n"
|
||||
metrics+="gitlab_project_storage_size_bytes{$labels} $storage_size\n"
|
||||
metrics+="gitlab_project_repository_size_bytes{$labels} $repo_size\n"
|
||||
metrics+="gitlab_project_lfs_objects_size_bytes{$labels} $lfs_size\n"
|
||||
metrics+="gitlab_project_job_artifacts_size_bytes{$labels} $artifacts_size\n"
|
||||
metrics+="gitlab_project_packages_size_bytes{$labels} $packages_size\n"
|
||||
metrics+="gitlab_project_wiki_size_bytes{$labels} $wiki_size\n"
|
||||
metrics+="gitlab_project_snippets_size_bytes{$labels} $snippets_size\n"
|
||||
metrics+="gitlab_project_uploads_size_bytes{$labels} $uploads_size\n"
|
||||
done
|
||||
|
||||
echo -e "$metrics"
|
||||
}
|
||||
|
||||
collect_pipeline_metrics() {
|
||||
local metrics=""
|
||||
|
||||
debug_echo "Collecting pipeline metrics"
|
||||
|
||||
local projects
|
||||
if ! projects=$(gitlab_api "/api/v4/projects?per_page=${MAX_PROJECTS}&simple=true" 2>/dev/null); then
|
||||
ERRORS_TOTAL=$((ERRORS_TOTAL + 1))
|
||||
debug_echo "Failed to fetch projects for pipeline metrics"
|
||||
return
|
||||
fi
|
||||
|
||||
metrics+="# HELP gitlab_pipeline_status Count of pipelines by status\n"
|
||||
metrics+="# TYPE gitlab_pipeline_status gauge\n"
|
||||
metrics+="# HELP gitlab_pipeline_duration_seconds Duration of the latest pipeline\n"
|
||||
metrics+="# TYPE gitlab_pipeline_duration_seconds gauge\n"
|
||||
|
||||
echo "$projects" | jq -c '.[]' 2>/dev/null | while IFS= read -r project; do
|
||||
local project_id name namespace
|
||||
project_id=$(echo "$project" | jq -r '.id' 2>/dev/null)
|
||||
name=$(sanitize_label "$(echo "$project" | jq -r '.name // "unknown"' 2>/dev/null)")
|
||||
namespace=$(sanitize_label "$(echo "$project" | jq -r '.namespace.name // "unknown"' 2>/dev/null)")
|
||||
|
||||
local pipelines
|
||||
if ! pipelines=$(gitlab_api "/api/v4/projects/${project_id}/pipelines?per_page=20" 2>/dev/null); then
|
||||
ERRORS_TOTAL=$((ERRORS_TOTAL + 1))
|
||||
debug_echo "Failed to fetch pipelines for project $project_id"
|
||||
continue
|
||||
fi
|
||||
|
||||
# Count pipelines per status
|
||||
local statuses
|
||||
statuses=$(echo "$pipelines" | jq -r '.[].status // empty' 2>/dev/null | sort | uniq -c | awk '{print $2 " " $1}')
|
||||
while IFS= read -r line; do
|
||||
if [[ -n "$line" ]]; then
|
||||
local status count
|
||||
status=$(echo "$line" | awk '{print $1}')
|
||||
count=$(echo "$line" | awk '{print $2}')
|
||||
metrics+="gitlab_pipeline_status{project=\"$name\",namespace=\"$namespace\",status=\"$status\"} $count\n"
|
||||
fi
|
||||
done <<< "$statuses"
|
||||
|
||||
# Latest pipeline duration
|
||||
local duration
|
||||
duration=$(echo "$pipelines" | jq -r '.[0].duration // empty' 2>/dev/null)
|
||||
if [[ -n "$duration" && "$duration" != "null" ]]; then
|
||||
metrics+="gitlab_pipeline_duration_seconds{project=\"$name\",namespace=\"$namespace\"} $duration\n"
|
||||
fi
|
||||
done
|
||||
|
||||
echo -e "$metrics"
|
||||
}
|
||||
|
||||
collect_runner_metrics() {
|
||||
local metrics=""
|
||||
|
||||
debug_echo "Collecting runner metrics"
|
||||
|
||||
local runners
|
||||
if ! runners=$(gitlab_api_paginated "/api/v4/runners" 100 2>/dev/null); then
|
||||
ERRORS_TOTAL=$((ERRORS_TOTAL + 1))
|
||||
debug_echo "Failed to collect runner metrics"
|
||||
return
|
||||
fi
|
||||
|
||||
local total online_total
|
||||
total=$(echo "$runners" | jq 'length' 2>/dev/null) || total=0
|
||||
online_total=$(echo "$runners" | jq '[.[] | select(.status == "online")] | length' 2>/dev/null) || online_total=0
|
||||
|
||||
metrics+="# HELP gitlab_runner_active Whether a runner is active\n"
|
||||
metrics+="# TYPE gitlab_runner_active gauge\n"
|
||||
metrics+="# HELP gitlab_runner_online Whether a runner is online\n"
|
||||
metrics+="# TYPE gitlab_runner_online gauge\n"
|
||||
metrics+="# HELP gitlab_runners_total Total number of runners\n"
|
||||
metrics+="# TYPE gitlab_runners_total gauge\n"
|
||||
metrics+="# HELP gitlab_runners_online_total Total number of online runners\n"
|
||||
metrics+="# TYPE gitlab_runners_online_total gauge\n"
|
||||
|
||||
echo "$runners" | jq -c '.[]' 2>/dev/null | while IFS= read -r runner; do
|
||||
local runner_name runner_type active status
|
||||
runner_name=$(sanitize_label "$(echo "$runner" | jq -r '.description // "unknown"' 2>/dev/null)")
|
||||
runner_type=$(echo "$runner" | jq -r '.runner_type // "unknown"' 2>/dev/null)
|
||||
active=$(echo "$runner" | jq -r '.active // false' 2>/dev/null)
|
||||
status=$(echo "$runner" | jq -r '.status // "unknown"' 2>/dev/null)
|
||||
|
||||
local active_val=0
|
||||
if [[ "$active" == "true" ]]; then
|
||||
active_val=1
|
||||
fi
|
||||
|
||||
local online_val=0
|
||||
if [[ "$status" == "online" ]]; then
|
||||
online_val=1
|
||||
fi
|
||||
|
||||
metrics+="gitlab_runner_active{runner_name=\"$runner_name\",runner_type=\"$runner_type\"} $active_val\n"
|
||||
metrics+="gitlab_runner_online{runner_name=\"$runner_name\",runner_type=\"$runner_type\"} $online_val\n"
|
||||
done
|
||||
|
||||
metrics+="gitlab_runners_total $total\n"
|
||||
metrics+="gitlab_runners_online_total $online_total\n"
|
||||
|
||||
echo -e "$metrics"
|
||||
}
|
||||
|
||||
collect_user_metrics() {
|
||||
local metrics=""
|
||||
|
||||
debug_echo "Collecting user metrics"
|
||||
|
||||
local response
|
||||
if ! response=$(gitlab_api "/api/v4/users?per_page=1" "true" 2>/dev/null); then
|
||||
ERRORS_TOTAL=$((ERRORS_TOTAL + 1))
|
||||
debug_echo "Failed to collect user metrics"
|
||||
return
|
||||
fi
|
||||
|
||||
local total
|
||||
total=$(echo "$response" | grep -i '^x-total:' | tr -d '[:space:]' | cut -d: -f2)
|
||||
|
||||
if [[ -n "$total" ]]; then
|
||||
metrics+="# HELP gitlab_users_total Total number of GitLab users\n"
|
||||
metrics+="# TYPE gitlab_users_total gauge\n"
|
||||
metrics+="gitlab_users_total $total\n"
|
||||
debug_echo "Total users: $total"
|
||||
else
|
||||
ERRORS_TOTAL=$((ERRORS_TOTAL + 1))
|
||||
debug_echo "Failed to parse user count from X-Total header"
|
||||
fi
|
||||
|
||||
echo -e "$metrics"
|
||||
}
|
||||
|
||||
collect_group_metrics() {
|
||||
local metrics=""
|
||||
|
||||
debug_echo "Collecting group metrics"
|
||||
|
||||
local groups
|
||||
if ! groups=$(gitlab_api_paginated "/api/v4/groups" 100 2>/dev/null); then
|
||||
ERRORS_TOTAL=$((ERRORS_TOTAL + 1))
|
||||
debug_echo "Failed to collect group metrics"
|
||||
return
|
||||
fi
|
||||
|
||||
local total
|
||||
total=$(echo "$groups" | jq 'length' 2>/dev/null) || total=0
|
||||
|
||||
metrics+="# HELP gitlab_groups_total Total number of GitLab groups\n"
|
||||
metrics+="# TYPE gitlab_groups_total gauge\n"
|
||||
metrics+="gitlab_groups_total $total\n"
|
||||
debug_echo "Total groups: $total"
|
||||
|
||||
echo -e "$metrics"
|
||||
}
|
||||
|
||||
collect_job_metrics() {
|
||||
local metrics=""
|
||||
|
||||
debug_echo "Collecting job metrics"
|
||||
|
||||
local projects
|
||||
if ! projects=$(gitlab_api "/api/v4/projects?per_page=${MAX_PROJECTS}&simple=true" 2>/dev/null); then
|
||||
ERRORS_TOTAL=$((ERRORS_TOTAL + 1))
|
||||
debug_echo "Failed to fetch projects for job metrics"
|
||||
return
|
||||
fi
|
||||
|
||||
metrics+="# HELP gitlab_jobs_by_status Count of jobs by status\n"
|
||||
metrics+="# TYPE gitlab_jobs_by_status gauge\n"
|
||||
|
||||
echo "$projects" | jq -c '.[]' 2>/dev/null | while IFS= read -r project; do
|
||||
local project_id name namespace
|
||||
project_id=$(echo "$project" | jq -r '.id' 2>/dev/null)
|
||||
name=$(sanitize_label "$(echo "$project" | jq -r '.name // "unknown"' 2>/dev/null)")
|
||||
namespace=$(sanitize_label "$(echo "$project" | jq -r '.namespace.name // "unknown"' 2>/dev/null)")
|
||||
|
||||
local jobs
|
||||
if ! jobs=$(gitlab_api "/api/v4/projects/${project_id}/jobs?per_page=20" 2>/dev/null); then
|
||||
ERRORS_TOTAL=$((ERRORS_TOTAL + 1))
|
||||
debug_echo "Failed to fetch jobs for project $project_id"
|
||||
continue
|
||||
fi
|
||||
|
||||
local statuses
|
||||
statuses=$(echo "$jobs" | jq -r '.[].status // empty' 2>/dev/null | sort | uniq -c | awk '{print $2 " " $1}')
|
||||
while IFS= read -r line; do
|
||||
if [[ -n "$line" ]]; then
|
||||
local status count
|
||||
status=$(echo "$line" | awk '{print $1}')
|
||||
count=$(echo "$line" | awk '{print $2}')
|
||||
metrics+="gitlab_jobs_by_status{project=\"$name\",namespace=\"$namespace\",status=\"$status\"} $count\n"
|
||||
fi
|
||||
done <<< "$statuses"
|
||||
done
|
||||
|
||||
echo -e "$metrics"
|
||||
}
|
||||
|
||||
collect_merge_request_metrics() {
|
||||
local metrics=""
|
||||
|
||||
debug_echo "Collecting merge request metrics"
|
||||
|
||||
local projects
|
||||
if ! projects=$(gitlab_api "/api/v4/projects?per_page=${MAX_PROJECTS}&simple=true" 2>/dev/null); then
|
||||
ERRORS_TOTAL=$((ERRORS_TOTAL + 1))
|
||||
debug_echo "Failed to fetch projects for merge request metrics"
|
||||
return
|
||||
fi
|
||||
|
||||
metrics+="# HELP gitlab_open_merge_requests Number of open merge requests\n"
|
||||
metrics+="# TYPE gitlab_open_merge_requests gauge\n"
|
||||
|
||||
echo "$projects" | jq -c '.[]' 2>/dev/null | while IFS= read -r project; do
|
||||
local project_id name namespace
|
||||
project_id=$(echo "$project" | jq -r '.id' 2>/dev/null)
|
||||
name=$(sanitize_label "$(echo "$project" | jq -r '.name // "unknown"' 2>/dev/null)")
|
||||
namespace=$(sanitize_label "$(echo "$project" | jq -r '.namespace.name // "unknown"' 2>/dev/null)")
|
||||
|
||||
local mrs
|
||||
if ! mrs=$(gitlab_api "/api/v4/projects/${project_id}/merge_requests?state=opened&per_page=100" 2>/dev/null); then
|
||||
ERRORS_TOTAL=$((ERRORS_TOTAL + 1))
|
||||
debug_echo "Failed to fetch merge requests for project $project_id"
|
||||
continue
|
||||
fi
|
||||
|
||||
local count
|
||||
count=$(echo "$mrs" | jq 'length' 2>/dev/null) || count=0
|
||||
metrics+="gitlab_open_merge_requests{project=\"$name\",namespace=\"$namespace\"} $count\n"
|
||||
done
|
||||
|
||||
echo -e "$metrics"
|
||||
}
|
||||
|
||||
collect_environment_metrics() {
|
||||
local metrics=""
|
||||
|
||||
debug_echo "Collecting environment metrics"
|
||||
|
||||
local projects
|
||||
if ! projects=$(gitlab_api "/api/v4/projects?per_page=${MAX_PROJECTS}&simple=true" 2>/dev/null); then
|
||||
ERRORS_TOTAL=$((ERRORS_TOTAL + 1))
|
||||
debug_echo "Failed to fetch projects for environment metrics"
|
||||
return
|
||||
fi
|
||||
|
||||
metrics+="# HELP gitlab_environments_total Number of environments per project\n"
|
||||
metrics+="# TYPE gitlab_environments_total gauge\n"
|
||||
|
||||
echo "$projects" | jq -c '.[]' 2>/dev/null | while IFS= read -r project; do
|
||||
local project_id name namespace
|
||||
project_id=$(echo "$project" | jq -r '.id' 2>/dev/null)
|
||||
name=$(sanitize_label "$(echo "$project" | jq -r '.name // "unknown"' 2>/dev/null)")
|
||||
namespace=$(sanitize_label "$(echo "$project" | jq -r '.namespace.name // "unknown"' 2>/dev/null)")
|
||||
|
||||
local envs
|
||||
if ! envs=$(gitlab_api "/api/v4/projects/${project_id}/environments?per_page=100" 2>/dev/null); then
|
||||
ERRORS_TOTAL=$((ERRORS_TOTAL + 1))
|
||||
debug_echo "Failed to fetch environments for project $project_id"
|
||||
continue
|
||||
fi
|
||||
|
||||
local count
|
||||
count=$(echo "$envs" | jq 'length' 2>/dev/null) || count=0
|
||||
metrics+="gitlab_environments_total{project=\"$name\",namespace=\"$namespace\"} $count\n"
|
||||
done
|
||||
|
||||
echo -e "$metrics"
|
||||
}
|
||||
|
||||
collect_local_metrics() {
|
||||
local metrics=""
|
||||
|
||||
debug_echo "Scraping local metrics from $GITLAB_METRICS_URL"
|
||||
|
||||
local raw_metrics
|
||||
if ! raw_metrics=$(curl -sf --max-time "$DEFAULT_CURL_TIMEOUT" "$GITLAB_METRICS_URL" 2>/dev/null); then
|
||||
ERRORS_TOTAL=$((ERRORS_TOTAL + 1))
|
||||
echo "Error: Failed to scrape $GITLAB_METRICS_URL" >&2
|
||||
echo "Ensure this host's IP is in gitlab_rails['monitoring_whitelist']" >&2
|
||||
return
|
||||
fi
|
||||
|
||||
# GitLab version info
|
||||
local version_patterns="^gitlab_version_info[{ ]"
|
||||
local version_help="^# (HELP|TYPE) gitlab_version_info"
|
||||
metrics+=$(echo "$raw_metrics" | grep -E "$version_help|$version_patterns" 2>/dev/null)
|
||||
metrics+=$'\n'
|
||||
|
||||
# Puma metrics
|
||||
local puma_patterns="^puma_workers[{ ]|^puma_running_workers[{ ]|^puma_running[{ ]|^puma_queued_connections[{ ]|^puma_active_connections[{ ]|^puma_pool_capacity[{ ]|^puma_max_threads[{ ]|^puma_idle_threads[{ ]"
|
||||
local puma_help="^# (HELP|TYPE) puma_"
|
||||
metrics+=$(echo "$raw_metrics" | grep -E "$puma_help|$puma_patterns" 2>/dev/null)
|
||||
metrics+=$'\n'
|
||||
|
||||
# Sidekiq metrics (served by separate Sidekiq exporter, default localhost:8082)
|
||||
local sidekiq_raw
|
||||
if sidekiq_raw=$(curl -sf --max-time "$DEFAULT_CURL_TIMEOUT" "$GITLAB_SIDEKIQ_URL" 2>/dev/null); then
|
||||
debug_echo "Scraped Sidekiq metrics from $GITLAB_SIDEKIQ_URL"
|
||||
|
||||
# Core Sidekiq job metrics
|
||||
local sidekiq_patterns="^sidekiq_running_jobs[{ ]|^sidekiq_concurrency[{ ]|^sidekiq_mem_total_bytes[{ ]|^sidekiq_jobs_failed_total[{ ]|^sidekiq_jobs_dead_total[{ ]|^sidekiq_enqueued_jobs_total[{ ]|^sidekiq_jobs_completion_seconds[_{ ]|^sidekiq_jobs_queue_duration_seconds[_{ ]|^sidekiq_jobs_cpu_seconds[_{ ]|^sidekiq_jobs_db_seconds[_{ ]|^sidekiq_jobs_gitaly_seconds[_{ ]|^sidekiq_redis_requests_total[{ ]|^sidekiq_redis_requests_duration_seconds[_{ ]"
|
||||
local sidekiq_help="^# (HELP|TYPE) sidekiq_(running_jobs|concurrency|mem_total_bytes|jobs_failed_total|jobs_dead_total|enqueued_jobs_total|jobs_completion_seconds|jobs_queue_duration_seconds|jobs_cpu_seconds|jobs_db_seconds|jobs_gitaly_seconds|redis_requests_total|redis_requests_duration_seconds)"
|
||||
metrics+=$(echo "$sidekiq_raw" | grep -E "$sidekiq_help|$sidekiq_patterns" 2>/dev/null)
|
||||
metrics+=$'\n'
|
||||
|
||||
# CI/CD pipeline internals
|
||||
local ci_patterns="^pipelines_created_total[{ ]|^deployments[{ ]|^gitlab_ci_pipeline_creation_duration_seconds[_{ ]|^gitlab_ci_pipeline_failure_reasons[{ ]|^gitlab_ci_active_jobs[_{ ]"
|
||||
local ci_help="^# (HELP|TYPE) (pipelines_created_total|deployments|gitlab_ci_pipeline_creation_duration_seconds|gitlab_ci_pipeline_failure_reasons|gitlab_ci_active_jobs)"
|
||||
metrics+=$(echo "$sidekiq_raw" | grep -E "$ci_help|$ci_patterns" 2>/dev/null)
|
||||
metrics+=$'\n'
|
||||
|
||||
# Email delivery metrics
|
||||
local email_patterns="^gitlab_emails_delivered_total[{ ]|^gitlab_emails_delivery_attempts_total[{ ]"
|
||||
local email_help="^# (HELP|TYPE) gitlab_emails_(delivered_total|delivery_attempts_total)"
|
||||
metrics+=$(echo "$sidekiq_raw" | grep -E "$email_help|$email_patterns" 2>/dev/null)
|
||||
metrics+=$'\n'
|
||||
|
||||
# External HTTP (webhooks, integrations)
|
||||
local ext_http_patterns="^gitlab_external_http_total[{ ]|^gitlab_external_http_duration_seconds[_{ ]"
|
||||
local ext_http_help="^# (HELP|TYPE) gitlab_external_http_(total|duration_seconds)"
|
||||
metrics+=$(echo "$sidekiq_raw" | grep -E "$ext_http_help|$ext_http_patterns" 2>/dev/null)
|
||||
metrics+=$'\n'
|
||||
|
||||
# Sidekiq SLI apdex/errors
|
||||
local sli_patterns="^gitlab_sli_sidekiq_execution_apdex_success_total[{ ]|^gitlab_sli_sidekiq_execution_apdex_total[{ ]|^gitlab_sli_sidekiq_execution_error_total[{ ]|^gitlab_sli_sidekiq_execution_total[{ ]"
|
||||
local sli_help="^# (HELP|TYPE) gitlab_sli_sidekiq_execution"
|
||||
metrics+=$(echo "$sidekiq_raw" | grep -E "$sli_help|$sli_patterns" 2>/dev/null)
|
||||
metrics+=$'\n'
|
||||
|
||||
# DB transaction duration, primary SQL, threads, cache, workers
|
||||
local extra_patterns="^gitlab_database_transaction_seconds[_{ ]|^gitlab_sql_primary_duration_seconds[_{ ]|^gitlab_ruby_threads_running_threads[{ ]|^gitlab_ruby_threads_max_expected_threads[{ ]|^limited_capacity_worker_running_jobs[{ ]|^limited_capacity_worker_max_running_jobs[{ ]|^limited_capacity_worker_remaining_work_count[{ ]|^redis_hit_miss_operations_total[{ ]"
|
||||
local extra_help="^# (HELP|TYPE) (gitlab_database_transaction_seconds|gitlab_sql_primary_duration_seconds|gitlab_ruby_threads_running_threads|gitlab_ruby_threads_max_expected_threads|limited_capacity_worker_running_jobs|limited_capacity_worker_max_running_jobs|limited_capacity_worker_remaining_work_count|redis_hit_miss_operations_total)"
|
||||
metrics+=$(echo "$sidekiq_raw" | grep -E "$extra_help|$extra_patterns" 2>/dev/null)
|
||||
metrics+=$'\n'
|
||||
else
|
||||
debug_echo "Warning: Could not scrape Sidekiq exporter at $GITLAB_SIDEKIQ_URL (is sidekiq_exporter enabled?)"
|
||||
fi
|
||||
|
||||
# Redis metrics
|
||||
local redis_patterns="^gitlab_redis_client_requests_total[{ ]|^gitlab_redis_client_exceptions_total[{ ]|^gitlab_redis_client_requests_duration_seconds[_{ ]|^gitlab_redis_client_requests_duration_seconds_sum[{ ]|^gitlab_redis_client_requests_duration_seconds_count[{ ]"
|
||||
local redis_help="^# (HELP|TYPE) gitlab_redis_client_(requests_total|exceptions_total|requests_duration_seconds)"
|
||||
metrics+=$(echo "$raw_metrics" | grep -E "$redis_help|$redis_patterns" 2>/dev/null)
|
||||
metrics+=$'\n'
|
||||
|
||||
# Database connection pool metrics
|
||||
local db_patterns="^gitlab_database_connection_pool_"
|
||||
local db_help="^# (HELP|TYPE) gitlab_database_connection_pool_"
|
||||
metrics+=$(echo "$raw_metrics" | grep -E "$db_help|$db_patterns" 2>/dev/null)
|
||||
metrics+=$'\n'
|
||||
|
||||
# Process metrics (CPU, memory, file descriptors)
|
||||
local process_patterns="^ruby_process_resident_memory_bytes[{ ]|^ruby_process_cpu_seconds_total[{ ]|^process_open_fds[{ ]|^process_max_fds[{ ]|^ruby_gc_stat_heap_live_slots[{ ]|^ruby_gc_stat_heap_free_slots[{ ]"
|
||||
local process_help="^# (HELP|TYPE) (ruby_process_resident_memory_bytes|ruby_process_cpu_seconds_total|process_open_fds|process_max_fds|ruby_gc_stat_heap_live_slots|ruby_gc_stat_heap_free_slots)"
|
||||
metrics+=$(echo "$raw_metrics" | grep -E "$process_help|$process_patterns" 2>/dev/null)
|
||||
metrics+=$'\n'
|
||||
|
||||
# GitLab transaction/request metrics
|
||||
local txn_patterns="^gitlab_transaction_duration_seconds[{ _]|^gitlab_sql_duration_seconds[{ _]|^gitlab_cache_operation_duration_seconds[{ _]"
|
||||
local txn_help="^# (HELP|TYPE) (gitlab_transaction_duration_seconds|gitlab_sql_duration_seconds|gitlab_cache_operation_duration_seconds)"
|
||||
metrics+=$(echo "$raw_metrics" | grep -E "$txn_help|$txn_patterns" 2>/dev/null)
|
||||
metrics+=$'\n'
|
||||
|
||||
# User session and ActionCable metrics
|
||||
local session_patterns="^user_session_logins_total[{ ]|^action_cable_active_connections[{ ]|^action_cable_pool_current_size[{ ]"
|
||||
local session_help="^# (HELP|TYPE) (user_session_logins_total|action_cable_active_connections|action_cable_pool_current_size)"
|
||||
metrics+=$(echo "$raw_metrics" | grep -E "$session_help|$session_patterns" 2>/dev/null)
|
||||
metrics+=$'\n'
|
||||
|
||||
local metric_count
|
||||
metric_count=$(echo "$metrics" | grep -cv '^#\|^$' 2>/dev/null) || metric_count=0
|
||||
debug_echo "Extracted $metric_count local metrics"
|
||||
|
||||
printf '%s\n' "$metrics"
|
||||
}
|
||||
|
||||
run_collection() {
|
||||
local start_time
|
||||
start_time=$(date +%s)
|
||||
ERRORS_TOTAL=0
|
||||
|
||||
debug_echo "Starting metrics collection"
|
||||
|
||||
local all_metrics=""
|
||||
|
||||
# API-based metrics (require GITLAB_URL and GITLAB_TOKEN)
|
||||
if [[ -n "$GITLAB_URL" && -n "$GITLAB_TOKEN" ]]; then
|
||||
all_metrics+="$(collect_instance_health)"$'\n'
|
||||
all_metrics+="$(collect_project_statistics)"$'\n'
|
||||
all_metrics+="$(collect_pipeline_metrics)"$'\n'
|
||||
all_metrics+="$(collect_runner_metrics)"$'\n'
|
||||
all_metrics+="$(collect_user_metrics)"$'\n'
|
||||
all_metrics+="$(collect_group_metrics)"$'\n'
|
||||
all_metrics+="$(collect_job_metrics)"$'\n'
|
||||
all_metrics+="$(collect_merge_request_metrics)"$'\n'
|
||||
all_metrics+="$(collect_environment_metrics)"$'\n'
|
||||
fi
|
||||
|
||||
# Local server-side metrics (scraped from /-/metrics)
|
||||
if [[ "$LOCAL_MODE" == true ]]; then
|
||||
all_metrics+="$(collect_local_metrics)"$'\n'
|
||||
fi
|
||||
|
||||
local end_time duration
|
||||
end_time=$(date +%s)
|
||||
duration=$((end_time - start_time))
|
||||
|
||||
all_metrics+="# HELP gitlab_collector_duration_seconds Time taken to collect all metrics\n"
|
||||
all_metrics+="# TYPE gitlab_collector_duration_seconds gauge\n"
|
||||
all_metrics+="gitlab_collector_duration_seconds $duration\n"
|
||||
all_metrics+="# HELP gitlab_collector_last_run_timestamp Unix timestamp of last collection run\n"
|
||||
all_metrics+="# TYPE gitlab_collector_last_run_timestamp gauge\n"
|
||||
all_metrics+="gitlab_collector_last_run_timestamp $end_time\n"
|
||||
all_metrics+="# HELP gitlab_collector_errors_total Number of errors during collection\n"
|
||||
all_metrics+="# TYPE gitlab_collector_errors_total gauge\n"
|
||||
all_metrics+="gitlab_collector_errors_total $ERRORS_TOTAL\n"
|
||||
|
||||
# Write atomically
|
||||
mkdir -p "$NODE_DIR"
|
||||
local tmp_file
|
||||
tmp_file=$(mktemp "${NODE_DIR}/gitlab_metrics.prom.XXXXXX")
|
||||
echo -e "$all_metrics" > "$tmp_file"
|
||||
mv "$tmp_file" "${NODE_DIR}/gitlab_metrics.prom"
|
||||
|
||||
debug_echo "Collection complete in ${duration}s with $ERRORS_TOTAL errors"
|
||||
}
|
||||
|
||||
parse_arguments() {
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case $1 in
|
||||
--once)
|
||||
RUN_MODE="once"
|
||||
shift
|
||||
;;
|
||||
--daemon)
|
||||
RUN_MODE="daemon"
|
||||
shift
|
||||
;;
|
||||
--local)
|
||||
LOCAL_MODE=true
|
||||
shift
|
||||
;;
|
||||
--help|-h)
|
||||
show_help
|
||||
exit 0
|
||||
;;
|
||||
*)
|
||||
echo "Error: Unknown option: $1" >&2
|
||||
show_help >&2
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
}
|
||||
|
||||
validate_requirements() {
|
||||
# API credentials only required when not running local-only
|
||||
if [[ -z "$GITLAB_URL" && "$LOCAL_MODE" != true ]]; then
|
||||
echo "Error: GITLAB_URL is required (or use --local for server-side only)" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ -z "$GITLAB_TOKEN" && "$LOCAL_MODE" != true ]]; then
|
||||
echo "Error: GITLAB_TOKEN is required (or use --local for server-side only)" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Strip trailing slash from URLs
|
||||
GITLAB_URL="${GITLAB_URL%/}"
|
||||
GITLAB_METRICS_URL="${GITLAB_METRICS_URL%/}"
|
||||
GITLAB_SIDEKIQ_URL="${GITLAB_SIDEKIQ_URL%/}"
|
||||
|
||||
if ! command -v curl >/dev/null 2>&1; then
|
||||
echo "Error: curl is required but not installed" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ -n "$GITLAB_TOKEN" ]]; then
|
||||
if ! command -v jq >/dev/null 2>&1; then
|
||||
echo "Error: jq is required but not installed" >&2
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
}
|
||||
|
||||
main() {
|
||||
parse_arguments "$@"
|
||||
validate_requirements
|
||||
|
||||
debug_echo "GitLab URL: $GITLAB_URL"
|
||||
debug_echo "Metrics URL: $GITLAB_METRICS_URL"
|
||||
debug_echo "Sidekiq URL: $GITLAB_SIDEKIQ_URL"
|
||||
debug_echo "Node exporter dir: $NODE_DIR"
|
||||
debug_echo "Run mode: $RUN_MODE"
|
||||
debug_echo "Local mode: $LOCAL_MODE"
|
||||
debug_echo "Max projects: $MAX_PROJECTS"
|
||||
|
||||
if [[ "$RUN_MODE" == "daemon" ]]; then
|
||||
debug_echo "Running in daemon mode with ${COLLECTION_INTERVAL}s interval"
|
||||
while true; do
|
||||
run_collection
|
||||
sleep "$COLLECTION_INTERVAL"
|
||||
done
|
||||
else
|
||||
run_collection
|
||||
fi
|
||||
|
||||
debug_echo "Script completed successfully"
|
||||
}
|
||||
|
||||
# Execute main function if script is run directly
|
||||
if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
|
||||
main "$@"
|
||||
fi
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,531 @@
|
||||
#!/bin/bash
|
||||
|
||||
####################################################################
|
||||
#### Code-Server Install Script ####
|
||||
#### For RHEL/Rocky/Alma, Oracle Linux, Debian & Ubuntu ####
|
||||
#### ####
|
||||
#### Author: Phil Connor ####
|
||||
#### Contact: contact@mylinux.work ####
|
||||
#### License: MIT ####
|
||||
#### Version: 1.3 ####
|
||||
#### ####
|
||||
#### Usage: sudo ./install-code-server.sh ####
|
||||
####################################################################
|
||||
|
||||
#############################
|
||||
#### User Configurations ####
|
||||
#############################
|
||||
CODEDIR=/code # Home directory for your Code
|
||||
EMAIL=admin@mydomain.com # your domain email address
|
||||
HTTPTYPE=APACHE # Choose Apache, Caddy or Nginx All UPPER Case
|
||||
PASSWD=pAsSwOrD # Your Password for Code-server used for Apache, Nginx and Caddy
|
||||
UNAME=MyUser # Username Used for Caddy
|
||||
SERVDIR=/usr/local/code-server # where you want the code-server installed
|
||||
SERVERNAME=code.mydomain.cloud # server fqdn name
|
||||
USRDIR=/var/lib/code-server
|
||||
|
||||
########################
|
||||
#### System Configs ####
|
||||
########################
|
||||
CADPASS="$(echo -e "${PASSWD}\n$PASSWD" | caddy hash-password 2>/dev/null | tail --lines=1)"
|
||||
OS=$(grep PRETTY_NAME /etc/os-release | sed 's/PRETTY_NAME=//g' | tr -d '="' | awk '{print $1}' | tr '[:upper:]' '[:lower:]')
|
||||
OSVER=$(grep VERSION_ID /etc/os-release | sed 's/VERSION_ID=//g' | tr -d '="' | awk -F. '{print $1}')
|
||||
|
||||
define() {
|
||||
IFS=$'\n' read -r -d '' "$1"
|
||||
}
|
||||
|
||||
###########################################################
|
||||
#### Detect Package Manger from OS and OSVer Variables ####
|
||||
###########################################################
|
||||
if [ "${OS}" = ubuntu ]; then
|
||||
PAKMGR="apt-get -y"
|
||||
elif [[ ${OS} = centos || ${OS} = red || ${OS} = oracle || ${OS} = rocky || ${OS} = alma ]]; then
|
||||
if [ "${OSVER}" = 7 ]; then
|
||||
PAKMGR="yum -y"
|
||||
fi
|
||||
if [[ ${OSVER} = 8 || ${OSVER} = 9 ]]; then
|
||||
PAKMGR="dnf -y"
|
||||
fi
|
||||
fi
|
||||
|
||||
################################
|
||||
#### Check if OS is Updated ####
|
||||
################################
|
||||
if [ "${OS}" = ubuntu ]; then
|
||||
${PAKMGR} upgrade
|
||||
${PAKMGR} install libc6 libstdc++6
|
||||
else
|
||||
${PAKMGR} update
|
||||
fi
|
||||
|
||||
###############################################
|
||||
#### Get the latest version of Code Server ####
|
||||
###############################################
|
||||
get_latest_version() {
|
||||
{
|
||||
version="$(curl -fsSLI -o /dev/null -w "%{url_effective}" https://github.com/coder/code-server/releases/latest)"
|
||||
version="${version#https://github.com/coder/code-server/releases/tag/}"
|
||||
version="${version#v}"
|
||||
echo "$version"
|
||||
}
|
||||
}
|
||||
|
||||
#########################################
|
||||
#### Download and Install Codeserver ####
|
||||
#########################################
|
||||
install_codeserver() {
|
||||
{
|
||||
# check if command wget exists
|
||||
if ! command -v wget >/dev/null 2>&1; then
|
||||
${PAKMGR} install wget
|
||||
fi
|
||||
cd ~/ || exit
|
||||
wget "https://github.com/coder/code-server/releases/download/v$version/code-server-$version-linux-amd64.tar.gz"
|
||||
tar xvf "code-server-$version-linux-amd64.tar.gz"
|
||||
mkdir -p ${SERVDIR}
|
||||
cp -r ~/code-server-"$version"-linux-amd64/* ${SERVDIR}
|
||||
ln -s ${SERVDIR}/bin/code-server /usr/bin/code-server
|
||||
# Code Directory
|
||||
mkdir -p "${CODEDIR}"
|
||||
# User Directory
|
||||
mkdir -p "${USRDIR}"
|
||||
|
||||
csserv=/lib/systemd/system
|
||||
touch $csserv/code-server.service
|
||||
OUTFILE1="$csserv/code-server.service"
|
||||
define SFILE << EOF
|
||||
[Unit]
|
||||
Description=code-server
|
||||
After=nginx.service
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
Environment=PASSWORD=$PASSWD
|
||||
ExecStart=/usr/bin/code-server --bind-addr 127.0.0.1:8080 --user-data-dir ${USRDIR} --auth password
|
||||
Restart=always
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
EOF
|
||||
|
||||
{
|
||||
printf "%s\n" "$SFILE" | cut -c 2-
|
||||
} > "$OUTFILE1"
|
||||
|
||||
if [ $HTTPTYPE = CADDY ]; then
|
||||
sed -i 's/After=nginx.service/After=caddy.service/g' $csserv/code-server.service
|
||||
sed -i 's/auth: password/auth: none/' /root/.config/code-server/config.yaml
|
||||
sed -i "s|ExecStart=/usr/bin/code-server --bind-addr 127.0.0.1:8080 --user-data-dir ${USRDIR} --auth password|ExecStart=/usr/bin/code-server --bind-addr 127.0.0.1:8080 --user-data-dir ${USRDIR}|" $csserv/code-server.service
|
||||
fi
|
||||
|
||||
systemctl daemon-reload
|
||||
systemctl start code-server
|
||||
systemctl enable code-server
|
||||
}
|
||||
}
|
||||
|
||||
########################################
|
||||
#### Install Apache, Nginx or Caddy ####
|
||||
########################################
|
||||
install_http() {
|
||||
{
|
||||
if [ $HTTPTYPE = APACHE ]; then
|
||||
csserv=/lib/systemd/system
|
||||
sed -i 's/After=nginx.service/After=apache.service/g' $csserv/code-server.service
|
||||
if [[ ${OS} = centos || ${OS} = red || ${OS} = oracle || ${OS} = rocky || ${OS} = alma ]]; then
|
||||
if ! command -v httpd &> /dev/null; then
|
||||
${PAKMGR} install httpd
|
||||
systemctl enable --now httpd
|
||||
fi
|
||||
AOUTFILE="/etc/httpd/conf.d/code-server.conf"
|
||||
elif [ "${OS}" = ubuntu ]; then
|
||||
if ! command -v apache2 &> /dev/null; then
|
||||
${PAKMGR} install apache2
|
||||
systemctl enable --now apache2
|
||||
fi
|
||||
AOUTFILE="/etc/apache2/sites-available/code-server.conf"
|
||||
fi
|
||||
define ACONF << 'EOF'
|
||||
<VirtualHost *:80>
|
||||
ServerName $SERVERNAME
|
||||
#ProxyPreserveHost On
|
||||
RewriteEngine On
|
||||
RewriteCond %{HTTP:Upgrade} =websocket [NC]
|
||||
RewriteRule /(.*) ws://127.0.0.1:8080/$1 [P,L]
|
||||
RewriteCond %{HTTP:Upgrade} !=websocket [NC]
|
||||
RewriteRule /(.*) http://127.0.0.1:8080/$1 [P,L]
|
||||
ProxyRequests off
|
||||
#RequestHeader set X-Forwarded-Proto https
|
||||
#RequestHeader set X-Forwarded-Port 443
|
||||
ProxyPass / http://127.0.0.1:8080/ nocanon
|
||||
ProxyPassReverse / http://127.0.0.1:8080/
|
||||
</VirtualHost>
|
||||
EOF
|
||||
{
|
||||
printf "%s\n" "$ACONF" | cut -c 4-
|
||||
} > "$AOUTFILE"
|
||||
|
||||
systemctl daemon-reload
|
||||
systemctl restart code-server
|
||||
systemctl restart httpd
|
||||
fi
|
||||
|
||||
if [ $HTTPTYPE = NGINX ]; then
|
||||
if [[ ${OS} = centos || ${OS} = red || ${OS} = oracle || ${OS} = rocky || ${OS} = alma ]]; then
|
||||
OUTFILE="/etc/yum.repos.d/nginx.repo"
|
||||
define NYUM << 'EOF'
|
||||
[nginx-stable]
|
||||
name=nginx stable repo
|
||||
baseurl=http://nginx.org/packages/centos/$releasever/$basearch/
|
||||
gpgcheck=1
|
||||
enabled=1
|
||||
gpgkey=https://nginx.org/keys/nginx_signing.key
|
||||
module_hotfixes=true
|
||||
EOF
|
||||
{
|
||||
printf "%s\n" "$NYUM" | cut -c 4-
|
||||
} > "$OUTFILE"
|
||||
if [ "${OSVER}" = 8 ] || [ "${OSVER}" = 9 ]; then
|
||||
# shellcheck disable=2016
|
||||
sed -i 's/baseurl=http:\/\/nginx.org\/packages\/centos\/7\/$basearch\//baseurl=http:\/\/nginx.org\/packages\/centos\/8\/$basearch\//g' $OUTFILE
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ "${OS}" = ubuntu ]; then
|
||||
${PAKMGR} install curl gnupg2 ca-certificates lsb-release
|
||||
echo "deb http://nginx.org/packages/ubuntu $(lsb_release -cs) nginx" | sudo tee /etc/apt/sources.list.d/nginx.list
|
||||
echo -e "Package: *\nPin: origin nginx.org\nPin: release o=nginx\nPin-Priority: 900\n" | sudo tee /etc/apt/preferences.d/99nginx
|
||||
curl -o /tmp/nginx_signing.key https://nginx.org/keys/nginx_signing.key
|
||||
if [ "$OSVER" = 16 ]; then
|
||||
gpg --with-fingerprint /tmp/nginx_signing.key
|
||||
else
|
||||
gpg --dry-run --quiet --import --import-options show-only /tmp/nginx_signing.key
|
||||
fi
|
||||
sudo mv /tmp/nginx_signing.key /etc/apt/trusted.gpg.d/nginx_signing.asc
|
||||
sudo apt update
|
||||
fi
|
||||
|
||||
${PAKMGR} install nginx
|
||||
|
||||
if [[ ${OS} = centos || ${OS} = red || ${OS} = oracle || ${OS} = rocky || ${OS} = alma ]]; then
|
||||
nxdir=/etc/nginx/conf.d
|
||||
elif [ "${OS}" = ubuntu ]; then
|
||||
if [ "$OSVER" = 16 ]; then
|
||||
nxdir=/etc/nginx/sites-available
|
||||
else
|
||||
nxdir=/etc/nginx/conf.d
|
||||
fi
|
||||
fi
|
||||
|
||||
OUTFILE2="$nxdir/code-server.conf"
|
||||
define NFIG << EOF
|
||||
server {
|
||||
listen 80;
|
||||
listen [::]:80;
|
||||
server_name $SERVERNAME;
|
||||
location / {
|
||||
proxy_pass http://localhost:8080/;
|
||||
proxy_set_header Host \$host;
|
||||
proxy_set_header Upgrade \$http_upgrade;
|
||||
proxy_set_header Connection upgrade;
|
||||
proxy_set_header Accept-Encoding gzip;
|
||||
}
|
||||
}
|
||||
EOF
|
||||
{
|
||||
printf "%s\n" "$NFIG" | cut -c 2-
|
||||
} > "$OUTFILE2"
|
||||
|
||||
if [ "${OS}" = ubuntu ]; then
|
||||
mv $nxdir/default $nxdir/default.orig
|
||||
ln -sf /etc/nginx/sites-available/code-server.conf /etc/nginx/sites-enabled/code-server.conf
|
||||
else
|
||||
mv $nxdir/default.conf $nxdir/default.conf.orig
|
||||
fi
|
||||
systemctl start nginx
|
||||
systemctl enable nginx
|
||||
fi
|
||||
|
||||
if [ "$HTTPTYPE" = CADDY ]; then
|
||||
if [ "${OS}" = ubuntu ]; then
|
||||
${PAKMGR} debian-keyring debian-archive-keyring apt-transport-https
|
||||
curl -1sLf 'https://dl.cloudsmith.io/public/caddy/stable/cfg/gpg/gpg.155B6D79CA56EA34.key' | apt-key add -
|
||||
curl -1sLf 'https://dl.cloudsmith.io/public/caddy/stable/cfg/setup/config.deb.txt?distro=debian&version=any-version' | tee -a /etc/apt/sources.list.d/caddy-stable.list
|
||||
${PAKMGR} update
|
||||
${PAKMGR} install caddy
|
||||
elif [[ ${OS} = centos || ${OS} = red || ${OS} = oracle || ${OS} = rocky || ${OS} = alma ]]; then
|
||||
if [ "${OSVER}" = 7 ]; then
|
||||
${PAKMGR} install yum-plugin-copr
|
||||
elif [ "${OSVER}" = 8 ] || [ "${OSVER}" = 9 ]; then
|
||||
${PAKMGR} install 'dnf-command(copr)'
|
||||
fi
|
||||
${PAKMGR} copr enable @caddy/caddy
|
||||
${PAKMGR} install caddy
|
||||
fi
|
||||
|
||||
caddir=/etc/caddy
|
||||
mv $caddir/Caddyfile $caddir/Caddyfile.orig
|
||||
touch $caddir/Caddyfile
|
||||
OUTFILE3="$caddir/Caddyfile"
|
||||
define CFILE << EOF
|
||||
{ #### Remove these 3 lines
|
||||
acme_ca https://acme-staging-v02.api.letsencrypt.org/directory #### to make server live
|
||||
} #### and grab cert from letsencrypt
|
||||
|
||||
$SERVERNAME {
|
||||
basicauth /* {
|
||||
$UNAME $CADPASS
|
||||
}
|
||||
reverse_proxy 127.0.0.1:8080
|
||||
}
|
||||
|
||||
EOF
|
||||
{
|
||||
printf "%s\n" "$CFILE" | cut -c 2-
|
||||
} > "$OUTFILE3"
|
||||
|
||||
systemctl enable caddy
|
||||
systemctl start caddy
|
||||
|
||||
fi
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
##########################################
|
||||
#### Install Certbot and request Cert ####
|
||||
##########################################
|
||||
install_certbot() {
|
||||
{
|
||||
if [ $HTTPTYPE = NGINX ];then
|
||||
if [ "${OS}" = ubuntu ]; then
|
||||
${PAKMGR} remove letsencrypt
|
||||
${PAKMGR} remove certbot
|
||||
snap install core; snap refresh core
|
||||
snap install --classic certbot
|
||||
${PAKMGR} install python3-certbot-nginx
|
||||
elif [[ ${OS} = centos || ${OS} = red || ${OS} = oracle || ${OS} = rocky || ${OS} = alma ]]; then
|
||||
${PAKMGR} remove certbot
|
||||
${PAKMGR} install epel-release
|
||||
${PAKMGR} install snapd
|
||||
if [ "$OSVER" = 7 ]; then
|
||||
${PAKMGR} install python2-certbot-nginx
|
||||
elif [ "${OSVER}" = 8 ] || [ "${OSVER}" = 9 ]; then
|
||||
${PAKMGR} install python3-certbot-nginx
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
if [ $HTTPTYPE = APACHE ];then
|
||||
if [ "${OS}" = ubuntu ]; then
|
||||
${PAKMGR} remove letsencrypt
|
||||
${PAKMGR} remove certbot
|
||||
snap install core; snap refresh core
|
||||
snap install --classic certbot
|
||||
${PAKMGR} install python3-certbot-apache
|
||||
elif [[ ${OS} = centos || ${OS} = red || ${OS} = oracle || ${OS} = rocky || ${OS} = alma ]]; then
|
||||
${PAKMGR} remove certbot
|
||||
${PAKMGR} install epel-release
|
||||
${PAKMGR} install snapd
|
||||
if [ "$OSVER" = 7 ]; then
|
||||
${PAKMGR} install python2-certbot-apache
|
||||
elif [ "${OSVER}" = 8 ] || [ "${OSVER}" = 9 ]; then
|
||||
${PAKMGR} install python3-certbot-apache
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
systemctl enable --now snapd.socket
|
||||
ln -s /var/lib/snapd/snap /snap
|
||||
snap install core; snap refresh core
|
||||
snap install --classic certbot
|
||||
ln -s /snap/bin/certbot /usr/bin/certbot
|
||||
|
||||
#certbot certonly --redirect --agree-tos --nginx -d $SERVERNAME -m "$EMAIL" --dry-run
|
||||
if [ "$HTTPTYPE" = NGINX ]; then
|
||||
certbot --non-interactive --redirect --agree-tos --nginx -d $SERVERNAME -m "$EMAIL"
|
||||
systemctl restart nginx
|
||||
elif [ "$HTTPTYPE" = APACHE ]; then
|
||||
certbot --non-interactive --redirect --agree-tos --apache -d $SERVERNAME -m "$EMAIL"
|
||||
if [[ ${OS} = centos || ${OS} = red || ${OS} = oracle || ${OS} = rocky || ${OS} = alma ]]; then
|
||||
systemctl restart httpd
|
||||
else
|
||||
systemctl restart apache2
|
||||
fi
|
||||
fi
|
||||
if [ $HTTPTYPE = NGINX ]; then
|
||||
if [[ ${OS} = centos || ${OS} = red || ${OS} = oracle || ${OS} = rocky || ${OS} = alma ]]; then
|
||||
if ! grep "certbot" /var/spool/cron/root; then
|
||||
echo "0 */12 * * * root certbot -q renew --nginx" >> /var/spool/cron/root
|
||||
fi
|
||||
elif [ "${OS}" = ubuntu ]; then
|
||||
if ! grep "certbot" /var/spool/cron/crontabs/root; then
|
||||
echo "0 */12 * * * root certbot -q renew --nginx" >> /var/spool/cron/crontabs/root
|
||||
fi
|
||||
fi
|
||||
elif [ $HTTPTYPE = APACHE ]; then
|
||||
if [[ ${OS} = centos || ${OS} = red || ${OS} = oracle || ${OS} = rocky || ${OS} = alma ]]; then
|
||||
if ! grep "certbot" /var/spool/cron/root; then
|
||||
echo "0 */12 * * * root certbot -q renew --apache" >> /var/spool/cron/root
|
||||
fi
|
||||
elif [ "${OS}" = ubuntu ]; then
|
||||
if ! grep "certbot" /var/spool/cron/crontabs/root; then
|
||||
echo "0 */12 * * * root certbot -q renew --apache" >> /var/spool/cron/crontabs/root
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
if [[ ${OS} != "ubuntu" && ${OS} != "debian" ]]; then
|
||||
grep nginx /var/log/audit/audit.log | audit2allow -M nginx 2>/dev/null || true
|
||||
semodule -i nginx.pp 2>/dev/null || true
|
||||
fi
|
||||
}
|
||||
}
|
||||
|
||||
function install_firewall() {
|
||||
{
|
||||
if [[ ${OS} = centos || ${OS} = red || ${OS} = oracle || ${OS} = rocky || ${OS} = alma ]]; then
|
||||
${PAKMGR} install ipset perl-libwww-perl.noarch perl-LWP-Protocol-https.noarch perl-GDGraph perl-Sys-Syslog perl-Math-BigInt
|
||||
elif [ "${OS}" = ubuntu ]; then
|
||||
${PAKMGR} install ipset libwww-perl liblwp-protocol-https-perl libgd-graph-perl
|
||||
fi
|
||||
cd /usr/src || exit
|
||||
# rm -fv csf.tgz
|
||||
wget https://download.configserver.com/csf.tgz
|
||||
tar -xzf csf.tgz
|
||||
cd csf || exit
|
||||
./install.sh
|
||||
echo ''
|
||||
echo '###########################################'
|
||||
echo '#### Testing if CSF firewall will work ####'
|
||||
echo '###########################################'
|
||||
echo ''
|
||||
perl /usr/local/csf/bin/csftest.pl
|
||||
##### Initial Settings #####
|
||||
sed -i 's/TESTING = "1"/TESTING = "0"/g' /etc/csf/csf.conf
|
||||
sed -i 's/RESTRICT_SYSLOG = "0"/RESTRICT_SYSLOG = "3"/g' /etc/csf/csf.conf
|
||||
sed -i '/^RESTRICT_UI/c\RESTRICT_UI = "1"' /etc/csf/csf.conf
|
||||
sed -i '/^AUTO_UPDATES/c\AUTO_UPDATES = "1"' /etc/csf/csf.conf
|
||||
##### IPv4 Port Settings #####
|
||||
sed -i 's/TCP_IN = "20,21,22,25,53,80,110,143,443,465,587,993,995"/TCP_IN = "22,80,443,5666,10000"/g' /etc/csf/csf.conf
|
||||
sed -i 's/TCP_OUT = "20,21,22,25,53,80,110,113,443,587,993,995"/TCP_OUT = "22,25,53,80,443,5666,10000"/g' /etc/csf/csf.conf
|
||||
sed -i 's/UDP_IN = "20,21,53,80,443"/UDP_IN = "80,443"/g' /etc/csf/csf.conf
|
||||
sed -i 's/UDP_OUT = "20,21,53,113,123"/UDP_OUT = "53,113,123"/g' /etc/csf/csf.conf
|
||||
sed -i '/^ICMP_IN_RATE/c\ICMP_IN_RATE = "1/s"' /etc/csf/csf.conf
|
||||
##### IPv6 Port Settings #####
|
||||
sed -i 's/IPV6 = "0"/IPV6 = "1"/g' /etc/csf/csf.conf
|
||||
sed -i 's/TCP6_IN = "20,21,22,25,53,80,110,143,443,465,587,993,995"/TCP6_IN = "22,80,443,5666"/g' /etc/csf/csf.conf
|
||||
sed -i 's/TCP6_OUT = "20,21,22,25,53,80,110,113,443,587,993,995"/TCP6_OUT = "22,80,443,5666"/g' /etc/csf/csf.conf
|
||||
sed -i 's/UDP6_IN = "20,21,53,80,443"/UDP6_IN = "80,443"/g' /etc/csf/csf.conf
|
||||
sed -i 's/UDP6_OUT = "20,21,53,113,123"/UDP6_OUT = "53,113,123"/g' /etc/csf/csf.conf
|
||||
##### General Settings #####
|
||||
sed -i 's/SYSLOG_CHECK = "0"/SYSLOG_CHECK = "300"/g' /etc/csf/csf.conf
|
||||
sed -i '/^IGNORE_ALLOW/c\IGNORE_ALLOW = "0"' /etc/csf/csf.conf
|
||||
sed -i '/^LF_CSF/c\LF_CSF = "1"' /etc/csf/csf.conf
|
||||
sed -i 's/LF_IPSET = "0"/LF_IPSET = "1"/g' /etc/csf/csf.conf
|
||||
sed -i '/^PACKET_FILTER/c\PACKET_FILTER = "1"' /etc/csf/csf.conf
|
||||
##### SMTP Settings #####
|
||||
sed -i 's/SMTP_BLOCK = "0"/SMTP_BLOCK = "1"/g' /etc/csf/csf.conf
|
||||
##### Port Flood Settings #####
|
||||
sed -i 's/SYNFLOOD = "0"/SYNFLOOD = "1"/g' /etc/csf/csf.conf
|
||||
sed -i 's/CONNLIMIT = ""/CONNLIMIT= "22;5,25;3,80;10"/g' /etc/csf/csf.conf
|
||||
sed -i 's/PORTFLOOD = ""/PORTFLOOD = "22;tcp;5;300,25;tcp;5;300,80;tcp;20;5"/g' /etc/csf/csf.conf
|
||||
sed -i 's/UDPFLOOD = "0"/UDPFLOOD = "1"/g' /etc/csf/csf.conf
|
||||
##### Logging Settings #####
|
||||
sed -i 's/SYSLOG = "0"/SYSLOG = "1"/g' /etc/csf/csf.conf
|
||||
sed -i '/^DROP_LOGGING/c\DROP_LOGGING = "1"' /etc/csf/csf.conf
|
||||
sed -i '/^DROP_ONLYRES/c\DROP_ONLYRES = "0"' /etc/csf/csf.conf
|
||||
sed -i '/^UDPFLOOD_LOGGING/c\UDPFLOOD_LOGGING = "1"' /etc/csf/csf.conf
|
||||
##### Temp to Perm/Netblock Settings #####
|
||||
sed -i '/^LF_PERMBLOCK^/c\LF_PERMBLOCK = "1"' /etc/csf/csf.conf
|
||||
sed -i 's/LF_NETBLOCK = "0"/LF_NETBLOCK = "1"/g' /etc/csf/csf.conf
|
||||
##### Login Failure Blocking and Alerts #####
|
||||
sed -i 's/LF_SSHD = "5"/LF_SSHD = "3"/g' /etc/csf/csf.conf
|
||||
sed -i 's/LF_FTPD = "10"/LF_FTPD = "5"/g' /etc/csf/csf.conf
|
||||
sed -i 's/LF_SMTPAUTH = "0"/LF_SMTPAUTH = "5"/g' /etc/csf/csf.conf
|
||||
sed -i 's/LF_EXIMSYNTAX = "0"/LF_EXIMSYNTAX = "10"/g' /etc/csf/csf.conf
|
||||
sed -i 's/LF_POP3D = "0"/LF_POP3D = "5"/g' /etc/csf/csf.conf
|
||||
sed -i 's/LF_IMAPD = "0"/LF_IMAPD = "5"/g' /etc/csf/csf.conf
|
||||
sed -i 's/LF_HTACCESS = "0"/LF_HTACCESS = "5"/g' /etc/csf/csf.conf
|
||||
sed -i 's/LF_MODSEC = "5"/LF_MODSEC = "3"/g' /etc/csf/csf.conf
|
||||
sed -i 's/LF_CXS = "0"/LF_CXS = "1"/g' /etc/csf/csf.conf
|
||||
sed -i 's/LF_SYMLINK = "0"/LF_SYMLINK = "5"/g' /etc/csf/csf.conf
|
||||
sed -i 's/LF_WEBMIN = "0"/LF_WEBMIN = "3"/g' /etc/csf/csf.conf
|
||||
sed -i '/^LF_SSH_EMAIL_ALERT/c\LF_SSH_EMAIL_ALERT = "1"' /etc/csf/csf.conf
|
||||
sed -i '/^LF_SU_EMAIL_ALERT/c\LF_SU_EMAIL_ALERT = "1"' /etc/csf/csf.conf
|
||||
sed -i '/^LF_SUDO_EMAIL_ALERT/c\LF_SUDO_EMAIL_ALERT = "1"' /etc/csf/csf.conf
|
||||
sed -i '/^LF_WEBMIN_EMAIL_ALERT/c\LF_WEBMIN_EMAIL_ALERT = "1"' /etc/csf/csf.conf
|
||||
sed -i '/^LF_CONSOLE_EMAIL_ALERT/c\LF_CONSOLE_EMAIL_ALERT = "1"' /etc/csf/csf.conf
|
||||
sed -i '/^LF_BLOCKINONLY/c\LF_BLOCKINONLY = "0"' /etc/csf/csf.conf
|
||||
##### Directory Watching & Integrity #####
|
||||
sed -i '/^LF_DIRWATCH^/c\LF_DIRWATCH = "300"' /etc/csf/csf.conf
|
||||
sed -i '/^LF_INTEGRITY/c\LF_INTEGRITY = "3600"' /etc/csf/csf.conf
|
||||
##### Distributed Attacks #####
|
||||
sed -i 's/LF_DISTATTACK = "0"/LF_DISTATTACK = "1"/g' /etc/csf/csf.conf
|
||||
sed -i 's/LF_DISTFTP = "0"/LF_DISTFTP = "5"/g' /etc/csf/csf.conf
|
||||
sed -i 's/LF_DISTSMTP = "0"/LF_DISTSMTP = "5"/g' /etc/csf/csf.conf
|
||||
##### Connection Tracking #####
|
||||
sed -i 's/CT_LIMIT = "0"/CT_LIMIT = "300"/g' /etc/csf/csf.conf
|
||||
##### Process Tracking #####
|
||||
sed -i '/^PT_LIMIT/c\PT_LIMIT = "60"' /etc/csf/csf.conf
|
||||
sed -i '/^PT_SKIP_HTTP/c\PT_SKIP_HTTP = "0"' /etc/csf/csf.conf
|
||||
sed -i 's/PT_DELETED = "0"/PT_DELETED = "1"/g' /etc/csf/csf.conf
|
||||
sed -i 's/PT_USERTIME = "1800"/PT_USERTIME = "0"/g' /etc/csf/csf.conf
|
||||
sed -i 's/PT_FORKBOMB = "0"/PT_FORKBOMB = "250"/g' /etc/csf/csf.conf
|
||||
##### Port Scan Tracking #####
|
||||
sed -i 's/PS_INTERVAL = "0"/PS_INTERVAL = "300"/g' /etc/csf/csf.conf
|
||||
sed -i '/^PS_EMAIL_ALERT/c\PS_EMAIL_ALERT = "1"' /etc/csf/csf.conf
|
||||
##### User ID Tracking #####
|
||||
sed -i 's/UID_INTERVAL = "0"/UID_INTERVAL = "600"/g' /etc/csf/csf.conf
|
||||
##### Account Tracking #####
|
||||
sed -i 's/AT_ALERT = "2"/AT_ALERT = "1"/g' /etc/csf/csf.conf
|
||||
systemctl enable --now csf
|
||||
systemctl enable --now lfd
|
||||
}
|
||||
}
|
||||
|
||||
function install_webmin() {
|
||||
{
|
||||
if [[ ${OS} = centos || ${OS} = red || ${OS} = oracle || ${OS} = rocky || ${OS} = alma ]]; then
|
||||
OUTFILE="/etc/yum.repos.d/webmin.repo"
|
||||
define WYUM << 'EOF'
|
||||
[Webmin]
|
||||
name=Webmin Distribution Neutral
|
||||
#baseurl=https://download.webmin.com/download/yum
|
||||
mirrorlist=https://download.webmin.com/download/yum/mirrorlist
|
||||
enabled=1
|
||||
EOF
|
||||
{
|
||||
printf "%s\n" "$WYUM" | cut -c 3-
|
||||
} > "$OUTFILE"
|
||||
wget https://download.webmin.com/jcameron-key.asc
|
||||
rpm --import jcameron-key.asc
|
||||
if [ "${OSVER}" = 7 ]; then
|
||||
${PAKMGR} install perl-Encode-Detect perl-Net-SSLeay perl-Data-Dumper tcp_wrappers-devel perl-IO-Tty webmin unzip
|
||||
elif [ "${OSVER}" = 8 ] || [ "${OSVER}" = 9 ]; then
|
||||
${PAKMGR} install perl-Encode-Detect perl-Net-SSLeay perl-Data-Dumper tcp_wrappers tcp_wrappers-libs unzip
|
||||
dnf config-manager --set-enabled powertools
|
||||
${PAKMGR} install perl-IO-Tty webmin
|
||||
fi
|
||||
elif [ "${OS}" = ubuntu ]; then
|
||||
{
|
||||
echo ''
|
||||
echo '############################'
|
||||
echo '#### Adding Webmin Repo ####'
|
||||
echo '############################'
|
||||
echo ''
|
||||
echo 'deb https://download.webmin.com/download/repository sarge contrib'
|
||||
} >> /etc/apt/sources.list
|
||||
wget https://download.webmin.com/jcameron-key.asc
|
||||
apt-key add jcameron-key.asc
|
||||
${PAKMGR} install apt-transport-https
|
||||
${PAKMGR} update
|
||||
${PAKMGR} install webmin
|
||||
fi
|
||||
}
|
||||
}
|
||||
get_latest_version
|
||||
install_codeserver
|
||||
install_http
|
||||
install_certbot
|
||||
install_firewall
|
||||
install_webmin
|
||||
@@ -0,0 +1,189 @@
|
||||
#!/bin/bash
|
||||
|
||||
#############################################################
|
||||
#### ntfy Push Notification Server Setup ####
|
||||
#### Install and configure ntfy as a systemd service ####
|
||||
#### ####
|
||||
#### Author: Phil Connor ####
|
||||
#### Contact: contact@mylinux.work ####
|
||||
#### License: MIT ####
|
||||
#### Version: 1.0 ####
|
||||
#### ####
|
||||
#### Usage: sudo ./install-ntfy-server.sh ####
|
||||
#############################################################
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# --- Configuration (edit these before running) ---
|
||||
NTFY_VERSION="2.8.0"
|
||||
DOMAIN="ntfy.example.com"
|
||||
|
||||
NTFY_USER="ntfy"
|
||||
NTFY_DIR="/var/lib/ntfy"
|
||||
CONFIG_DIR="/etc/ntfy"
|
||||
|
||||
# Ensure script is run as root
|
||||
if [[ $EUID -ne 0 ]]; then
|
||||
echo "ERROR: This script must be run as root (use sudo)."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "=== Installing ntfy v${NTFY_VERSION} ==="
|
||||
|
||||
# Create ntfy user
|
||||
if ! id "$NTFY_USER" &>/dev/null; then
|
||||
echo "Creating ntfy user..."
|
||||
useradd --system --no-create-home --shell /usr/sbin/nologin "$NTFY_USER"
|
||||
fi
|
||||
|
||||
# Create directories
|
||||
echo "Creating directories..."
|
||||
mkdir -p "$NTFY_DIR" "$CONFIG_DIR"
|
||||
chown "$NTFY_USER:$NTFY_USER" "$NTFY_DIR"
|
||||
|
||||
# Download and install ntfy
|
||||
echo "Downloading ntfy..."
|
||||
rm -rf /tmp/ntfy_extract
|
||||
mkdir -p /tmp/ntfy_extract
|
||||
wget -q -O /tmp/ntfy.tar.gz "https://github.com/binwiederhier/ntfy/releases/download/v${NTFY_VERSION}/ntfy_${NTFY_VERSION}_linux_amd64.tar.gz"
|
||||
tar -xzf /tmp/ntfy.tar.gz -C /tmp/ntfy_extract
|
||||
find /tmp/ntfy_extract -name "ntfy" -type f -exec mv {} /usr/local/bin/ntfy \;
|
||||
chmod +x /usr/local/bin/ntfy
|
||||
rm -rf /tmp/ntfy.tar.gz /tmp/ntfy_extract
|
||||
|
||||
# Verify installation
|
||||
echo "Verifying installation..."
|
||||
if [ -x /usr/local/bin/ntfy ]; then
|
||||
echo "✓ ntfy binary installed at /usr/local/bin/ntfy"
|
||||
else
|
||||
echo "✗ ntfy binary not found"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Create configuration
|
||||
echo "Installing configuration..."
|
||||
|
||||
cat > "$CONFIG_DIR/server.yml" << EOF
|
||||
# ntfy server configuration
|
||||
# Location: /etc/ntfy/server.yml
|
||||
|
||||
# Base URL for the server (used in notification links)
|
||||
base-url: "http://${DOMAIN}"
|
||||
|
||||
# Listen address - use internal port, proxy externally
|
||||
listen-http: "127.0.0.1:8090"
|
||||
|
||||
# Authentication - deny by default, require tokens
|
||||
auth-default-access: "deny-all"
|
||||
auth-file: "/var/lib/ntfy/user.db"
|
||||
|
||||
# Cache for offline message delivery
|
||||
cache-file: "/var/lib/ntfy/cache.db"
|
||||
cache-duration: "24h"
|
||||
|
||||
# Behind nginx/caddy reverse proxy
|
||||
behind-proxy: true
|
||||
|
||||
# Attachment settings
|
||||
attachment-cache-dir: "/var/lib/ntfy/attachments"
|
||||
attachment-total-size-limit: "1G"
|
||||
attachment-file-size-limit: "10M"
|
||||
attachment-expiry-duration: "24h"
|
||||
|
||||
# Logging
|
||||
log-level: "info"
|
||||
log-format: "json"
|
||||
|
||||
# Rate limiting per visitor
|
||||
visitor-subscription-limit: 30
|
||||
visitor-request-limit-burst: 60
|
||||
visitor-request-limit-replenish: "5s"
|
||||
EOF
|
||||
|
||||
cat > /etc/systemd/system/ntfy.service << 'EOF'
|
||||
# ntfy systemd service
|
||||
# Location: /etc/systemd/system/ntfy.service
|
||||
|
||||
[Unit]
|
||||
Description=ntfy push notification server
|
||||
Documentation=https://ntfy.sh/docs/
|
||||
After=network.target
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
User=ntfy
|
||||
Group=ntfy
|
||||
|
||||
ExecStart=/usr/local/bin/ntfy serve --config /etc/ntfy/server.yml
|
||||
Restart=always
|
||||
RestartSec=5
|
||||
|
||||
# Security hardening
|
||||
NoNewPrivileges=yes
|
||||
PrivateTmp=yes
|
||||
ProtectSystem=strict
|
||||
ProtectHome=yes
|
||||
ReadWritePaths=/var/lib/ntfy
|
||||
|
||||
# Resource limits
|
||||
LimitNOFILE=65535
|
||||
MemoryMax=512M
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
EOF
|
||||
|
||||
# Enable and start service
|
||||
echo "Enabling ntfy service..."
|
||||
systemctl daemon-reload
|
||||
systemctl enable ntfy
|
||||
systemctl start ntfy
|
||||
|
||||
# Wait for service to start
|
||||
sleep 2
|
||||
|
||||
# Check status
|
||||
if systemctl is-active --quiet ntfy; then
|
||||
echo "✓ ntfy service is running"
|
||||
else
|
||||
echo "✗ ntfy service failed to start"
|
||||
systemctl status ntfy
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "=== Setting up authentication ==="
|
||||
echo ""
|
||||
|
||||
# Create admin user (skip if exists)
|
||||
echo "Creating admin user..."
|
||||
if ntfy user list 2>/dev/null | grep -q "^admin "; then
|
||||
echo "✓ admin user already exists"
|
||||
else
|
||||
ntfy user add --role=admin admin
|
||||
fi
|
||||
echo ""
|
||||
|
||||
# Set access permissions for alert topics
|
||||
echo "Setting access permissions for alert topics..."
|
||||
ntfy access admin 'alerts-*' rw
|
||||
echo "✓ admin has rw access to alerts-*"
|
||||
|
||||
echo ""
|
||||
echo "=== Next Steps ==="
|
||||
echo ""
|
||||
echo "1. Create user accounts for desktop clients:"
|
||||
echo " ntfy user add --role=user <username>"
|
||||
echo " ntfy token add <username>"
|
||||
echo ""
|
||||
echo "2. Grant topic access:"
|
||||
echo " ntfy access <username> alerts-myapp ro # Read-only to app alerts"
|
||||
echo " ntfy access <username> alerts-critical ro # Read-only to critical alerts"
|
||||
echo ""
|
||||
echo "3. Set up a reverse proxy (nginx/caddy) for ${DOMAIN}"
|
||||
echo " pointing to 127.0.0.1:8090"
|
||||
echo ""
|
||||
echo "4. Test with:"
|
||||
echo " curl -u admin:<password> -d 'Test notification' http://127.0.0.1:8090/alerts-test"
|
||||
echo ""
|
||||
echo "=== Installation complete ==="
|
||||
Executable
+1652
File diff suppressed because it is too large
Load Diff
Executable
+628
@@ -0,0 +1,628 @@
|
||||
#!/bin/bash
|
||||
################################################################################
|
||||
# Script Name: iptables-blocklist-metrics.sh
|
||||
# Version: 2.0
|
||||
# Description: Prometheus exporter for iptables threat feed blocking metrics
|
||||
# Author: Phil Connor
|
||||
# Contact: contact@mylinux.work
|
||||
# Website: https://mylinux.work
|
||||
# License: MIT
|
||||
################################################################################
|
||||
|
||||
# Ensure PATH includes sbin (for ipset/iptables when run from cron)
|
||||
export PATH="/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:$PATH"
|
||||
#
|
||||
# EXPORTED METRICS:
|
||||
# - iptables_blocklist_info - Exporter metadata
|
||||
# - iptables_blocklist_enabled_feeds - Count of enabled feeds
|
||||
# - iptables_blocklist_ipset_size - IPs per feed ipset (IPv4/v6)
|
||||
# - iptables_blocklist_blocked_total - Block counts per feed (1h, 24h)
|
||||
# - iptables_blocklist_effectiveness - Blocks per 1000 IPs (24h)
|
||||
# - iptables_blocklist_last_update_timestamp - Feed cache file mtime
|
||||
# - iptables_blocklist_cache_age_seconds - Age of feed cache files
|
||||
# - iptables_blocklist_file_size_bytes - Feed parsed file sizes
|
||||
# - iptables_blocklist_ip_version_ratio - IPv4 vs IPv6 distribution per feed
|
||||
# - iptables_blocklist_total_unique_ips - Total unique IPs across all feeds
|
||||
# - iptables_blocklist_total_rules - Total iptables rules
|
||||
# - iptables_blocklist_rule_packets - Packet counts from iptables rules
|
||||
# - iptables_blocklist_rule_bytes - Byte counts from iptables rules
|
||||
# - iptables_blocklist_conntrack_entries - Current conntrack entries
|
||||
# - iptables_blocklist_conntrack_max - Maximum conntrack entries
|
||||
# - iptables_blocklist_conntrack_usage_percent - Conntrack usage percentage
|
||||
# - iptables_blocklist_whitelist_size - Whitelist ipset sizes
|
||||
# - iptables_blocklist_exporter_runtime_seconds - Script execution time
|
||||
|
||||
CONFIG_DIR="/etc/iptables-threats"
|
||||
CACHE_DIR="$CONFIG_DIR/cache"
|
||||
FEEDS_CONFIG="$CONFIG_DIR/feeds.conf"
|
||||
IPSET_PREFIX="iptables-feed"
|
||||
WHITELIST_IPSET="iptables-whitelist"
|
||||
WHITELIST_IPSET_V6="iptables-whitelist-v6"
|
||||
LOG_FILE="/var/log/iptables-threats.log"
|
||||
|
||||
TEXTFILE_DIR="/var/lib/node_exporter"
|
||||
OUTPUT_FILE=""
|
||||
HTTP_MODE=false
|
||||
HTTP_PORT=9419
|
||||
SCRIPT_START_TIME=$(date +%s)
|
||||
LOCK_FILE="/var/run/iptables-blocklist-metrics.lock"
|
||||
|
||||
show_usage() {
|
||||
cat <<EOF
|
||||
Usage: $0 [OPTIONS]
|
||||
|
||||
Export per-feed iptables threat statistics as Prometheus metrics.
|
||||
|
||||
MODES:
|
||||
--textfile Write to node_exporter textfile collector
|
||||
--http Run HTTP server on port $HTTP_PORT
|
||||
|
||||
OPTIONS:
|
||||
-p, --port HTTP port (default: 9419)
|
||||
-o, --output Output file
|
||||
-h, --help Show this help
|
||||
|
||||
EXAMPLES:
|
||||
# Write to textfile collector
|
||||
$0 --textfile
|
||||
|
||||
# Run as HTTP server
|
||||
$0 --http --port 9419
|
||||
|
||||
# Generate metrics to stdout
|
||||
$0
|
||||
|
||||
EOF
|
||||
exit 0
|
||||
}
|
||||
|
||||
parse_args() {
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case $1 in
|
||||
-h|--help) show_usage ;;
|
||||
--textfile) OUTPUT_FILE="$TEXTFILE_DIR/iptables_blocklist.prom"; shift ;;
|
||||
--http) HTTP_MODE=true; shift ;;
|
||||
-p|--port) HTTP_PORT="$2"; shift 2 ;;
|
||||
-o|--output) OUTPUT_FILE="$2"; shift 2 ;;
|
||||
*) echo "Unknown: $1"; exit 1 ;;
|
||||
esac
|
||||
done
|
||||
}
|
||||
|
||||
get_ipset_size() {
|
||||
local ipset_name="$1"
|
||||
local size
|
||||
size=$(ipset list "$ipset_name" 2>/dev/null | grep '^[0-9a-fA-F.:]' | wc -l 2>/dev/null)
|
||||
echo "${size:-0}"
|
||||
}
|
||||
|
||||
get_feed_blocks() {
|
||||
local feed="$1"
|
||||
local period="$2"
|
||||
local count
|
||||
count=$(journalctl -k --since "$period" 2>/dev/null | grep "\[THREAT:${feed}\]" | wc -l 2>/dev/null)
|
||||
echo "${count:-0}"
|
||||
}
|
||||
|
||||
get_feed_blocks_v6() {
|
||||
local feed="$1"
|
||||
local period="$2"
|
||||
local count
|
||||
count=$(journalctl -k --since "$period" 2>/dev/null | grep "\[THREAT-v6:${feed}\]" | wc -l 2>/dev/null)
|
||||
echo "${count:-0}"
|
||||
}
|
||||
|
||||
get_file_timestamp() {
|
||||
[ -f "$1" ] && stat -c %Y "$1" 2>/dev/null || echo "0"
|
||||
}
|
||||
|
||||
get_file_size() {
|
||||
[ -f "$1" ] && stat -c %s "$1" 2>/dev/null || echo "0"
|
||||
}
|
||||
|
||||
get_cache_age() {
|
||||
if [ -f "$1" ]; then
|
||||
echo $(($(date +%s) - $(stat -c %Y "$1" 2>/dev/null || echo 0)))
|
||||
else
|
||||
echo "0"
|
||||
fi
|
||||
}
|
||||
|
||||
get_iptables_rule_stats() {
|
||||
local chain="$1"
|
||||
local feed="$2"
|
||||
# Extract packet and byte counts from iptables -L -v -n -x (exact numbers, no human-readable K/M/G)
|
||||
iptables -L "$chain" -v -n -x 2>/dev/null | grep "${IPSET_PREFIX}-${feed}" | head -1 | awk '{print $1"|"$2}'
|
||||
}
|
||||
|
||||
get_total_unique_ips() {
|
||||
local ip_version="$1"
|
||||
local count=0
|
||||
|
||||
if [ "$ip_version" = "4" ]; then
|
||||
count=$(cat "$CACHE_DIR/"*-v4.parsed 2>/dev/null | sort -u | wc -l 2>/dev/null)
|
||||
elif [ "$ip_version" = "6" ]; then
|
||||
count=$(cat "$CACHE_DIR/"*-v6.parsed 2>/dev/null | sort -u | wc -l 2>/dev/null)
|
||||
fi
|
||||
|
||||
echo "${count:-0}"
|
||||
}
|
||||
|
||||
get_conntrack_count() {
|
||||
if [ -f /proc/sys/net/netfilter/nf_conntrack_count ]; then
|
||||
cat /proc/sys/net/netfilter/nf_conntrack_count
|
||||
else
|
||||
echo "0"
|
||||
fi
|
||||
}
|
||||
|
||||
get_conntrack_max() {
|
||||
if [ -f /proc/sys/net/netfilter/nf_conntrack_max ]; then
|
||||
cat /proc/sys/net/netfilter/nf_conntrack_max
|
||||
else
|
||||
echo "0"
|
||||
fi
|
||||
}
|
||||
|
||||
get_ipset_memory() {
|
||||
local ipset_name="$1"
|
||||
local mem
|
||||
mem=$(ipset list "$ipset_name" -t 2>/dev/null | grep "Size in memory:" | awk '{print $4}')
|
||||
echo "${mem:-0}"
|
||||
}
|
||||
|
||||
get_cache_disk_usage() {
|
||||
if [ -d "$CACHE_DIR" ]; then
|
||||
df -B1 "$CACHE_DIR" 2>/dev/null | tail -1 | awk '{print $3"|"$4"|"$5}'
|
||||
else
|
||||
echo "0|0|0%"
|
||||
fi
|
||||
}
|
||||
|
||||
get_total_cache_size() {
|
||||
if [ -d "$CACHE_DIR" ]; then
|
||||
du -sb "$CACHE_DIR" 2>/dev/null | awk '{print $1}'
|
||||
else
|
||||
echo "0"
|
||||
fi
|
||||
}
|
||||
|
||||
acquire_lock() {
|
||||
if [ -f "$LOCK_FILE" ]; then
|
||||
local pid=$(cat "$LOCK_FILE" 2>/dev/null)
|
||||
if [ -n "$pid" ] && kill -0 "$pid" 2>/dev/null; then
|
||||
echo "ERROR: Another instance is already running (PID: $pid)" >&2
|
||||
exit 1
|
||||
else
|
||||
echo "Removing stale lock file" >&2
|
||||
rm -f "$LOCK_FILE"
|
||||
fi
|
||||
fi
|
||||
echo $$ > "$LOCK_FILE"
|
||||
trap cleanup EXIT INT TERM
|
||||
}
|
||||
|
||||
cleanup() {
|
||||
rm -f "$LOCK_FILE"
|
||||
}
|
||||
|
||||
generate_metrics() {
|
||||
local start_time=$(date +%s)
|
||||
local current_time=$(date +%s)
|
||||
|
||||
cat <<EOF
|
||||
# HELP iptables_blocklist_info Per-feed iptables threat blocking info
|
||||
# TYPE iptables_blocklist_info gauge
|
||||
iptables_blocklist_info{mode="per-feed",version="2.0"} 1
|
||||
|
||||
# HELP iptables_blocklist_enabled_feeds Total enabled feeds
|
||||
# TYPE iptables_blocklist_enabled_feeds gauge
|
||||
iptables_blocklist_enabled_feeds $(grep -c '^1|' "$FEEDS_CONFIG" 2>/dev/null || echo 0)
|
||||
|
||||
# HELP iptables_blocklist_ipset_size Number of IPs per feed ipset
|
||||
# TYPE iptables_blocklist_ipset_size gauge
|
||||
EOF
|
||||
|
||||
# Only export metrics for ipsets that actually exist
|
||||
for ipset_name in $(ipset list -n 2>/dev/null | grep "^${IPSET_PREFIX}-"); do
|
||||
# Extract feed name and IP version
|
||||
local feed_name="${ipset_name#${IPSET_PREFIX}-}"
|
||||
local ip_version="4"
|
||||
|
||||
if [[ "$feed_name" =~ -v6$ ]]; then
|
||||
feed_name="${feed_name%-v6}"
|
||||
ip_version="6"
|
||||
fi
|
||||
|
||||
# Get status from config
|
||||
local status="disabled"
|
||||
if grep -q "^1|${feed_name}|" "$FEEDS_CONFIG" 2>/dev/null; then
|
||||
status="enabled"
|
||||
fi
|
||||
|
||||
local size=$(get_ipset_size "$ipset_name")
|
||||
echo "iptables_blocklist_ipset_size{feed=\"$feed_name\",ip_version=\"$ip_version\",status=\"$status\"} $size"
|
||||
done
|
||||
|
||||
cat <<EOF
|
||||
|
||||
# HELP iptables_blocklist_blocked_total Blocked attempts per feed
|
||||
# TYPE iptables_blocklist_blocked_total counter
|
||||
EOF
|
||||
|
||||
# Per-feed block counts (IPv4 and IPv6)
|
||||
if [ -f "$FEEDS_CONFIG" ]; then
|
||||
while IFS='|' read -r enabled name url type description; do
|
||||
[[ "$enabled" =~ ^#.*$ ]] && continue
|
||||
[[ -z "$enabled" ]] && continue
|
||||
[ "$enabled" != "1" ] && continue
|
||||
|
||||
local blocks_1h_v4 blocks_24h_v4 blocks_1h_v6 blocks_24h_v6
|
||||
blocks_1h_v4=$(get_feed_blocks "$name" "1 hour ago")
|
||||
blocks_24h_v4=$(get_feed_blocks "$name" "24 hours ago")
|
||||
blocks_1h_v6=$(get_feed_blocks_v6 "$name" "1 hour ago")
|
||||
blocks_24h_v6=$(get_feed_blocks_v6 "$name" "24 hours ago")
|
||||
|
||||
echo "iptables_blocklist_blocked_total{feed=\"$name\",ip_version=\"4\",period=\"1h\"} $blocks_1h_v4"
|
||||
echo "iptables_blocklist_blocked_total{feed=\"$name\",ip_version=\"4\",period=\"24h\"} $blocks_24h_v4"
|
||||
echo "iptables_blocklist_blocked_total{feed=\"$name\",ip_version=\"6\",period=\"1h\"} $blocks_1h_v6"
|
||||
echo "iptables_blocklist_blocked_total{feed=\"$name\",ip_version=\"6\",period=\"24h\"} $blocks_24h_v6"
|
||||
done < "$FEEDS_CONFIG"
|
||||
fi
|
||||
|
||||
# Feed effectiveness (blocks per 1000 IPs)
|
||||
cat <<EOF
|
||||
|
||||
# HELP iptables_blocklist_effectiveness Blocks per 1000 IPs in feed (24h)
|
||||
# TYPE iptables_blocklist_effectiveness gauge
|
||||
EOF
|
||||
|
||||
if [ -f "$FEEDS_CONFIG" ]; then
|
||||
while IFS='|' read -r enabled name url type description; do
|
||||
[[ "$enabled" =~ ^#.*$ ]] && continue
|
||||
[[ -z "$enabled" ]] && continue
|
||||
[ "$enabled" != "1" ] && continue
|
||||
|
||||
local ipset_size blocks_v4 blocks_v6 effectiveness_v4 effectiveness_v6
|
||||
ipset_size=$(get_ipset_size "${IPSET_PREFIX}-${name}")
|
||||
blocks_v4=$(get_feed_blocks "$name" "24 hours ago")
|
||||
blocks_v6=$(get_feed_blocks_v6 "$name" "24 hours ago")
|
||||
|
||||
# Strip whitespace and ensure integers
|
||||
ipset_size=$(echo "$ipset_size" | tr -d '\n' | tr -d ' ')
|
||||
blocks_v4=$(echo "$blocks_v4" | tr -d '\n' | tr -d ' ')
|
||||
blocks_v6=$(echo "$blocks_v6" | tr -d '\n' | tr -d ' ')
|
||||
ipset_size=${ipset_size:-0}
|
||||
blocks_v4=${blocks_v4:-0}
|
||||
blocks_v6=${blocks_v6:-0}
|
||||
|
||||
if [ "$ipset_size" -gt 0 ] 2>/dev/null; then
|
||||
effectiveness_v4=$(awk "BEGIN {printf \"%.2f\", ($blocks_v4 / $ipset_size) * 1000}" 2>/dev/null || echo "0")
|
||||
effectiveness_v6=$(awk "BEGIN {printf \"%.2f\", ($blocks_v6 / $ipset_size) * 1000}" 2>/dev/null || echo "0")
|
||||
else
|
||||
effectiveness_v4="0"
|
||||
effectiveness_v6="0"
|
||||
fi
|
||||
|
||||
echo "iptables_blocklist_effectiveness{feed=\"$name\",ip_version=\"4\"} $effectiveness_v4"
|
||||
echo "iptables_blocklist_effectiveness{feed=\"$name\",ip_version=\"6\"} $effectiveness_v6"
|
||||
done < "$FEEDS_CONFIG"
|
||||
fi
|
||||
|
||||
# Feed update/cache metrics
|
||||
cat <<EOF
|
||||
|
||||
# HELP iptables_blocklist_last_update_timestamp Feed cache file last modified timestamp
|
||||
# TYPE iptables_blocklist_last_update_timestamp gauge
|
||||
EOF
|
||||
|
||||
if [ -f "$FEEDS_CONFIG" ]; then
|
||||
while IFS='|' read -r enabled name url type description; do
|
||||
[[ "$enabled" =~ ^#.*$ ]] && continue
|
||||
[[ -z "$enabled" ]] && continue
|
||||
|
||||
local v4_file="${CACHE_DIR}/${name}-v4.parsed"
|
||||
local v6_file="${CACHE_DIR}/${name}-v6.parsed"
|
||||
local v4_ts v6_ts
|
||||
v4_ts=$(get_file_timestamp "$v4_file")
|
||||
v6_ts=$(get_file_timestamp "$v6_file")
|
||||
|
||||
echo "iptables_blocklist_last_update_timestamp{feed=\"$name\",ip_version=\"4\"} $v4_ts"
|
||||
echo "iptables_blocklist_last_update_timestamp{feed=\"$name\",ip_version=\"6\"} $v6_ts"
|
||||
done < "$FEEDS_CONFIG"
|
||||
fi
|
||||
|
||||
cat <<EOF
|
||||
|
||||
# HELP iptables_blocklist_cache_age_seconds Age of feed cache files
|
||||
# TYPE iptables_blocklist_cache_age_seconds gauge
|
||||
EOF
|
||||
|
||||
if [ -f "$FEEDS_CONFIG" ]; then
|
||||
while IFS='|' read -r enabled name url type description; do
|
||||
[[ "$enabled" =~ ^#.*$ ]] && continue
|
||||
[[ -z "$enabled" ]] && continue
|
||||
|
||||
local v4_file="${CACHE_DIR}/${name}-v4.parsed"
|
||||
local v6_file="${CACHE_DIR}/${name}-v6.parsed"
|
||||
local v4_age v6_age
|
||||
v4_age=$(get_cache_age "$v4_file")
|
||||
v6_age=$(get_cache_age "$v6_file")
|
||||
|
||||
echo "iptables_blocklist_cache_age_seconds{feed=\"$name\",ip_version=\"4\"} $v4_age"
|
||||
echo "iptables_blocklist_cache_age_seconds{feed=\"$name\",ip_version=\"6\"} $v6_age"
|
||||
done < "$FEEDS_CONFIG"
|
||||
fi
|
||||
|
||||
cat <<EOF
|
||||
|
||||
# HELP iptables_blocklist_file_size_bytes Feed parsed file sizes
|
||||
# TYPE iptables_blocklist_file_size_bytes gauge
|
||||
EOF
|
||||
|
||||
if [ -f "$FEEDS_CONFIG" ]; then
|
||||
while IFS='|' read -r enabled name url type description; do
|
||||
[[ "$enabled" =~ ^#.*$ ]] && continue
|
||||
[[ -z "$enabled" ]] && continue
|
||||
|
||||
local v4_file="${CACHE_DIR}/${name}-v4.parsed"
|
||||
local v6_file="${CACHE_DIR}/${name}-v6.parsed"
|
||||
local v4_size v6_size
|
||||
v4_size=$(get_file_size "$v4_file")
|
||||
v6_size=$(get_file_size "$v6_file")
|
||||
|
||||
echo "iptables_blocklist_file_size_bytes{feed=\"$name\",ip_version=\"4\",type=\"parsed\"} $v4_size"
|
||||
echo "iptables_blocklist_file_size_bytes{feed=\"$name\",ip_version=\"6\",type=\"parsed\"} $v6_size"
|
||||
done < "$FEEDS_CONFIG"
|
||||
fi
|
||||
|
||||
# IP version distribution ratio
|
||||
cat <<EOF
|
||||
|
||||
# HELP iptables_blocklist_ip_version_ratio Ratio of IPv4 to IPv6 addresses per feed
|
||||
# TYPE iptables_blocklist_ip_version_ratio gauge
|
||||
EOF
|
||||
|
||||
if [ -f "$FEEDS_CONFIG" ]; then
|
||||
while IFS='|' read -r enabled name url type description; do
|
||||
[[ "$enabled" =~ ^#.*$ ]] && continue
|
||||
[[ -z "$enabled" ]] && continue
|
||||
|
||||
local v4_size v6_size total ratio_v4 ratio_v6
|
||||
v4_size=$(get_ipset_size "${IPSET_PREFIX}-${name}")
|
||||
v6_size=$(get_ipset_size "${IPSET_PREFIX}-${name}-v6")
|
||||
|
||||
v4_size=${v4_size:-0}
|
||||
v6_size=${v6_size:-0}
|
||||
total=$((v4_size + v6_size))
|
||||
|
||||
if [ "$total" -gt 0 ] 2>/dev/null; then
|
||||
ratio_v4=$(awk "BEGIN {printf \"%.4f\", $v4_size / $total}" 2>/dev/null || echo "0")
|
||||
ratio_v6=$(awk "BEGIN {printf \"%.4f\", $v6_size / $total}" 2>/dev/null || echo "0")
|
||||
else
|
||||
ratio_v4="0"
|
||||
ratio_v6="0"
|
||||
fi
|
||||
|
||||
echo "iptables_blocklist_ip_version_ratio{feed=\"$name\",version=\"4\"} $ratio_v4"
|
||||
echo "iptables_blocklist_ip_version_ratio{feed=\"$name\",version=\"6\"} $ratio_v6"
|
||||
done < "$FEEDS_CONFIG"
|
||||
fi
|
||||
|
||||
# Total metrics
|
||||
cat <<EOF
|
||||
|
||||
# HELP iptables_blocklist_total_unique_ips Total unique IPs across all feeds
|
||||
# TYPE iptables_blocklist_total_unique_ips gauge
|
||||
iptables_blocklist_total_unique_ips{ip_version="4"} $(get_total_unique_ips "4")
|
||||
iptables_blocklist_total_unique_ips{ip_version="6"} $(get_total_unique_ips "6")
|
||||
|
||||
# HELP iptables_blocklist_total_rules Total iptables rules
|
||||
# TYPE iptables_blocklist_total_rules gauge
|
||||
iptables_blocklist_total_rules $(iptables -S 2>/dev/null | wc -l)
|
||||
|
||||
# HELP iptables_blocklist_rule_packets Packet counts from iptables rules
|
||||
# TYPE iptables_blocklist_rule_packets counter
|
||||
EOF
|
||||
|
||||
if [ -f "$FEEDS_CONFIG" ]; then
|
||||
while IFS='|' read -r enabled name url type description; do
|
||||
[[ "$enabled" =~ ^#.*$ ]] && continue
|
||||
[[ -z "$enabled" ]] && continue
|
||||
[ "$enabled" != "1" ] && continue
|
||||
|
||||
local stats_log stats_drop packets_log bytes_log packets_drop bytes_drop
|
||||
|
||||
stats_log=$(iptables -L INPUT -v -n -x 2>/dev/null | grep "${IPSET_PREFIX}-${name}" | grep LOG | head -1 | awk '{print $1"|"$2}')
|
||||
stats_drop=$(iptables -L INPUT -v -n -x 2>/dev/null | grep "${IPSET_PREFIX}-${name}" | grep DROP | head -1 | awk '{print $1"|"$2}')
|
||||
|
||||
if [ -n "$stats_log" ]; then
|
||||
packets_log=$(echo "$stats_log" | cut -d'|' -f1)
|
||||
bytes_log=$(echo "$stats_log" | cut -d'|' -f2)
|
||||
echo "iptables_blocklist_rule_packets{feed=\"$name\",ip_version=\"4\",action=\"log\"} ${packets_log:-0}"
|
||||
fi
|
||||
|
||||
if [ -n "$stats_drop" ]; then
|
||||
packets_drop=$(echo "$stats_drop" | cut -d'|' -f1)
|
||||
bytes_drop=$(echo "$stats_drop" | cut -d'|' -f2)
|
||||
echo "iptables_blocklist_rule_packets{feed=\"$name\",ip_version=\"4\",action=\"drop\"} ${packets_drop:-0}"
|
||||
fi
|
||||
done < "$FEEDS_CONFIG"
|
||||
fi
|
||||
|
||||
cat <<EOF
|
||||
|
||||
# HELP iptables_blocklist_rule_bytes Byte counts from iptables rules
|
||||
# TYPE iptables_blocklist_rule_bytes counter
|
||||
EOF
|
||||
|
||||
if [ -f "$FEEDS_CONFIG" ]; then
|
||||
while IFS='|' read -r enabled name url type description; do
|
||||
[[ "$enabled" =~ ^#.*$ ]] && continue
|
||||
[[ -z "$enabled" ]] && continue
|
||||
[ "$enabled" != "1" ] && continue
|
||||
|
||||
local stats_log stats_drop packets_log bytes_log packets_drop bytes_drop
|
||||
|
||||
stats_log=$(iptables -L INPUT -v -n -x 2>/dev/null | grep "${IPSET_PREFIX}-${name}" | grep LOG | head -1 | awk '{print $1"|"$2}')
|
||||
stats_drop=$(iptables -L INPUT -v -n -x 2>/dev/null | grep "${IPSET_PREFIX}-${name}" | grep DROP | head -1 | awk '{print $1"|"$2}')
|
||||
|
||||
if [ -n "$stats_log" ]; then
|
||||
packets_log=$(echo "$stats_log" | cut -d'|' -f1)
|
||||
bytes_log=$(echo "$stats_log" | cut -d'|' -f2)
|
||||
echo "iptables_blocklist_rule_bytes{feed=\"$name\",ip_version=\"4\",action=\"log\"} ${bytes_log:-0}"
|
||||
fi
|
||||
|
||||
if [ -n "$stats_drop" ]; then
|
||||
packets_drop=$(echo "$stats_drop" | cut -d'|' -f1)
|
||||
bytes_drop=$(echo "$stats_drop" | cut -d'|' -f2)
|
||||
echo "iptables_blocklist_rule_bytes{feed=\"$name\",ip_version=\"4\",action=\"drop\"} ${bytes_drop:-0}"
|
||||
fi
|
||||
done < "$FEEDS_CONFIG"
|
||||
fi
|
||||
|
||||
cat <<EOF
|
||||
|
||||
# HELP iptables_blocklist_ipset_memory_bytes Memory used by each ipset
|
||||
# TYPE iptables_blocklist_ipset_memory_bytes gauge
|
||||
EOF
|
||||
|
||||
if [ -f "$FEEDS_CONFIG" ]; then
|
||||
while IFS='|' read -r enabled name url type description; do
|
||||
[[ "$enabled" =~ ^#.*$ ]] && continue
|
||||
[[ -z "$enabled" ]] && continue
|
||||
|
||||
mem_v4=$(get_ipset_memory "${IPSET_PREFIX}-${name}")
|
||||
mem_v6=$(get_ipset_memory "${IPSET_PREFIX}-${name}-v6")
|
||||
|
||||
echo "iptables_blocklist_ipset_memory_bytes{feed=\"$name\",ip_version=\"4\"} $mem_v4"
|
||||
echo "iptables_blocklist_ipset_memory_bytes{feed=\"$name\",ip_version=\"6\"} $mem_v6"
|
||||
done < "$FEEDS_CONFIG"
|
||||
fi
|
||||
|
||||
# Conntrack metrics
|
||||
local conntrack_count conntrack_max conntrack_usage
|
||||
conntrack_count=$(get_conntrack_count)
|
||||
conntrack_max=$(get_conntrack_max)
|
||||
|
||||
if [ "$conntrack_max" -gt 0 ] 2>/dev/null; then
|
||||
conntrack_usage=$(awk "BEGIN {printf \"%.2f\", ($conntrack_count / $conntrack_max) * 100}" 2>/dev/null || echo "0")
|
||||
else
|
||||
conntrack_usage="0"
|
||||
fi
|
||||
|
||||
# Cache disk metrics
|
||||
local disk_info cache_size disk_used disk_avail disk_pct
|
||||
disk_info=$(get_cache_disk_usage)
|
||||
cache_size=$(get_total_cache_size)
|
||||
disk_used=$(echo "$disk_info" | cut -d'|' -f1)
|
||||
disk_avail=$(echo "$disk_info" | cut -d'|' -f2)
|
||||
disk_pct=$(echo "$disk_info" | cut -d'|' -f3 | tr -d '%')
|
||||
|
||||
cat <<EOF
|
||||
|
||||
# HELP iptables_blocklist_conntrack_entries Current conntrack entries
|
||||
# TYPE iptables_blocklist_conntrack_entries gauge
|
||||
iptables_blocklist_conntrack_entries $conntrack_count
|
||||
|
||||
# HELP iptables_blocklist_conntrack_max Maximum conntrack entries
|
||||
# TYPE iptables_blocklist_conntrack_max gauge
|
||||
iptables_blocklist_conntrack_max $conntrack_max
|
||||
|
||||
# HELP iptables_blocklist_conntrack_usage_percent Conntrack usage percentage
|
||||
# TYPE iptables_blocklist_conntrack_usage_percent gauge
|
||||
iptables_blocklist_conntrack_usage_percent $conntrack_usage
|
||||
|
||||
# HELP iptables_blocklist_cache_disk_used_bytes Disk space used by cache partition
|
||||
# TYPE iptables_blocklist_cache_disk_used_bytes gauge
|
||||
iptables_blocklist_cache_disk_used_bytes $disk_used
|
||||
|
||||
# HELP iptables_blocklist_cache_disk_available_bytes Disk space available on cache partition
|
||||
# TYPE iptables_blocklist_cache_disk_available_bytes gauge
|
||||
iptables_blocklist_cache_disk_available_bytes $disk_avail
|
||||
|
||||
# HELP iptables_blocklist_cache_disk_usage_percent Cache partition disk usage percentage
|
||||
# TYPE iptables_blocklist_cache_disk_usage_percent gauge
|
||||
iptables_blocklist_cache_disk_usage_percent ${disk_pct:-0}
|
||||
|
||||
# HELP iptables_blocklist_cache_total_size_bytes Total size of cache directory
|
||||
# TYPE iptables_blocklist_cache_total_size_bytes gauge
|
||||
iptables_blocklist_cache_total_size_bytes $cache_size
|
||||
|
||||
# HELP iptables_blocklist_whitelist_size Whitelist ipset size
|
||||
# TYPE iptables_blocklist_whitelist_size gauge
|
||||
iptables_blocklist_whitelist_size{ip_version="4"} $(get_ipset_size "$WHITELIST_IPSET")
|
||||
iptables_blocklist_whitelist_size{ip_version="6"} $(get_ipset_size "$WHITELIST_IPSET_V6")
|
||||
|
||||
# HELP iptables_blocklist_exporter_runtime_seconds Exporter runtime in seconds
|
||||
# TYPE iptables_blocklist_exporter_runtime_seconds gauge
|
||||
iptables_blocklist_exporter_runtime_seconds $((current_time - start_time))
|
||||
EOF
|
||||
echo ""
|
||||
}
|
||||
|
||||
run_http_server() {
|
||||
echo "Starting iptables blocklist exporter on port $HTTP_PORT..."
|
||||
|
||||
if ! command -v nc >/dev/null 2>&1; then
|
||||
echo "ERROR: netcat (nc) is required for HTTP mode"
|
||||
echo "Install with: yum install nmap-ncat (RHEL/CentOS)"
|
||||
echo " or: apt install netcat (Debian/Ubuntu)"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
while true; do
|
||||
{
|
||||
read -r request
|
||||
if [[ "$request" =~ ^GET\ /metrics ]]; then
|
||||
echo -e "HTTP/1.1 200 OK\r\nContent-Type: text/plain; version=0.0.4\r\n\r"
|
||||
generate_metrics
|
||||
else
|
||||
echo -e "HTTP/1.1 200 OK\r\nContent-Type: text/html\r\n\r"
|
||||
echo "<h1>iptables Blocklist Metrics Exporter</h1>"
|
||||
echo "<p>Per-feed threat blocking statistics</p>"
|
||||
echo "<p><a href='/metrics'>Metrics</a></p>"
|
||||
fi
|
||||
} | nc -l -p "$HTTP_PORT" -q 1 2>/dev/null
|
||||
done
|
||||
}
|
||||
|
||||
main() {
|
||||
parse_args "$@"
|
||||
|
||||
[ ! -d "$CONFIG_DIR" ] && { echo "ERROR: $CONFIG_DIR not found. Run iptables-blocklists.sh first" >&2; exit 1; }
|
||||
|
||||
# Prevent multiple instances (skip for HTTP mode as it should run continuously)
|
||||
[ "$HTTP_MODE" != true ] && acquire_lock
|
||||
|
||||
if [ "$HTTP_MODE" = true ]; then
|
||||
run_http_server
|
||||
elif [ -n "$OUTPUT_FILE" ]; then
|
||||
# Ensure output directory exists
|
||||
mkdir -p "$(dirname "$OUTPUT_FILE")"
|
||||
|
||||
# Create temp file in /tmp (not in node_exporter directory!)
|
||||
# This prevents node_exporter from seeing partial writes
|
||||
local temp_file=$(mktemp /tmp/iptables_metrics.XXXXXX)
|
||||
|
||||
# Generate metrics to temp file
|
||||
generate_metrics > "$temp_file"
|
||||
|
||||
# FORCE NEW INODE: Delete old file first, then move
|
||||
# Some node_exporter versions cache file descriptors
|
||||
rm -f "$OUTPUT_FILE"
|
||||
|
||||
# Move temp file to final location
|
||||
mv "$temp_file" "$OUTPUT_FILE"
|
||||
|
||||
# Ensure node_exporter user can read it
|
||||
chmod 644 "$OUTPUT_FILE"
|
||||
|
||||
# Force filesystem sync (optional but helps)
|
||||
sync
|
||||
else
|
||||
generate_metrics
|
||||
fi
|
||||
}
|
||||
|
||||
main "$@"
|
||||
Executable
+757
@@ -0,0 +1,757 @@
|
||||
#!/bin/bash
|
||||
################################################################################
|
||||
# Script Name: iptables-blocklists.sh
|
||||
# Version: 1.0
|
||||
# Description: Per-feed iptables threat intelligence blocking with ipset
|
||||
# Author: Phil Connor
|
||||
# Contact: contact@mylinux.work
|
||||
# Website: https://mylinux.work
|
||||
# License: MIT
|
||||
################################################################################
|
||||
# Don't use 'set -e' - causes issues with ipset error handling
|
||||
|
||||
CONFIG_DIR="/etc/iptables-threats"
|
||||
FEEDS_CONFIG="$CONFIG_DIR/feeds.conf"
|
||||
CACHE_DIR="$CONFIG_DIR/cache"
|
||||
BACKUP_DIR="$CONFIG_DIR/backups"
|
||||
IPSET_PREFIX="iptables-feed"
|
||||
WHITELIST_IPSET="iptables-whitelist"
|
||||
WHITELIST_IPSET_V6="iptables-whitelist-v6"
|
||||
LOG_FILE="/var/log/iptables-threats.log"
|
||||
SSH_PORT="22"
|
||||
ENABLE_AUTO_UPDATE=true
|
||||
UPDATE_INTERVAL="daily"
|
||||
ENABLE_IPV6=true
|
||||
MAX_BACKUPS=5
|
||||
|
||||
show_usage() {
|
||||
cat <<EOF
|
||||
Usage: $0 [OPTIONS] [COMMAND]
|
||||
|
||||
PER-FEED VERSION for iptables: Each threat feed gets its own ipset.
|
||||
Provides detailed per-feed blocking statistics and metrics.
|
||||
|
||||
COMMANDS:
|
||||
install Install and configure threat feed blocking
|
||||
update Update all enabled feeds now (ipsets only, no rules reload)
|
||||
apply-rules Regenerate and apply iptables rules (use with caution!)
|
||||
test-rules Test rule generation without applying (dry-run)
|
||||
add-feed NAME URL Add a custom feed
|
||||
remove-feed NAME Remove a feed
|
||||
enable-feed NAME Enable a disabled feed
|
||||
disable-feed NAME Disable a feed
|
||||
list-feeds List all configured feeds
|
||||
show-stats Show blocking statistics per feed
|
||||
whitelist-add IP Add IP/CIDR to whitelist
|
||||
whitelist-init Initialize whitelist with RFC1918/Docker networks
|
||||
whitelist-list Show all whitelisted IPs
|
||||
|
||||
OPTIONS:
|
||||
-h, --help Show this help message
|
||||
-s, --ssh-port PORT SSH port (default: 22)
|
||||
--no-auto-update Disable automatic updates
|
||||
--no-ipv6 Disable IPv6
|
||||
--update-interval TIME hourly, daily, weekly (default: daily)
|
||||
|
||||
EXAMPLES:
|
||||
# Install with default feeds
|
||||
sudo $0 install
|
||||
|
||||
# Update feeds manually (safe - only updates ipsets)
|
||||
sudo $0 update
|
||||
|
||||
# Test rule generation (safe - no changes)
|
||||
sudo $0 test-rules
|
||||
|
||||
# Apply rules after testing (regenerates iptables)
|
||||
sudo $0 apply-rules
|
||||
|
||||
# Add custom feed
|
||||
sudo $0 add-feed "my-blocklist" "https://example.com/blocklist.txt"
|
||||
|
||||
# View statistics
|
||||
sudo $0 show-stats
|
||||
|
||||
EOF
|
||||
exit 0
|
||||
}
|
||||
|
||||
log_message() {
|
||||
echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1" | tee -a "$LOG_FILE"
|
||||
}
|
||||
|
||||
parse_args() {
|
||||
COMMAND=""
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case $1 in
|
||||
-h|--help) show_usage ;;
|
||||
-s|--ssh-port) SSH_PORT="$2"; shift 2 ;;
|
||||
--no-auto-update) ENABLE_AUTO_UPDATE=false; shift ;;
|
||||
--no-ipv6) ENABLE_IPV6=false; shift ;;
|
||||
--update-interval) UPDATE_INTERVAL="$2"; shift 2 ;;
|
||||
install|update|apply-rules|test-rules|list-feeds|show-stats|whitelist-init|whitelist-list) COMMAND="$1"; shift ;;
|
||||
add-feed) COMMAND="add-feed"; FEED_NAME="$2"; FEED_URL="$3"; shift 3 ;;
|
||||
remove-feed|enable-feed|disable-feed) COMMAND="$1"; FEED_NAME="$2"; shift 2 ;;
|
||||
whitelist-add) COMMAND="whitelist-add"; WHITELIST_IP="$2"; shift 2 ;;
|
||||
*) echo "Unknown: $1"; exit 1 ;;
|
||||
esac
|
||||
done
|
||||
[ -z "$COMMAND" ] && COMMAND="install"
|
||||
}
|
||||
|
||||
check_requirements() {
|
||||
[ "$EUID" -ne 0 ] && { echo "Run as root"; exit 1; }
|
||||
|
||||
# Install iptables, ipset, curl if needed
|
||||
if ! command -v iptables >/dev/null 2>&1 || ! command -v ipset >/dev/null 2>&1; then
|
||||
if command -v dnf >/dev/null 2>&1; then
|
||||
dnf install -y iptables ipset curl iptables-services
|
||||
elif command -v yum >/dev/null 2>&1; then
|
||||
yum install -y iptables ipset curl iptables-services
|
||||
elif command -v apt-get >/dev/null 2>&1; then
|
||||
apt-get update && apt-get install -y iptables ipset curl iptables-persistent
|
||||
else
|
||||
echo "Cannot install requirements automatically"
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
}
|
||||
|
||||
create_directory_structure() {
|
||||
mkdir -p "$CONFIG_DIR" "$CACHE_DIR" "$BACKUP_DIR"
|
||||
touch "$LOG_FILE"
|
||||
chmod 700 "$CONFIG_DIR"
|
||||
chmod 600 "$LOG_FILE"
|
||||
}
|
||||
|
||||
cleanup_old_backups() {
|
||||
local backup_count
|
||||
backup_count=$(find "$BACKUP_DIR" -name 'iptables-save-*.txt' | wc -l)
|
||||
|
||||
if [ "$backup_count" -gt "$MAX_BACKUPS" ]; then
|
||||
local to_delete=$((backup_count - MAX_BACKUPS))
|
||||
find "$BACKUP_DIR" -name 'iptables-save-*.txt' -type f | \
|
||||
sort | head -n "$to_delete" | xargs rm -f
|
||||
log_message "Cleaned up $to_delete old backups (keeping last $MAX_BACKUPS)"
|
||||
fi
|
||||
}
|
||||
|
||||
initialize_feeds_config() {
|
||||
[ -f "$FEEDS_CONFIG" ] && return
|
||||
|
||||
cat > "$FEEDS_CONFIG" <<'EOF'
|
||||
# Threat Intelligence Feeds Configuration
|
||||
# Format: ENABLED|NAME|URL|TYPE|DESCRIPTION
|
||||
#
|
||||
# ENABLED: 1 (enabled) or 0 (disabled)
|
||||
# NAME: Unique feed identifier
|
||||
# URL: Feed URL
|
||||
# TYPE: Format type (plain, cidr, commented, custom)
|
||||
# DESCRIPTION: Feed description
|
||||
|
||||
1|cinsarmy|http://cinsscore.com/list/ci-badguys.txt|plain|CINS Army Malicious IPs
|
||||
1|firehol-level1|https://raw.githubusercontent.com/ktsaou/blocklist-ipsets/master/firehol_level1.netset|cidr|FireHOL Level 1 - Most aggressive attackers
|
||||
1|firehol-level2|https://raw.githubusercontent.com/ktsaou/blocklist-ipsets/master/firehol_level2.netset|cidr|FireHOL Level 2 - Attacks in last 48h
|
||||
0|firehol-level3|https://raw.githubusercontent.com/ktsaou/blocklist-ipsets/master/firehol_level3.netset|cidr|FireHOL Level 3 - Attacks in last 30d
|
||||
1|ipsum-1|https://raw.githubusercontent.com/stamparm/ipsum/master/levels/1.txt|plain|IPsum Level 1 - Most dangerous
|
||||
0|ipsum-2|https://raw.githubusercontent.com/stamparm/ipsum/master/levels/2.txt|plain|IPsum Level 2 - Dangerous
|
||||
0|ipsum-3|https://raw.githubusercontent.com/stamparm/ipsum/master/levels/3.txt|plain|IPsum Level 3 - Suspicious
|
||||
0|spamhaus-drop|https://www.spamhaus.org/drop/drop.txt|commented|Spamhaus DROP List
|
||||
0|spamhaus-edrop|https://www.spamhaus.org/drop/edrop.txt|commented|Spamhaus EDROP List
|
||||
1|spamhaus-dropv6|https://www.spamhaus.org/drop/dropv6.txt|commented|Spamhaus DROP V6 List
|
||||
0|feodo-tracker|https://feodotracker.abuse.ch/downloads/ipblocklist.txt|commented|Feodo Tracker C2 IPs
|
||||
0|sslbl-aggressive|https://sslbl.abuse.ch/blacklist/sslipblacklist_aggressive.txt|commented|SSL Blacklist Aggressive
|
||||
0|sslbl-all|https://sslbl.abuse.ch/blacklist/sslipblacklist.txt|commented|SSL Blacklist All
|
||||
1|blocklist-de|https://lists.blocklist.de/lists/all.txt|plain|Blocklist.de All Attacks
|
||||
0|greensnow|https://blocklist.greensnow.co/greensnow.txt|plain|GreenSnow Blacklist
|
||||
0|emergingthreats|https://rules.emergingthreats.net/fwrules/emerging-Block-IPs.txt|plain|Emerging Threats IPs
|
||||
0|bruteforce-ssh|https://lists.blocklist.de/lists/ssh.txt|plain|SSH Bruteforce Attempts
|
||||
1|binarydefense|https://www.binarydefense.com/banlist.txt|plain|Binary Defense Blacklist
|
||||
1|bruteforce-bl|https://danger.rulez.sk/projects/bruteforceblocker/blist.php|commented|BruteForce Blocker
|
||||
0|dshield-top|https://www.dshield.org/block.txt|commented|DShield Top Attackers
|
||||
1|dshield-fhol|https://iplists.firehol.org/files/dshield.netset|commented|Dshield FireHol top 20
|
||||
0|tor-exit|https://check.torproject.org/torbulkexitlist|plain|TOR Exit Nodes (optional)
|
||||
0|abuseipdb-1d|https://raw.githubusercontent.com/borestad/blocklist-abuseipdb/main/abuseipdb-s100-1d.ipv4|commented|AbuseIPDB confidence score 100 1 day
|
||||
0|abuseipd-3d|https://raw.githubusercontent.com/borestad/blocklist-abuseipdb/main/abuseipdb-s100-3d.ipv4|commented|AbuseIPDB confidence score 100 3 day
|
||||
0|abuseipdb-7d|https://raw.githubusercontent.com/borestad/blocklist-abuseipdb/main/abuseipdb-s100-7d.ipv4|commented|AbuseIPDB confidence score 100 7 day
|
||||
1|abuseipdb-14d|https://raw.githubusercontent.com/borestad/blocklist-abuseipdb/main/abuseipdb-s100-14d.ipv4|commented|AbuseIPDB confidence score 100 14 day
|
||||
0|abuseipdb-30d|https://raw.githubusercontent.com/borestad/blocklist-abuseipdb/main/abuseipdb-s100-30d.ipv4|commented|AbuseIPDB confidence score 100 30 day
|
||||
# Add custom feeds below this line
|
||||
EOF
|
||||
chmod 600 "$FEEDS_CONFIG"
|
||||
}
|
||||
|
||||
setup_ipsets() {
|
||||
log_message "Setting up per-feed ipsets..."
|
||||
|
||||
# Whitelist
|
||||
if ! ipset list "$WHITELIST_IPSET" >/dev/null 2>&1; then
|
||||
ipset create "$WHITELIST_IPSET" hash:net family inet hashsize 1024 maxelem 10000
|
||||
ipset add "$WHITELIST_IPSET" 127.0.0.1 2>/dev/null || true
|
||||
fi
|
||||
|
||||
if [ "$ENABLE_IPV6" = true ] && ! ipset list "$WHITELIST_IPSET_V6" >/dev/null 2>&1; then
|
||||
ipset create "$WHITELIST_IPSET_V6" hash:net family inet6 hashsize 1024 maxelem 10000
|
||||
ipset add "$WHITELIST_IPSET_V6" ::1 2>/dev/null || true
|
||||
fi
|
||||
|
||||
# Create ipset per feed
|
||||
while IFS='|' read -r enabled name url type description; do
|
||||
[[ "$enabled" =~ ^#.*$ ]] && continue
|
||||
[[ -z "$enabled" ]] && continue
|
||||
[ "$enabled" != "1" ] && continue
|
||||
|
||||
if ! ipset list "${IPSET_PREFIX}-${name}" >/dev/null 2>&1; then
|
||||
ipset create "${IPSET_PREFIX}-${name}" hash:net family inet hashsize 4096 maxelem 200000
|
||||
fi
|
||||
|
||||
if [ "$ENABLE_IPV6" = true ] && ! ipset list "${IPSET_PREFIX}-${name}-v6" >/dev/null 2>&1; then
|
||||
ipset create "${IPSET_PREFIX}-${name}-v6" hash:net family inet6 hashsize 4096 maxelem 200000
|
||||
fi
|
||||
done < "$FEEDS_CONFIG"
|
||||
}
|
||||
|
||||
download_feed() {
|
||||
curl -f -s -m 30 -L "$1" -o "$2" 2>/dev/null
|
||||
}
|
||||
|
||||
parse_feed() {
|
||||
local file="$1" type="$2" out_v4="$3" out_v6="$4"
|
||||
true > "$out_v4"
|
||||
true > "$out_v6"
|
||||
|
||||
case "$type" in
|
||||
plain)
|
||||
grep -E '^[0-9.]+(/[0-9]+)?$' "$file" >> "$out_v4" 2>/dev/null || true
|
||||
[ "$ENABLE_IPV6" = true ] && grep -E '^[0-9a-fA-F:]+(/[0-9]+)?$' "$file" | grep ':' >> "$out_v6" 2>/dev/null || true
|
||||
;;
|
||||
cidr)
|
||||
grep -E '^[0-9.]+' "$file" | cut -d' ' -f1 | cut -d'#' -f1 | grep -v '^$' >> "$out_v4" 2>/dev/null || true
|
||||
[ "$ENABLE_IPV6" = true ] && grep -E '^[0-9a-fA-F:]+' "$file" | grep ':' | cut -d' ' -f1 | cut -d'#' -f1 >> "$out_v6" 2>/dev/null || true
|
||||
;;
|
||||
commented)
|
||||
grep -v -E '^[#;]|^$' "$file" | grep -oE '[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+(/[0-9]+)?' >> "$out_v4" 2>/dev/null || true
|
||||
[ "$ENABLE_IPV6" = true ] && grep -v -E '^[#;]|^$' "$file" | grep -oE '[0-9a-fA-F:]+(/[0-9]+)?' | grep -E '^[0-9a-fA-F]{1,4}:[0-9a-fA-F:]+' >> "$out_v6" 2>/dev/null || true
|
||||
;;
|
||||
esac
|
||||
}
|
||||
|
||||
update_feeds() {
|
||||
log_message "Starting per-feed update (FAST ipset restore mode)..."
|
||||
|
||||
# Auto-cleanup cache and ipsets for disabled feeds
|
||||
local enabled_feeds=$(grep '^1|' "$FEEDS_CONFIG" 2>/dev/null | cut -d'|' -f2)
|
||||
local cleaned_cache=0
|
||||
local cleaned_ipsets=0
|
||||
|
||||
# Clean cache files
|
||||
for cache_file in "$CACHE_DIR"/*.raw "$CACHE_DIR"/*-v4.parsed "$CACHE_DIR"/*-v6.parsed "$CACHE_DIR"/*-v4.restore "$CACHE_DIR"/*-v6.restore; do
|
||||
[ -f "$cache_file" ] || continue
|
||||
local bn=$(basename "$cache_file")
|
||||
local fn="${bn%%.raw}"; fn="${fn%%-v4.parsed}"; fn="${fn%%-v6.parsed}"; fn="${fn%%-v4.restore}"; fn="${fn%%-v6.restore}"
|
||||
if ! echo "$enabled_feeds" | grep -q "^${fn}$"; then
|
||||
rm -f "$cache_file" && cleaned_cache=$((cleaned_cache + 1))
|
||||
fi
|
||||
done
|
||||
|
||||
# Clean ipsets for disabled feeds
|
||||
while IFS='|' read -r enabled name url type description; do
|
||||
[[ "$enabled" =~ ^#.*$ ]] && continue
|
||||
[[ -z "$enabled" ]] && continue
|
||||
[ "$enabled" = "1" ] && continue
|
||||
|
||||
if ipset list "${IPSET_PREFIX}-${name}" >/dev/null 2>&1; then
|
||||
ipset destroy "${IPSET_PREFIX}-${name}" 2>/dev/null && cleaned_ipsets=$((cleaned_ipsets + 1))
|
||||
fi
|
||||
if ipset list "${IPSET_PREFIX}-${name}-v6" >/dev/null 2>&1; then
|
||||
ipset destroy "${IPSET_PREFIX}-${name}-v6" 2>/dev/null && cleaned_ipsets=$((cleaned_ipsets + 1))
|
||||
fi
|
||||
done < "$FEEDS_CONFIG"
|
||||
|
||||
[ "$cleaned_cache" -gt 0 ] && log_message " Cleaned $cleaned_cache stale cache files"
|
||||
[ "$cleaned_ipsets" -gt 0 ] && log_message " Destroyed $cleaned_ipsets stale ipsets"
|
||||
|
||||
local total=0 failed=0
|
||||
|
||||
while IFS='|' read -r enabled name url type description; do
|
||||
[[ "$enabled" =~ ^#.*$ ]] && continue
|
||||
[[ -z "$enabled" ]] && continue
|
||||
[ "$enabled" != "1" ] && continue
|
||||
|
||||
total=$((total + 1))
|
||||
log_message "Updating: $name"
|
||||
|
||||
local raw="$CACHE_DIR/${name}.raw"
|
||||
local v4="$CACHE_DIR/${name}-v4.parsed"
|
||||
local v6="$CACHE_DIR/${name}-v6.parsed"
|
||||
|
||||
if download_feed "$url" "$raw" && parse_feed "$raw" "$type" "$v4" "$v6"; then
|
||||
local c4 c6=0
|
||||
c4=$(wc -l < "$v4" 2>/dev/null || echo 0)
|
||||
[ "$ENABLE_IPV6" = true ] && c6=$(wc -l < "$v6" 2>/dev/null || echo 0)
|
||||
|
||||
# FAST IPv4: Use ipset restore
|
||||
if [ "$c4" -gt 0 ]; then
|
||||
# Ensure target ipset exists for swap
|
||||
if ! ipset list "${IPSET_PREFIX}-${name}" >/dev/null 2>&1; then
|
||||
ipset create "${IPSET_PREFIX}-${name}" hash:net family inet hashsize 4096 maxelem 200000
|
||||
fi
|
||||
|
||||
{
|
||||
echo "create ${IPSET_PREFIX}-${name}-tmp hash:net family inet hashsize 4096 maxelem 200000"
|
||||
echo "flush ${IPSET_PREFIX}-${name}-tmp"
|
||||
while IFS= read -r ip; do
|
||||
[ -z "$ip" ] && continue
|
||||
echo "add ${IPSET_PREFIX}-${name}-tmp $ip"
|
||||
done < "$v4"
|
||||
echo "swap ${IPSET_PREFIX}-${name} ${IPSET_PREFIX}-${name}-tmp"
|
||||
echo "destroy ${IPSET_PREFIX}-${name}-tmp"
|
||||
} > "$CACHE_DIR/${name}-v4.restore"
|
||||
|
||||
ipset restore < "$CACHE_DIR/${name}-v4.restore" 2>/dev/null || {
|
||||
log_message " ⚠ Batch load failed for $name IPv4, using fallback"
|
||||
ipset flush "${IPSET_PREFIX}-${name}" 2>/dev/null || true
|
||||
while IFS= read -r ip; do
|
||||
[ -z "$ip" ] && continue
|
||||
ipset add "${IPSET_PREFIX}-${name}" "$ip" 2>/dev/null || true
|
||||
done < "$v4"
|
||||
}
|
||||
fi
|
||||
|
||||
# FAST IPv6: Use ipset restore
|
||||
if [ "$ENABLE_IPV6" = true ] && [ "$c6" -gt 0 ]; then
|
||||
# Ensure target ipset exists for swap
|
||||
if ! ipset list "${IPSET_PREFIX}-${name}-v6" >/dev/null 2>&1; then
|
||||
ipset create "${IPSET_PREFIX}-${name}-v6" hash:net family inet6 hashsize 4096 maxelem 200000
|
||||
fi
|
||||
|
||||
{
|
||||
echo "create ${IPSET_PREFIX}-${name}-v6-tmp hash:net family inet6 hashsize 4096 maxelem 200000"
|
||||
echo "flush ${IPSET_PREFIX}-${name}-v6-tmp"
|
||||
while IFS= read -r ip; do
|
||||
[ -z "$ip" ] && continue
|
||||
echo "add ${IPSET_PREFIX}-${name}-v6-tmp $ip"
|
||||
done < "$v6"
|
||||
echo "swap ${IPSET_PREFIX}-${name}-v6 ${IPSET_PREFIX}-${name}-v6-tmp"
|
||||
echo "destroy ${IPSET_PREFIX}-${name}-v6-tmp"
|
||||
} > "$CACHE_DIR/${name}-v6.restore"
|
||||
|
||||
ipset restore < "$CACHE_DIR/${name}-v6.restore" 2>/dev/null || {
|
||||
log_message " ⚠ Batch load failed for $name IPv6, using fallback"
|
||||
ipset flush "${IPSET_PREFIX}-${name}-v6" 2>/dev/null || true
|
||||
while IFS= read -r ip; do
|
||||
[ -z "$ip" ] && continue
|
||||
ipset add "${IPSET_PREFIX}-${name}-v6" "$ip" 2>/dev/null || true
|
||||
done < "$v6"
|
||||
}
|
||||
fi
|
||||
|
||||
log_message " ✓ $name: $c4 IPv4, $c6 IPv6"
|
||||
else
|
||||
log_message " ✗ Failed: $name"
|
||||
failed=$((failed + 1))
|
||||
fi
|
||||
done < "$FEEDS_CONFIG"
|
||||
|
||||
# Save ipsets
|
||||
ipset save > /etc/sysconfig/ipset 2>/dev/null || ipset save > /etc/iptables/ipsets 2>/dev/null || true
|
||||
|
||||
log_message "✓ Updated $total feeds ($failed failed) - FAST IPSET RESTORE MODE"
|
||||
}
|
||||
|
||||
apply_iptables_rules() {
|
||||
log_message "Applying per-feed iptables rules..."
|
||||
|
||||
# Backup current rules
|
||||
iptables-save > "$BACKUP_DIR/iptables-save-$(date +%Y%m%d-%H%M%S).txt" 2>/dev/null || true
|
||||
cleanup_old_backups
|
||||
|
||||
# Remove old threat feed rules
|
||||
iptables -D INPUT -m set --match-set "$WHITELIST_IPSET" src -j ACCEPT 2>/dev/null || true
|
||||
while IFS='|' read -r enabled name url type description; do
|
||||
[[ "$enabled" =~ ^#.*$ ]] && continue
|
||||
[[ -z "$enabled" ]] && continue
|
||||
iptables -D INPUT -m set --match-set "${IPSET_PREFIX}-${name}" src -m limit --limit 5/min -j LOG --log-prefix "[THREAT:${name}] " 2>/dev/null || true
|
||||
iptables -D INPUT -m set --match-set "${IPSET_PREFIX}-${name}" src -j DROP 2>/dev/null || true
|
||||
done < "$FEEDS_CONFIG" 2>/dev/null || true
|
||||
|
||||
if [ "$ENABLE_IPV6" = true ]; then
|
||||
ip6tables -D INPUT -m set --match-set "$WHITELIST_IPSET_V6" src -j ACCEPT 2>/dev/null || true
|
||||
while IFS='|' read -r enabled name url type description; do
|
||||
[[ "$enabled" =~ ^#.*$ ]] && continue
|
||||
[[ -z "$enabled" ]] && continue
|
||||
ip6tables -D INPUT -m set --match-set "${IPSET_PREFIX}-${name}-v6" src -m limit --limit 5/min -j LOG --log-prefix "[THREAT-v6:${name}] " 2>/dev/null || true
|
||||
ip6tables -D INPUT -m set --match-set "${IPSET_PREFIX}-${name}-v6" src -j DROP 2>/dev/null || true
|
||||
done < "$FEEDS_CONFIG" 2>/dev/null || true
|
||||
fi
|
||||
|
||||
# Add whitelist rules (highest priority)
|
||||
iptables -I INPUT 1 -m set --match-set "$WHITELIST_IPSET" src -j ACCEPT
|
||||
[ "$ENABLE_IPV6" = true ] && ip6tables -I INPUT 1 -m set --match-set "$WHITELIST_IPSET_V6" src -j ACCEPT
|
||||
|
||||
# Add per-feed rules
|
||||
local line=2
|
||||
while IFS='|' read -r enabled name url type description; do
|
||||
[[ "$enabled" =~ ^#.*$ ]] && continue
|
||||
[[ -z "$enabled" ]] && continue
|
||||
[ "$enabled" != "1" ] && continue
|
||||
|
||||
# IPv4
|
||||
iptables -I INPUT $line -m set --match-set "${IPSET_PREFIX}-${name}" src -m limit --limit 5/min -j LOG --log-prefix "[THREAT:${name}] "
|
||||
line=$((line + 1))
|
||||
iptables -I INPUT $line -m set --match-set "${IPSET_PREFIX}-${name}" src -j DROP
|
||||
line=$((line + 1))
|
||||
|
||||
# IPv6
|
||||
if [ "$ENABLE_IPV6" = true ]; then
|
||||
ip6tables -A INPUT -m set --match-set "${IPSET_PREFIX}-${name}-v6" src -m limit --limit 5/min -j LOG --log-prefix "[THREAT-v6:${name}] "
|
||||
ip6tables -A INPUT -m set --match-set "${IPSET_PREFIX}-${name}-v6" src -j DROP
|
||||
fi
|
||||
done < "$FEEDS_CONFIG"
|
||||
|
||||
# SSH rate limiting
|
||||
if ! iptables -C INPUT -p tcp --dport "$SSH_PORT" -m conntrack --ctstate NEW -m recent --set 2>/dev/null; then
|
||||
iptables -I INPUT -p tcp --dport "$SSH_PORT" -m conntrack --ctstate NEW -m recent --set
|
||||
iptables -I INPUT -p tcp --dport "$SSH_PORT" -m conntrack --ctstate NEW -m recent --update --seconds 60 --hitcount 4 -j DROP
|
||||
fi
|
||||
|
||||
# Save rules
|
||||
if [ -d /etc/sysconfig ]; then
|
||||
iptables-save > /etc/sysconfig/iptables
|
||||
[ "$ENABLE_IPV6" = true ] && ip6tables-save > /etc/sysconfig/ip6tables
|
||||
elif [ -d /etc/iptables ]; then
|
||||
iptables-save > /etc/iptables/rules.v4
|
||||
[ "$ENABLE_IPV6" = true ] && ip6tables-save > /etc/iptables/rules.v6
|
||||
fi
|
||||
|
||||
log_message "✓ iptables rules applied (per-feed)"
|
||||
}
|
||||
|
||||
setup_iptables_persistence() {
|
||||
log_message "Setting up iptables persistence..."
|
||||
|
||||
# Create systemd service for iptables restore
|
||||
cat > /etc/systemd/system/iptables-restore.service <<'EOF'
|
||||
[Unit]
|
||||
Description=Restore iptables rules
|
||||
Before=network-pre.target
|
||||
Wants=network-pre.target
|
||||
|
||||
[Service]
|
||||
Type=oneshot
|
||||
RemainAfterExit=yes
|
||||
ExecStart=/bin/bash -c 'ipset restore -f /etc/sysconfig/ipset 2>/dev/null || ipset restore -f /etc/iptables/ipsets 2>/dev/null || true'
|
||||
ExecStart=/bin/bash -c 'iptables-restore /etc/sysconfig/iptables 2>/dev/null || iptables-restore /etc/iptables/rules.v4 2>/dev/null || true'
|
||||
ExecStart=/bin/bash -c 'ip6tables-restore /etc/sysconfig/ip6tables 2>/dev/null || ip6tables-restore /etc/iptables/rules.v6 2>/dev/null || true'
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
EOF
|
||||
|
||||
systemctl daemon-reload
|
||||
systemctl enable iptables-restore.service 2>/dev/null || true
|
||||
log_message "✓ iptables persistence configured"
|
||||
}
|
||||
|
||||
setup_auto_update() {
|
||||
[ "$ENABLE_AUTO_UPDATE" = false ] && return
|
||||
|
||||
local script=$(readlink -f "$0")
|
||||
|
||||
cat > /etc/systemd/system/iptables-threat-feeds-update.service <<EOF
|
||||
[Unit]
|
||||
Description=Update iptables threat feeds (per-feed)
|
||||
After=network-online.target
|
||||
|
||||
[Service]
|
||||
Type=oneshot
|
||||
ExecStart=$script update
|
||||
EOF
|
||||
|
||||
cat > /etc/systemd/system/iptables-threat-feeds-update.timer <<EOF
|
||||
[Unit]
|
||||
Description=Update threat feeds $UPDATE_INTERVAL
|
||||
|
||||
[Timer]
|
||||
OnCalendar=$UPDATE_INTERVAL
|
||||
Persistent=true
|
||||
|
||||
[Install]
|
||||
WantedBy=timers.target
|
||||
EOF
|
||||
|
||||
systemctl daemon-reload
|
||||
systemctl enable --now iptables-threat-feeds-update.timer
|
||||
}
|
||||
|
||||
cmd_show_stats() {
|
||||
echo "Per-Feed Blocking Statistics"
|
||||
printf "%-25s %10s %10s %12s\n" "FEED" "IPv4" "IPv6" "BLOCKS(1h)"
|
||||
echo "-------------------------------------------------------------------"
|
||||
|
||||
while IFS='|' read -r enabled name url type description; do
|
||||
[[ "$enabled" =~ ^#.*$ ]] && continue
|
||||
[[ -z "$enabled" ]] && continue
|
||||
[ "$enabled" != "1" ] && continue
|
||||
|
||||
local v4 v6=0 blocks
|
||||
v4=$(ipset list "${IPSET_PREFIX}-${name}" 2>/dev/null | grep -c '^[0-9.]' || echo 0)
|
||||
[ "$ENABLE_IPV6" = true ] && v6=$(ipset list "${IPSET_PREFIX}-${name}-v6" 2>/dev/null | grep -c '^[0-9a-fA-F:]' || echo 0)
|
||||
blocks=$(journalctl -k --since "1 hour ago" 2>/dev/null | grep -c "\[THREAT:${name}\]" || echo 0)
|
||||
|
||||
printf "%-25s %10s %10s %12s\n" "$name" "$v4" "$v6" "$blocks"
|
||||
done < "$FEEDS_CONFIG"
|
||||
}
|
||||
|
||||
cmd_list_feeds() {
|
||||
printf "%-10s %-25s %s\n" "STATUS" "NAME" "DESC"
|
||||
while IFS='|' read -r enabled name url type description; do
|
||||
[[ "$enabled" =~ ^#.*$ ]] && continue
|
||||
[[ -z "$enabled" ]] && continue
|
||||
printf "%-10s %-25s %s\n" "$([ "$enabled" = "1" ] && echo "ENABLED" || echo "DISABLED")" "$name" "$description"
|
||||
done < "$FEEDS_CONFIG"
|
||||
}
|
||||
|
||||
cmd_whitelist_add() {
|
||||
[ -z "$WHITELIST_IP" ] && { echo "Usage: $0 whitelist-add <IP|CIDR>"; exit 1; }
|
||||
|
||||
if echo "$WHITELIST_IP" | grep -q ':'; then
|
||||
ipset add "$WHITELIST_IPSET_V6" "$WHITELIST_IP" 2>/dev/null && \
|
||||
log_message "✓ Added to IPv6 whitelist: $WHITELIST_IP" || \
|
||||
{ echo "Failed to add $WHITELIST_IP"; exit 1; }
|
||||
else
|
||||
ipset add "$WHITELIST_IPSET" "$WHITELIST_IP" 2>/dev/null && \
|
||||
log_message "✓ Added to IPv4 whitelist: $WHITELIST_IP" || \
|
||||
{ echo "Failed to add $WHITELIST_IP"; exit 1; }
|
||||
fi
|
||||
|
||||
ipset save > /etc/sysconfig/ipset 2>/dev/null || ipset save > /etc/iptables/ipsets 2>/dev/null || true
|
||||
}
|
||||
|
||||
cmd_whitelist_init() {
|
||||
log_message "Initializing whitelist with private networks..."
|
||||
|
||||
local private_networks=(
|
||||
"10.0.0.0/8"
|
||||
"172.16.0.0/12"
|
||||
"192.168.0.0/16"
|
||||
"169.254.0.0/16"
|
||||
"127.0.0.0/8"
|
||||
)
|
||||
|
||||
local private_networks_v6=(
|
||||
"fc00::/7"
|
||||
"fe80::/10"
|
||||
"::1"
|
||||
)
|
||||
|
||||
echo "Adding IPv4 private networks to whitelist..."
|
||||
for net in "${private_networks[@]}"; do
|
||||
if ipset add "$WHITELIST_IPSET" "$net" 2>/dev/null; then
|
||||
echo " ✓ $net"
|
||||
else
|
||||
echo " - $net (already exists or error)"
|
||||
fi
|
||||
done
|
||||
|
||||
if [ "$ENABLE_IPV6" = true ]; then
|
||||
echo "Adding IPv6 private networks to whitelist..."
|
||||
for net in "${private_networks_v6[@]}"; do
|
||||
if ipset add "$WHITELIST_IPSET_V6" "$net" 2>/dev/null; then
|
||||
echo " ✓ $net"
|
||||
else
|
||||
echo " - $net (already exists or error)"
|
||||
fi
|
||||
done
|
||||
fi
|
||||
|
||||
ipset save > /etc/sysconfig/ipset 2>/dev/null || ipset save > /etc/iptables/ipsets 2>/dev/null || true
|
||||
log_message "✓ Whitelist initialized with RFC1918/private networks"
|
||||
}
|
||||
|
||||
cmd_whitelist_list() {
|
||||
echo "=========================================="
|
||||
echo "IPv4 Whitelist ($WHITELIST_IPSET)"
|
||||
echo "=========================================="
|
||||
ipset list "$WHITELIST_IPSET" 2>/dev/null | grep '^[0-9]' || echo "No entries"
|
||||
|
||||
if [ "$ENABLE_IPV6" = true ]; then
|
||||
echo ""
|
||||
echo "=========================================="
|
||||
echo "IPv6 Whitelist ($WHITELIST_IPSET_V6)"
|
||||
echo "=========================================="
|
||||
ipset list "$WHITELIST_IPSET_V6" 2>/dev/null | grep '^[0-9a-fA-F:]' || echo "No entries"
|
||||
fi
|
||||
}
|
||||
|
||||
cmd_add_feed() {
|
||||
[ -z "$FEED_NAME" ] || [ -z "$FEED_URL" ] && { echo "Usage: $0 add-feed <NAME> <URL>"; exit 1; }
|
||||
grep -q "^[01]|${FEED_NAME}|" "$FEEDS_CONFIG" 2>/dev/null && { echo "Feed exists"; exit 1; }
|
||||
echo "1|${FEED_NAME}|${FEED_URL}|plain|Custom: ${FEED_NAME}" >> "$FEEDS_CONFIG"
|
||||
log_message "✓ Added feed: $FEED_NAME"
|
||||
}
|
||||
|
||||
cmd_remove_feed() {
|
||||
[ -z "$FEED_NAME" ] && { echo "Usage: $0 remove-feed <NAME>"; exit 1; }
|
||||
sed -i "/|${FEED_NAME}|/d" "$FEEDS_CONFIG"
|
||||
|
||||
# Remove ipsets and rules
|
||||
ipset destroy "${IPSET_PREFIX}-${FEED_NAME}" 2>/dev/null || true
|
||||
ipset destroy "${IPSET_PREFIX}-${FEED_NAME}-v6" 2>/dev/null || true
|
||||
|
||||
log_message "✓ Removed feed: $FEED_NAME"
|
||||
log_message "Reapplying rules..."
|
||||
apply_iptables_rules
|
||||
}
|
||||
|
||||
cmd_enable_feed() {
|
||||
[ -z "$FEED_NAME" ] && { echo "Usage: $0 enable-feed <NAME>"; exit 1; }
|
||||
sed -i "s/^0|${FEED_NAME}|/1|${FEED_NAME}|/" "$FEEDS_CONFIG"
|
||||
log_message "✓ Enabled: $FEED_NAME"
|
||||
|
||||
# Create ipsets if they don't exist
|
||||
if ! ipset list "${IPSET_PREFIX}-${FEED_NAME}" >/dev/null 2>&1; then
|
||||
ipset create "${IPSET_PREFIX}-${FEED_NAME}" hash:net family inet hashsize 4096 maxelem 200000
|
||||
fi
|
||||
if [ "$ENABLE_IPV6" = true ] && ! ipset list "${IPSET_PREFIX}-${FEED_NAME}-v6" >/dev/null 2>&1; then
|
||||
ipset create "${IPSET_PREFIX}-${FEED_NAME}-v6" hash:net family inet6 hashsize 4096 maxelem 200000
|
||||
fi
|
||||
|
||||
log_message "Run 'update' to download IPs, then 'apply-rules' to add firewall rules"
|
||||
}
|
||||
|
||||
cmd_disable_feed() {
|
||||
[ -z "$FEED_NAME" ] && { echo "Usage: $0 disable-feed <NAME>"; exit 1; }
|
||||
sed -i "s/^1|${FEED_NAME}|/0|${FEED_NAME}|/" "$FEEDS_CONFIG"
|
||||
|
||||
# Destroy ipsets to clear metrics
|
||||
ipset destroy "${IPSET_PREFIX}-${FEED_NAME}" 2>/dev/null || true
|
||||
ipset destroy "${IPSET_PREFIX}-${FEED_NAME}-v6" 2>/dev/null || true
|
||||
|
||||
log_message "✓ Disabled: $FEED_NAME"
|
||||
log_message "Reapplying rules..."
|
||||
apply_iptables_rules
|
||||
}
|
||||
|
||||
cmd_install() {
|
||||
log_message "Installing per-feed mode..."
|
||||
check_requirements
|
||||
create_directory_structure
|
||||
initialize_feeds_config
|
||||
setup_ipsets
|
||||
update_feeds
|
||||
apply_iptables_rules
|
||||
setup_iptables_persistence
|
||||
setup_auto_update
|
||||
|
||||
echo ""
|
||||
echo "=========================================="
|
||||
echo "✓ Per-feed installation complete"
|
||||
echo "=========================================="
|
||||
echo "Feeds: $(grep -c '^1|' "$FEEDS_CONFIG")"
|
||||
echo "Config: $FEEDS_CONFIG"
|
||||
echo "Log: $LOG_FILE"
|
||||
echo ""
|
||||
echo "Commands:"
|
||||
echo " $0 show-stats"
|
||||
echo " $0 list-feeds"
|
||||
echo " $0 update"
|
||||
echo " $0 whitelist-add <IP>"
|
||||
echo "=========================================="
|
||||
}
|
||||
|
||||
cmd_test_rules() {
|
||||
log_message "Testing iptables rule generation (dry-run mode)..."
|
||||
|
||||
echo "=========================================="
|
||||
echo "Rule Generation Test"
|
||||
echo "=========================================="
|
||||
echo ""
|
||||
|
||||
# Count enabled feeds
|
||||
local enabled_count=0
|
||||
while IFS='|' read -r enabled name url type description; do
|
||||
[[ "$enabled" =~ ^#.*$ ]] && continue
|
||||
[[ -z "$enabled" ]] && continue
|
||||
[ "$enabled" != "1" ] && continue
|
||||
enabled_count=$((enabled_count + 1))
|
||||
done < "$FEEDS_CONFIG"
|
||||
|
||||
echo "✓ Found $enabled_count enabled feeds"
|
||||
echo ""
|
||||
|
||||
# Show what would be generated
|
||||
echo "IPv4 rules that would be created:"
|
||||
echo " 1. Whitelist bypass: -I INPUT 1 -m set --match-set $WHITELIST_IPSET src -j ACCEPT"
|
||||
|
||||
local line=2
|
||||
while IFS='|' read -r enabled name url type description; do
|
||||
[[ "$enabled" =~ ^#.*$ ]] && continue
|
||||
[[ -z "$enabled" ]] && continue
|
||||
[ "$enabled" != "1" ] && continue
|
||||
|
||||
echo " $line. [${name}] LOG: -I INPUT $line -m set --match-set ${IPSET_PREFIX}-${name} src -m limit --limit 5/min -j LOG"
|
||||
line=$((line + 1))
|
||||
echo " $line. [${name}] DROP: -I INPUT $line -m set --match-set ${IPSET_PREFIX}-${name} src -j DROP"
|
||||
line=$((line + 1))
|
||||
done < "$FEEDS_CONFIG"
|
||||
|
||||
echo ""
|
||||
echo "Total IPv4 rules: $((line - 1))"
|
||||
|
||||
if [ "$ENABLE_IPV6" = true ]; then
|
||||
echo ""
|
||||
echo "IPv6 rules that would be created:"
|
||||
echo " 1. Whitelist bypass: -I INPUT 1 -m set --match-set $WHITELIST_IPSET_V6 src -j ACCEPT"
|
||||
|
||||
local v6_count=0
|
||||
while IFS='|' read -r enabled name url type description; do
|
||||
[[ "$enabled" =~ ^#.*$ ]] && continue
|
||||
[[ -z "$enabled" ]] && continue
|
||||
[ "$enabled" != "1" ] && continue
|
||||
|
||||
v6_count=$((v6_count + 1))
|
||||
echo " $((v6_count * 2)). [${name}] LOG: -A INPUT -m set --match-set ${IPSET_PREFIX}-${name}-v6 src -j LOG"
|
||||
echo " $((v6_count * 2 + 1)). [${name}] DROP: -A INPUT -m set --match-set ${IPSET_PREFIX}-${name}-v6 src -j DROP"
|
||||
done < "$FEEDS_CONFIG"
|
||||
|
||||
echo ""
|
||||
echo "Total IPv6 rules: $((v6_count * 2 + 1))"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "=========================================="
|
||||
echo "✓ Test passed - rules would be generated successfully"
|
||||
echo " To apply these rules, run: $0 apply-rules"
|
||||
echo "=========================================="
|
||||
}
|
||||
|
||||
main() {
|
||||
parse_args "$@"
|
||||
case "$COMMAND" in
|
||||
install) cmd_install ;;
|
||||
update)
|
||||
check_requirements
|
||||
create_directory_structure
|
||||
update_feeds
|
||||
# DO NOT apply rules here - only update ipsets
|
||||
# To regenerate rules, use: apply-rules, enable-feed, disable-feed, or remove-feed
|
||||
;;
|
||||
apply-rules)
|
||||
check_requirements
|
||||
apply_iptables_rules
|
||||
;;
|
||||
test-rules) cmd_test_rules ;;
|
||||
list-feeds) cmd_list_feeds ;;
|
||||
show-stats) cmd_show_stats ;;
|
||||
add-feed) cmd_add_feed ;;
|
||||
remove-feed) cmd_remove_feed ;;
|
||||
enable-feed) cmd_enable_feed ;;
|
||||
disable-feed) cmd_disable_feed ;;
|
||||
whitelist-add) cmd_whitelist_add ;;
|
||||
whitelist-init) cmd_whitelist_init ;;
|
||||
whitelist-list) cmd_whitelist_list ;;
|
||||
esac
|
||||
}
|
||||
|
||||
main "$@"
|
||||
@@ -0,0 +1,565 @@
|
||||
#!/bin/bash
|
||||
|
||||
##############################################################################
|
||||
#### Promtail to Grafana Alloy Migration Script ####
|
||||
#### ####
|
||||
#### Detects OS, reads existing Promtail config for Loki URL/hostname, ####
|
||||
#### generates equivalent Alloy River config, installs Alloy, and ####
|
||||
#### handles the cutover from Promtail to Alloy. ####
|
||||
#### ####
|
||||
#### Supports: Ubuntu, Debian, RHEL, CentOS, Rocky, Alma, Amazon Linux ####
|
||||
#### ####
|
||||
#### Author: Phil Connor ####
|
||||
#### License: MIT ####
|
||||
#### Contact: contact@mylinux.work ####
|
||||
#### Version: 1.0.0-030326 ####
|
||||
##############################################################################
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
readonly SCRIPT_NAME=$(basename "$0")
|
||||
readonly SCRIPT_VERSION="1.0.0-030326"
|
||||
|
||||
# Defaults
|
||||
ALLOY_CONFIG_DIR="/etc/alloy"
|
||||
ALLOY_CONFIG_FILE="/etc/alloy/config.alloy"
|
||||
PROMTAIL_CONFIG="/etc/promtail/config.yml"
|
||||
LOKI_URL=""
|
||||
CUSTOM_HOSTNAME=""
|
||||
DRY_RUN=false
|
||||
GENERATE_ONLY=false
|
||||
SKIP_INSTALL=false
|
||||
SKIP_CUTOVER=false
|
||||
KEEP_PROMTAIL=true
|
||||
VERBOSE=false
|
||||
INCLUDE_JOURNAL=true
|
||||
INCLUDE_NGINX=false
|
||||
INCLUDE_APACHE=false
|
||||
|
||||
# Colors
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
BLUE='\033[0;34m'
|
||||
NC='\033[0m'
|
||||
|
||||
log() { echo -e "${GREEN}[INFO]${NC} $1"; }
|
||||
warn() { echo -e "${YELLOW}[WARN]${NC} $1"; }
|
||||
error() { echo -e "${RED}[ERROR]${NC} $1" >&2; }
|
||||
debug() { [[ "$VERBOSE" == true ]] && echo -e "${BLUE}[DEBUG]${NC} $1"; }
|
||||
|
||||
show_help() {
|
||||
cat << EOF
|
||||
Usage: $SCRIPT_NAME [OPTIONS]
|
||||
|
||||
Migrate from Promtail to Grafana Alloy. Generates an Alloy config that
|
||||
maintains Promtail-compatible labels so existing dashboards keep working.
|
||||
|
||||
OPTIONS:
|
||||
--loki-url URL Loki push URL (default: extracted from Promtail config)
|
||||
--hostname NAME Override hostname (default: auto-detect or from Promtail)
|
||||
--promtail-config F Path to existing Promtail config (default: /etc/promtail/config.yml)
|
||||
--output FILE Alloy config output path (default: /etc/alloy/config.alloy)
|
||||
--generate-only Only generate the Alloy config, don't install or cutover
|
||||
--skip-install Skip Alloy installation (already installed)
|
||||
--skip-cutover Generate config and install, but don't stop Promtail
|
||||
--no-journal Skip systemd journal collection
|
||||
--include-nginx Include nginx log collection
|
||||
--include-apache Include Apache log collection
|
||||
--remove-promtail Remove Promtail package after cutover (default: keep)
|
||||
--dry-run Show what would be done without making changes
|
||||
--verbose Enable verbose output
|
||||
--version Show version
|
||||
--help, -h Show this help message
|
||||
|
||||
EXAMPLES:
|
||||
# Auto-detect everything from existing Promtail config
|
||||
sudo $SCRIPT_NAME
|
||||
|
||||
# Specify Loki URL and hostname
|
||||
sudo $SCRIPT_NAME --loki-url http://loki.example.com:3100 --hostname web-01
|
||||
|
||||
# Generate config only (don't install or cutover)
|
||||
$SCRIPT_NAME --generate-only --loki-url http://loki:3100 --output /tmp/config.alloy
|
||||
|
||||
# Full migration with nginx logs
|
||||
sudo $SCRIPT_NAME --include-nginx --remove-promtail
|
||||
|
||||
# Dry run to see what would happen
|
||||
sudo $SCRIPT_NAME --dry-run
|
||||
EOF
|
||||
}
|
||||
|
||||
parse_arguments() {
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case $1 in
|
||||
--loki-url) LOKI_URL="$2"; shift 2 ;;
|
||||
--hostname) CUSTOM_HOSTNAME="$2"; shift 2 ;;
|
||||
--promtail-config) PROMTAIL_CONFIG="$2"; shift 2 ;;
|
||||
--output) ALLOY_CONFIG_FILE="$2"; shift 2 ;;
|
||||
--generate-only) GENERATE_ONLY=true; shift ;;
|
||||
--skip-install) SKIP_INSTALL=true; shift ;;
|
||||
--skip-cutover) SKIP_CUTOVER=true; shift ;;
|
||||
--no-journal) INCLUDE_JOURNAL=false; shift ;;
|
||||
--include-nginx) INCLUDE_NGINX=true; shift ;;
|
||||
--include-apache) INCLUDE_APACHE=true; shift ;;
|
||||
--remove-promtail) KEEP_PROMTAIL=false; shift ;;
|
||||
--dry-run) DRY_RUN=true; shift ;;
|
||||
--verbose) VERBOSE=true; shift ;;
|
||||
--version) echo "$SCRIPT_NAME version $SCRIPT_VERSION"; exit 0 ;;
|
||||
--help|-h) show_help; exit 0 ;;
|
||||
*) error "Unknown option: $1"; show_help; exit 1 ;;
|
||||
esac
|
||||
done
|
||||
}
|
||||
|
||||
detect_os() {
|
||||
if [[ -f /etc/os-release ]]; then
|
||||
. /etc/os-release
|
||||
OS=$ID
|
||||
OS_PRETTY="$PRETTY_NAME"
|
||||
else
|
||||
error "Cannot detect OS"
|
||||
exit 1
|
||||
fi
|
||||
debug "Detected OS: $OS_PRETTY"
|
||||
}
|
||||
|
||||
detect_hostname() {
|
||||
if [[ -n "$CUSTOM_HOSTNAME" ]]; then
|
||||
DETECTED_HOSTNAME="$CUSTOM_HOSTNAME"
|
||||
debug "Using custom hostname: $DETECTED_HOSTNAME"
|
||||
return
|
||||
fi
|
||||
|
||||
# Try to extract from Promtail config
|
||||
if [[ -f "$PROMTAIL_CONFIG" ]]; then
|
||||
local pt_host
|
||||
pt_host=$(grep -m1 'host:' "$PROMTAIL_CONFIG" 2>/dev/null | awk '{print $2}' | tr -d '"' || true)
|
||||
if [[ -n "$pt_host" ]]; then
|
||||
DETECTED_HOSTNAME="$pt_host"
|
||||
debug "Extracted hostname from Promtail config: $DETECTED_HOSTNAME"
|
||||
return
|
||||
fi
|
||||
fi
|
||||
|
||||
DETECTED_HOSTNAME=$(hostname -f 2>/dev/null || hostname)
|
||||
debug "Using system hostname: $DETECTED_HOSTNAME"
|
||||
}
|
||||
|
||||
detect_loki_url() {
|
||||
if [[ -n "$LOKI_URL" ]]; then
|
||||
debug "Using provided Loki URL: $LOKI_URL"
|
||||
return
|
||||
fi
|
||||
|
||||
# Extract from Promtail config
|
||||
if [[ -f "$PROMTAIL_CONFIG" ]]; then
|
||||
LOKI_URL=$(grep -m1 'url:' "$PROMTAIL_CONFIG" 2>/dev/null | awk '{print $2}' | tr -d '"' | sed 's|/loki/api/v1/push||' || true)
|
||||
if [[ -n "$LOKI_URL" ]]; then
|
||||
debug "Extracted Loki URL from Promtail config: $LOKI_URL"
|
||||
return
|
||||
fi
|
||||
fi
|
||||
|
||||
error "Could not determine Loki URL"
|
||||
error "Provide with --loki-url or ensure Promtail config exists at $PROMTAIL_CONFIG"
|
||||
exit 1
|
||||
}
|
||||
|
||||
check_promtail_status() {
|
||||
if systemctl is-active --quiet promtail 2>/dev/null; then
|
||||
PROMTAIL_RUNNING=true
|
||||
log "Promtail is currently running"
|
||||
else
|
||||
PROMTAIL_RUNNING=false
|
||||
debug "Promtail is not running"
|
||||
fi
|
||||
}
|
||||
|
||||
# Generate an Alloy loki.source.file block if the log file exists
|
||||
generate_file_source() {
|
||||
local label="$1"
|
||||
local path="$2"
|
||||
local job="$3"
|
||||
local extra_labels="$4"
|
||||
|
||||
if [[ "$DRY_RUN" == true ]] || [[ -f "$path" ]] || [[ "$path" == *"*"* ]]; then
|
||||
cat << EOF
|
||||
|
||||
loki.source.file "$label" {
|
||||
targets = [
|
||||
{
|
||||
"__path__" = "$path",
|
||||
"job" = "$job",
|
||||
"host" = "$DETECTED_HOSTNAME",${extra_labels}
|
||||
},
|
||||
]
|
||||
forward_to = [loki.write.default.receiver]
|
||||
}
|
||||
EOF
|
||||
else
|
||||
debug "Skipping $path (file does not exist)"
|
||||
fi
|
||||
}
|
||||
|
||||
generate_alloy_config() {
|
||||
log "Generating Alloy config for $OS ($DETECTED_HOSTNAME)..."
|
||||
|
||||
local os_label
|
||||
case "$OS" in
|
||||
ubuntu|debian) os_label="ubuntu" ;;
|
||||
rhel|centos|rocky|almalinux|amzn) os_label="rhel-family" ;;
|
||||
*) os_label="$OS" ;;
|
||||
esac
|
||||
|
||||
local config=""
|
||||
|
||||
# Header
|
||||
config+="// Grafana Alloy Configuration for $DETECTED_HOSTNAME
|
||||
// Migrated from Promtail on $(date +%Y-%m-%d)
|
||||
// OS: $OS_PRETTY
|
||||
// Labels maintained for Promtail dashboard compatibility
|
||||
|
||||
logging {
|
||||
level = \"info\"
|
||||
}
|
||||
"
|
||||
|
||||
# Journal source
|
||||
if [[ "$INCLUDE_JOURNAL" == true ]]; then
|
||||
config+="
|
||||
// System logs via systemd journal
|
||||
loki.source.journal \"systemd_journal\" {
|
||||
max_age = \"12h\"
|
||||
labels = {
|
||||
job = \"systemd-journal\",
|
||||
host = \"$DETECTED_HOSTNAME\",
|
||||
os = \"$os_label\",
|
||||
}
|
||||
forward_to = [loki.relabel.journal_relabel.receiver]
|
||||
}
|
||||
|
||||
loki.relabel \"journal_relabel\" {
|
||||
forward_to = [loki.write.default.receiver]
|
||||
|
||||
rule {
|
||||
source_labels = [\"__journal__systemd_unit\"]
|
||||
target_label = \"unit\"
|
||||
}
|
||||
|
||||
rule {
|
||||
source_labels = [\"__journal_priority\"]
|
||||
target_label = \"priority\"
|
||||
}
|
||||
|
||||
rule {
|
||||
source_labels = [\"__journal__hostname\"]
|
||||
target_label = \"hostname\"
|
||||
}
|
||||
}
|
||||
"
|
||||
fi
|
||||
|
||||
# OS-specific file sources
|
||||
case "$OS" in
|
||||
ubuntu|debian)
|
||||
config+="
|
||||
// Ubuntu/Debian system logs"
|
||||
config+=$(generate_file_source "syslog" "/var/log/syslog" "messages" "
|
||||
\"os\" = \"ubuntu\",")
|
||||
config+=$(generate_file_source "auth" "/var/log/auth.log" "auth" "
|
||||
\"log_type\" = \"authentication\",")
|
||||
config+=$(generate_file_source "kern" "/var/log/kern.log" "kernel" "")
|
||||
config+=$(generate_file_source "cron" "/var/log/cron.log" "cron" "")
|
||||
config+=$(generate_file_source "mail" "/var/log/mail.log" "mail" "")
|
||||
config+=$(generate_file_source "apt" "/var/log/apt/history.log" "packages" "
|
||||
\"package_manager\" = \"apt\",")
|
||||
config+=$(generate_file_source "boot" "/var/log/boot.log" "boot" "")
|
||||
;;
|
||||
rhel|centos|rocky|almalinux|amzn)
|
||||
config+="
|
||||
// RHEL/CentOS/Rocky/Alma/Amazon Linux system logs"
|
||||
config+=$(generate_file_source "messages" "/var/log/messages" "messages" "
|
||||
\"os\" = \"rhel-family\",")
|
||||
config+=$(generate_file_source "secure" "/var/log/secure" "auth" "
|
||||
\"log_type\" = \"authentication\",")
|
||||
config+=$(generate_file_source "cron" "/var/log/cron" "cron" "")
|
||||
config+=$(generate_file_source "maillog" "/var/log/maillog" "mail" "")
|
||||
config+=$(generate_file_source "yum" "/var/log/yum.log" "packages" "
|
||||
\"package_manager\" = \"yum\",")
|
||||
config+=$(generate_file_source "boot" "/var/log/boot.log" "boot" "")
|
||||
;;
|
||||
*)
|
||||
config+="
|
||||
// Generic system logs"
|
||||
config+=$(generate_file_source "syslog" "/var/log/syslog" "messages" "")
|
||||
config+=$(generate_file_source "auth" "/var/log/auth.log" "auth" "
|
||||
\"log_type\" = \"authentication\",")
|
||||
;;
|
||||
esac
|
||||
|
||||
# Application wildcard
|
||||
config+=$(generate_file_source "application_logs" "/var/log/*.log" "application" "")
|
||||
|
||||
# Nginx
|
||||
if [[ "$INCLUDE_NGINX" == true ]]; then
|
||||
config+="
|
||||
// Nginx logs"
|
||||
config+=$(generate_file_source "nginx_access" "/var/log/nginx/access.log" "nginx" "
|
||||
\"log_type\" = \"access\",")
|
||||
config+=$(generate_file_source "nginx_error" "/var/log/nginx/error.log" "nginx" "
|
||||
\"log_type\" = \"error\",")
|
||||
fi
|
||||
|
||||
# Apache
|
||||
if [[ "$INCLUDE_APACHE" == true ]]; then
|
||||
config+="
|
||||
// Apache logs"
|
||||
config+=$(generate_file_source "apache_access" "/var/log/apache2/access.log" "apache" "
|
||||
\"log_type\" = \"access\",")
|
||||
config+=$(generate_file_source "apache_error" "/var/log/apache2/error.log" "apache" "
|
||||
\"log_type\" = \"error\",")
|
||||
config+=$(generate_file_source "httpd_access" "/var/log/httpd/access_log" "apache" "
|
||||
\"log_type\" = \"access\",")
|
||||
config+=$(generate_file_source "httpd_error" "/var/log/httpd/error_log" "apache" "
|
||||
\"log_type\" = \"error\",")
|
||||
fi
|
||||
|
||||
# Loki write endpoint
|
||||
config+="
|
||||
|
||||
// Write to Loki
|
||||
loki.write \"default\" {
|
||||
endpoint {
|
||||
url = \"${LOKI_URL}/loki/api/v1/push\"
|
||||
}
|
||||
}
|
||||
"
|
||||
|
||||
GENERATED_CONFIG="$config"
|
||||
}
|
||||
|
||||
write_config() {
|
||||
local output_file="$1"
|
||||
|
||||
if [[ "$DRY_RUN" == true ]]; then
|
||||
log "DRY RUN: Would write config to $output_file"
|
||||
echo "--- Generated config.alloy ---"
|
||||
echo "$GENERATED_CONFIG"
|
||||
echo "--- End config ---"
|
||||
return
|
||||
fi
|
||||
|
||||
local output_dir
|
||||
output_dir=$(dirname "$output_file")
|
||||
mkdir -p "$output_dir"
|
||||
|
||||
# Backup existing config
|
||||
if [[ -f "$output_file" ]]; then
|
||||
local backup="${output_file}.bak.$(date +%Y%m%d%H%M%S)"
|
||||
cp "$output_file" "$backup"
|
||||
log "Backed up existing config to $backup"
|
||||
fi
|
||||
|
||||
echo "$GENERATED_CONFIG" > "$output_file"
|
||||
log "Alloy config written to $output_file"
|
||||
}
|
||||
|
||||
install_alloy() {
|
||||
if [[ "$DRY_RUN" == true ]]; then
|
||||
log "DRY RUN: Would install Grafana Alloy"
|
||||
return
|
||||
fi
|
||||
|
||||
# Check if already installed
|
||||
if command -v alloy >/dev/null 2>&1; then
|
||||
log "Alloy is already installed: $(alloy --version 2>&1 | head -1)"
|
||||
return
|
||||
fi
|
||||
|
||||
log "Installing Grafana Alloy..."
|
||||
|
||||
case "$OS" in
|
||||
ubuntu|debian)
|
||||
apt-get install -y apt-transport-https software-properties-common
|
||||
mkdir -p /etc/apt/keyrings/
|
||||
wget -q -O - https://apt.grafana.com/gpg.key | gpg --dearmor | tee /etc/apt/keyrings/grafana.gpg > /dev/null
|
||||
echo "deb [signed-by=/etc/apt/keyrings/grafana.gpg] https://apt.grafana.com stable main" | tee /etc/apt/sources.list.d/grafana.list
|
||||
apt-get update -qq
|
||||
apt-get install -y alloy
|
||||
;;
|
||||
rhel|centos|rocky|almalinux|amzn)
|
||||
cat > /etc/yum.repos.d/grafana.repo << 'REPO'
|
||||
[grafana]
|
||||
name=grafana
|
||||
baseurl=https://rpm.grafana.com
|
||||
repo_gpgcheck=1
|
||||
enabled=1
|
||||
gpgcheck=1
|
||||
gpgkey=https://rpm.grafana.com/gpg.key
|
||||
sslverify=1
|
||||
sslcacert=/etc/pki/tls/certs/ca-bundle.crt
|
||||
REPO
|
||||
if command -v dnf >/dev/null 2>&1; then
|
||||
dnf install -y alloy
|
||||
else
|
||||
yum install -y alloy
|
||||
fi
|
||||
;;
|
||||
*)
|
||||
error "Unsupported OS for automatic installation: $OS"
|
||||
error "Install Alloy manually: https://grafana.com/docs/alloy/latest/set-up/install/"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
log "Alloy installed: $(alloy --version 2>&1 | head -1)"
|
||||
}
|
||||
|
||||
validate_config() {
|
||||
if [[ "$DRY_RUN" == true ]]; then
|
||||
log "DRY RUN: Would validate config with 'alloy fmt'"
|
||||
return
|
||||
fi
|
||||
|
||||
if ! command -v alloy >/dev/null 2>&1; then
|
||||
warn "Alloy not installed, skipping validation"
|
||||
return
|
||||
fi
|
||||
|
||||
log "Validating Alloy config..."
|
||||
if alloy fmt "$ALLOY_CONFIG_FILE" >/dev/null 2>&1; then
|
||||
log "Config validation passed"
|
||||
else
|
||||
error "Config validation failed. Check $ALLOY_CONFIG_FILE for syntax errors"
|
||||
error "Run: alloy fmt $ALLOY_CONFIG_FILE"
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
perform_cutover() {
|
||||
if [[ "$DRY_RUN" == true ]]; then
|
||||
log "DRY RUN: Would stop Promtail and start Alloy"
|
||||
return
|
||||
fi
|
||||
|
||||
# Stop Promtail
|
||||
if systemctl is-active --quiet promtail 2>/dev/null; then
|
||||
log "Stopping Promtail..."
|
||||
systemctl stop promtail
|
||||
systemctl disable promtail
|
||||
log "Promtail stopped and disabled"
|
||||
fi
|
||||
|
||||
# Add alloy user to required groups
|
||||
if getent group adm >/dev/null 2>&1; then
|
||||
usermod -a -G adm alloy 2>/dev/null || true
|
||||
fi
|
||||
if getent group systemd-journal >/dev/null 2>&1; then
|
||||
usermod -a -G systemd-journal alloy 2>/dev/null || true
|
||||
fi
|
||||
|
||||
# Start Alloy
|
||||
log "Starting Alloy..."
|
||||
systemctl enable --now alloy
|
||||
sleep 2
|
||||
|
||||
if systemctl is-active --quiet alloy; then
|
||||
log "Alloy is running"
|
||||
else
|
||||
error "Alloy failed to start. Check: journalctl -u alloy --no-pager -n 30"
|
||||
error "Rolling back — restarting Promtail"
|
||||
systemctl enable --now promtail 2>/dev/null || true
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Remove Promtail if requested
|
||||
if [[ "$KEEP_PROMTAIL" == false ]]; then
|
||||
log "Removing Promtail package..."
|
||||
case "$OS" in
|
||||
ubuntu|debian) apt-get remove -y promtail 2>/dev/null || true ;;
|
||||
*) yum remove -y promtail 2>/dev/null || dnf remove -y promtail 2>/dev/null || true ;;
|
||||
esac
|
||||
log "Promtail removed"
|
||||
else
|
||||
log "Promtail package kept (use 'systemctl start promtail' to rollback)"
|
||||
fi
|
||||
}
|
||||
|
||||
print_summary() {
|
||||
echo ""
|
||||
echo "=========================================="
|
||||
echo " Migration Summary"
|
||||
echo "=========================================="
|
||||
echo " OS: $OS_PRETTY"
|
||||
echo " Hostname: $DETECTED_HOSTNAME"
|
||||
echo " Loki URL: $LOKI_URL"
|
||||
echo " Alloy config: $ALLOY_CONFIG_FILE"
|
||||
|
||||
if [[ "$DRY_RUN" != true ]] && [[ "$GENERATE_ONLY" != true ]]; then
|
||||
echo ""
|
||||
echo " Alloy status: $(systemctl is-active alloy 2>/dev/null || echo 'not checked')"
|
||||
echo ""
|
||||
echo " Verify:"
|
||||
echo " systemctl status alloy"
|
||||
echo " journalctl -u alloy -f"
|
||||
echo " curl http://localhost:12345 (Alloy UI)"
|
||||
echo ""
|
||||
echo " Rollback:"
|
||||
echo " sudo systemctl stop alloy"
|
||||
echo " sudo systemctl start promtail"
|
||||
fi
|
||||
|
||||
if [[ "$GENERATE_ONLY" == true ]]; then
|
||||
echo ""
|
||||
echo " Config generated. Review and deploy manually."
|
||||
fi
|
||||
|
||||
echo "=========================================="
|
||||
echo ""
|
||||
}
|
||||
|
||||
main() {
|
||||
parse_arguments "$@"
|
||||
|
||||
log "Promtail → Alloy Migration Script v${SCRIPT_VERSION}"
|
||||
echo ""
|
||||
|
||||
# Check root (unless generate-only)
|
||||
if [[ "$GENERATE_ONLY" != true ]] && [[ "$DRY_RUN" != true ]] && [[ "$EUID" -ne 0 ]]; then
|
||||
error "This script must be run as root (or use --generate-only)"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
detect_os
|
||||
detect_hostname
|
||||
detect_loki_url
|
||||
check_promtail_status
|
||||
|
||||
# Generate config
|
||||
generate_alloy_config
|
||||
write_config "$ALLOY_CONFIG_FILE"
|
||||
|
||||
if [[ "$GENERATE_ONLY" == true ]]; then
|
||||
print_summary
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Install Alloy
|
||||
if [[ "$SKIP_INSTALL" != true ]]; then
|
||||
install_alloy
|
||||
fi
|
||||
|
||||
# Validate
|
||||
validate_config
|
||||
|
||||
# Cutover
|
||||
if [[ "$SKIP_CUTOVER" != true ]]; then
|
||||
perform_cutover
|
||||
fi
|
||||
|
||||
print_summary
|
||||
}
|
||||
|
||||
main "$@"
|
||||
Executable
+305
@@ -0,0 +1,305 @@
|
||||
#!/bin/bash
|
||||
|
||||
#############################################################
|
||||
#### Grafana Mimir Install Script for Oracle Linux, ####
|
||||
#### Centos/Redhat and Debian/Ubuntu Servers. ####
|
||||
#### ####
|
||||
#### Author: Phil Connor 01/09/2025 ####
|
||||
#### License: MIT ####
|
||||
#### Contact: contact@mylinux.work ####
|
||||
#### Version 1.00.010925 ####
|
||||
#### ####
|
||||
#### To use this script chmod it to 755 ####
|
||||
#### or simply type bash <filename.sh> ####
|
||||
#############################################################
|
||||
|
||||
########################
|
||||
### System Variables ###
|
||||
########################
|
||||
if [ "$(command -v lsb_release)" ]; then
|
||||
OS=$(lsb_release -i | awk '{print $3}' | tr '[:upper:]' '[:lower:]')
|
||||
else
|
||||
OS=$(grep PRETTY_NAME /etc/os-release | sed 's/PRETTY_NAME=//g' | tr -d '="' | awk '{print $1}' | tr '[:upper:]' '[:lower:]')
|
||||
fi
|
||||
|
||||
domain=mylinux.work
|
||||
bindir=/usr/local/bin
|
||||
mimirdir=/etc/prometheus
|
||||
datadir=/mimir
|
||||
|
||||
if [ -d "/usr/lib/systemd/system" ]; then
|
||||
psdir='/etc/systemd/system'
|
||||
else
|
||||
psdir='/usr/lib/systemd/system'
|
||||
fi
|
||||
|
||||
#########################
|
||||
### Check permissions ###
|
||||
#########################
|
||||
if [[ $EUID -ne 0 ]]; then
|
||||
echo ''
|
||||
echo "$(basename "$0") This script must be run as root! Login as root, or sudo/su."
|
||||
echo ''
|
||||
exit 1
|
||||
fi
|
||||
|
||||
######################
|
||||
### Package Manager ##
|
||||
######################
|
||||
pkgmgr="yum -y"
|
||||
if [ "$OS" = "ubuntu" ]; then
|
||||
pkgmgr="apt -y"
|
||||
fi
|
||||
|
||||
#################################
|
||||
#### Add Mimir User/Group ####
|
||||
#################################
|
||||
if ! grep mimir /etc/passwd; then
|
||||
groupadd --system mimir
|
||||
if [ "$OS" = "ubuntu" ]; then
|
||||
useradd -s /sbin/nologin --system -g mimir mimir
|
||||
else
|
||||
useradd -m -s /bin/false mimir -g mimir
|
||||
fi
|
||||
fi
|
||||
|
||||
#################################
|
||||
#### Check for wget and curl ####
|
||||
#################################
|
||||
if [ ! "$(command -v wget)" ]; then
|
||||
$pkgmgr install wget
|
||||
fi
|
||||
|
||||
if [ ! "$(command -v curl)" ]; then
|
||||
$pkgmgr install curl
|
||||
fi
|
||||
|
||||
if [ ! "$(command -v unzip)" ]; then
|
||||
$pkgmgr install unzip
|
||||
fi
|
||||
|
||||
##########################
|
||||
### Install Mimir ###
|
||||
##########################
|
||||
install_mimir() {
|
||||
{
|
||||
# Create base directories if they don't exist
|
||||
if [ ! -d "$mimirdir" ]; then
|
||||
mkdir -p $mimirdir || { echo "Failed to create $mimirdir directory"; exit 1; }
|
||||
fi
|
||||
|
||||
if [ ! -d "$datadir" ]; then
|
||||
mkdir -p $datadir || { echo "Failed to create $datadir directory"; exit 1; }
|
||||
fi
|
||||
|
||||
# Create Mimir subdirectories
|
||||
mkdir -p $datadir/{tsdb-sync,data/tsdb,mimir-tsdb,compactor,mimir-ruler}
|
||||
chown -R mimir:mimir $datadir
|
||||
|
||||
# Download and install Mimir
|
||||
cd /tmp || exit 2
|
||||
echo "Downloading latest Grafana Mimir..."
|
||||
curl -s https://api.github.com/repos/grafana/mimir/releases/latest | grep browser_download_url | grep linux-amd64 | cut -d '"' -f 4 | wget -qi - || { echo "Failed to download Mimir"; exit 1; }
|
||||
|
||||
tar -xvf mimir-linux-amd64.tar.gz
|
||||
mv mimir-linux-amd64 $bindir/mimir || exit 1
|
||||
chown mimir:mimir $bindir/mimir || exit 1
|
||||
rm -rf /tmp/mimir-linux-amd64.tar.gz
|
||||
|
||||
# Get server IP address
|
||||
SERVER_IP=$(hostname -I | awk '{print $1}')
|
||||
|
||||
# Create Mimir config
|
||||
touch $mimirdir/mimir.yml
|
||||
{
|
||||
echo '# Mimir Configuration - Single Instance Mode'
|
||||
echo 'multitenancy_enabled: false'
|
||||
echo ''
|
||||
echo 'blocks_storage:'
|
||||
echo ' backend: filesystem'
|
||||
echo ' bucket_store:'
|
||||
echo " sync_dir: $datadir/tsdb-sync"
|
||||
echo ' filesystem:'
|
||||
echo " dir: $datadir/data/tsdb"
|
||||
echo ' tsdb:'
|
||||
echo " dir: $datadir/mimir-tsdb"
|
||||
echo ' retention_period: 720h'
|
||||
echo ''
|
||||
echo 'compactor:'
|
||||
echo " data_dir: $datadir/compactor"
|
||||
echo ' sharding_ring:'
|
||||
echo ' kvstore:'
|
||||
echo ' store: inmemory'
|
||||
echo ''
|
||||
echo 'distributor:'
|
||||
echo ' ring:'
|
||||
echo ' kvstore:'
|
||||
echo ' store: inmemory'
|
||||
echo ''
|
||||
echo 'ingester:'
|
||||
echo ' ring:'
|
||||
echo ' kvstore:'
|
||||
echo ' store: inmemory'
|
||||
echo ' replication_factor: 1'
|
||||
echo ''
|
||||
echo 'ruler_storage:'
|
||||
echo ' backend: filesystem'
|
||||
echo ' filesystem:'
|
||||
echo " dir: $datadir/mimir-ruler"
|
||||
echo ''
|
||||
echo 'server:'
|
||||
echo ' http_listen_port: 9009'
|
||||
echo ' log_level: info'
|
||||
echo ''
|
||||
echo 'memberlist:'
|
||||
echo ' abort_if_cluster_join_fails: false'
|
||||
echo ' bind_port: 7946'
|
||||
echo " advertise_addr: $SERVER_IP"
|
||||
echo ' join_members: []'
|
||||
echo ''
|
||||
echo 'store_gateway:'
|
||||
echo ' sharding_ring:'
|
||||
echo ' replication_factor: 1'
|
||||
echo ' kvstore:'
|
||||
echo ' store: inmemory'
|
||||
echo ''
|
||||
echo 'limits:'
|
||||
echo ' max_global_series_per_user: 0'
|
||||
echo ' max_global_exemplars_per_user: 100000'
|
||||
} > $mimirdir/mimir.yml
|
||||
|
||||
chown mimir:mimir $mimirdir/mimir.yml
|
||||
|
||||
# Create systemd service
|
||||
{
|
||||
echo '[Unit]'
|
||||
echo 'Description=Grafana Mimir'
|
||||
echo 'Documentation=https://grafana.com/docs/mimir/'
|
||||
echo 'After=network-online.target'
|
||||
echo 'Wants=network-online.target'
|
||||
echo ''
|
||||
echo '[Service]'
|
||||
echo 'Type=simple'
|
||||
echo 'User=mimir'
|
||||
echo 'Group=mimir'
|
||||
echo "ExecStart=$bindir/mimir -config.file=$mimirdir/mimir.yml"
|
||||
echo "ExecReload=/bin/kill -HUP \$MAINPID"
|
||||
echo 'TimeoutStopSec=20s'
|
||||
echo 'SendSIGKILL=no'
|
||||
echo ''
|
||||
echo '# Output to journal'
|
||||
echo 'StandardOutput=journal'
|
||||
echo 'StandardError=journal'
|
||||
echo 'SyslogIdentifier=mimir'
|
||||
echo ''
|
||||
echo '# Restart'
|
||||
echo 'Restart=on-failure'
|
||||
echo 'RestartSec=5s'
|
||||
echo ''
|
||||
echo '# Security'
|
||||
echo 'NoNewPrivileges=yes'
|
||||
echo 'PrivateTmp=yes'
|
||||
echo 'ProtectSystem=full'
|
||||
echo 'ProtectHome=yes'
|
||||
echo "ReadWritePaths=$datadir"
|
||||
echo ''
|
||||
echo '# Resource limits'
|
||||
echo 'LimitNOFILE=1048576'
|
||||
echo 'LimitNPROC=1048576'
|
||||
echo ''
|
||||
echo '# Environment'
|
||||
echo 'Environment=GOMAXPROCS=4'
|
||||
echo ''
|
||||
echo '[Install]'
|
||||
echo 'WantedBy=multi-user.target'
|
||||
} > $psdir/mimir.service
|
||||
|
||||
systemctl daemon-reload
|
||||
systemctl enable --now mimir
|
||||
|
||||
echo ""
|
||||
echo "=========================================="
|
||||
echo "Mimir installation complete!"
|
||||
echo "=========================================="
|
||||
echo "Mimir UI: http://localhost:9009"
|
||||
echo "Config: $mimirdir/mimir.yml"
|
||||
echo "Data: $datadir"
|
||||
echo ""
|
||||
echo "Add to Prometheus remote_write:"
|
||||
echo " remote_write:"
|
||||
echo " - url: http://localhost:9009/api/v1/push"
|
||||
echo ""
|
||||
}
|
||||
}
|
||||
|
||||
################################
|
||||
### Install and Config Nginx ###
|
||||
################################
|
||||
install_nginx() {
|
||||
{
|
||||
$pkgmgr install nginx
|
||||
|
||||
if [ -d "/etc/nginx/sites-available" ]; then
|
||||
sitesa=/etc/nginx/sites-available
|
||||
sitese=/etc/nginx/sites-enabled/
|
||||
elif [ -d "/etc/nginx/conf.d" ]; then
|
||||
sitesa=/etc/nginx/conf.d
|
||||
fi
|
||||
|
||||
touch "$sitesa"/mimir.conf
|
||||
{
|
||||
echo 'server {'
|
||||
echo ' listen 80;'
|
||||
echo ' listen [::]:80;'
|
||||
echo ''
|
||||
echo " server_name mimir.$domain;"
|
||||
echo ''
|
||||
echo ' location / {'
|
||||
echo ' proxy_pass http://localhost:9009/;'
|
||||
# shellcheck disable=SC2016
|
||||
echo ' proxy_set_header Host $host;'
|
||||
# shellcheck disable=SC2016
|
||||
echo ' proxy_set_header X-Real-IP $remote_addr;'
|
||||
# shellcheck disable=SC2016
|
||||
echo ' proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;'
|
||||
# shellcheck disable=SC2016
|
||||
echo ' proxy_set_header X-Forwarded-Proto $scheme;'
|
||||
echo ' proxy_read_timeout 300s;'
|
||||
echo ' proxy_connect_timeout 75s;'
|
||||
echo ' }'
|
||||
echo '}'
|
||||
} > "$sitesa"/mimir.conf
|
||||
|
||||
if [ -d "/etc/nginx/sites-available" ]; then
|
||||
ln -s "$sitesa"/mimir.conf "$sitese" 2>/dev/null || true
|
||||
fi
|
||||
|
||||
if nginx -t; then
|
||||
systemctl restart nginx
|
||||
echo "Nginx configured for Mimir at mimir.$domain"
|
||||
else
|
||||
echo "Nginx configuration test failed"
|
||||
fi
|
||||
}
|
||||
}
|
||||
|
||||
######################
|
||||
### Function Calls ###
|
||||
######################
|
||||
install_mimir
|
||||
|
||||
# Uncomment to install nginx reverse proxy
|
||||
# install_nginx
|
||||
|
||||
#############################################################
|
||||
|
||||
echo ""
|
||||
echo "=========================================="
|
||||
echo "Installation Summary"
|
||||
echo "=========================================="
|
||||
echo "Mimir version: $(mimir --version 2>&1 | head -1)"
|
||||
echo "Status: $(systemctl is-active mimir)"
|
||||
echo ""
|
||||
echo "Check status: systemctl status mimir"
|
||||
echo "View logs: journalctl -u mimir -f"
|
||||
echo ""
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,263 @@
|
||||
#!/bin/bash
|
||||
|
||||
#############################################################
|
||||
#### ntfy Desktop Client Setup for Linux ####
|
||||
#### Subscribe to ntfy push notifications with desktop ####
|
||||
#### alerts via systemd user service ####
|
||||
#### ####
|
||||
#### Author: Phil Connor ####
|
||||
#### Contact: contact@mylinux.work ####
|
||||
#### License: MIT ####
|
||||
#### Version: 1.0 ####
|
||||
#### ####
|
||||
#### Usage: ./ntfy-client-setup-linux.sh ####
|
||||
#############################################################
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
NTFY_VERSION="2.11.0"
|
||||
|
||||
# ── Detect the actual desktop user ─────────────────────────
|
||||
# Handles both sudo and non-sudo execution
|
||||
if [ -n "${SUDO_USER:-}" ]; then
|
||||
DESKTOP_USER="$SUDO_USER"
|
||||
DESKTOP_HOME=$(getent passwd "$SUDO_USER" | cut -d: -f6)
|
||||
else
|
||||
DESKTOP_USER="$USER"
|
||||
DESKTOP_HOME="$HOME"
|
||||
fi
|
||||
|
||||
CONFIG_DIR="$DESKTOP_HOME/.config/ntfy"
|
||||
SYSTEMD_DIR="$DESKTOP_HOME/.config/systemd/user"
|
||||
|
||||
# ── Helper functions ───────────────────────────────────────
|
||||
|
||||
info() { echo -e " ✓ $*"; }
|
||||
warn() { echo -e " ⚠ $*"; }
|
||||
error() { echo -e " ✗ $*" >&2; }
|
||||
|
||||
run_as_user() {
|
||||
# Run a command as the desktop user (handles sudo case)
|
||||
if [ "$(id -u)" -eq 0 ] && [ "$DESKTOP_USER" != "root" ]; then
|
||||
sudo -u "$DESKTOP_USER" "$@"
|
||||
else
|
||||
"$@"
|
||||
fi
|
||||
}
|
||||
|
||||
install_package() {
|
||||
local pkg_apt="$1"
|
||||
local pkg_dnf="${2:-$1}"
|
||||
local pkg_pacman="${3:-$1}"
|
||||
|
||||
if command -v apt &> /dev/null; then
|
||||
sudo apt install -y "$pkg_apt"
|
||||
elif command -v dnf &> /dev/null; then
|
||||
sudo dnf install -y "$pkg_dnf"
|
||||
elif command -v pacman &> /dev/null; then
|
||||
sudo pacman -S --noconfirm "$pkg_pacman"
|
||||
else
|
||||
error "Could not detect package manager. Please install '$pkg_apt' manually."
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
# ── Banner ─────────────────────────────────────────────────
|
||||
|
||||
echo ""
|
||||
echo "==========================================="
|
||||
echo " ntfy Desktop Client Setup for Linux"
|
||||
echo "==========================================="
|
||||
echo ""
|
||||
echo " User: $DESKTOP_USER"
|
||||
echo " Home: $DESKTOP_HOME"
|
||||
echo ""
|
||||
|
||||
# ── Step 1: Install dependencies ───────────────────────────
|
||||
|
||||
echo "── Checking dependencies ──────────────────"
|
||||
echo ""
|
||||
|
||||
if ! command -v notify-send &> /dev/null; then
|
||||
echo " Installing libnotify for desktop notifications..."
|
||||
install_package libnotify-bin libnotify libnotify
|
||||
info "libnotify installed"
|
||||
else
|
||||
info "notify-send already available"
|
||||
fi
|
||||
|
||||
if ! command -v curl &> /dev/null; then
|
||||
echo " Installing curl..."
|
||||
install_package curl curl curl
|
||||
info "curl installed"
|
||||
else
|
||||
info "curl already available"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
|
||||
# ── Step 2: Install ntfy binary ────────────────────────────
|
||||
|
||||
echo "── Installing ntfy client ─────────────────"
|
||||
echo ""
|
||||
|
||||
# Determine install location based on privileges
|
||||
if [ "$(id -u)" -eq 0 ] || sudo -n true 2>/dev/null; then
|
||||
NTFY_BIN="/usr/local/bin/ntfy"
|
||||
INSTALL_SYSTEM=true
|
||||
else
|
||||
NTFY_BIN="$DESKTOP_HOME/.local/bin/ntfy"
|
||||
INSTALL_SYSTEM=false
|
||||
fi
|
||||
|
||||
if [ -x "$NTFY_BIN" ]; then
|
||||
info "ntfy already installed at $NTFY_BIN"
|
||||
else
|
||||
# Detect architecture
|
||||
ARCH=$(uname -m)
|
||||
case "$ARCH" in
|
||||
x86_64) NTFY_ARCH="amd64" ;;
|
||||
aarch64) NTFY_ARCH="arm64" ;;
|
||||
armv7l) NTFY_ARCH="armv7" ;;
|
||||
*)
|
||||
error "Unsupported architecture: $ARCH"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
DOWNLOAD_URL="https://github.com/binwiederhier/ntfy/releases/download/v${NTFY_VERSION}/ntfy_${NTFY_VERSION}_linux_${NTFY_ARCH}.tar.gz"
|
||||
echo " Downloading ntfy v${NTFY_VERSION} (${NTFY_ARCH})..."
|
||||
|
||||
TEMP_DIR=$(mktemp -d)
|
||||
trap 'rm -rf "$TEMP_DIR"' EXIT
|
||||
|
||||
curl -sL -o "$TEMP_DIR/ntfy.tar.gz" "$DOWNLOAD_URL"
|
||||
tar -xzf "$TEMP_DIR/ntfy.tar.gz" -C "$TEMP_DIR"
|
||||
|
||||
if [ "$INSTALL_SYSTEM" = true ]; then
|
||||
sudo find "$TEMP_DIR" -name "ntfy" -type f -exec mv {} "$NTFY_BIN" \;
|
||||
sudo chmod +x "$NTFY_BIN"
|
||||
else
|
||||
mkdir -p "$(dirname "$NTFY_BIN")"
|
||||
find "$TEMP_DIR" -name "ntfy" -type f -exec mv {} "$NTFY_BIN" \;
|
||||
chmod +x "$NTFY_BIN"
|
||||
fi
|
||||
|
||||
rm -rf "$TEMP_DIR"
|
||||
trap - EXIT
|
||||
|
||||
info "ntfy installed to $NTFY_BIN"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
|
||||
# ── Step 3: Interactive configuration ──────────────────────
|
||||
|
||||
echo "── Configuration ──────────────────────────"
|
||||
echo ""
|
||||
|
||||
read -rp " Server URL [https://ntfy.example.com]: " INPUT_SERVER
|
||||
SERVER_URL="${INPUT_SERVER:-https://ntfy.example.com}"
|
||||
|
||||
echo ""
|
||||
read -rp " Access token (leave empty for public topics): " ACCESS_TOKEN
|
||||
|
||||
echo ""
|
||||
echo " Enter topics to subscribe to (space-separated)."
|
||||
echo " Examples: alerts monitoring backup-status"
|
||||
read -rp " Topics: " TOPICS
|
||||
|
||||
if [ -z "$TOPICS" ]; then
|
||||
error "At least one topic is required."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo " Server: $SERVER_URL"
|
||||
echo " Topics: $TOPICS"
|
||||
echo " Token: ${ACCESS_TOKEN:+(set)}${ACCESS_TOKEN:-(none)}"
|
||||
echo ""
|
||||
|
||||
# ── Step 4: Create client config ───────────────────────────
|
||||
|
||||
echo "── Creating configuration files ────────────"
|
||||
echo ""
|
||||
|
||||
run_as_user mkdir -p "$CONFIG_DIR"
|
||||
run_as_user mkdir -p "$SYSTEMD_DIR"
|
||||
|
||||
# Build the subscribe section for client.yml
|
||||
SUBSCRIBE_BLOCK=""
|
||||
for topic in $TOPICS; do
|
||||
SUBSCRIBE_BLOCK+=" - topic: ${SERVER_URL}/${topic}"$'\n'
|
||||
if [ -n "$ACCESS_TOKEN" ]; then
|
||||
SUBSCRIBE_BLOCK+=" token: ${ACCESS_TOKEN}"$'\n'
|
||||
fi
|
||||
done
|
||||
|
||||
# Write client.yml
|
||||
cat > "$CONFIG_DIR/client.yml" << EOF
|
||||
# ntfy client configuration
|
||||
# Documentation: https://docs.ntfy.sh/subscribe/cli/
|
||||
|
||||
subscribe:
|
||||
${SUBSCRIBE_BLOCK}EOF
|
||||
|
||||
# Fix ownership if running as root
|
||||
if [ "$(id -u)" -eq 0 ] && [ "$DESKTOP_USER" != "root" ]; then
|
||||
chown -R "$DESKTOP_USER:$DESKTOP_USER" "$CONFIG_DIR"
|
||||
fi
|
||||
|
||||
info "Config saved to $CONFIG_DIR/client.yml"
|
||||
|
||||
# ── Step 5: Create systemd user service ────────────────────
|
||||
|
||||
cat > "$SYSTEMD_DIR/ntfy-subscribe.service" << EOF
|
||||
[Unit]
|
||||
Description=ntfy desktop notification subscriber
|
||||
After=network-online.target
|
||||
Wants=network-online.target
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
ExecStart=${NTFY_BIN} subscribe --from-config
|
||||
Restart=on-failure
|
||||
RestartSec=10
|
||||
|
||||
[Install]
|
||||
WantedBy=default.target
|
||||
EOF
|
||||
|
||||
# Fix ownership if running as root
|
||||
if [ "$(id -u)" -eq 0 ] && [ "$DESKTOP_USER" != "root" ]; then
|
||||
chown -R "$DESKTOP_USER:$DESKTOP_USER" "$SYSTEMD_DIR"
|
||||
fi
|
||||
|
||||
info "Systemd user service created"
|
||||
|
||||
echo ""
|
||||
|
||||
# ── Done ───────────────────────────────────────────────────
|
||||
|
||||
echo "==========================================="
|
||||
echo " Setup Complete"
|
||||
echo "==========================================="
|
||||
echo ""
|
||||
echo " To start receiving notifications, run as $DESKTOP_USER"
|
||||
echo " from a graphical desktop session:"
|
||||
echo ""
|
||||
echo " systemctl --user daemon-reload"
|
||||
echo " systemctl --user enable --now ntfy-subscribe"
|
||||
echo ""
|
||||
echo " Useful commands:"
|
||||
echo ""
|
||||
echo " Status: systemctl --user status ntfy-subscribe"
|
||||
echo " Logs: journalctl --user -u ntfy-subscribe -f"
|
||||
echo " Restart: systemctl --user restart ntfy-subscribe"
|
||||
echo " Stop: systemctl --user stop ntfy-subscribe"
|
||||
echo " Disable: systemctl --user disable --now ntfy-subscribe"
|
||||
echo ""
|
||||
echo " Test with:"
|
||||
echo ""
|
||||
echo " curl -d 'Hello from ntfy!' ${SERVER_URL}/${TOPICS%% *}"
|
||||
echo ""
|
||||
@@ -0,0 +1,236 @@
|
||||
#############################################################
|
||||
#### ntfy Desktop Client Setup for Windows ####
|
||||
#### Subscribe to ntfy push notifications with Windows ####
|
||||
#### toast notifications ####
|
||||
#### ####
|
||||
#### Author: Phil Connor ####
|
||||
#### Contact: contact@mylinux.work ####
|
||||
#### License: MIT ####
|
||||
#### Version: 1.0 ####
|
||||
#### ####
|
||||
#### Usage: .\ntfy-client-setup-windows.ps1 ####
|
||||
#############################################################
|
||||
|
||||
$ErrorActionPreference = "Stop"
|
||||
|
||||
# --- Configuration ---
|
||||
$NtfyVersion = "2.8.0"
|
||||
$InstallDir = "$env:LOCALAPPDATA\ntfy"
|
||||
$ConfigDir = "$env:APPDATA\ntfy"
|
||||
|
||||
# --- Interactive Prompts ---
|
||||
Write-Host ""
|
||||
Write-Host "=== ntfy Desktop Notifications Setup ===" -ForegroundColor Cyan
|
||||
Write-Host "Installing for user: $env:USERNAME"
|
||||
Write-Host ""
|
||||
|
||||
# Server URL
|
||||
$ServerUrl = Read-Host "Enter your ntfy server URL (e.g. https://ntfy.example.com)"
|
||||
$ServerUrl = $ServerUrl.TrimEnd("/")
|
||||
if ([string]::IsNullOrWhiteSpace($ServerUrl)) {
|
||||
Write-Host "ERROR: Server URL is required." -ForegroundColor Red
|
||||
exit 1
|
||||
}
|
||||
|
||||
# Access token (optional — some servers allow anonymous access)
|
||||
$Token = Read-Host "Enter your access token (leave blank if not required)"
|
||||
|
||||
# Topics
|
||||
$topicInput = Read-Host "Enter topics to subscribe to, comma-separated (e.g. alerts-critical,alerts-all)"
|
||||
if ([string]::IsNullOrWhiteSpace($topicInput)) {
|
||||
Write-Host "ERROR: At least one topic is required." -ForegroundColor Red
|
||||
exit 1
|
||||
}
|
||||
$Topics = $topicInput -split "," | ForEach-Object { $_.Trim() } | Where-Object { $_ -ne "" }
|
||||
|
||||
Write-Host ""
|
||||
Write-Host "Server: $ServerUrl" -ForegroundColor White
|
||||
Write-Host "Topics: $($Topics -join ', ')" -ForegroundColor White
|
||||
Write-Host "Token: $(if ($Token) { '********' } else { '(none)' })" -ForegroundColor White
|
||||
Write-Host ""
|
||||
|
||||
# --- Create directories ---
|
||||
New-Item -ItemType Directory -Force -Path $InstallDir | Out-Null
|
||||
New-Item -ItemType Directory -Force -Path $ConfigDir | Out-Null
|
||||
|
||||
# --- Download ntfy if not already installed ---
|
||||
if (Test-Path "$InstallDir\ntfy.exe") {
|
||||
Write-Host "ntfy already installed at: $InstallDir\ntfy.exe" -ForegroundColor Green
|
||||
} else {
|
||||
Write-Host "Downloading ntfy v$NtfyVersion..."
|
||||
$downloadUrl = "https://github.com/binwiederhier/ntfy/releases/download/v$NtfyVersion/ntfy_${NtfyVersion}_windows_amd64.zip"
|
||||
$zipPath = "$env:TEMP\ntfy.zip"
|
||||
Invoke-WebRequest -Uri $downloadUrl -OutFile $zipPath
|
||||
|
||||
Write-Host "Extracting..."
|
||||
$extractPath = "$env:TEMP\ntfy_extract"
|
||||
Remove-Item -Path $extractPath -Recurse -Force -ErrorAction SilentlyContinue
|
||||
Expand-Archive -Path $zipPath -DestinationPath $extractPath -Force
|
||||
Remove-Item $zipPath
|
||||
|
||||
# Find the exe (may be in a subfolder)
|
||||
$ntfyExe = Get-ChildItem -Path $extractPath -Recurse -Filter "ntfy.exe" | Select-Object -First 1
|
||||
if ($ntfyExe) {
|
||||
Copy-Item -Path $ntfyExe.FullName -Destination "$InstallDir\ntfy.exe" -Force
|
||||
} else {
|
||||
Write-Host "ERROR: Could not find ntfy.exe in downloaded archive." -ForegroundColor Red
|
||||
exit 1
|
||||
}
|
||||
Remove-Item -Path $extractPath -Recurse -Force -ErrorAction SilentlyContinue
|
||||
|
||||
Write-Host "Installed to: $InstallDir\ntfy.exe" -ForegroundColor Green
|
||||
}
|
||||
Write-Host ""
|
||||
|
||||
# --- Create client.yml config ---
|
||||
$clientYml = @"
|
||||
default-host: $ServerUrl
|
||||
"@
|
||||
|
||||
if ($Token) {
|
||||
$clientYml += "`ndefault-token: $Token"
|
||||
}
|
||||
|
||||
$clientYmlPath = "$ConfigDir\client.yml"
|
||||
$clientYml | Out-File -FilePath $clientYmlPath -Encoding UTF8
|
||||
Write-Host "Client config saved to: $clientYmlPath" -ForegroundColor Green
|
||||
|
||||
# --- Build topic URLs ---
|
||||
$topicUrls = @()
|
||||
foreach ($topic in $Topics) {
|
||||
$topicUrls += "$ServerUrl/$topic"
|
||||
}
|
||||
$topicUrlsString = $topicUrls -join " "
|
||||
|
||||
# --- Create PowerShell notification script ---
|
||||
# Build the token environment line only if a token was provided
|
||||
$tokenLine = ""
|
||||
if ($Token) {
|
||||
$tokenLine = "`$env:NTFY_TOKEN = `"$Token`""
|
||||
}
|
||||
|
||||
$psScriptContent = @"
|
||||
Add-Type -AssemblyName System.Windows.Forms
|
||||
|
||||
# Create a persistent notification icon in the system tray
|
||||
`$global:notifyIcon = New-Object System.Windows.Forms.NotifyIcon
|
||||
`$global:notifyIcon.Icon = [System.Drawing.SystemIcons]::Information
|
||||
`$global:notifyIcon.Visible = `$true
|
||||
`$global:notifyIcon.Text = "ntfy alerts"
|
||||
|
||||
function Show-Notification {
|
||||
param([string]`$Title, [string]`$Message, [int]`$Priority)
|
||||
|
||||
# Map ntfy priority levels to Windows balloon icon types
|
||||
# 1 (min), 2 (low) -> None
|
||||
# 3 (default) -> Info
|
||||
# 4 (high), 5 (max) -> Error
|
||||
`$icon = [System.Windows.Forms.ToolTipIcon]::Info
|
||||
if (`$Priority -ge 4) { `$icon = [System.Windows.Forms.ToolTipIcon]::Error }
|
||||
elseif (`$Priority -le 2) { `$icon = [System.Windows.Forms.ToolTipIcon]::None }
|
||||
|
||||
`$global:notifyIcon.BalloonTipIcon = `$icon
|
||||
`$global:notifyIcon.BalloonTipTitle = `$Title
|
||||
`$global:notifyIcon.BalloonTipText = `$Message
|
||||
`$global:notifyIcon.ShowBalloonTip(30000)
|
||||
}
|
||||
|
||||
# Set access token if configured
|
||||
$tokenLine
|
||||
`$ntfyExe = "$InstallDir\ntfy.exe"
|
||||
|
||||
# Subscribe and process JSON output line by line
|
||||
& `$ntfyExe subscribe $topicUrlsString 2>&1 | ForEach-Object {
|
||||
`$line = `$_
|
||||
if (`$line -match '"event":"message"') {
|
||||
try {
|
||||
`$json = `$line | ConvertFrom-Json
|
||||
`$title = if (`$json.title) { `$json.title } else { `$json.topic }
|
||||
`$message = `$json.message
|
||||
`$priority = if (`$json.priority) { `$json.priority } else { 3 }
|
||||
Show-Notification -Title `$title -Message `$message -Priority `$priority
|
||||
} catch { }
|
||||
}
|
||||
}
|
||||
|
||||
`$global:notifyIcon.Dispose()
|
||||
"@
|
||||
|
||||
$psScriptPath = "$ConfigDir\run-subscribe.ps1"
|
||||
$psScriptContent | Out-File -FilePath $psScriptPath -Encoding UTF8
|
||||
Write-Host "Notification script saved to: $psScriptPath" -ForegroundColor Green
|
||||
|
||||
# --- Create VBS wrapper for hidden startup (no console window) ---
|
||||
$vbsContent = @"
|
||||
Set WshShell = CreateObject("WScript.Shell")
|
||||
WshShell.Run "powershell -ExecutionPolicy Bypass -WindowStyle Hidden -File ""$psScriptPath""", 0
|
||||
Set WshShell = Nothing
|
||||
"@
|
||||
|
||||
$vbsPath = "$ConfigDir\run-subscribe-hidden.vbs"
|
||||
$vbsContent | Out-File -FilePath $vbsPath -Encoding ASCII
|
||||
Write-Host "Hidden launcher saved to: $vbsPath" -ForegroundColor Green
|
||||
Write-Host ""
|
||||
|
||||
# --- Create startup shortcut ---
|
||||
Write-Host "Creating startup shortcut..."
|
||||
$startupPath = "$env:APPDATA\Microsoft\Windows\Start Menu\Programs\Startup"
|
||||
$shortcutPath = "$startupPath\ntfy-subscribe.lnk"
|
||||
|
||||
$shell = New-Object -ComObject WScript.Shell
|
||||
$shortcut = $shell.CreateShortcut($shortcutPath)
|
||||
$shortcut.TargetPath = "wscript.exe"
|
||||
$shortcut.Arguments = "`"$vbsPath`""
|
||||
$shortcut.WorkingDirectory = $ConfigDir
|
||||
$shortcut.WindowStyle = 7 # Minimized
|
||||
$shortcut.Description = "ntfy notification subscriber"
|
||||
$shortcut.Save()
|
||||
|
||||
Write-Host "Startup shortcut created at: $shortcutPath" -ForegroundColor Green
|
||||
Write-Host ""
|
||||
|
||||
# --- Start the subscriber now ---
|
||||
Write-Host "Starting ntfy subscriber..."
|
||||
|
||||
# Stop any existing ntfy or subscriber processes
|
||||
Stop-Process -Name ntfy -ErrorAction SilentlyContinue
|
||||
Get-Process powershell -ErrorAction SilentlyContinue | Where-Object { $_.Id -ne $PID } | ForEach-Object {
|
||||
try {
|
||||
$cmdLine = (Get-CimInstance Win32_Process -Filter "ProcessId = $($_.Id)" -ErrorAction SilentlyContinue).CommandLine
|
||||
if ($cmdLine -like "*run-subscribe*") { Stop-Process -Id $_.Id -Force -ErrorAction SilentlyContinue }
|
||||
} catch {}
|
||||
}
|
||||
Start-Sleep -Seconds 1
|
||||
|
||||
$process = Start-Process -FilePath "powershell" `
|
||||
-ArgumentList @("-ExecutionPolicy", "Bypass", "-WindowStyle", "Hidden", "-File", $psScriptPath) `
|
||||
-WindowStyle Hidden `
|
||||
-PassThru
|
||||
|
||||
Start-Sleep -Seconds 2
|
||||
|
||||
# --- Print status and management commands ---
|
||||
if ($process -and !$process.HasExited) {
|
||||
Write-Host ""
|
||||
Write-Host "=== Setup Complete ===" -ForegroundColor Green
|
||||
Write-Host ""
|
||||
Write-Host "ntfy is running and will start automatically on login." -ForegroundColor Green
|
||||
Write-Host "You should see Windows toast notifications when messages arrive."
|
||||
Write-Host ""
|
||||
Write-Host "Management commands (run in PowerShell):" -ForegroundColor Cyan
|
||||
Write-Host " Check status: Get-Process ntfy -ErrorAction SilentlyContinue"
|
||||
Write-Host " Stop: Stop-Process -Name ntfy"
|
||||
Write-Host " Start manually: wscript.exe '$vbsPath'"
|
||||
Write-Host " Edit config: notepad '$clientYmlPath'"
|
||||
Write-Host " Edit topics: notepad '$psScriptPath'"
|
||||
Write-Host ""
|
||||
} else {
|
||||
Write-Host ""
|
||||
Write-Host "WARNING: ntfy may not have started correctly." -ForegroundColor Yellow
|
||||
Write-Host "Try running manually: wscript.exe '$vbsPath'"
|
||||
Write-Host ""
|
||||
}
|
||||
|
||||
Write-Host "To test, send a notification from another machine:" -ForegroundColor Cyan
|
||||
Write-Host " curl -d 'Test message' $ServerUrl/$($Topics[0])"
|
||||
Write-Host ""
|
||||
Executable
+990
@@ -0,0 +1,990 @@
|
||||
#!/bin/bash
|
||||
################################################################################
|
||||
# Script Name: postfix-metrics.sh
|
||||
# Description: Prometheus exporter for Postfix mail server metrics
|
||||
#
|
||||
# Usage:
|
||||
# # Output to stdout
|
||||
# ./postfix-metrics.sh
|
||||
#
|
||||
# # Textfile collector mode (atomic write)
|
||||
# ./postfix-metrics.sh --textfile
|
||||
#
|
||||
# # Custom output file
|
||||
# ./postfix-metrics.sh -o /path/to/metrics.prom
|
||||
#
|
||||
################################################################################
|
||||
|
||||
# ============================================================================
|
||||
# CONFIGURATION VARIABLES
|
||||
# ============================================================================
|
||||
|
||||
TEXTFILE_DIR="/var/lib/node_exporter"
|
||||
OUTPUT_FILE=""
|
||||
HTTP_MODE=false
|
||||
HTTP_PORT=9192
|
||||
QUEUE_DIR="/var/spool/postfix"
|
||||
LOG_FILE="/var/log/mail.log"
|
||||
HOSTNAME=$(hostname)
|
||||
|
||||
# ============================================================================
|
||||
# HELPER FUNCTIONS
|
||||
# ============================================================================
|
||||
|
||||
show_usage() {
|
||||
cat <<EOF
|
||||
Usage: $0 [OPTIONS]
|
||||
|
||||
Export Postfix statistics as Prometheus metrics.
|
||||
|
||||
MODES:
|
||||
--textfile Write to node_exporter textfile collector
|
||||
(writes to $TEXTFILE_DIR/postfix-metrics.prom)
|
||||
--http Run HTTP server on port $HTTP_PORT
|
||||
|
||||
OPTIONS:
|
||||
-p, --port HTTP port (default: $HTTP_PORT)
|
||||
-o, --output Output file path (for custom locations)
|
||||
-h, --help Show this help message
|
||||
|
||||
EXAMPLES:
|
||||
$0 # Output to stdout
|
||||
$0 --textfile # Write to textfile collector
|
||||
$0 --http # Run HTTP server on port $HTTP_PORT
|
||||
$0 --http -p 9192 # Run HTTP server on custom port
|
||||
$0 -o /tmp/postfix.prom # Write to custom file
|
||||
|
||||
EOF
|
||||
exit 0
|
||||
}
|
||||
|
||||
parse_args() {
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case $1 in
|
||||
-h|--help) show_usage ;;
|
||||
--textfile) OUTPUT_FILE="$TEXTFILE_DIR/postfix.prom"; shift ;;
|
||||
--http) HTTP_MODE=true; shift ;;
|
||||
-p|--port) HTTP_PORT="$2"; shift 2 ;;
|
||||
-o|--output) OUTPUT_FILE="$2"; shift 2 ;;
|
||||
*) echo "Unknown option: $1" >&2; exit 1 ;;
|
||||
esac
|
||||
done
|
||||
}
|
||||
|
||||
# Helper function to count grep matches (returns 0 if no match)
|
||||
grep_count() {
|
||||
local result
|
||||
result=$(grep -c "$@" 2>/dev/null) || result=0
|
||||
echo "$result"
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# METRIC GENERATION
|
||||
# ============================================================================
|
||||
|
||||
generate_metrics() {
|
||||
local START_TIME
|
||||
START_TIME=$(date +%s.%N)
|
||||
|
||||
# Queue sizes
|
||||
echo "# HELP postfix_queue_size Number of messages in each Postfix queue"
|
||||
echo "# TYPE postfix_queue_size gauge"
|
||||
for queue in incoming active deferred hold corrupt; do
|
||||
count=$(find "${QUEUE_DIR}/${queue}" -type f 2>/dev/null | wc -l)
|
||||
echo "postfix_queue_size{queue=\"${queue}\",hostname=\"${HOSTNAME}\"} ${count}"
|
||||
done
|
||||
|
||||
# Oldest message in queue (seconds)
|
||||
echo "# HELP postfix_queue_oldest_seconds Age of oldest message in queue"
|
||||
echo "# TYPE postfix_queue_oldest_seconds gauge"
|
||||
for queue in deferred hold; do
|
||||
oldest=$(find "${QUEUE_DIR}/${queue}" -type f -printf '%T@\n' 2>/dev/null | sort -n | head -1)
|
||||
if [[ -n "$oldest" ]]; then
|
||||
age=$(echo "$(date +%s) - ${oldest%.*}" | bc)
|
||||
else
|
||||
age=0
|
||||
fi
|
||||
echo "postfix_queue_oldest_seconds{queue=\"${queue}\",hostname=\"${HOSTNAME}\"} ${age}"
|
||||
done
|
||||
|
||||
# Message counters by status
|
||||
echo "# HELP postfix_messages_total Total messages by status"
|
||||
echo "# TYPE postfix_messages_total counter"
|
||||
for status in sent bounced deferred expired; do
|
||||
count=$(grep_count "status=${status}" "$LOG_FILE")
|
||||
echo "postfix_messages_total{status=\"${status}\",hostname=\"${HOSTNAME}\"} ${count}"
|
||||
done
|
||||
rejected=$(grep_count 'reject:' "$LOG_FILE")
|
||||
echo "postfix_messages_total{status=\"rejected\",hostname=\"${HOSTNAME}\"} ${rejected}"
|
||||
|
||||
# SMTP connections
|
||||
echo "# HELP postfix_smtp_connections SMTP connection stats"
|
||||
echo "# TYPE postfix_smtp_connections counter"
|
||||
connections=$(grep_count 'connect from' "$LOG_FILE")
|
||||
disconnections=$(grep_count 'disconnect from' "$LOG_FILE")
|
||||
echo "postfix_smtp_connections{type=\"connect\",hostname=\"${HOSTNAME}\"} ${connections}"
|
||||
echo "postfix_smtp_connections{type=\"disconnect\",hostname=\"${HOSTNAME}\"} ${disconnections}"
|
||||
|
||||
# Connection timeouts
|
||||
echo "# HELP postfix_timeout_total Connection timeout events"
|
||||
echo "# TYPE postfix_timeout_total counter"
|
||||
timeout_count=$(grep_count 'timeout after' "$LOG_FILE")
|
||||
echo "postfix_timeout_total{hostname=\"${HOSTNAME}\"} ${timeout_count}"
|
||||
|
||||
# SASL authentication
|
||||
echo "# HELP postfix_sasl_auth_total SASL authentication attempts"
|
||||
echo "# TYPE postfix_sasl_auth_total counter"
|
||||
sasl_success=$(grep_count 'sasl_username=' "$LOG_FILE")
|
||||
sasl_fail=$(grep_count 'authentication failed' "$LOG_FILE")
|
||||
echo "postfix_sasl_auth_total{result=\"success\",hostname=\"${HOSTNAME}\"} ${sasl_success}"
|
||||
echo "postfix_sasl_auth_total{result=\"failed\",hostname=\"${HOSTNAME}\"} ${sasl_fail}"
|
||||
|
||||
# Message sizes (bytes)
|
||||
echo "# HELP postfix_message_size_bytes_total Total bytes of messages processed"
|
||||
echo "# TYPE postfix_message_size_bytes_total counter"
|
||||
total_bytes=$(grep -oP 'size=\K\d+' "$LOG_FILE" 2>/dev/null | awk '{sum+=$1} END {print sum+0}')
|
||||
echo "postfix_message_size_bytes_total{hostname=\"${HOSTNAME}\"} ${total_bytes}"
|
||||
|
||||
echo "# HELP postfix_message_size_bytes_avg Average message size"
|
||||
echo "# TYPE postfix_message_size_bytes_avg gauge"
|
||||
avg_size=$(grep -oP 'size=\K\d+' "$LOG_FILE" 2>/dev/null | awk '{sum+=$1; count++} END {if(count>0) print int(sum/count); else print 0}')
|
||||
echo "postfix_message_size_bytes_avg{hostname=\"${HOSTNAME}\"} ${avg_size}"
|
||||
|
||||
echo "# HELP postfix_message_size_bytes_max Largest message size"
|
||||
echo "# TYPE postfix_message_size_bytes_max gauge"
|
||||
max_size=$(grep -oP 'size=\K\d+' "$LOG_FILE" 2>/dev/null | sort -rn | head -1)
|
||||
echo "postfix_message_size_bytes_max{hostname=\"${HOSTNAME}\"} ${max_size:-0}"
|
||||
|
||||
# Per-recipient domain stats (top domains)
|
||||
echo "# HELP postfix_recipient_domain_total Messages per recipient domain"
|
||||
echo "# TYPE postfix_recipient_domain_total counter"
|
||||
grep -oP 'to=<[^@]+@\K[^>]+' "$LOG_FILE" 2>/dev/null | sort | uniq -c | sort -rn | head -20 | while read -r count domain; do
|
||||
echo "postfix_recipient_domain_total{domain=\"${domain}\",hostname=\"${HOSTNAME}\"} ${count}"
|
||||
done
|
||||
|
||||
# Sender domain stats
|
||||
echo "# HELP postfix_sender_domain_total Messages per sender domain"
|
||||
echo "# TYPE postfix_sender_domain_total counter"
|
||||
grep -oP 'from=<[^@]+@\K[^>]+' "$LOG_FILE" 2>/dev/null | sort | uniq -c | sort -rn | head -20 | while read -r count domain; do
|
||||
echo "postfix_sender_domain_total{domain=\"${domain}\",hostname=\"${HOSTNAME}\"} ${count}"
|
||||
done
|
||||
|
||||
# Bounce reasons
|
||||
echo "# HELP postfix_bounce_reason_total Bounces by reason"
|
||||
echo "# TYPE postfix_bounce_reason_total counter"
|
||||
bounce_user=$(grep_count 'User unknown' "$LOG_FILE")
|
||||
bounce_quota=$(grep_count -i 'over quota\|mailbox full' "$LOG_FILE")
|
||||
bounce_spam=$(grep_count -i 'blocked\|spam\|blacklist' "$LOG_FILE")
|
||||
bounce_dns=$(grep_count 'Host or domain name not found' "$LOG_FILE")
|
||||
bounce_refused=$(grep_count 'Connection refused' "$LOG_FILE")
|
||||
echo "postfix_bounce_reason_total{reason=\"user_unknown\",hostname=\"${HOSTNAME}\"} ${bounce_user}"
|
||||
echo "postfix_bounce_reason_total{reason=\"over_quota\",hostname=\"${HOSTNAME}\"} ${bounce_quota}"
|
||||
echo "postfix_bounce_reason_total{reason=\"spam_blocked\",hostname=\"${HOSTNAME}\"} ${bounce_spam}"
|
||||
echo "postfix_bounce_reason_total{reason=\"dns_error\",hostname=\"${HOSTNAME}\"} ${bounce_dns}"
|
||||
echo "postfix_bounce_reason_total{reason=\"connection_refused\",hostname=\"${HOSTNAME}\"} ${bounce_refused}"
|
||||
|
||||
# Relay stats
|
||||
echo "# HELP postfix_relay_total Messages by relay"
|
||||
echo "# TYPE postfix_relay_total counter"
|
||||
grep -oP 'relay=\K[^,\[]+' "$LOG_FILE" 2>/dev/null | sort | uniq -c | sort -rn | head -10 | while read -r count relay; do
|
||||
echo "postfix_relay_total{relay=\"${relay}\",hostname=\"${HOSTNAME}\"} ${count}"
|
||||
done
|
||||
|
||||
# Client connections (top IPs)
|
||||
echo "# HELP postfix_client_connections_total Connections per client IP"
|
||||
echo "# TYPE postfix_client_connections_total counter"
|
||||
grep -oP 'connect from \S+\[\K[^\]]+' "$LOG_FILE" 2>/dev/null | sort | uniq -c | sort -rn | head -10 | while read -r count ip; do
|
||||
echo "postfix_client_connections_total{client_ip=\"${ip}\",hostname=\"${HOSTNAME}\"} ${count}"
|
||||
done
|
||||
|
||||
# TLS stats
|
||||
echo "# HELP postfix_tls_connections_total TLS connection statistics"
|
||||
echo "# TYPE postfix_tls_connections_total counter"
|
||||
tls_in=$(grep_count 'Anonymous TLS connection established from' "$LOG_FILE")
|
||||
tls_out=$(grep_count 'Anonymous TLS connection established to' "$LOG_FILE")
|
||||
verified_in=$(grep_count 'Trusted TLS connection established from' "$LOG_FILE")
|
||||
verified_out=$(grep_count 'Trusted TLS connection established to' "$LOG_FILE")
|
||||
untrusted_in=$(grep_count 'Untrusted TLS connection established from' "$LOG_FILE")
|
||||
untrusted_out=$(grep_count 'Untrusted TLS connection established to' "$LOG_FILE")
|
||||
echo "postfix_tls_connections_total{direction=\"inbound\",verified=\"anonymous\",hostname=\"${HOSTNAME}\"} ${tls_in}"
|
||||
echo "postfix_tls_connections_total{direction=\"outbound\",verified=\"anonymous\",hostname=\"${HOSTNAME}\"} ${tls_out}"
|
||||
echo "postfix_tls_connections_total{direction=\"inbound\",verified=\"trusted\",hostname=\"${HOSTNAME}\"} ${verified_in}"
|
||||
echo "postfix_tls_connections_total{direction=\"outbound\",verified=\"trusted\",hostname=\"${HOSTNAME}\"} ${verified_out}"
|
||||
echo "postfix_tls_connections_total{direction=\"inbound\",verified=\"untrusted\",hostname=\"${HOSTNAME}\"} ${untrusted_in}"
|
||||
echo "postfix_tls_connections_total{direction=\"outbound\",verified=\"untrusted\",hostname=\"${HOSTNAME}\"} ${untrusted_out}"
|
||||
|
||||
# TLS protocol versions
|
||||
echo "# HELP postfix_tls_protocol_total TLS protocol version usage"
|
||||
echo "# TYPE postfix_tls_protocol_total counter"
|
||||
for proto in TLSv1 TLSv1.1 TLSv1.2 TLSv1.3; do
|
||||
count=$(grep_count "${proto} with cipher" "$LOG_FILE")
|
||||
echo "postfix_tls_protocol_total{protocol=\"${proto}\",hostname=\"${HOSTNAME}\"} ${count}"
|
||||
done
|
||||
|
||||
# Delay stats (queue time)
|
||||
echo "# HELP postfix_delay_seconds_total Total delay time in seconds"
|
||||
echo "# TYPE postfix_delay_seconds_total counter"
|
||||
total_delay=$(grep -oP 'delay=\K[\d.]+' "$LOG_FILE" 2>/dev/null | awk '{sum+=$1} END {print sum+0}')
|
||||
echo "postfix_delay_seconds_total{hostname=\"${HOSTNAME}\"} ${total_delay}"
|
||||
|
||||
echo "# HELP postfix_delay_seconds_avg Average delivery delay"
|
||||
echo "# TYPE postfix_delay_seconds_avg gauge"
|
||||
avg_delay=$(grep -oP 'delay=\K[\d.]+' "$LOG_FILE" 2>/dev/null | awk '{sum+=$1; count++} END {if(count>0) printf "%.2f", sum/count; else print 0}')
|
||||
echo "postfix_delay_seconds_avg{hostname=\"${HOSTNAME}\"} ${avg_delay}"
|
||||
|
||||
echo "# HELP postfix_delay_seconds_max Maximum delivery delay"
|
||||
echo "# TYPE postfix_delay_seconds_max gauge"
|
||||
max_delay=$(grep -oP 'delay=\K[\d.]+' "$LOG_FILE" 2>/dev/null | sort -rn | head -1)
|
||||
echo "postfix_delay_seconds_max{hostname=\"${HOSTNAME}\"} ${max_delay:-0}"
|
||||
|
||||
# Postfix process count
|
||||
echo "# HELP postfix_processes Number of running postfix processes"
|
||||
echo "# TYPE postfix_processes gauge"
|
||||
proc_count=$(pgrep -c -f "postfix" 2>/dev/null) || proc_count=0
|
||||
echo "postfix_processes{hostname=\"${HOSTNAME}\"} ${proc_count}"
|
||||
|
||||
# Mail loop detection
|
||||
echo "# HELP postfix_mail_loop_total Detected mail loops"
|
||||
echo "# TYPE postfix_mail_loop_total counter"
|
||||
loops=$(grep_count 'mail forwarding loop' "$LOG_FILE")
|
||||
echo "postfix_mail_loop_total{hostname=\"${HOSTNAME}\"} ${loops}"
|
||||
|
||||
# Service status
|
||||
echo "# HELP postfix_up Postfix service status (1=running, 0=stopped)"
|
||||
echo "# TYPE postfix_up gauge"
|
||||
if postfix status &>/dev/null || systemctl is-active postfix &>/dev/null; then
|
||||
echo "postfix_up{hostname=\"${HOSTNAME}\"} 1"
|
||||
else
|
||||
echo "postfix_up{hostname=\"${HOSTNAME}\"} 0"
|
||||
fi
|
||||
|
||||
# Queue age distribution (messages by age bucket)
|
||||
echo "# HELP postfix_queue_age_bucket Messages in deferred queue by age"
|
||||
echo "# TYPE postfix_queue_age_bucket gauge"
|
||||
now=$(date +%s)
|
||||
for mins in 5 15 60 360 1440; do
|
||||
count=$(find "${QUEUE_DIR}/deferred" -type f -mmin +${mins} 2>/dev/null | wc -l)
|
||||
echo "postfix_queue_age_bucket{le=\"${mins}m\",hostname=\"${HOSTNAME}\"} ${count}"
|
||||
done
|
||||
|
||||
# Delivery attempts (retries)
|
||||
echo "# HELP postfix_delivery_attempts_total Delivery attempts by result"
|
||||
echo "# TYPE postfix_delivery_attempts_total counter"
|
||||
first_attempt=$(grep_count 'delay=.*delays=0/' "$LOG_FILE")
|
||||
retry_attempt=$(grep -c 'status=deferred.*will be retried' "$LOG_FILE" 2>/dev/null) || retry_attempt=0
|
||||
echo "postfix_delivery_attempts_total{type=\"first\",hostname=\"${HOSTNAME}\"} ${first_attempt}"
|
||||
echo "postfix_delivery_attempts_total{type=\"retry\",hostname=\"${HOSTNAME}\"} ${retry_attempt}"
|
||||
|
||||
# DSN status codes breakdown
|
||||
echo "# HELP postfix_dsn_total Delivery Status Notification codes"
|
||||
echo "# TYPE postfix_dsn_total counter"
|
||||
for dsn in "2.0.0" "4.7.1" "5.1.1" "5.1.2" "5.2.1" "5.2.2" "5.4.1" "5.7.1"; do
|
||||
count=$(grep_count "dsn=${dsn}" "$LOG_FILE")
|
||||
echo "postfix_dsn_total{code=\"${dsn}\",hostname=\"${HOSTNAME}\"} ${count}"
|
||||
done
|
||||
|
||||
# Delay breakdown by phase
|
||||
echo "# HELP postfix_delay_phase_seconds_total Delay time by phase"
|
||||
echo "# TYPE postfix_delay_phase_seconds_total counter"
|
||||
grep -oP 'delays=\K[\d.]+/[\d.]+/[\d.]+/[\d.]+' "$LOG_FILE" 2>/dev/null | awk -F'/' '{
|
||||
before_qmgr+=$1; in_qmgr+=$2; conn_setup+=$3; transmission+=$4
|
||||
} END {
|
||||
print "before_qmgr " before_qmgr+0
|
||||
print "in_qmgr " in_qmgr+0
|
||||
print "conn_setup " conn_setup+0
|
||||
print "transmission " transmission+0
|
||||
}' | while read -r phase total; do
|
||||
echo "postfix_delay_phase_seconds_total{phase=\"${phase}\",hostname=\"${HOSTNAME}\"} ${total}"
|
||||
done
|
||||
|
||||
# RBL rejections (per blocklist)
|
||||
echo "# HELP postfix_rbl_reject_total Rejections by RBL"
|
||||
echo "# TYPE postfix_rbl_reject_total counter"
|
||||
for rbl in "zen.spamhaus.org" "bl.spamcop.net" "b.barracudacentral.org" "dnsbl.sorbs.net"; do
|
||||
count=$(grep_count "${rbl}" "$LOG_FILE")
|
||||
echo "postfix_rbl_reject_total{rbl=\"${rbl}\",hostname=\"${HOSTNAME}\"} ${count}"
|
||||
done
|
||||
|
||||
# Invalid HELO/EHLO attempts
|
||||
echo "# HELP postfix_helo_invalid_total Invalid HELO/EHLO attempts"
|
||||
echo "# TYPE postfix_helo_invalid_total counter"
|
||||
helo_invalid=$(grep_count 'Helo command rejected' "$LOG_FILE")
|
||||
echo "postfix_helo_invalid_total{hostname=\"${HOSTNAME}\"} ${helo_invalid}"
|
||||
|
||||
# Anvil rate limiting
|
||||
echo "# HELP postfix_rate_limited_total Anvil rate limit events"
|
||||
echo "# TYPE postfix_rate_limited_total counter"
|
||||
rate_conn=$(grep_count 'anvil.*connection rate' "$LOG_FILE")
|
||||
rate_msg=$(grep_count 'anvil.*message rate' "$LOG_FILE")
|
||||
rate_rcpt=$(grep_count 'anvil.*recipient rate' "$LOG_FILE")
|
||||
echo "postfix_rate_limited_total{type=\"connection\",hostname=\"${HOSTNAME}\"} ${rate_conn}"
|
||||
echo "postfix_rate_limited_total{type=\"message\",hostname=\"${HOSTNAME}\"} ${rate_msg}"
|
||||
echo "postfix_rate_limited_total{type=\"recipient\",hostname=\"${HOSTNAME}\"} ${rate_rcpt}"
|
||||
|
||||
# Milter/content filter rejections
|
||||
echo "# HELP postfix_milter_reject_total Milter rejection events"
|
||||
echo "# TYPE postfix_milter_reject_total counter"
|
||||
milter_reject=$(grep_count 'milter-reject' "$LOG_FILE")
|
||||
echo "postfix_milter_reject_total{hostname=\"${HOSTNAME}\"} ${milter_reject}"
|
||||
|
||||
# Header/body checks rejections
|
||||
echo "# HELP postfix_header_checks_reject_total Header/body check rejections"
|
||||
echo "# TYPE postfix_header_checks_reject_total counter"
|
||||
header_reject=$(grep_count 'header_checks:' "$LOG_FILE")
|
||||
body_reject=$(grep_count 'body_checks:' "$LOG_FILE")
|
||||
echo "postfix_header_checks_reject_total{type=\"header\",hostname=\"${HOSTNAME}\"} ${header_reject}"
|
||||
echo "postfix_header_checks_reject_total{type=\"body\",hostname=\"${HOSTNAME}\"} ${body_reject}"
|
||||
|
||||
# Policy daemon deferrals
|
||||
echo "# HELP postfix_policyd_total Policy daemon events"
|
||||
echo "# TYPE postfix_policyd_total counter"
|
||||
policyd_defer=$(grep_count 'policy.*DEFER' "$LOG_FILE")
|
||||
policyd_reject=$(grep_count 'policy.*REJECT' "$LOG_FILE")
|
||||
echo "postfix_policyd_total{action=\"defer\",hostname=\"${HOSTNAME}\"} ${policyd_defer}"
|
||||
echo "postfix_policyd_total{action=\"reject\",hostname=\"${HOSTNAME}\"} ${policyd_reject}"
|
||||
|
||||
# DKIM signing (if OpenDKIM is used)
|
||||
echo "# HELP postfix_dkim_total DKIM signing/verification results"
|
||||
echo "# TYPE postfix_dkim_total counter"
|
||||
dkim_signed=$(grep_count 'DKIM-Signature field added' "$LOG_FILE")
|
||||
dkim_pass=$(grep_count 'dkim=pass' "$LOG_FILE")
|
||||
dkim_fail=$(grep_count 'dkim=fail' "$LOG_FILE")
|
||||
echo "postfix_dkim_total{action=\"signed\",hostname=\"${HOSTNAME}\"} ${dkim_signed}"
|
||||
echo "postfix_dkim_total{result=\"pass\",hostname=\"${HOSTNAME}\"} ${dkim_pass}"
|
||||
echo "postfix_dkim_total{result=\"fail\",hostname=\"${HOSTNAME}\"} ${dkim_fail}"
|
||||
|
||||
# SPF results
|
||||
echo "# HELP postfix_spf_total SPF check results"
|
||||
echo "# TYPE postfix_spf_total counter"
|
||||
for result in pass fail softfail neutral none permerror temperror; do
|
||||
count=$(grep_count -i "spf=${result}\|SPF: ${result}" "$LOG_FILE")
|
||||
echo "postfix_spf_total{result=\"${result}\",hostname=\"${HOSTNAME}\"} ${count}"
|
||||
done
|
||||
|
||||
# DMARC results (if OpenDMARC is used)
|
||||
# OpenDMARC logs: "opendmarc[PID]: QUEUEID: domain.com pass/fail/none"
|
||||
echo "# HELP postfix_dmarc_total DMARC check results"
|
||||
echo "# TYPE postfix_dmarc_total counter"
|
||||
for result in pass fail none; do
|
||||
count=$(grep -cE "opendmarc\[.*\]: [A-F0-9]+: [^ ]+ ${result}$" "$LOG_FILE" 2>/dev/null) || count=0
|
||||
echo "postfix_dmarc_total{result=\"${result}\",hostname=\"${HOSTNAME}\"} ${count}"
|
||||
done
|
||||
|
||||
# Hourly volume (traffic patterns)
|
||||
echo "# HELP postfix_hourly_volume Messages processed per hour"
|
||||
echo "# TYPE postfix_hourly_volume gauge"
|
||||
current_date=$(date +%b" "%d)
|
||||
for hour in $(seq -w 0 23); do
|
||||
count=$(grep_count "^${current_date} ${hour}:" "$LOG_FILE" | grep -c 'status=sent' 2>/dev/null) || count=0
|
||||
count=$(grep "^${current_date} ${hour}:" "$LOG_FILE" 2>/dev/null | grep -c 'status=sent') || count=0
|
||||
echo "postfix_hourly_volume{hour=\"${hour}\",hostname=\"${HOSTNAME}\"} ${count}"
|
||||
done
|
||||
|
||||
# Recent throughput (last 5/15/60 minutes)
|
||||
echo "# HELP postfix_messages_recent Messages sent in recent time windows"
|
||||
echo "# TYPE postfix_messages_recent gauge"
|
||||
for mins in 5 15 60; do
|
||||
since=$(date -d "${mins} minutes ago" '+%b %d %H:%M' 2>/dev/null) || since=""
|
||||
if [[ -n "$since" ]]; then
|
||||
count=$(awk -v since="$since" '$0 >= since && /status=sent/' "$LOG_FILE" 2>/dev/null | wc -l)
|
||||
else
|
||||
count=0
|
||||
fi
|
||||
echo "postfix_messages_recent{window=\"${mins}m\",hostname=\"${HOSTNAME}\"} ${count}"
|
||||
done
|
||||
|
||||
# Active SMTP sessions estimate
|
||||
echo "# HELP postfix_smtp_sessions_active Estimated active SMTP sessions"
|
||||
echo "# TYPE postfix_smtp_sessions_active gauge"
|
||||
smtp_procs=$(pgrep -c -x smtp 2>/dev/null) || smtp_procs=0
|
||||
smtpd_procs=$(pgrep -c -x smtpd 2>/dev/null) || smtpd_procs=0
|
||||
echo "postfix_smtp_sessions_active{type=\"outbound\",hostname=\"${HOSTNAME}\"} ${smtp_procs}"
|
||||
echo "postfix_smtp_sessions_active{type=\"inbound\",hostname=\"${HOSTNAME}\"} ${smtpd_procs}"
|
||||
|
||||
# Qmgr active recipients
|
||||
echo "# HELP postfix_qmgr_recipients Active recipients in queue manager"
|
||||
echo "# TYPE postfix_qmgr_recipients gauge"
|
||||
active_recipients=$(find "${QUEUE_DIR}/active" -type f -exec cat {} \; 2>/dev/null | wc -l) || active_recipients=0
|
||||
echo "postfix_qmgr_recipients{hostname=\"${HOSTNAME}\"} ${active_recipients}"
|
||||
|
||||
# Estimated queue memory usage (based on file sizes)
|
||||
echo "# HELP postfix_queue_size_bytes Total size of queue files in bytes"
|
||||
echo "# TYPE postfix_queue_size_bytes gauge"
|
||||
for queue in incoming active deferred hold; do
|
||||
size=$(du -sb "${QUEUE_DIR}/${queue}" 2>/dev/null | cut -f1) || size=0
|
||||
echo "postfix_queue_size_bytes{queue=\"${queue}\",hostname=\"${HOSTNAME}\"} ${size}"
|
||||
done
|
||||
|
||||
# Warnings and fatal errors
|
||||
echo "# HELP postfix_log_events_total Log events by severity"
|
||||
echo "# TYPE postfix_log_events_total counter"
|
||||
warnings=$(grep_count 'warning:' "$LOG_FILE")
|
||||
fatals=$(grep_count 'fatal:' "$LOG_FILE")
|
||||
panics=$(grep_count 'panic:' "$LOG_FILE")
|
||||
echo "postfix_log_events_total{level=\"warning\",hostname=\"${HOSTNAME}\"} ${warnings}"
|
||||
echo "postfix_log_events_total{level=\"fatal\",hostname=\"${HOSTNAME}\"} ${fatals}"
|
||||
echo "postfix_log_events_total{level=\"panic\",hostname=\"${HOSTNAME}\"} ${panics}"
|
||||
|
||||
# SMTP response codes
|
||||
echo "# HELP postfix_smtp_response_total SMTP response codes"
|
||||
echo "# TYPE postfix_smtp_response_total counter"
|
||||
smtp_2xx=$(grep_count 'status=sent' "$LOG_FILE")
|
||||
smtp_4xx=$(grep_count 'status=deferred' "$LOG_FILE")
|
||||
smtp_5xx=$(grep_count 'status=bounced' "$LOG_FILE")
|
||||
echo "postfix_smtp_response_total{code=\"2xx\",hostname=\"${HOSTNAME}\"} ${smtp_2xx}"
|
||||
echo "postfix_smtp_response_total{code=\"4xx\",hostname=\"${HOSTNAME}\"} ${smtp_4xx}"
|
||||
echo "postfix_smtp_response_total{code=\"5xx\",hostname=\"${HOSTNAME}\"} ${smtp_5xx}"
|
||||
|
||||
# Specific SMTP error codes (check multiple patterns)
|
||||
# Postfix logs SMTP errors in various formats:
|
||||
# - "said: 550 5.1.1 User unknown"
|
||||
# - "status=bounced (host ... said: 550 ...)"
|
||||
# - "dsn=5.1.1" (DSN codes start with same digit)
|
||||
# - Remote server responses with just the code
|
||||
echo "# HELP postfix_smtp_error_code_total Specific SMTP error codes"
|
||||
echo "# TYPE postfix_smtp_error_code_total counter"
|
||||
for code in 421 450 451 452 500 501 502 503 504 550 551 552 553 554; do
|
||||
# Multiple patterns: "said: 550", "(550 ", "smtp.*550", host responses
|
||||
count=$(grep -cE "(said: ${code}|said:${code}|\(${code} |host .*\[.*\].*${code} |smtp.*${code}[^0-9])" "$LOG_FILE" 2>/dev/null) || count=0
|
||||
echo "postfix_smtp_error_code_total{code=\"${code}\",hostname=\"${HOSTNAME}\"} ${count}"
|
||||
done
|
||||
|
||||
# TLS cipher suites (top 10)
|
||||
# Requires smtpd_tls_loglevel=1 and smtp_tls_loglevel=1 in main.cf
|
||||
# Postfix logs: "TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)"
|
||||
echo "# HELP postfix_tls_cipher_total TLS cipher suite usage"
|
||||
echo "# TYPE postfix_tls_cipher_total counter"
|
||||
cipher_output=$({
|
||||
grep -oP 'with cipher \K[A-Za-z0-9_-]+' "$LOG_FILE" 2>/dev/null
|
||||
grep -oP 'cipher=\K[A-Za-z0-9_-]+' "$LOG_FILE" 2>/dev/null
|
||||
} | sort | uniq -c | sort -rn | head -10)
|
||||
if [[ -n "$cipher_output" ]]; then
|
||||
echo "$cipher_output" | while read -r count cipher; do
|
||||
[[ -n "$cipher" ]] && echo "postfix_tls_cipher_total{cipher=\"${cipher}\",hostname=\"${HOSTNAME}\"} ${count}"
|
||||
done
|
||||
else
|
||||
echo "postfix_tls_cipher_total{cipher=\"unknown\",hostname=\"${HOSTNAME}\"} 0"
|
||||
fi
|
||||
|
||||
# TLS certificate expiry (check multiple locations)
|
||||
echo "# HELP postfix_cert_expiry_seconds Seconds until TLS certificate expires"
|
||||
echo "# TYPE postfix_cert_expiry_seconds gauge"
|
||||
CERT_FILE=""
|
||||
for cert in "/etc/ssl/certs/postfix.pem" \
|
||||
"/home/user-data/ssl/ssl_certificate.pem" \
|
||||
"/etc/letsencrypt/live/$(hostname)/fullchain.pem" \
|
||||
"/etc/letsencrypt/live/$(hostname -f)/fullchain.pem" \
|
||||
"/etc/ssl/certs/ssl-cert-snakeoil.pem"; do
|
||||
if [[ -f "$cert" ]]; then
|
||||
CERT_FILE="$cert"
|
||||
break
|
||||
fi
|
||||
done
|
||||
cert_seconds=0
|
||||
if [[ -n "$CERT_FILE" ]] && command -v openssl &>/dev/null; then
|
||||
expiry=$(openssl x509 -enddate -noout -in "$CERT_FILE" 2>/dev/null | cut -d= -f2)
|
||||
if [[ -n "$expiry" ]]; then
|
||||
expiry_epoch=$(date -d "$expiry" +%s 2>/dev/null) || expiry_epoch=0
|
||||
now=$(date +%s)
|
||||
cert_seconds=$((expiry_epoch - now))
|
||||
fi
|
||||
fi
|
||||
echo "postfix_cert_expiry_seconds{hostname=\"${HOSTNAME}\"} ${cert_seconds}"
|
||||
|
||||
# LMTP delivery metrics (Postfix side)
|
||||
# Matches: "postfix/lmtp[PID]: ... status=sent"
|
||||
echo "# HELP postfix_lmtp_delivery_total LMTP delivery stats"
|
||||
echo "# TYPE postfix_lmtp_delivery_total counter"
|
||||
lmtp_sent=$(grep_count 'postfix/lmtp\[.*status=sent' "$LOG_FILE")
|
||||
lmtp_deferred=$(grep_count 'postfix/lmtp\[.*status=deferred' "$LOG_FILE")
|
||||
lmtp_bounced=$(grep_count 'postfix/lmtp\[.*status=bounced' "$LOG_FILE")
|
||||
echo "postfix_lmtp_delivery_total{status=\"sent\",hostname=\"${HOSTNAME}\"} ${lmtp_sent}"
|
||||
echo "postfix_lmtp_delivery_total{status=\"deferred\",hostname=\"${HOSTNAME}\"} ${lmtp_deferred}"
|
||||
echo "postfix_lmtp_delivery_total{status=\"bounced\",hostname=\"${HOSTNAME}\"} ${lmtp_bounced}"
|
||||
|
||||
echo "# HELP postfix_lmtp_connections_total LMTP connection events"
|
||||
echo "# TYPE postfix_lmtp_connections_total counter"
|
||||
lmtp_connect=$(grep_count 'postfix/lmtp\[.*connect' "$LOG_FILE")
|
||||
lmtp_disconnect=$(grep_count 'postfix/lmtp\[.*disconnect' "$LOG_FILE")
|
||||
lmtp_timeout=$(grep_count 'postfix/lmtp\[.*timeout' "$LOG_FILE")
|
||||
lmtp_refused=$(grep_count 'postfix/lmtp\[.*Connection refused' "$LOG_FILE")
|
||||
echo "postfix_lmtp_connections_total{type=\"connect\",hostname=\"${HOSTNAME}\"} ${lmtp_connect}"
|
||||
echo "postfix_lmtp_connections_total{type=\"disconnect\",hostname=\"${HOSTNAME}\"} ${lmtp_disconnect}"
|
||||
echo "postfix_lmtp_connections_total{type=\"timeout\",hostname=\"${HOSTNAME}\"} ${lmtp_timeout}"
|
||||
echo "postfix_lmtp_connections_total{type=\"refused\",hostname=\"${HOSTNAME}\"} ${lmtp_refused}"
|
||||
|
||||
echo "# HELP postfix_lmtp_delay_seconds LMTP delivery delay stats"
|
||||
echo "# TYPE postfix_lmtp_delay_seconds gauge"
|
||||
lmtp_avg_delay=$(grep 'postfix/lmtp\[' "$LOG_FILE" 2>/dev/null | grep -oP 'delay=\K[\d.]+' | awk '{sum+=$1; count++} END {if(count>0) printf "%.2f", sum/count; else print 0}')
|
||||
lmtp_max_delay=$(grep 'postfix/lmtp\[' "$LOG_FILE" 2>/dev/null | grep -oP 'delay=\K[\d.]+' | sort -rn | head -1)
|
||||
echo "postfix_lmtp_delay_seconds{stat=\"avg\",hostname=\"${HOSTNAME}\"} ${lmtp_avg_delay}"
|
||||
echo "postfix_lmtp_delay_seconds{stat=\"max\",hostname=\"${HOSTNAME}\"} ${lmtp_max_delay:-0}"
|
||||
|
||||
# Dovecot LMTP/LDA delivery stats (check multiple log locations)
|
||||
DOVECOT_LOG=""
|
||||
for log in "/var/log/dovecot.log" "/var/log/mail.log" "/var/log/syslog"; do
|
||||
if [[ -f "$log" ]] && grep -q 'dovecot' "$log" 2>/dev/null; then
|
||||
DOVECOT_LOG="$log"
|
||||
break
|
||||
fi
|
||||
done
|
||||
if [[ -n "$DOVECOT_LOG" ]]; then
|
||||
echo "# HELP postfix_dovecot_delivery_total Dovecot local delivery stats"
|
||||
echo "# TYPE postfix_dovecot_delivery_total counter"
|
||||
lmtp_delivered=$(grep_count 'lmtp.*saved mail' "$DOVECOT_LOG")
|
||||
lda_delivered=$(grep_count 'lda.*saved mail' "$DOVECOT_LOG")
|
||||
echo "postfix_dovecot_delivery_total{type=\"lmtp\",hostname=\"${HOSTNAME}\"} ${lmtp_delivered}"
|
||||
echo "postfix_dovecot_delivery_total{type=\"lda\",hostname=\"${HOSTNAME}\"} ${lda_delivered}"
|
||||
|
||||
echo "# HELP postfix_dovecot_sieve_total Dovecot sieve filter actions"
|
||||
echo "# TYPE postfix_dovecot_sieve_total counter"
|
||||
sieve_fileinto=$(grep_count 'sieve.*fileinto' "$DOVECOT_LOG")
|
||||
sieve_discard=$(grep_count 'sieve.*discard' "$DOVECOT_LOG")
|
||||
sieve_redirect=$(grep_count 'sieve.*redirect' "$DOVECOT_LOG")
|
||||
echo "postfix_dovecot_sieve_total{action=\"fileinto\",hostname=\"${HOSTNAME}\"} ${sieve_fileinto}"
|
||||
echo "postfix_dovecot_sieve_total{action=\"discard\",hostname=\"${HOSTNAME}\"} ${sieve_discard}"
|
||||
echo "postfix_dovecot_sieve_total{action=\"redirect\",hostname=\"${HOSTNAME}\"} ${sieve_redirect}"
|
||||
|
||||
echo "# HELP postfix_dovecot_auth_total Dovecot authentication attempts"
|
||||
echo "# TYPE postfix_dovecot_auth_total counter"
|
||||
auth_success=$(grep_count 'auth.*successful' "$DOVECOT_LOG")
|
||||
auth_fail=$(grep_count 'auth.*failed' "$DOVECOT_LOG")
|
||||
echo "postfix_dovecot_auth_total{result=\"success\",hostname=\"${HOSTNAME}\"} ${auth_success}"
|
||||
echo "postfix_dovecot_auth_total{result=\"failed\",hostname=\"${HOSTNAME}\"} ${auth_fail}"
|
||||
|
||||
echo "# HELP postfix_dovecot_imap_connections_total Dovecot IMAP connections"
|
||||
echo "# TYPE postfix_dovecot_imap_connections_total counter"
|
||||
imap_login=$(grep_count 'imap-login:.*Login' "$DOVECOT_LOG")
|
||||
imap_disconnect=$(grep_count 'imap.*Disconnected' "$DOVECOT_LOG")
|
||||
echo "postfix_dovecot_imap_connections_total{type=\"login\",hostname=\"${HOSTNAME}\"} ${imap_login}"
|
||||
echo "postfix_dovecot_imap_connections_total{type=\"disconnect\",hostname=\"${HOSTNAME}\"} ${imap_disconnect}"
|
||||
|
||||
echo "# HELP postfix_dovecot_pop3_connections_total Dovecot POP3 connections"
|
||||
echo "# TYPE postfix_dovecot_pop3_connections_total counter"
|
||||
pop3_login=$(grep_count 'pop3-login:.*Login' "$DOVECOT_LOG")
|
||||
pop3_disconnect=$(grep_count 'pop3.*Disconnected' "$DOVECOT_LOG")
|
||||
echo "postfix_dovecot_pop3_connections_total{type=\"login\",hostname=\"${HOSTNAME}\"} ${pop3_login}"
|
||||
echo "postfix_dovecot_pop3_connections_total{type=\"disconnect\",hostname=\"${HOSTNAME}\"} ${pop3_disconnect}"
|
||||
fi
|
||||
|
||||
# SpamAssassin metrics (supports spamd, spampd, and amavis)
|
||||
SPAM_LOG="/var/log/mail.log"
|
||||
|
||||
# Detect which spam daemon is in use (check spampd first as it's more specific)
|
||||
if grep -q 'spampd' "$SPAM_LOG" 2>/dev/null; then
|
||||
SPAM_DAEMON="spampd"
|
||||
elif grep -q 'spamd\[' "$SPAM_LOG" 2>/dev/null; then
|
||||
SPAM_DAEMON="spamd"
|
||||
elif grep -q 'amavis' "$SPAM_LOG" 2>/dev/null; then
|
||||
SPAM_DAEMON="amavis"
|
||||
else
|
||||
SPAM_DAEMON=""
|
||||
fi
|
||||
|
||||
if [[ -n "$SPAM_DAEMON" ]]; then
|
||||
echo "# HELP postfix_spamassassin_total SpamAssassin scan results"
|
||||
echo "# TYPE postfix_spamassassin_total counter"
|
||||
|
||||
if [[ "$SPAM_DAEMON" == "spampd" ]]; then
|
||||
# spampd format: "clean message <...> (SCORE/THRESHOLD)" or "identified spam <...> (SCORE/THRESHOLD)"
|
||||
spam_identified=$(grep_count 'spampd.*identified spam' "$SPAM_LOG")
|
||||
ham_clean=$(grep_count 'spampd.*clean message' "$SPAM_LOG")
|
||||
elif [[ "$SPAM_DAEMON" == "amavis" ]]; then
|
||||
spam_identified=$(grep_count 'amavis.*Blocked SPAM' "$SPAM_LOG")
|
||||
ham_clean=$(grep_count 'amavis.*Passed CLEAN' "$SPAM_LOG")
|
||||
else
|
||||
spam_identified=$(grep_count 'spamd.*identified spam' "$SPAM_LOG")
|
||||
ham_clean=$(grep_count 'spamd.*clean message' "$SPAM_LOG")
|
||||
fi
|
||||
echo "postfix_spamassassin_total{result=\"spam\",hostname=\"${HOSTNAME}\"} ${spam_identified}"
|
||||
echo "postfix_spamassassin_total{result=\"ham\",hostname=\"${HOSTNAME}\"} ${ham_clean}"
|
||||
|
||||
echo "# HELP postfix_spamassassin_score_total SpamAssassin score distribution"
|
||||
echo "# TYPE postfix_spamassassin_score_total counter"
|
||||
|
||||
if [[ "$SPAM_DAEMON" == "spampd" ]]; then
|
||||
# spampd format: (SCORE/THRESHOLD) like (-0.30/5.00) or (15.2/5.0)
|
||||
score_neg=$(grep -oP 'spampd.*\(\K-[\d.]+(?=/)' "$SPAM_LOG" 2>/dev/null | wc -l)
|
||||
score_0_5=$(grep -oP 'spampd.*\(\K-?[\d.]+(?=/)' "$SPAM_LOG" 2>/dev/null | awk '$1 >= 0 && $1 < 5 {count++} END {print count+0}')
|
||||
score_5_10=$(grep -oP 'spampd.*\(\K-?[\d.]+(?=/)' "$SPAM_LOG" 2>/dev/null | awk '$1 >= 5 && $1 < 10 {count++} END {print count+0}')
|
||||
score_10_plus=$(grep -oP 'spampd.*\(\K-?[\d.]+(?=/)' "$SPAM_LOG" 2>/dev/null | awk '$1 >= 10 {count++} END {print count+0}')
|
||||
elif [[ "$SPAM_DAEMON" == "amavis" ]]; then
|
||||
score_neg=$(grep -oP 'amavis.*Hits: \K-[\d.]+' "$SPAM_LOG" 2>/dev/null | wc -l)
|
||||
score_0_5=$(grep -oP 'amavis.*Hits: \K-?[\d.]+' "$SPAM_LOG" 2>/dev/null | awk '$1 >= 0 && $1 < 5 {count++} END {print count+0}')
|
||||
score_5_10=$(grep -oP 'amavis.*Hits: \K-?[\d.]+' "$SPAM_LOG" 2>/dev/null | awk '$1 >= 5 && $1 < 10 {count++} END {print count+0}')
|
||||
score_10_plus=$(grep -oP 'amavis.*Hits: \K-?[\d.]+' "$SPAM_LOG" 2>/dev/null | awk '$1 >= 10 {count++} END {print count+0}')
|
||||
else
|
||||
score_neg=0
|
||||
score_0_5=$(grep -oP 'spamd.*score=\K[\d.]+' "$SPAM_LOG" 2>/dev/null | awk '$1 >= 0 && $1 < 5 {count++} END {print count+0}')
|
||||
score_5_10=$(grep -oP 'spamd.*score=\K[\d.]+' "$SPAM_LOG" 2>/dev/null | awk '$1 >= 5 && $1 < 10 {count++} END {print count+0}')
|
||||
score_10_plus=$(grep -oP 'spamd.*score=\K[\d.]+' "$SPAM_LOG" 2>/dev/null | awk '$1 >= 10 {count++} END {print count+0}')
|
||||
fi
|
||||
echo "postfix_spamassassin_score_total{bucket=\"negative\",hostname=\"${HOSTNAME}\"} ${score_neg:-0}"
|
||||
echo "postfix_spamassassin_score_total{bucket=\"0-5\",hostname=\"${HOSTNAME}\"} ${score_0_5}"
|
||||
echo "postfix_spamassassin_score_total{bucket=\"5-10\",hostname=\"${HOSTNAME}\"} ${score_5_10}"
|
||||
echo "postfix_spamassassin_score_total{bucket=\"10+\",hostname=\"${HOSTNAME}\"} ${score_10_plus}"
|
||||
|
||||
echo "# HELP postfix_spamassassin_score_avg Average SpamAssassin score"
|
||||
echo "# TYPE postfix_spamassassin_score_avg gauge"
|
||||
if [[ "$SPAM_DAEMON" == "spampd" ]]; then
|
||||
avg_score=$(grep -oP 'spampd.*\(\K-?[\d.]+(?=/)' "$SPAM_LOG" 2>/dev/null | awk '{sum+=$1; count++} END {if(count>0) printf "%.2f", sum/count; else print 0}')
|
||||
elif [[ "$SPAM_DAEMON" == "amavis" ]]; then
|
||||
avg_score=$(grep -oP 'amavis.*Hits: \K-?[\d.]+' "$SPAM_LOG" 2>/dev/null | awk '{sum+=$1; count++} END {if(count>0) printf "%.2f", sum/count; else print 0}')
|
||||
else
|
||||
avg_score=$(grep -oP 'spamd.*score=\K[\d.]+' "$SPAM_LOG" 2>/dev/null | awk '{sum+=$1; count++} END {if(count>0) printf "%.2f", sum/count; else print 0}')
|
||||
fi
|
||||
echo "postfix_spamassassin_score_avg{hostname=\"${HOSTNAME}\"} ${avg_score}"
|
||||
|
||||
echo "# HELP postfix_spamassassin_score_max Maximum SpamAssassin score seen"
|
||||
echo "# TYPE postfix_spamassassin_score_max gauge"
|
||||
if [[ "$SPAM_DAEMON" == "spampd" ]]; then
|
||||
max_score=$(grep -oP 'spampd.*\(\K-?[\d.]+(?=/)' "$SPAM_LOG" 2>/dev/null | sort -rn | head -1)
|
||||
elif [[ "$SPAM_DAEMON" == "amavis" ]]; then
|
||||
max_score=$(grep -oP 'amavis.*Hits: \K-?[\d.]+' "$SPAM_LOG" 2>/dev/null | sort -rn | head -1)
|
||||
else
|
||||
max_score=$(grep -oP 'spamd.*score=\K[\d.]+' "$SPAM_LOG" 2>/dev/null | sort -rn | head -1)
|
||||
fi
|
||||
echo "postfix_spamassassin_score_max{hostname=\"${HOSTNAME}\"} ${max_score:-0}"
|
||||
|
||||
# Messages scanned total
|
||||
echo "# HELP postfix_spamassassin_scanned_total Total messages scanned"
|
||||
echo "# TYPE postfix_spamassassin_scanned_total counter"
|
||||
scanned_total=$((spam_identified + ham_clean))
|
||||
echo "postfix_spamassassin_scanned_total{hostname=\"${HOSTNAME}\"} ${scanned_total}"
|
||||
|
||||
echo "# HELP postfix_spamassassin_scan_time_seconds SpamAssassin scan time stats"
|
||||
echo "# TYPE postfix_spamassassin_scan_time_seconds gauge"
|
||||
if [[ "$SPAM_DAEMON" == "spampd" ]]; then
|
||||
# spampd format: "in 2.15s"
|
||||
avg_time=$(grep -oP 'spampd.* in \K[\d.]+(?=s)' "$SPAM_LOG" 2>/dev/null | awk '{sum+=$1; count++} END {if(count>0) printf "%.2f", sum/count; else print 0}')
|
||||
max_time=$(grep -oP 'spampd.* in \K[\d.]+(?=s)' "$SPAM_LOG" 2>/dev/null | sort -rn | head -1)
|
||||
else
|
||||
avg_time=$(grep -oP "${SPAM_DAEMON}.* in \K[\d.]+(?= seconds)" "$SPAM_LOG" 2>/dev/null | awk '{sum+=$1; count++} END {if(count>0) printf "%.2f", sum/count; else print 0}')
|
||||
max_time=$(grep -oP "${SPAM_DAEMON}.* in \K[\d.]+(?= seconds)" "$SPAM_LOG" 2>/dev/null | sort -rn | head -1)
|
||||
fi
|
||||
echo "postfix_spamassassin_scan_time_seconds{stat=\"avg\",hostname=\"${HOSTNAME}\"} ${avg_time:-0}"
|
||||
echo "postfix_spamassassin_scan_time_seconds{stat=\"max\",hostname=\"${HOSTNAME}\"} ${max_time:-0}"
|
||||
|
||||
# spampd-specific: message size stats
|
||||
if [[ "$SPAM_DAEMON" == "spampd" ]]; then
|
||||
echo "# HELP postfix_spamassassin_message_size_bytes SpamAssassin processed message sizes"
|
||||
echo "# TYPE postfix_spamassassin_message_size_bytes gauge"
|
||||
avg_size=$(grep -oP 'spampd.*, \K\d+(?= bytes)' "$SPAM_LOG" 2>/dev/null | awk '{sum+=$1; count++} END {if(count>0) printf "%.0f", sum/count; else print 0}')
|
||||
max_size=$(grep -oP 'spampd.*, \K\d+(?= bytes)' "$SPAM_LOG" 2>/dev/null | sort -rn | head -1)
|
||||
echo "postfix_spamassassin_message_size_bytes{stat=\"avg\",hostname=\"${HOSTNAME}\"} ${avg_size:-0}"
|
||||
echo "postfix_spamassassin_message_size_bytes{stat=\"max\",hostname=\"${HOSTNAME}\"} ${max_size:-0}"
|
||||
|
||||
echo "# HELP postfix_spamassassin_threshold SpamAssassin spam threshold"
|
||||
echo "# TYPE postfix_spamassassin_threshold gauge"
|
||||
threshold=$(grep -oP 'spampd.*/-?\K[\d.]+(?=\))' "$SPAM_LOG" 2>/dev/null | head -1)
|
||||
echo "postfix_spamassassin_threshold{hostname=\"${HOSTNAME}\"} ${threshold:-5}"
|
||||
fi
|
||||
|
||||
# SpamAssassin rules (only available with spamd or if logging to separate file)
|
||||
# NOTE: spampd (used by Mail-in-a-Box) does NOT log individual rules to mail.log
|
||||
# Rules are only available if using standalone spamd with verbose logging or a separate log file
|
||||
SA_RULES_LOG=""
|
||||
for log in "/var/log/spamassassin.log" "/var/log/spamd.log" "$SPAM_LOG"; do
|
||||
if [[ -f "$log" ]] && grep -q 'tests=' "$log" 2>/dev/null; then
|
||||
SA_RULES_LOG="$log"
|
||||
break
|
||||
fi
|
||||
done
|
||||
if [[ -n "$SA_RULES_LOG" ]]; then
|
||||
echo "# HELP postfix_spamassassin_rules_total Top SpamAssassin rules triggered"
|
||||
echo "# TYPE postfix_spamassassin_rules_total counter"
|
||||
grep -oP 'tests=\K[^,\]\s]+' "$SA_RULES_LOG" 2>/dev/null | tr ',' '\n' | tr -d ' ' | sort | uniq -c | sort -rn | head -15 | while read -r count rule; do
|
||||
[[ -n "$rule" ]] && echo "postfix_spamassassin_rules_total{rule=\"${rule}\",hostname=\"${HOSTNAME}\"} ${count}"
|
||||
done
|
||||
fi
|
||||
|
||||
# Daemon status
|
||||
echo "# HELP postfix_spamassassin_up SpamAssassin daemon status"
|
||||
echo "# TYPE postfix_spamassassin_up gauge"
|
||||
if pgrep -f "${SPAM_DAEMON}" &>/dev/null; then
|
||||
echo "postfix_spamassassin_up{daemon=\"${SPAM_DAEMON}\",hostname=\"${HOSTNAME}\"} 1"
|
||||
else
|
||||
echo "postfix_spamassassin_up{daemon=\"${SPAM_DAEMON}\",hostname=\"${HOSTNAME}\"} 0"
|
||||
fi
|
||||
|
||||
echo "# HELP postfix_spamassassin_processes Number of spam daemon processes"
|
||||
echo "# TYPE postfix_spamassassin_processes gauge"
|
||||
spam_procs=$(pgrep -c -f "${SPAM_DAEMON}" 2>/dev/null) || spam_procs=0
|
||||
echo "postfix_spamassassin_processes{daemon=\"${SPAM_DAEMON}\",hostname=\"${HOSTNAME}\"} ${spam_procs}"
|
||||
fi
|
||||
|
||||
# Greylisting stats (postgrey)
|
||||
echo "# HELP postfix_greylist_total Greylisting events"
|
||||
echo "# TYPE postfix_greylist_total counter"
|
||||
greylist_defer=$(grep_count 'action=greylist' "$LOG_FILE")
|
||||
greylist_pass=$(grep_count 'action=pass.*reason=triplet' "$LOG_FILE")
|
||||
greylist_whitelist=$(grep_count 'action=pass.*reason=client whitelist\|action=pass, reason=client AWL' "$LOG_FILE")
|
||||
echo "postfix_greylist_total{action=\"defer\",hostname=\"${HOSTNAME}\"} ${greylist_defer}"
|
||||
echo "postfix_greylist_total{action=\"pass\",hostname=\"${HOSTNAME}\"} ${greylist_pass}"
|
||||
echo "postfix_greylist_total{action=\"whitelist\",hostname=\"${HOSTNAME}\"} ${greylist_whitelist}"
|
||||
|
||||
echo "# HELP postfix_greylist_reason_total Greylisting by reason"
|
||||
echo "# TYPE postfix_greylist_reason_total counter"
|
||||
grey_new=$(grep_count 'reason=new' "$LOG_FILE")
|
||||
grey_early=$(grep_count 'reason=early-retry' "$LOG_FILE")
|
||||
grey_triplet=$(grep_count 'reason=triplet found' "$LOG_FILE")
|
||||
echo "postfix_greylist_reason_total{reason=\"new\",hostname=\"${HOSTNAME}\"} ${grey_new}"
|
||||
echo "postfix_greylist_reason_total{reason=\"early_retry\",hostname=\"${HOSTNAME}\"} ${grey_early}"
|
||||
echo "postfix_greylist_reason_total{reason=\"triplet_found\",hostname=\"${HOSTNAME}\"} ${grey_triplet}"
|
||||
|
||||
echo "# HELP postfix_greylist_delay_seconds Greylist delay statistics"
|
||||
echo "# TYPE postfix_greylist_delay_seconds gauge"
|
||||
avg_delay=$(grep -oP 'delay=\K\d+' "$LOG_FILE" 2>/dev/null | grep -v '^0$' | awk '{sum+=$1; count++} END {if(count>0) printf "%.0f", sum/count; else print 0}')
|
||||
max_delay=$(grep -oP 'postgrey.*delay=\K\d+' "$LOG_FILE" 2>/dev/null | sort -rn | head -1)
|
||||
echo "postfix_greylist_delay_seconds{type=\"avg\",hostname=\"${HOSTNAME}\"} ${avg_delay:-0}"
|
||||
echo "postfix_greylist_delay_seconds{type=\"max\",hostname=\"${HOSTNAME}\"} ${max_delay:-0}"
|
||||
|
||||
echo "# HELP postfix_greylist_clients_total Unique greylisted client IPs"
|
||||
echo "# TYPE postfix_greylist_clients_total gauge"
|
||||
grey_clients=$(grep 'action=greylist' "$LOG_FILE" 2>/dev/null | grep -oP 'client_address=\K[^,]+' | sort -u | wc -l)
|
||||
echo "postfix_greylist_clients_total{hostname=\"${HOSTNAME}\"} ${grey_clients:-0}"
|
||||
|
||||
echo "# HELP postfix_greylist_top_senders Top greylisted sender domains"
|
||||
echo "# TYPE postfix_greylist_top_senders counter"
|
||||
grep 'action=greylist' "$LOG_FILE" 2>/dev/null | grep -oP 'sender=\K[^,]+' | sed 's/.*@//' | sort | uniq -c | sort -rn | head -10 | while read -r count domain; do
|
||||
[[ -n "$domain" ]] && echo "postfix_greylist_top_senders{domain=\"${domain}\",hostname=\"${HOSTNAME}\"} ${count}"
|
||||
done
|
||||
|
||||
# Cleanup daemon stats (total messages entering system)
|
||||
echo "# HELP postfix_cleanup_total Messages processed by cleanup daemon"
|
||||
echo "# TYPE postfix_cleanup_total counter"
|
||||
cleanup_count=$(grep_count 'message-id=' "$LOG_FILE")
|
||||
echo "postfix_cleanup_total{hostname=\"${HOSTNAME}\"} ${cleanup_count}"
|
||||
|
||||
# Virtual mailbox errors
|
||||
echo "# HELP postfix_virtual_errors_total Virtual mailbox lookup errors"
|
||||
echo "# TYPE postfix_virtual_errors_total counter"
|
||||
virtual_not_found=$(grep_count 'mailbox not found\|User unknown in virtual' "$LOG_FILE")
|
||||
echo "postfix_virtual_errors_total{hostname=\"${HOSTNAME}\"} ${virtual_not_found}"
|
||||
|
||||
# Address verification failures
|
||||
echo "# HELP postfix_address_verify_total Address verification events"
|
||||
echo "# TYPE postfix_address_verify_total counter"
|
||||
verify_fail=$(grep_count 'address verification failed' "$LOG_FILE")
|
||||
verify_success=$(grep_count 'address verification succeeded\|cache hit' "$LOG_FILE")
|
||||
echo "postfix_address_verify_total{result=\"failed\",hostname=\"${HOSTNAME}\"} ${verify_fail}"
|
||||
echo "postfix_address_verify_total{result=\"success\",hostname=\"${HOSTNAME}\"} ${verify_success}"
|
||||
|
||||
# Postfix master process uptime (based on pid file age)
|
||||
echo "# HELP postfix_master_uptime_seconds Postfix master process uptime"
|
||||
echo "# TYPE postfix_master_uptime_seconds gauge"
|
||||
MASTER_PID_FILE="/var/spool/postfix/pid/master.pid"
|
||||
if [[ -f "$MASTER_PID_FILE" ]]; then
|
||||
master_start=$(stat -c %Y "$MASTER_PID_FILE" 2>/dev/null) || master_start=0
|
||||
if [[ $master_start -gt 0 ]]; then
|
||||
uptime_seconds=$(($(date +%s) - master_start))
|
||||
else
|
||||
uptime_seconds=0
|
||||
fi
|
||||
else
|
||||
uptime_seconds=0
|
||||
fi
|
||||
echo "postfix_master_uptime_seconds{hostname=\"${HOSTNAME}\"} ${uptime_seconds}"
|
||||
|
||||
# DNS lookup failures
|
||||
echo "# HELP postfix_dns_errors_total DNS lookup errors"
|
||||
echo "# TYPE postfix_dns_errors_total counter"
|
||||
dns_not_found=$(grep_count 'Host not found\|Name service error\|Host or domain name not found' "$LOG_FILE")
|
||||
dns_timeout=$(grep_count 'DNS lookup.*timeout\|name server.*timeout' "$LOG_FILE")
|
||||
dns_servfail=$(grep_count 'SERVFAIL\|server failure' "$LOG_FILE")
|
||||
echo "postfix_dns_errors_total{type=\"not_found\",hostname=\"${HOSTNAME}\"} ${dns_not_found}"
|
||||
echo "postfix_dns_errors_total{type=\"timeout\",hostname=\"${HOSTNAME}\"} ${dns_timeout}"
|
||||
echo "postfix_dns_errors_total{type=\"servfail\",hostname=\"${HOSTNAME}\"} ${dns_servfail}"
|
||||
|
||||
# STARTTLS usage - count TLS connections vs total SMTP connections
|
||||
# "used" = successful TLS connections (inbound + outbound)
|
||||
# "total" = total SMTP connections for ratio calculation
|
||||
echo "# HELP postfix_starttls_total STARTTLS connection counts"
|
||||
echo "# TYPE postfix_starttls_total counter"
|
||||
starttls_inbound=$(grep_count 'TLS connection established from' "$LOG_FILE")
|
||||
starttls_outbound=$(grep_count 'TLS connection established to' "$LOG_FILE")
|
||||
echo "postfix_starttls_total{type=\"inbound\",hostname=\"${HOSTNAME}\"} ${starttls_inbound}"
|
||||
echo "postfix_starttls_total{type=\"outbound\",hostname=\"${HOSTNAME}\"} ${starttls_outbound}"
|
||||
|
||||
# Sender/recipient access rejections
|
||||
echo "# HELP postfix_access_reject_total Sender/recipient access rejections"
|
||||
echo "# TYPE postfix_access_reject_total counter"
|
||||
sender_reject=$(grep_count 'Sender address rejected' "$LOG_FILE")
|
||||
recipient_reject=$(grep_count 'Recipient address rejected' "$LOG_FILE")
|
||||
client_reject=$(grep_count 'Client host rejected' "$LOG_FILE")
|
||||
echo "postfix_access_reject_total{type=\"sender\",hostname=\"${HOSTNAME}\"} ${sender_reject}"
|
||||
echo "postfix_access_reject_total{type=\"recipient\",hostname=\"${HOSTNAME}\"} ${recipient_reject}"
|
||||
echo "postfix_access_reject_total{type=\"client\",hostname=\"${HOSTNAME}\"} ${client_reject}"
|
||||
|
||||
# Queue filesystem usage
|
||||
echo "# HELP postfix_queue_filesystem_usage_percent Queue filesystem usage percentage"
|
||||
echo "# TYPE postfix_queue_filesystem_usage_percent gauge"
|
||||
queue_usage=$(df "${QUEUE_DIR}" 2>/dev/null | awk 'NR==2 {gsub(/%/,""); print $5}') || queue_usage=0
|
||||
echo "postfix_queue_filesystem_usage_percent{hostname=\"${HOSTNAME}\"} ${queue_usage:-0}"
|
||||
|
||||
# Postfix file descriptor count (for master process)
|
||||
echo "# HELP postfix_file_descriptors Open file descriptors by postfix"
|
||||
echo "# TYPE postfix_file_descriptors gauge"
|
||||
if [[ -f "$MASTER_PID_FILE" ]]; then
|
||||
master_pid=$(tr -d '[:space:]' < "$MASTER_PID_FILE" 2>/dev/null)
|
||||
if [[ -n "$master_pid" ]] && [[ -d "/proc/${master_pid}/fd" ]]; then
|
||||
fd_count=$(find "/proc/${master_pid}/fd" -maxdepth 1 2>/dev/null | wc -l)
|
||||
else
|
||||
fd_count=0
|
||||
fi
|
||||
else
|
||||
fd_count=0
|
||||
fi
|
||||
echo "postfix_file_descriptors{hostname=\"${HOSTNAME}\"} ${fd_count}"
|
||||
|
||||
# Script execution time
|
||||
# Dovecot IMAP/POP3 login metrics
|
||||
echo "# HELP dovecot_logins_total Successful logins by protocol"
|
||||
echo "# TYPE dovecot_logins_total counter"
|
||||
imap_logins=$(grep_count 'imap-login: Info: Login:' "$LOG_FILE")
|
||||
pop3_logins=$(grep_count 'pop3-login: Info: Login:' "$LOG_FILE")
|
||||
echo "dovecot_logins_total{protocol=\"imap\",hostname=\"${HOSTNAME}\"} ${imap_logins}"
|
||||
echo "dovecot_logins_total{protocol=\"pop3\",hostname=\"${HOSTNAME}\"} ${pop3_logins}"
|
||||
|
||||
echo "# HELP dovecot_login_auth_method_total Logins by authentication method"
|
||||
echo "# TYPE dovecot_login_auth_method_total counter"
|
||||
for method in PLAIN LOGIN CRAM-MD5 DIGEST-MD5; do
|
||||
count=$(grep_count "Login:.*method=${method}" "$LOG_FILE")
|
||||
echo "dovecot_login_auth_method_total{method=\"${method}\",hostname=\"${HOSTNAME}\"} ${count}"
|
||||
done
|
||||
|
||||
echo "# HELP dovecot_login_tls_total Logins with/without TLS"
|
||||
echo "# TYPE dovecot_login_tls_total counter"
|
||||
tls_logins=$(grep -c 'Login:.*TLS' "$LOG_FILE" 2>/dev/null) || tls_logins=0
|
||||
notls_logins=$(grep 'Login:' "$LOG_FILE" 2>/dev/null | grep -cv 'TLS') || notls_logins=0
|
||||
echo "dovecot_login_tls_total{tls=\"yes\",hostname=\"${HOSTNAME}\"} ${tls_logins}"
|
||||
echo "dovecot_login_tls_total{tls=\"no\",hostname=\"${HOSTNAME}\"} ${notls_logins}"
|
||||
|
||||
echo "# HELP dovecot_login_failed_total Failed login attempts"
|
||||
echo "# TYPE dovecot_login_failed_total counter"
|
||||
imap_failed=$(grep_count 'imap-login: Info: Aborted login\|imap-login:.*auth failed' "$LOG_FILE")
|
||||
pop3_failed=$(grep_count 'pop3-login: Info: Aborted login\|pop3-login:.*auth failed' "$LOG_FILE")
|
||||
echo "dovecot_login_failed_total{protocol=\"imap\",hostname=\"${HOSTNAME}\"} ${imap_failed}"
|
||||
echo "dovecot_login_failed_total{protocol=\"pop3\",hostname=\"${HOSTNAME}\"} ${pop3_failed}"
|
||||
|
||||
echo "# HELP dovecot_login_user_total Logins per user (top 20)"
|
||||
echo "# TYPE dovecot_login_user_total counter"
|
||||
grep -oP 'Login: user=<\K[^>]+' "$LOG_FILE" 2>/dev/null | sort | uniq -c | sort -rn | head -20 | while read -r count user; do
|
||||
echo "dovecot_login_user_total{user=\"${user}\",hostname=\"${HOSTNAME}\"} ${count}"
|
||||
done
|
||||
|
||||
echo "# HELP dovecot_login_client_ip_total Logins per client IP (top 20)"
|
||||
echo "# TYPE dovecot_login_client_ip_total counter"
|
||||
grep -oP 'Login:.*rip=\K[^,]+' "$LOG_FILE" 2>/dev/null | sort | uniq -c | sort -rn | head -20 | while read -r count ip; do
|
||||
echo "dovecot_login_client_ip_total{client_ip=\"${ip}\",hostname=\"${HOSTNAME}\"} ${count}"
|
||||
done
|
||||
|
||||
local END_TIME
|
||||
END_TIME=$(date +%s.%N)
|
||||
local DURATION
|
||||
DURATION=$(echo "$END_TIME - $START_TIME" | bc)
|
||||
echo "# HELP postfix_collector_duration_seconds Time taken to collect metrics"
|
||||
echo "# TYPE postfix_collector_duration_seconds gauge"
|
||||
echo "postfix_collector_duration_seconds{hostname=\"${HOSTNAME}\"} ${DURATION}"
|
||||
|
||||
echo "# HELP postfix_collector_last_run_timestamp Unix timestamp of last collection"
|
||||
echo "# TYPE postfix_collector_last_run_timestamp gauge"
|
||||
echo "postfix_collector_last_run_timestamp{hostname=\"${HOSTNAME}\"} $(date +%s)"
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# HTTP SERVER MODE
|
||||
# ============================================================================
|
||||
|
||||
run_http_server() {
|
||||
echo "Starting Postfix metrics exporter on port $HTTP_PORT..." >&2
|
||||
|
||||
if ! command -v nc >/dev/null 2>&1; then
|
||||
echo "ERROR: netcat (nc) required for HTTP mode" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
while true; do
|
||||
{
|
||||
read -r request
|
||||
if [[ "$request" =~ ^GET\ /metrics ]]; then
|
||||
echo -e "HTTP/1.1 200 OK\r\nContent-Type: text/plain; version=0.0.4\r\n\r"
|
||||
generate_metrics
|
||||
else
|
||||
echo -e "HTTP/1.1 200 OK\r\nContent-Type: text/html\r\n\r"
|
||||
cat <<EOF
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head><title>Postfix Metrics Exporter</title></head>
|
||||
<body>
|
||||
<h1>Postfix Prometheus Exporter</h1>
|
||||
<p><a href="/metrics">Metrics</a></p>
|
||||
<h2>Available Metrics</h2>
|
||||
<ul>
|
||||
<li>Queue sizes and ages</li>
|
||||
<li>Message counts by status</li>
|
||||
<li>TLS connection stats</li>
|
||||
<li>SASL authentication</li>
|
||||
<li>Bounce reasons</li>
|
||||
<li>SpamAssassin scores</li>
|
||||
<li>Dovecot delivery stats</li>
|
||||
</ul>
|
||||
</body>
|
||||
</html>
|
||||
EOF
|
||||
fi
|
||||
} | nc -l -p "$HTTP_PORT" -q 1 2>/dev/null
|
||||
done
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# MAIN EXECUTION
|
||||
# ============================================================================
|
||||
|
||||
main() {
|
||||
parse_args "$@"
|
||||
|
||||
if [ "$HTTP_MODE" = true ]; then
|
||||
run_http_server
|
||||
elif [ -n "$OUTPUT_FILE" ]; then
|
||||
# Textfile collector mode: write atomically using temp file
|
||||
local output_dir
|
||||
output_dir="$(dirname "$OUTPUT_FILE")"
|
||||
mkdir -p "$output_dir"
|
||||
|
||||
# Create temp file in SAME directory for atomic rename (same filesystem)
|
||||
local temp_file
|
||||
temp_file=$(mktemp "${output_dir}/.postfix_metrics.XXXXXX")
|
||||
|
||||
# Generate metrics to temp file
|
||||
if ! generate_metrics > "$temp_file" 2>/dev/null; then
|
||||
rm -f "$temp_file"
|
||||
echo "ERROR: Failed to generate metrics" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Validate: file must exist and have content
|
||||
local file_lines
|
||||
file_lines=$(wc -l < "$temp_file" 2>/dev/null || echo 0)
|
||||
|
||||
if [ "$file_lines" -lt 10 ]; then
|
||||
rm -f "$temp_file"
|
||||
echo "ERROR: Metrics file too small ($file_lines lines), keeping previous" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Set permissions before move
|
||||
chmod 644 "$temp_file"
|
||||
|
||||
# Atomic rename - no gap where file is missing
|
||||
mv -f "$temp_file" "$OUTPUT_FILE"
|
||||
|
||||
echo "Metrics written to $OUTPUT_FILE ($file_lines lines)" >&2
|
||||
else
|
||||
# Default: output to stdout
|
||||
generate_metrics
|
||||
fi
|
||||
}
|
||||
|
||||
# Execute main function with all script arguments
|
||||
main "$@"
|
||||
@@ -0,0 +1,535 @@
|
||||
#!/bin/bash
|
||||
|
||||
################################################
|
||||
#### Salt Key Manager ####
|
||||
#### Automate salt-key operations ####
|
||||
#### ####
|
||||
#### Author: Phil Connor ####
|
||||
#### Contact: contact@mylinux.work ####
|
||||
#### Version: 1.00-030526 ####
|
||||
################################################
|
||||
|
||||
set -o pipefail
|
||||
|
||||
SCRIPT_NAME=$(basename "$0")
|
||||
readonly SCRIPT_NAME
|
||||
|
||||
# Default configuration
|
||||
readonly DEFAULT_STALE_DAYS=30
|
||||
readonly DEFAULT_CACHE_DIR="/var/cache/salt/master/minions"
|
||||
|
||||
# Configuration variables
|
||||
DEBUG=${DEBUG:-}
|
||||
|
||||
# Runtime flags
|
||||
ACTION=""
|
||||
TARGET_MINION=""
|
||||
STALE_DAYS=$DEFAULT_STALE_DAYS
|
||||
EXPORT_PATH=""
|
||||
BULK_FILE=""
|
||||
AUTO_YES=false
|
||||
USE_COLOR=true
|
||||
|
||||
# Colors
|
||||
C_GREEN=""
|
||||
C_YELLOW=""
|
||||
C_RED=""
|
||||
C_CYAN=""
|
||||
C_RESET=""
|
||||
|
||||
handle_error() {
|
||||
local exit_code=$1
|
||||
local line_number=$2
|
||||
echo "Error: $SCRIPT_NAME failed at line $line_number with exit code $exit_code" >&2
|
||||
exit "$exit_code"
|
||||
}
|
||||
|
||||
trap 'handle_error $? $LINENO' ERR
|
||||
|
||||
debug_echo() {
|
||||
if [[ -n "$DEBUG" ]]; then
|
||||
echo "[DEBUG] $*" >&2
|
||||
fi
|
||||
}
|
||||
|
||||
log_info() {
|
||||
echo "[INFO] $*"
|
||||
}
|
||||
|
||||
log_warn() {
|
||||
echo "[WARN] $*" >&2
|
||||
}
|
||||
|
||||
log_error() {
|
||||
echo "[ERROR] $*" >&2
|
||||
}
|
||||
|
||||
setup_colors() {
|
||||
if [[ "$USE_COLOR" == true ]] && [[ -t 1 ]]; then
|
||||
C_GREEN='\033[0;32m'
|
||||
C_YELLOW='\033[0;33m'
|
||||
C_RED='\033[0;31m'
|
||||
C_CYAN='\033[0;36m'
|
||||
C_RESET='\033[0m'
|
||||
fi
|
||||
}
|
||||
|
||||
show_help() {
|
||||
cat << EOF
|
||||
Usage: $SCRIPT_NAME [ACTION] [OPTIONS]
|
||||
|
||||
Manage Salt minion keys — accept, reject, delete, verify, rotate, and
|
||||
clean up stale keys.
|
||||
|
||||
ACTIONS:
|
||||
--list List all keys by status with counts
|
||||
--verify Show pending keys with fingerprints for verification
|
||||
--accept-all Accept all pending keys
|
||||
--accept MINION Accept a specific minion key
|
||||
--reject MINION Reject a specific minion key
|
||||
--delete MINION Delete a specific minion key
|
||||
--rotate MINION Rotate a minion key (delete, re-accept on reconnect)
|
||||
--cleanup-stale [DAYS] Delete keys for minions not seen in DAYS days (default: $DEFAULT_STALE_DAYS)
|
||||
--export PATH Export all accepted key fingerprints to a file
|
||||
--bulk-accept FILE Accept minions listed in a file (one per line)
|
||||
|
||||
OPTIONS:
|
||||
--yes Skip confirmation prompts
|
||||
--no-color Disable colored output
|
||||
--help, -h Show this help message
|
||||
|
||||
ENVIRONMENT VARIABLES:
|
||||
DEBUG Enable debug output
|
||||
|
||||
EXAMPLES:
|
||||
# List all keys with status
|
||||
sudo $SCRIPT_NAME --list
|
||||
|
||||
# Show pending keys for verification
|
||||
sudo $SCRIPT_NAME --verify
|
||||
|
||||
# Accept all pending keys
|
||||
sudo $SCRIPT_NAME --accept-all --yes
|
||||
|
||||
# Accept a specific minion
|
||||
sudo $SCRIPT_NAME --accept web01
|
||||
|
||||
# Clean up minions not seen in 60 days
|
||||
sudo $SCRIPT_NAME --cleanup-stale 60
|
||||
|
||||
# Export fingerprints for auditing
|
||||
sudo $SCRIPT_NAME --export /tmp/salt-keys.txt
|
||||
|
||||
# Bulk accept from a file
|
||||
sudo $SCRIPT_NAME --bulk-accept /tmp/new-minions.txt --yes
|
||||
EOF
|
||||
}
|
||||
|
||||
count_keys() {
|
||||
local status="$1"
|
||||
salt-key --list "$status" 2>/dev/null | grep -cv "^$status\|^$" || echo 0
|
||||
}
|
||||
|
||||
do_list() {
|
||||
echo "Salt Key Status"
|
||||
echo "==============="
|
||||
echo ""
|
||||
|
||||
local accepted unaccepted denied rejected
|
||||
accepted=$(count_keys "accepted")
|
||||
unaccepted=$(count_keys "unaccepted")
|
||||
denied=$(count_keys "denied")
|
||||
rejected=$(count_keys "rejected")
|
||||
|
||||
printf ' %bAccepted:%b %d\n' "$C_GREEN" "$C_RESET" "$accepted"
|
||||
printf ' %bPending:%b %d\n' "$C_YELLOW" "$C_RESET" "$unaccepted"
|
||||
printf ' %bDenied:%b %d\n' "$C_RED" "$C_RESET" "$denied"
|
||||
printf ' %bRejected:%b %d\n' "$C_RED" "$C_RESET" "$rejected"
|
||||
echo ""
|
||||
|
||||
if ((accepted > 0)); then
|
||||
printf '%bAccepted Keys:%b\n' "$C_GREEN" "$C_RESET"
|
||||
salt-key --list accepted 2>/dev/null | grep -v "^Accepted Keys:" | sed 's/^/ /'
|
||||
echo ""
|
||||
fi
|
||||
|
||||
if ((unaccepted > 0)); then
|
||||
printf '%bPending Keys:%b\n' "$C_YELLOW" "$C_RESET"
|
||||
salt-key --list unaccepted 2>/dev/null | grep -v "^Unaccepted Keys:" | sed 's/^/ /'
|
||||
echo ""
|
||||
fi
|
||||
|
||||
if ((denied > 0)); then
|
||||
printf '%bDenied Keys:%b\n' "$C_RED" "$C_RESET"
|
||||
salt-key --list denied 2>/dev/null | grep -v "^Denied Keys:" | sed 's/^/ /'
|
||||
echo ""
|
||||
fi
|
||||
|
||||
if ((rejected > 0)); then
|
||||
printf '%bRejected Keys:%b\n' "$C_RED" "$C_RESET"
|
||||
salt-key --list rejected 2>/dev/null | grep -v "^Rejected Keys:" | sed 's/^/ /'
|
||||
echo ""
|
||||
fi
|
||||
}
|
||||
|
||||
do_verify() {
|
||||
local pending
|
||||
pending=$(salt-key --list unaccepted 2>/dev/null | grep -v "^Unaccepted Keys:$" | grep -v "^$")
|
||||
|
||||
if [[ -z "$pending" ]]; then
|
||||
log_info "No pending keys to verify"
|
||||
return 0
|
||||
fi
|
||||
|
||||
echo "Master Fingerprint:"
|
||||
printf ' %b' "$C_CYAN"
|
||||
salt-key -F master 2>/dev/null | grep -A1 "master.pub" | tail -1 | tr -d ' '
|
||||
printf '%b\n\n' "$C_RESET"
|
||||
|
||||
echo "Pending Keys with Fingerprints:"
|
||||
echo ""
|
||||
|
||||
while IFS= read -r minion; do
|
||||
[[ -z "$minion" ]] && continue
|
||||
minion=$(echo "$minion" | tr -d '[:space:]')
|
||||
local fingerprint
|
||||
fingerprint=$(salt-key -f "$minion" 2>/dev/null | grep -v "^Unaccepted Keys:" | awk '{print $2}' | head -1)
|
||||
printf ' %b%-30s%b %s\n' "$C_YELLOW" "$minion" "$C_RESET" "${fingerprint:-unknown}"
|
||||
done <<< "$pending"
|
||||
|
||||
echo ""
|
||||
log_info "Verify each fingerprint matches the minion's local fingerprint:"
|
||||
log_info " (on minion) salt-call --local key.finger"
|
||||
}
|
||||
|
||||
do_accept_all() {
|
||||
local pending
|
||||
pending=$(count_keys "unaccepted")
|
||||
|
||||
if ((pending == 0)); then
|
||||
log_info "No pending keys to accept"
|
||||
return 0
|
||||
fi
|
||||
|
||||
log_info "Accepting $pending pending key(s)..."
|
||||
|
||||
if [[ "$AUTO_YES" != true ]]; then
|
||||
echo "Accept all $pending pending keys? [y/N] "
|
||||
read -r confirm
|
||||
if [[ "$confirm" != "y" && "$confirm" != "Y" ]]; then
|
||||
log_info "Aborted"
|
||||
return 0
|
||||
fi
|
||||
fi
|
||||
|
||||
salt-key -A -y 2>/dev/null
|
||||
log_info "All pending keys accepted"
|
||||
}
|
||||
|
||||
do_accept() {
|
||||
local minion="$1"
|
||||
log_info "Accepting key for: $minion"
|
||||
|
||||
if [[ "$AUTO_YES" != true ]]; then
|
||||
local fingerprint
|
||||
fingerprint=$(salt-key -f "$minion" 2>/dev/null | grep -v "^Unaccepted Keys:" | awk '{print $2}' | head -1)
|
||||
echo "Fingerprint: ${fingerprint:-unknown}"
|
||||
echo "Accept key for $minion? [y/N] "
|
||||
read -r confirm
|
||||
if [[ "$confirm" != "y" && "$confirm" != "Y" ]]; then
|
||||
log_info "Aborted"
|
||||
return 0
|
||||
fi
|
||||
fi
|
||||
|
||||
salt-key -a "$minion" -y 2>/dev/null
|
||||
log_info "Key accepted for $minion"
|
||||
}
|
||||
|
||||
do_reject() {
|
||||
local minion="$1"
|
||||
log_info "Rejecting key for: $minion"
|
||||
|
||||
if [[ "$AUTO_YES" != true ]]; then
|
||||
echo "Reject key for $minion? [y/N] "
|
||||
read -r confirm
|
||||
if [[ "$confirm" != "y" && "$confirm" != "Y" ]]; then
|
||||
log_info "Aborted"
|
||||
return 0
|
||||
fi
|
||||
fi
|
||||
|
||||
salt-key -r "$minion" -y 2>/dev/null
|
||||
log_info "Key rejected for $minion"
|
||||
}
|
||||
|
||||
do_delete() {
|
||||
local minion="$1"
|
||||
log_info "Deleting key for: $minion"
|
||||
|
||||
if [[ "$AUTO_YES" != true ]]; then
|
||||
echo "Delete key for $minion? This cannot be undone. [y/N] "
|
||||
read -r confirm
|
||||
if [[ "$confirm" != "y" && "$confirm" != "Y" ]]; then
|
||||
log_info "Aborted"
|
||||
return 0
|
||||
fi
|
||||
fi
|
||||
|
||||
salt-key -d "$minion" -y 2>/dev/null
|
||||
log_info "Key deleted for $minion"
|
||||
}
|
||||
|
||||
do_rotate() {
|
||||
local minion="$1"
|
||||
log_info "Rotating key for: $minion"
|
||||
log_info "This will delete the current key — the minion must reconnect to get a new key accepted"
|
||||
|
||||
if [[ "$AUTO_YES" != true ]]; then
|
||||
echo "Rotate key for $minion? [y/N] "
|
||||
read -r confirm
|
||||
if [[ "$confirm" != "y" && "$confirm" != "Y" ]]; then
|
||||
log_info "Aborted"
|
||||
return 0
|
||||
fi
|
||||
fi
|
||||
|
||||
salt-key -d "$minion" -y 2>/dev/null
|
||||
log_info "Key deleted for $minion — accept the new key when the minion reconnects"
|
||||
log_info "On the minion, restart salt-minion: systemctl restart salt-minion"
|
||||
}
|
||||
|
||||
do_cleanup_stale() {
|
||||
local days="$1"
|
||||
log_info "Finding minions not seen in $days days..."
|
||||
|
||||
if [[ ! -d "$DEFAULT_CACHE_DIR" ]]; then
|
||||
log_error "Minion cache directory not found: $DEFAULT_CACHE_DIR"
|
||||
return 1
|
||||
fi
|
||||
|
||||
local stale_minions=()
|
||||
local cutoff
|
||||
cutoff=$(date -d "-${days} days" +%s 2>/dev/null) || cutoff=$(date -v-"${days}"d +%s 2>/dev/null)
|
||||
|
||||
while IFS= read -r minion_dir; do
|
||||
local minion_name
|
||||
minion_name=$(basename "$minion_dir")
|
||||
local last_modified
|
||||
last_modified=$(stat -c %Y "$minion_dir" 2>/dev/null) || last_modified=$(stat -f %m "$minion_dir" 2>/dev/null) || continue
|
||||
|
||||
if ((last_modified < cutoff)); then
|
||||
local days_ago=$(( ($(date +%s) - last_modified) / 86400 ))
|
||||
stale_minions+=("$minion_name")
|
||||
printf ' %b%-30s%b (last seen %d days ago)\n' "$C_RED" "$minion_name" "$C_RESET" "$days_ago"
|
||||
fi
|
||||
done < <(find "$DEFAULT_CACHE_DIR" -maxdepth 1 -mindepth 1 -type d 2>/dev/null)
|
||||
|
||||
if [[ ${#stale_minions[@]} -eq 0 ]]; then
|
||||
log_info "No stale minions found"
|
||||
return 0
|
||||
fi
|
||||
|
||||
echo ""
|
||||
log_info "Found ${#stale_minions[@]} stale minion(s)"
|
||||
|
||||
if [[ "$AUTO_YES" != true ]]; then
|
||||
echo "Delete keys for all ${#stale_minions[@]} stale minions? [y/N] "
|
||||
read -r confirm
|
||||
if [[ "$confirm" != "y" && "$confirm" != "Y" ]]; then
|
||||
log_info "Aborted"
|
||||
return 0
|
||||
fi
|
||||
fi
|
||||
|
||||
for minion in "${stale_minions[@]}"; do
|
||||
salt-key -d "$minion" -y 2>/dev/null && log_info "Deleted key: $minion"
|
||||
done
|
||||
|
||||
log_info "Stale key cleanup complete"
|
||||
}
|
||||
|
||||
do_export() {
|
||||
local output_path="$1"
|
||||
log_info "Exporting accepted key fingerprints to $output_path..."
|
||||
|
||||
{
|
||||
echo "# Salt Key Fingerprint Export"
|
||||
echo "# Generated: $(date -u '+%Y-%m-%d %H:%M:%S UTC')"
|
||||
echo "# Master: $(hostname -f 2>/dev/null || hostname)"
|
||||
echo "#"
|
||||
echo "# Format: minion_id fingerprint"
|
||||
echo ""
|
||||
salt-key -F accepted 2>/dev/null | grep -v "^Accepted Keys:" | while IFS= read -r line; do
|
||||
[[ -z "$line" ]] && continue
|
||||
echo "$line"
|
||||
done
|
||||
} > "$output_path"
|
||||
|
||||
local count
|
||||
count=$(grep -cv "^#\|^$" "$output_path" 2>/dev/null) || count=0
|
||||
log_info "Exported $count key fingerprint(s) to $output_path"
|
||||
}
|
||||
|
||||
do_bulk_accept() {
|
||||
local input_file="$1"
|
||||
|
||||
if [[ ! -f "$input_file" ]]; then
|
||||
log_error "File not found: $input_file"
|
||||
return 1
|
||||
fi
|
||||
|
||||
local count=0
|
||||
local failed=0
|
||||
|
||||
while IFS= read -r line; do
|
||||
[[ -z "$line" || "$line" == \#* ]] && continue
|
||||
local minion_id="${line%%:*}"
|
||||
minion_id=$(echo "$minion_id" | tr -d '[:space:]')
|
||||
|
||||
if salt-key -a "$minion_id" -y 2>/dev/null; then
|
||||
log_info "Accepted: $minion_id"
|
||||
count=$((count + 1))
|
||||
else
|
||||
log_error "Failed to accept: $minion_id"
|
||||
failed=$((failed + 1))
|
||||
fi
|
||||
done < "$input_file"
|
||||
|
||||
log_info "Bulk accept complete: $count accepted, $failed failed"
|
||||
}
|
||||
|
||||
parse_arguments() {
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case $1 in
|
||||
--list)
|
||||
ACTION="list"
|
||||
shift
|
||||
;;
|
||||
--verify)
|
||||
ACTION="verify"
|
||||
shift
|
||||
;;
|
||||
--accept-all)
|
||||
ACTION="accept-all"
|
||||
shift
|
||||
;;
|
||||
--accept)
|
||||
ACTION="accept"
|
||||
TARGET_MINION="$2"
|
||||
shift 2
|
||||
;;
|
||||
--reject)
|
||||
ACTION="reject"
|
||||
TARGET_MINION="$2"
|
||||
shift 2
|
||||
;;
|
||||
--delete)
|
||||
ACTION="delete"
|
||||
TARGET_MINION="$2"
|
||||
shift 2
|
||||
;;
|
||||
--rotate)
|
||||
ACTION="rotate"
|
||||
TARGET_MINION="$2"
|
||||
shift 2
|
||||
;;
|
||||
--cleanup-stale)
|
||||
ACTION="cleanup-stale"
|
||||
if [[ -n "${2:-}" && "$2" =~ ^[0-9]+$ ]]; then
|
||||
STALE_DAYS="$2"
|
||||
shift 2
|
||||
else
|
||||
shift
|
||||
fi
|
||||
;;
|
||||
--export)
|
||||
ACTION="export"
|
||||
EXPORT_PATH="$2"
|
||||
shift 2
|
||||
;;
|
||||
--bulk-accept)
|
||||
ACTION="bulk-accept"
|
||||
BULK_FILE="$2"
|
||||
shift 2
|
||||
;;
|
||||
--yes)
|
||||
AUTO_YES=true
|
||||
shift
|
||||
;;
|
||||
--no-color)
|
||||
USE_COLOR=false
|
||||
shift
|
||||
;;
|
||||
--help|-h)
|
||||
show_help
|
||||
exit 0
|
||||
;;
|
||||
*)
|
||||
log_error "Unknown option: $1"
|
||||
show_help >&2
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
}
|
||||
|
||||
validate_requirements() {
|
||||
if [[ $EUID -ne 0 ]]; then
|
||||
log_error "This script must be run as root (use sudo)"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ -z "$ACTION" ]]; then
|
||||
log_error "An action is required"
|
||||
show_help >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if ! command -v salt-key >/dev/null 2>&1; then
|
||||
log_error "salt-key not found — is salt-master installed?"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ "$ACTION" == "accept" || "$ACTION" == "reject" || "$ACTION" == "delete" || "$ACTION" == "rotate" ]]; then
|
||||
if [[ -z "$TARGET_MINION" ]]; then
|
||||
log_error "Minion name is required for --$ACTION"
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
if [[ "$ACTION" == "export" && -z "$EXPORT_PATH" ]]; then
|
||||
log_error "Output path is required for --export"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ "$ACTION" == "bulk-accept" && -z "$BULK_FILE" ]]; then
|
||||
log_error "Input file is required for --bulk-accept"
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
main() {
|
||||
parse_arguments "$@"
|
||||
validate_requirements
|
||||
setup_colors
|
||||
|
||||
case "$ACTION" in
|
||||
list) do_list ;;
|
||||
verify) do_verify ;;
|
||||
accept-all) do_accept_all ;;
|
||||
accept) do_accept "$TARGET_MINION" ;;
|
||||
reject) do_reject "$TARGET_MINION" ;;
|
||||
delete) do_delete "$TARGET_MINION" ;;
|
||||
rotate) do_rotate "$TARGET_MINION" ;;
|
||||
cleanup-stale) do_cleanup_stale "$STALE_DAYS" ;;
|
||||
export) do_export "$EXPORT_PATH" ;;
|
||||
bulk-accept) do_bulk_accept "$BULK_FILE" ;;
|
||||
esac
|
||||
|
||||
debug_echo "Script completed successfully"
|
||||
}
|
||||
|
||||
if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
|
||||
main "$@"
|
||||
fi
|
||||
Executable
+1314
File diff suppressed because it is too large
Load Diff
+509
@@ -0,0 +1,509 @@
|
||||
#!/bin/bash
|
||||
|
||||
################################################
|
||||
#### Salt Master/Minion Setup Automation ####
|
||||
#### Install and configure SaltStack ####
|
||||
#### ####
|
||||
#### Author: Phil Connor ####
|
||||
#### Contact: contact@mylinux.work ####
|
||||
#### Version: 1.00-030526 ####
|
||||
################################################
|
||||
|
||||
set -o pipefail
|
||||
|
||||
SCRIPT_NAME=$(basename "$0")
|
||||
readonly SCRIPT_NAME
|
||||
|
||||
# Default configuration
|
||||
readonly DEFAULT_SALT_VERSION="latest"
|
||||
readonly DEFAULT_FILE_ROOTS="/srv/salt"
|
||||
readonly DEFAULT_PILLAR_ROOTS="/srv/pillar"
|
||||
readonly DEFAULT_MASTER_INTERFACE="0.0.0.0"
|
||||
readonly DEFAULT_MASTER_PORT_PUB=4505
|
||||
readonly DEFAULT_MASTER_PORT_RET=4506
|
||||
|
||||
# Configuration variables (can be overridden by environment)
|
||||
SALT_VERSION=${SALT_VERSION:-$DEFAULT_SALT_VERSION}
|
||||
FILE_ROOTS=${FILE_ROOTS:-$DEFAULT_FILE_ROOTS}
|
||||
PILLAR_ROOTS=${PILLAR_ROOTS:-$DEFAULT_PILLAR_ROOTS}
|
||||
DEBUG=${DEBUG:-}
|
||||
|
||||
# Runtime flags
|
||||
MODE=""
|
||||
MASTER_IP=""
|
||||
MINION_ID=""
|
||||
AUTO_ACCEPT=false
|
||||
AUTO_YES=false
|
||||
PKG_MANAGER=""
|
||||
OS_FAMILY=""
|
||||
OS_VERSION=""
|
||||
|
||||
handle_error() {
|
||||
local exit_code=$1
|
||||
local line_number=$2
|
||||
echo "Error: $SCRIPT_NAME failed at line $line_number with exit code $exit_code" >&2
|
||||
exit "$exit_code"
|
||||
}
|
||||
|
||||
trap 'handle_error $? $LINENO' ERR
|
||||
|
||||
debug_echo() {
|
||||
if [[ -n "$DEBUG" ]]; then
|
||||
echo "[DEBUG] $*" >&2
|
||||
fi
|
||||
}
|
||||
|
||||
log_info() {
|
||||
echo "[INFO] $*"
|
||||
}
|
||||
|
||||
log_warn() {
|
||||
echo "[WARN] $*" >&2
|
||||
}
|
||||
|
||||
log_error() {
|
||||
echo "[ERROR] $*" >&2
|
||||
}
|
||||
|
||||
show_help() {
|
||||
cat << EOF
|
||||
Usage: $SCRIPT_NAME [OPTIONS]
|
||||
|
||||
Automate Salt master and/or minion installation and configuration.
|
||||
|
||||
Supports Ubuntu/Debian and RHEL/AlmaLinux. Adds the Salt Project repository,
|
||||
installs packages, configures services, creates directory structure, and
|
||||
opens firewall ports.
|
||||
|
||||
OPTIONS:
|
||||
--mode master|minion|both What to install (required)
|
||||
--master-ip ADDRESS Salt master IP or hostname (required for minion/both)
|
||||
--minion-id NAME Custom minion ID (default: system hostname)
|
||||
--auto-accept Enable auto_accept on master (NOT for production)
|
||||
--salt-version VERSION Pin Salt version (default: latest)
|
||||
--yes Skip confirmation prompts
|
||||
--help, -h Show this help message
|
||||
|
||||
ENVIRONMENT VARIABLES:
|
||||
SALT_VERSION Salt version to install (default: $DEFAULT_SALT_VERSION)
|
||||
FILE_ROOTS Master file_roots path (default: $DEFAULT_FILE_ROOTS)
|
||||
PILLAR_ROOTS Master pillar_roots path (default: $DEFAULT_PILLAR_ROOTS)
|
||||
DEBUG Enable debug output
|
||||
|
||||
EXAMPLES:
|
||||
# Install salt-master
|
||||
sudo $SCRIPT_NAME --mode master --yes
|
||||
|
||||
# Install salt-minion pointing to master
|
||||
sudo $SCRIPT_NAME --mode minion --master-ip 10.0.0.1
|
||||
|
||||
# Install both on the same node
|
||||
sudo $SCRIPT_NAME --mode both --master-ip localhost --yes
|
||||
|
||||
# Install with custom minion ID
|
||||
sudo $SCRIPT_NAME --mode minion --master-ip salt.example.com --minion-id web01
|
||||
|
||||
# Install specific Salt version
|
||||
sudo $SCRIPT_NAME --mode master --salt-version 3006 --yes
|
||||
EOF
|
||||
}
|
||||
|
||||
detect_os() {
|
||||
if [[ -f /etc/os-release ]]; then
|
||||
# shellcheck disable=SC1091
|
||||
source /etc/os-release
|
||||
OS_VERSION="$VERSION_ID"
|
||||
case "$ID" in
|
||||
ubuntu|debian)
|
||||
OS_FAMILY="debian"
|
||||
PKG_MANAGER="apt"
|
||||
;;
|
||||
rhel|centos|rocky|almalinux|ol|fedora)
|
||||
OS_FAMILY="rhel"
|
||||
if command -v dnf >/dev/null 2>&1; then
|
||||
PKG_MANAGER="dnf"
|
||||
else
|
||||
PKG_MANAGER="yum"
|
||||
fi
|
||||
;;
|
||||
*)
|
||||
log_error "Unsupported OS: $ID"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
else
|
||||
log_error "Cannot detect OS — /etc/os-release not found"
|
||||
exit 1
|
||||
fi
|
||||
debug_echo "Detected OS: $OS_FAMILY ($PKG_MANAGER) version $OS_VERSION"
|
||||
}
|
||||
|
||||
get_cpu_count() {
|
||||
nproc 2>/dev/null || echo 2
|
||||
}
|
||||
|
||||
add_salt_repo_debian() {
|
||||
log_info "Adding Salt Project repository (Debian/Ubuntu)..."
|
||||
|
||||
apt-get update -qq
|
||||
apt-get install -y -qq curl gnupg2 >/dev/null
|
||||
|
||||
local keyring="/etc/apt/keyrings/salt-archive-keyring.gpg"
|
||||
mkdir -p /etc/apt/keyrings
|
||||
curl -fsSL "https://repo.saltproject.io/salt/py3/ubuntu/${OS_VERSION}/amd64/SALT-PROJECT-GPG-PUBKEY-2023.gpg" \
|
||||
-o "$keyring"
|
||||
|
||||
local repo_url="https://repo.saltproject.io/salt/py3/ubuntu/${OS_VERSION}/amd64"
|
||||
if [[ "$SALT_VERSION" != "latest" ]]; then
|
||||
repo_url="${repo_url}/${SALT_VERSION}"
|
||||
fi
|
||||
echo "deb [signed-by=${keyring}] ${repo_url} ${VERSION_CODENAME} main" \
|
||||
> /etc/apt/sources.list.d/salt.list
|
||||
|
||||
apt-get update -qq
|
||||
log_info "Salt repository added"
|
||||
}
|
||||
|
||||
add_salt_repo_rhel() {
|
||||
log_info "Adding Salt Project repository (RHEL)..."
|
||||
|
||||
local major_ver="${OS_VERSION%%.*}"
|
||||
local repo_url="https://repo.saltproject.io/salt/py3/redhat/${major_ver}/x86_64"
|
||||
if [[ "$SALT_VERSION" != "latest" ]]; then
|
||||
repo_url="${repo_url}/${SALT_VERSION}"
|
||||
fi
|
||||
|
||||
cat > /etc/yum.repos.d/salt.repo << REPOEOF
|
||||
[salt]
|
||||
name=Salt Project for RHEL ${major_ver}
|
||||
baseurl=${repo_url}
|
||||
enabled=1
|
||||
gpgcheck=1
|
||||
gpgkey=https://repo.saltproject.io/salt/py3/redhat/${major_ver}/x86_64/SALT-PROJECT-GPG-PUBKEY-2023.pub
|
||||
REPOEOF
|
||||
|
||||
"$PKG_MANAGER" clean expire-cache -q
|
||||
log_info "Salt repository added"
|
||||
}
|
||||
|
||||
install_master() {
|
||||
log_info "Installing salt-master..."
|
||||
case "$PKG_MANAGER" in
|
||||
apt)
|
||||
apt-get install -y -qq salt-master >/dev/null
|
||||
;;
|
||||
dnf|yum)
|
||||
"$PKG_MANAGER" install -y -q salt-master
|
||||
;;
|
||||
esac
|
||||
log_info "salt-master installed"
|
||||
}
|
||||
|
||||
install_minion() {
|
||||
log_info "Installing salt-minion..."
|
||||
case "$PKG_MANAGER" in
|
||||
apt)
|
||||
apt-get install -y -qq salt-minion >/dev/null
|
||||
;;
|
||||
dnf|yum)
|
||||
"$PKG_MANAGER" install -y -q salt-minion
|
||||
;;
|
||||
esac
|
||||
log_info "salt-minion installed"
|
||||
}
|
||||
|
||||
configure_master() {
|
||||
log_info "Configuring salt-master..."
|
||||
|
||||
local worker_threads
|
||||
worker_threads=$(get_cpu_count)
|
||||
|
||||
if [[ -f /etc/salt/master ]]; then
|
||||
cp /etc/salt/master /etc/salt/master.bak."$(date +%Y%m%d%H%M%S)"
|
||||
log_info "Backed up existing /etc/salt/master"
|
||||
fi
|
||||
|
||||
cat > /etc/salt/master << MASTEREOF
|
||||
##### Salt Master Configuration #####
|
||||
##### Managed by salt-setup.sh #####
|
||||
|
||||
interface: ${DEFAULT_MASTER_INTERFACE}
|
||||
|
||||
file_roots:
|
||||
base:
|
||||
- ${FILE_ROOTS}
|
||||
|
||||
pillar_roots:
|
||||
base:
|
||||
- ${PILLAR_ROOTS}
|
||||
|
||||
worker_threads: ${worker_threads}
|
||||
timeout: 30
|
||||
state_events: True
|
||||
presence_events: True
|
||||
MASTEREOF
|
||||
|
||||
if [[ "$AUTO_ACCEPT" == true ]]; then
|
||||
{
|
||||
echo ""
|
||||
echo "# WARNING: NOT recommended for production"
|
||||
echo "auto_accept: True"
|
||||
} >> /etc/salt/master
|
||||
log_warn "auto_accept enabled — NOT recommended for production"
|
||||
else
|
||||
{
|
||||
echo ""
|
||||
echo "auto_accept: False"
|
||||
} >> /etc/salt/master
|
||||
fi
|
||||
|
||||
log_info "Master configuration written to /etc/salt/master"
|
||||
}
|
||||
|
||||
configure_minion() {
|
||||
log_info "Configuring salt-minion..."
|
||||
|
||||
local minion_id
|
||||
minion_id="${MINION_ID:-$(hostname -f 2>/dev/null || hostname)}"
|
||||
|
||||
if [[ -f /etc/salt/minion ]]; then
|
||||
cp /etc/salt/minion /etc/salt/minion.bak."$(date +%Y%m%d%H%M%S)"
|
||||
log_info "Backed up existing /etc/salt/minion"
|
||||
fi
|
||||
|
||||
cat > /etc/salt/minion << MINIONEOF
|
||||
##### Salt Minion Configuration #####
|
||||
##### Managed by salt-setup.sh #####
|
||||
|
||||
master: ${MASTER_IP}
|
||||
id: ${minion_id}
|
||||
|
||||
# grains:
|
||||
# role: webserver
|
||||
# environment: production
|
||||
MINIONEOF
|
||||
|
||||
log_info "Minion configured (id: ${minion_id}, master: ${MASTER_IP})"
|
||||
}
|
||||
|
||||
create_directory_structure() {
|
||||
log_info "Creating Salt directory structure..."
|
||||
|
||||
mkdir -p "${FILE_ROOTS}" "${PILLAR_ROOTS}"
|
||||
|
||||
if [[ ! -f "${FILE_ROOTS}/top.sls" ]]; then
|
||||
cat > "${FILE_ROOTS}/top.sls" << 'TOPEOF'
|
||||
base:
|
||||
'*':
|
||||
[]
|
||||
# - common
|
||||
# - packages
|
||||
TOPEOF
|
||||
log_info "Created ${FILE_ROOTS}/top.sls"
|
||||
fi
|
||||
|
||||
if [[ ! -f "${PILLAR_ROOTS}/top.sls" ]]; then
|
||||
cat > "${PILLAR_ROOTS}/top.sls" << 'PTOPEOF'
|
||||
base:
|
||||
'*':
|
||||
[]
|
||||
# - common
|
||||
PTOPEOF
|
||||
log_info "Created ${PILLAR_ROOTS}/top.sls"
|
||||
fi
|
||||
}
|
||||
|
||||
open_firewall_ports() {
|
||||
log_info "Configuring firewall for Salt master ports..."
|
||||
|
||||
if command -v ufw >/dev/null 2>&1; then
|
||||
if ufw status | grep -q "Status: active"; then
|
||||
ufw allow ${DEFAULT_MASTER_PORT_PUB}/tcp >/dev/null
|
||||
ufw allow ${DEFAULT_MASTER_PORT_RET}/tcp >/dev/null
|
||||
log_info "Opened ports ${DEFAULT_MASTER_PORT_PUB}/${DEFAULT_MASTER_PORT_RET} in ufw"
|
||||
else
|
||||
debug_echo "ufw not active — skipping"
|
||||
fi
|
||||
elif command -v firewall-cmd >/dev/null 2>&1; then
|
||||
if firewall-cmd --state >/dev/null 2>&1; then
|
||||
firewall-cmd --permanent --add-port=${DEFAULT_MASTER_PORT_PUB}/tcp >/dev/null
|
||||
firewall-cmd --permanent --add-port=${DEFAULT_MASTER_PORT_RET}/tcp >/dev/null
|
||||
firewall-cmd --reload >/dev/null
|
||||
log_info "Opened ports ${DEFAULT_MASTER_PORT_PUB}/${DEFAULT_MASTER_PORT_RET} in firewalld"
|
||||
else
|
||||
debug_echo "firewalld not running — skipping"
|
||||
fi
|
||||
else
|
||||
log_warn "No supported firewall detected — manually open ports ${DEFAULT_MASTER_PORT_PUB} and ${DEFAULT_MASTER_PORT_RET}"
|
||||
fi
|
||||
}
|
||||
|
||||
start_service() {
|
||||
local service="$1"
|
||||
log_info "Enabling and starting ${service}..."
|
||||
systemctl enable "$service" >/dev/null 2>&1
|
||||
systemctl restart "$service"
|
||||
if systemctl is-active "$service" >/dev/null 2>&1; then
|
||||
log_info "${service} is running"
|
||||
else
|
||||
log_error "${service} failed to start"
|
||||
systemctl status "$service" --no-pager
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
show_summary() {
|
||||
echo ""
|
||||
echo "============================================"
|
||||
echo " Salt Setup Complete"
|
||||
echo "============================================"
|
||||
|
||||
if [[ "$MODE" == "master" || "$MODE" == "both" ]]; then
|
||||
echo ""
|
||||
echo " Master:"
|
||||
echo " Config: /etc/salt/master"
|
||||
echo " File roots: ${FILE_ROOTS}"
|
||||
echo " Pillar roots: ${PILLAR_ROOTS}"
|
||||
echo " Ports: ${DEFAULT_MASTER_PORT_PUB}, ${DEFAULT_MASTER_PORT_RET}"
|
||||
echo ""
|
||||
echo " Master fingerprint:"
|
||||
salt-key -F master 2>/dev/null | grep -A1 "master.pub" || echo " (not yet generated — restart may be needed)"
|
||||
echo ""
|
||||
echo " Next steps:"
|
||||
echo " salt-key -L # List pending keys"
|
||||
echo " salt-key -a <minion_id> # Accept a minion key"
|
||||
echo " salt '*' test.ping # Test connectivity"
|
||||
fi
|
||||
|
||||
if [[ "$MODE" == "minion" || "$MODE" == "both" ]]; then
|
||||
local minion_id
|
||||
minion_id="${MINION_ID:-$(hostname -f 2>/dev/null || hostname)}"
|
||||
echo ""
|
||||
echo " Minion:"
|
||||
echo " Config: /etc/salt/minion"
|
||||
echo " Master: ${MASTER_IP}"
|
||||
echo " Minion ID: ${minion_id}"
|
||||
echo ""
|
||||
echo " Next steps:"
|
||||
echo " salt-call test.ping # Test master connectivity"
|
||||
if [[ "$AUTO_ACCEPT" != true ]]; then
|
||||
echo " (on master) salt-key -a ${minion_id}"
|
||||
fi
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "============================================"
|
||||
}
|
||||
|
||||
parse_arguments() {
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case $1 in
|
||||
--mode)
|
||||
MODE="$2"
|
||||
if [[ "$MODE" != "master" && "$MODE" != "minion" && "$MODE" != "both" ]]; then
|
||||
log_error "Mode must be 'master', 'minion', or 'both'"
|
||||
exit 1
|
||||
fi
|
||||
shift 2
|
||||
;;
|
||||
--master-ip)
|
||||
MASTER_IP="$2"
|
||||
shift 2
|
||||
;;
|
||||
--minion-id)
|
||||
MINION_ID="$2"
|
||||
shift 2
|
||||
;;
|
||||
--auto-accept)
|
||||
AUTO_ACCEPT=true
|
||||
shift
|
||||
;;
|
||||
--salt-version)
|
||||
SALT_VERSION="$2"
|
||||
shift 2
|
||||
;;
|
||||
--yes)
|
||||
AUTO_YES=true
|
||||
shift
|
||||
;;
|
||||
--help|-h)
|
||||
show_help
|
||||
exit 0
|
||||
;;
|
||||
*)
|
||||
log_error "Unknown option: $1"
|
||||
show_help >&2
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
}
|
||||
|
||||
validate_requirements() {
|
||||
if [[ $EUID -ne 0 ]]; then
|
||||
log_error "This script must be run as root (use sudo)"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ -z "$MODE" ]]; then
|
||||
log_error "--mode is required (master, minion, or both)"
|
||||
show_help >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ "$MODE" == "minion" || "$MODE" == "both" ]]; then
|
||||
if [[ -z "$MASTER_IP" ]]; then
|
||||
log_error "--master-ip is required for minion/both modes"
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
detect_os
|
||||
}
|
||||
|
||||
main() {
|
||||
parse_arguments "$@"
|
||||
validate_requirements
|
||||
|
||||
echo "============================================"
|
||||
echo " Salt Setup"
|
||||
echo " Mode: $MODE"
|
||||
echo " OS: $OS_FAMILY ($PKG_MANAGER)"
|
||||
if [[ -n "$MASTER_IP" ]]; then
|
||||
echo " Master: $MASTER_IP"
|
||||
fi
|
||||
echo "============================================"
|
||||
echo ""
|
||||
|
||||
if [[ "$AUTO_YES" != true ]]; then
|
||||
echo "Press Enter to continue, or Ctrl+C to abort..."
|
||||
read -r
|
||||
fi
|
||||
|
||||
case "$OS_FAMILY" in
|
||||
debian) add_salt_repo_debian ;;
|
||||
rhel) add_salt_repo_rhel ;;
|
||||
esac
|
||||
|
||||
if [[ "$MODE" == "master" || "$MODE" == "both" ]]; then
|
||||
install_master
|
||||
configure_master
|
||||
create_directory_structure
|
||||
open_firewall_ports
|
||||
start_service salt-master
|
||||
fi
|
||||
|
||||
if [[ "$MODE" == "minion" || "$MODE" == "both" ]]; then
|
||||
install_minion
|
||||
configure_minion
|
||||
start_service salt-minion
|
||||
fi
|
||||
|
||||
show_summary
|
||||
|
||||
debug_echo "Script completed successfully"
|
||||
}
|
||||
|
||||
if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
|
||||
main "$@"
|
||||
fi
|
||||
+513
@@ -0,0 +1,513 @@
|
||||
<#
|
||||
.SYNOPSIS
|
||||
Monitors Salt Minion service status and exports metrics for Prometheus windows_exporter.
|
||||
|
||||
.DESCRIPTION
|
||||
This script checks the status of the Salt Minion service and creates Prometheus-formatted metrics.
|
||||
The metrics are written to a text file that can be consumed by the windows_exporter.
|
||||
It can also create a scheduled task to run periodically.
|
||||
|
||||
.PARAMETER ValidateNotNullOrEmpty
|
||||
Switch to validate that the MetricsFilePath parameter is not null or empty.
|
||||
|
||||
.PARAMETER ValidateScript
|
||||
Validate that the MetricsFilePath parameter is a valid Windows path.
|
||||
|
||||
.PARAMETER MetricsFilePath
|
||||
The path where the Prometheus metrics file will be written.
|
||||
|
||||
.PARAMETER InstallScheduledTask
|
||||
Switch to create a scheduled task for periodic monitoring.
|
||||
|
||||
.PARAMETER TaskIntervalMinutes
|
||||
The interval in minutes for the scheduled task. Default is 15 minutes.
|
||||
|
||||
.PARAMETER TimeoutSeconds
|
||||
Timeout in seconds for service status checks. Default is 30 seconds.
|
||||
|
||||
.PARAMETER TimeoutSeconds
|
||||
Timeout in seconds for service status checks. Default is 30 seconds.
|
||||
|
||||
.PARAMETER SaltMasterPort
|
||||
The port number for the Salt Master. Default is 4505.
|
||||
|
||||
.PARAMETER DryRun
|
||||
Switch to output metrics to console instead of writing to file.
|
||||
|
||||
.PARAMETER Verbose
|
||||
Switch to enable verbose debug output for troubleshooting.
|
||||
|
||||
.PARAMETER Quiet
|
||||
Switch to suppress non-error output (useful for scheduled tasks).
|
||||
|
||||
.PARAMETER NoCron
|
||||
Switch to skip scheduled task installation.
|
||||
|
||||
.PARAMETER Version
|
||||
Switch to display script version and exit.
|
||||
|
||||
.NOTES
|
||||
Version: 3.3.0-20250915
|
||||
Author: Phil Connor, contact@mylinux.work
|
||||
License: MIT
|
||||
Created: 2025-01-24 loosly based on my salt_status.sh used with the linux servers.
|
||||
#>
|
||||
|
||||
param(
|
||||
[ValidateNotNullOrEmpty()]
|
||||
[ValidateScript({
|
||||
$parentPath = Split-Path $_ -Parent
|
||||
if ($parentPath -and -not (Test-Path $parentPath)) {
|
||||
throw "Directory does not exist: $parentPath"
|
||||
}
|
||||
if ($_ -match '^[A-Za-z]:\\') {
|
||||
return $true
|
||||
}
|
||||
throw "Invalid file path format"
|
||||
})]
|
||||
[string]$MetricsFilePath = "$env:ProgramFiles\windows_exporter\textfile_inputs\salt_status.prom",
|
||||
[switch]$InstallScheduledTask = $false,
|
||||
[ValidateRange(1, 1440)] # Validate the interval is between 1 and 1440 minutes for the scheduled task
|
||||
[int]$TaskIntervalMinutes = 15,
|
||||
[ValidateRange(1, 300)] # Validate the timeout is between 1 and 3600 seconds for service status checks
|
||||
[int]$TimeoutSeconds = 30,
|
||||
[int]$SaltMasterPort = 4505,
|
||||
[switch]$DryRun = $false, # Output metrics to console instead of file
|
||||
[switch]$Verbose = $false, # Enable verbose debug output
|
||||
[switch]$Quiet = $false, # Suppress non-error output
|
||||
[switch]$NoCron = $false, # Skip scheduled task installation
|
||||
[switch]$Version = $false # Show version and exit
|
||||
)
|
||||
|
||||
# Handle version display
|
||||
if ($Version) {
|
||||
Write-Host "Salt Status Monitor PowerShell Script"
|
||||
Write-Host "Version: 3.3.0-20250915"
|
||||
Write-Host "Author: Phil Connor pconnor@ara.com"
|
||||
exit 0
|
||||
}
|
||||
|
||||
# Set up logging preferences based on Verbose/Quiet flags
|
||||
if ($Verbose) {
|
||||
$VerbosePreference = 'Continue'
|
||||
$InformationPreference = 'Continue'
|
||||
}
|
||||
if ($Quiet) {
|
||||
$VerbosePreference = 'SilentlyContinue'
|
||||
$InformationPreference = 'SilentlyContinue'
|
||||
$WarningPreference = 'SilentlyContinue'
|
||||
}
|
||||
|
||||
# Logging functions
|
||||
function Write-VerboseLog {
|
||||
param([string]$Message)
|
||||
if ($Verbose) {
|
||||
Write-Host "[VERBOSE] $(Get-Date -Format 'yyyy-MM-dd HH:mm:ss') $Message" -ForegroundColor Cyan
|
||||
}
|
||||
}
|
||||
|
||||
function Write-InfoLog {
|
||||
param([string]$Message)
|
||||
if (-not $Quiet) {
|
||||
Write-Host "[INFO] $(Get-Date -Format 'yyyy-MM-dd HH:mm:ss') $Message" -ForegroundColor Green
|
||||
}
|
||||
}
|
||||
|
||||
# Create a scheduled task to run this script every 15 minutes
|
||||
if ($InstallScheduledTask -and -not $NoCron) {
|
||||
$taskName = "SaltMinionStatusCheck"
|
||||
$existingTask = Get-ScheduledTask -TaskName $taskName -ErrorAction SilentlyContinue
|
||||
|
||||
if (-not $existingTask) {
|
||||
$taskAction = New-ScheduledTaskAction -Execute "powershell.exe" -Argument "-NoProfile -ExecutionPolicy Bypass -File `"$($MyInvocation.MyCommand.Path)`""
|
||||
# Add validation
|
||||
if (-not $TaskIntervalMinutes -or $TaskIntervalMinutes -le 0) {
|
||||
throw "TaskIntervalMinutes must be a positive integer"
|
||||
}
|
||||
|
||||
$taskTrigger = New-ScheduledTaskTrigger -Once -At (Get-Date).AddMinutes(1) -RepetitionInterval (New-TimeSpan -Minutes $TaskIntervalMinutes) -RepetitionDuration (New-TimeSpan -Days 365)
|
||||
$taskPrincipal = New-ScheduledTaskPrincipal -UserId "SYSTEM" -LogonType ServiceAccount -RunLevel Highest
|
||||
|
||||
try {
|
||||
Write-InfoLog "Creating scheduled task: $taskName"
|
||||
Register-ScheduledTask -TaskName $taskName -Action $taskAction -Trigger $taskTrigger -Principal $taskPrincipal -Description "Monitors Salt Minion status every $TaskIntervalMinutes minutes"
|
||||
|
||||
# Verify the task was created
|
||||
$createdTask = Get-ScheduledTask -TaskName $taskName -ErrorAction SilentlyContinue
|
||||
if (-not $createdTask) {
|
||||
throw "Failed to verify scheduled task creation"
|
||||
}
|
||||
Write-InfoLog "Successfully created scheduled task: $taskName"
|
||||
} catch {
|
||||
Write-Error "Failed to create auto-start task: $($_.Exception.Message)"
|
||||
throw
|
||||
}
|
||||
} else {
|
||||
Write-InfoLog "Scheduled task $taskName already exists. Skipping creation."
|
||||
}
|
||||
}
|
||||
|
||||
# Function to check if required commands are available
|
||||
function Test-CommandAvailability {
|
||||
param([string]$Command)
|
||||
|
||||
try {
|
||||
Get-Command $Command -ErrorAction Stop | Out-Null
|
||||
return $true
|
||||
} catch {
|
||||
Write-Warning "Required command '$Command' is not available"
|
||||
return $false
|
||||
}
|
||||
}
|
||||
|
||||
# Function to check if the salt-master is connected
|
||||
function Test-Port4505Connection {
|
||||
try {
|
||||
# Use netstat to check for active connections on the salt-master port
|
||||
$portCheck = netstat -an 2>$null | Select-String "\s+[^:]+:$SaltMasterPort\s+"
|
||||
|
||||
# Check if we found any active connections on the port
|
||||
if ($null -ne $portCheck) {
|
||||
Write-VerboseLog "Port $SaltMasterPort is in use and has active connections"
|
||||
return $true
|
||||
} else {
|
||||
Write-VerboseLog "No active connections found on port $SaltMasterPort"
|
||||
return $false
|
||||
}
|
||||
} catch [System.Management.Automation.ActionPreferenceStopException] {
|
||||
# Silently ignore this specific exception when error action is set to Stop
|
||||
} catch {
|
||||
# Log any other unexpected errors and return failure status
|
||||
Write-Warning "Failed to check port $SaltMasterPort : $($_.Exception.Message)"
|
||||
return $false
|
||||
}
|
||||
}
|
||||
|
||||
# Function to check if the salt-master responds to ping
|
||||
function Test-SaltPing {
|
||||
param(
|
||||
[int]$TimeoutSeconds = $TimeoutSeconds
|
||||
)
|
||||
if (-not (Test-CommandAvailability "salt-call")) {
|
||||
Write-Warning "Salt-call command not found"
|
||||
return $false
|
||||
}
|
||||
|
||||
$job = $null
|
||||
try {
|
||||
$job = Start-Job -ScriptBlock { salt-call test.ping --local 2>$null } -ErrorAction Stop
|
||||
$completed = $job | Wait-Job -Timeout $TimeoutSeconds
|
||||
if (-not $completed) {
|
||||
Write-Warning "Salt-call test.ping timed out after $TimeoutSeconds seconds"
|
||||
return $false
|
||||
}
|
||||
$saltTest = $job | Receive-Job -ErrorAction SilentlyContinue
|
||||
if ($null -eq $saltTest) {
|
||||
Write-Host "No response from salt-call test.ping"
|
||||
return $false
|
||||
}
|
||||
|
||||
if ($saltTest -is [array]) {
|
||||
$saltTest = $saltTest -join "`n"
|
||||
}
|
||||
|
||||
if ($saltTest -match "local:\s*True" -or $saltTest -match "^\s*True\s*$") {
|
||||
Write-VerboseLog "Salt-call test.ping returned True"
|
||||
return $true
|
||||
} else {
|
||||
Write-VerboseLog "Salt-call test.ping failed or returned unexpected output: $saltTest"
|
||||
return $false
|
||||
}
|
||||
} catch {
|
||||
Write-Warning "Salt-Call failed: $($_.Exception.Message)"
|
||||
return $false
|
||||
} finally {
|
||||
if ($null -ne $job) {
|
||||
try {
|
||||
if ($job.State -eq 'Running') {
|
||||
$job | Stop-Job -Force -ErrorAction SilentlyContinue
|
||||
}
|
||||
} finally {
|
||||
$job | Remove-Job -Force -ErrorAction SilentlyContinue
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Function to check if prometheus named metrics are sanitized or not
|
||||
function Test-PrometheusMetricName {
|
||||
param([string]$MetricName)
|
||||
|
||||
# Prometheus metric names should match: [a-zA-Z_:][a-zA-Z0-9_:]*
|
||||
if ($MetricName -match '^[a-zA-Z_:][a-zA-Z0-9_:]*$') {
|
||||
return $true
|
||||
}
|
||||
return $false
|
||||
}
|
||||
|
||||
# Function to format and add a metric to the metrics array
|
||||
function Add-PrometheusMetric {
|
||||
param(
|
||||
[string]$Name,
|
||||
[string]$Help,
|
||||
[string]$Type,
|
||||
[object]$Value,
|
||||
[ref]$MetricsArray
|
||||
)
|
||||
|
||||
if (-not (Test-PrometheusMetricName $Name)) {
|
||||
Write-Warning "Invalid metric name: $Name"
|
||||
return
|
||||
}
|
||||
|
||||
$MetricsArray.Value += "# HELP $Name $Help"
|
||||
$MetricsArray.Value += "# TYPE $Name $Type"
|
||||
$MetricsArray.Value += "$Name $Value"
|
||||
}
|
||||
|
||||
# Function to check Windows service status
|
||||
function Test-SaltMinionService {
|
||||
try {
|
||||
$service = Get-Service -Name "salt-minion" -ErrorAction SilentlyContinue
|
||||
if ($null -eq $service) {
|
||||
Write-Warning "Salt-minion service not found"
|
||||
return 2 # Service not found
|
||||
}
|
||||
|
||||
if ($service.Status -eq 'Running') {
|
||||
return 1 # Service is running
|
||||
} else {
|
||||
return 0 # Service is not running
|
||||
}
|
||||
} catch {
|
||||
Write-Warning "Failed to check salt-minion service status: $($_.Exception.Message)"
|
||||
return 0
|
||||
}
|
||||
}
|
||||
|
||||
# Function to get Salt version
|
||||
function Get-SaltVersion {
|
||||
if (-not (Test-CommandAvailability "salt-call")) {
|
||||
return "0"
|
||||
}
|
||||
|
||||
try {
|
||||
$versionOutput = & salt-call --version 2>$null
|
||||
if ($versionOutput -match "(\d+\.\d+)") {
|
||||
return $matches[1]
|
||||
}
|
||||
return "0"
|
||||
} catch {
|
||||
Write-Warning "Failed to get Salt version: $($_.Exception.Message)"
|
||||
return "0"
|
||||
}
|
||||
}
|
||||
|
||||
# Function to get Salt-minion memory usage
|
||||
function Get-SaltMemoryUsage {
|
||||
try {
|
||||
$saltProcesses = Get-Process -Name "salt-minion" -ErrorAction SilentlyContinue
|
||||
if ($null -eq $saltProcesses) {
|
||||
return 0
|
||||
}
|
||||
|
||||
$totalMemory = 0
|
||||
foreach ($process in $saltProcesses) {
|
||||
$totalMemory += $process.WorkingSet64
|
||||
}
|
||||
return $totalMemory
|
||||
} catch {
|
||||
Write-Warning "Failed to get salt-minion memory usage: $($_.Exception.Message)"
|
||||
return 0
|
||||
}
|
||||
}
|
||||
|
||||
# Function to get last successful communication timestamp
|
||||
function Get-LastCommunicationTimestamp {
|
||||
if (-not (Test-CommandAvailability "salt-call")) {
|
||||
return 0
|
||||
}
|
||||
|
||||
try {
|
||||
$pingResult = Test-SaltPing
|
||||
if ($pingResult) {
|
||||
return [int][double]::Parse((Get-Date -UFormat %s))
|
||||
}
|
||||
return 0
|
||||
} catch {
|
||||
Write-Warning "Failed to get last communication timestamp: $($_.Exception.Message)"
|
||||
return 0
|
||||
}
|
||||
}
|
||||
|
||||
# Function to count recent Salt errors in Windows Event Log
|
||||
function Get-SaltErrorCount {
|
||||
try {
|
||||
$24HoursAgo = (Get-Date).AddHours(-24)
|
||||
$errorEvents = Get-WinEvent -FilterHashtable @{
|
||||
LogName = 'Application'
|
||||
Source = 'salt-minion'
|
||||
Level = 2 # Error level
|
||||
StartTime = $24HoursAgo
|
||||
} -ErrorAction SilentlyContinue
|
||||
|
||||
if ($null -eq $errorEvents) {
|
||||
return 0
|
||||
}
|
||||
|
||||
return $errorEvents.Count
|
||||
} catch {
|
||||
# Fallback: try to read from salt log file if it exists
|
||||
$logPath = "${env:ProgramData}\Salt Project\Salt\var\log\salt\minion"
|
||||
if (Test-Path $logPath) {
|
||||
try {
|
||||
$logContent = Get-Content $logPath -Tail 1000 -ErrorAction SilentlyContinue
|
||||
$errorLines = $logContent | Where-Object { $_ -match "\[ERROR\]" }
|
||||
return $errorLines.Count
|
||||
} catch {
|
||||
return 0
|
||||
}
|
||||
}
|
||||
return 0
|
||||
}
|
||||
}
|
||||
|
||||
# Function to export Prometheus metrics
|
||||
function Export-PrometheusMetrics {
|
||||
#Starts the metrics export.
|
||||
$startTime = Get-Date
|
||||
$metrics = @()
|
||||
$errors = @()
|
||||
|
||||
try {
|
||||
# Connection status metric (port 4505)
|
||||
try {
|
||||
if (-not (Test-CommandAvailability "netstat")) {
|
||||
$errors += "netstat command not found"
|
||||
$connectionStatus = 2
|
||||
} else {
|
||||
$connectionStatus = if (Test-Port4505Connection) { 1 } else { 0 }
|
||||
}
|
||||
Add-PrometheusMetric -Name "minion_connection_status" -Help "Shows if Salt-Minion is connected to Salt-Master." -Type "gauge" -Value $connectionStatus -MetricsArray ([ref]$metrics)
|
||||
} catch {
|
||||
$errors += "Port 4505 check failed: $($_.Exception.Message)"
|
||||
Add-PrometheusMetric -Name "minion_connection_status" -Help "Shows if Salt-Minion is connected to Salt-Master." -Type "gauge" -Value 0 -MetricsArray ([ref]$metrics)
|
||||
}
|
||||
|
||||
# Salt ping metric
|
||||
try {
|
||||
if (-not (Test-CommandAvailability "salt-call")) {
|
||||
$errors += "salt-call command not found"
|
||||
$pingStatus = 2
|
||||
} else {
|
||||
$pingStatus = if (Test-SaltPing) { 1 } else { 0 }
|
||||
}
|
||||
Add-PrometheusMetric -Name "minion_ping_status" -Help "Shows if Salt-Minion is able to ping Salt-Master." -Type "gauge" -Value $pingStatus -MetricsArray ([ref]$metrics)
|
||||
} catch {
|
||||
$errors += "Salt ping check failed: $($_.Exception.Message)"
|
||||
Add-PrometheusMetric -Name "minion_ping_status" -Help "Shows if Salt-Minion is able to ping Salt-Master." -Type "gauge" -Value 0 -MetricsArray ([ref]$metrics)
|
||||
}
|
||||
|
||||
# Service status metric
|
||||
try {
|
||||
$serviceStatus = Test-SaltMinionService
|
||||
Add-PrometheusMetric -Name "minion_service_status" -Help "Shows if Salt-Minion service is active." -Type "gauge" -Value $serviceStatus -MetricsArray ([ref]$metrics)
|
||||
} catch {
|
||||
$errors += "Service status check failed: $($_.Exception.Message)"
|
||||
Add-PrometheusMetric -Name "minion_service_status" -Help "Shows if Salt-Minion service is active." -Type "gauge" -Value 0 -MetricsArray ([ref]$metrics)
|
||||
}
|
||||
|
||||
# Last communication timestamp
|
||||
try {
|
||||
$lastComm = Get-LastCommunicationTimestamp
|
||||
Add-PrometheusMetric -Name "minion_last_communication_timestamp" -Help "Timestamp of last successful communication with Salt-Master." -Type "gauge" -Value $lastComm -MetricsArray ([ref]$metrics)
|
||||
} catch {
|
||||
$errors += "Last communication check failed: $($_.Exception.Message)"
|
||||
Add-PrometheusMetric -Name "minion_last_communication_timestamp" -Help "Timestamp of last successful communication with Salt-Master." -Type "gauge" -Value 0 -MetricsArray ([ref]$metrics)
|
||||
}
|
||||
|
||||
# Salt version metric
|
||||
try {
|
||||
$version = Get-SaltVersion
|
||||
Add-PrometheusMetric -Name "minion_version" -Help "Salt-Minion version number." -Type "gauge" -Value $version -MetricsArray ([ref]$metrics)
|
||||
} catch {
|
||||
$errors += "Version check failed: $($_.Exception.Message)"
|
||||
Add-PrometheusMetric -Name "minion_version" -Help "Salt-Minion version number." -Type "gauge" -Value 0 -MetricsArray ([ref]$metrics)
|
||||
}
|
||||
|
||||
# Memory usage metric
|
||||
try {
|
||||
$memoryUsage = Get-SaltMemoryUsage
|
||||
Add-PrometheusMetric -Name "minion_memory_usage_bytes" -Help "Salt-Minion process memory usage in bytes." -Type "gauge" -Value $memoryUsage -MetricsArray ([ref]$metrics)
|
||||
} catch {
|
||||
$errors += "Memory usage check failed: $($_.Exception.Message)"
|
||||
Add-PrometheusMetric -Name "minion_memory_usage_bytes" -Help "Salt-Minion process memory usage in bytes." -Type "gauge" -Value 0 -MetricsArray ([ref]$metrics)
|
||||
}
|
||||
|
||||
# Error count metric
|
||||
try {
|
||||
$errorCount = Get-SaltErrorCount
|
||||
Add-PrometheusMetric -Name "minion_error_count" -Help "Number of error entries in Salt-Minion log file." -Type "counter" -Value $errorCount -MetricsArray ([ref]$metrics)
|
||||
} catch {
|
||||
$errors += "Error count check failed: $($_.Exception.Message)"
|
||||
Add-PrometheusMetric -Name "minion_error_count" -Help "Number of error entries in Salt-Minion log file." -Type "counter" -Value 0 -MetricsArray ([ref]$metrics)
|
||||
}
|
||||
|
||||
# Windows-specific: Script execution error count
|
||||
Add-PrometheusMetric -Name "windows_salt_script_errors_total" -Help "Total number of errors during script execution" -Type "counter" -Value $errors.Count -MetricsArray ([ref]$metrics)
|
||||
|
||||
# Windows-specific: Script runtime
|
||||
$scriptRuntime = (Get-Date) - $startTime
|
||||
Add-PrometheusMetric -Name "windows_salt_script_runtime_seconds" -Help "Total script execution time in seconds" -Type "gauge" -Value $scriptRuntime.TotalSeconds -MetricsArray ([ref]$metrics)
|
||||
|
||||
} finally {
|
||||
# Ensure cleanup happens regardless of success/failure
|
||||
if ($errors.Count -gt 0) {
|
||||
Write-Warning "Script completed with $($errors.Count) errors"
|
||||
}
|
||||
}
|
||||
|
||||
return $metrics
|
||||
}
|
||||
|
||||
|
||||
# Output metrics to console or file
|
||||
try {
|
||||
# Export metrics as an array of strings
|
||||
$exportedMetrics = Export-PrometheusMetrics
|
||||
if ($null -eq $exportedMetrics) {
|
||||
throw "Export-PrometheusMetrics returned null"
|
||||
}
|
||||
|
||||
if ($DryRun) {
|
||||
# Dry run mode: output to console
|
||||
Write-Host "=== DRY RUN MODE - Metrics that would be written to $MetricsFilePath ===" -ForegroundColor Yellow
|
||||
$exportedMetrics | ForEach-Object { Write-Host $_ }
|
||||
Write-Host "=== END DRY RUN OUTPUT ===" -ForegroundColor Yellow
|
||||
} else {
|
||||
# Normal mode: write to file with retry mechanism
|
||||
$retryCount = 0
|
||||
$maxRetries = 3
|
||||
do {
|
||||
try {
|
||||
# Write the metrics to the file
|
||||
$exportedMetrics | Out-File -FilePath $MetricsFilePath -Encoding UTF8 -Force
|
||||
break
|
||||
} catch [System.IO.IOException] {
|
||||
$retryCount++
|
||||
if ($retryCount -ge $maxRetries) {
|
||||
throw
|
||||
}
|
||||
# Wait 100ms before retrying
|
||||
Start-Sleep -Milliseconds 100
|
||||
}
|
||||
} while ($retryCount -lt $maxRetries)
|
||||
}
|
||||
} catch {
|
||||
Write-Error "Failed to export metrics: $($_.Exception.Message)"
|
||||
exit 1
|
||||
}
|
||||
|
||||
# Uncomment the following line to write metrics to the console
|
||||
# $exportedMetrics = Export-PrometheusMetrics
|
||||
|
||||
Executable
+409
@@ -0,0 +1,409 @@
|
||||
#!/bin/bash
|
||||
|
||||
#####################################################
|
||||
### ###
|
||||
### Description: Expose metrics from salt-minion. ###
|
||||
### ###
|
||||
### Phil Connor, contact@mylinux.work ###
|
||||
### License: MIT ###
|
||||
### Version 2.28.0.20250915 ###
|
||||
### ###
|
||||
#####################################################
|
||||
|
||||
# Exit on any error, treat unset variables as errors, and fail pipes on first failure
|
||||
set -euo pipefail
|
||||
|
||||
# Parse command line arguments
|
||||
DRY_RUN=false
|
||||
VERBOSE=false
|
||||
QUIET=false
|
||||
NO_CRON=false
|
||||
SCRIPT_VERSION="2.28.0.20250915"
|
||||
|
||||
show_version() {
|
||||
echo "Salt Status Monitor Bash Script"
|
||||
echo "Version: $SCRIPT_VERSION"
|
||||
echo "Author: Phil Connor pconnor@ara.com"
|
||||
}
|
||||
|
||||
show_help() {
|
||||
echo "Usage: $0 [OPTIONS]"
|
||||
echo "Monitor Salt minion status and export Prometheus metrics"
|
||||
echo ""
|
||||
echo "Options:"
|
||||
echo " --dry-run Output metrics to console instead of file"
|
||||
echo " --verbose Enable verbose debug output"
|
||||
echo " --quiet Suppress non-error output"
|
||||
echo " --no-cron Skip cron job installation"
|
||||
echo " --timeout N Override timeout seconds (default: varies by operation)"
|
||||
echo " --version Show version and exit"
|
||||
echo " --help Show this help message"
|
||||
}
|
||||
|
||||
# Logging functions
|
||||
log_verbose() {
|
||||
[[ "$VERBOSE" == "true" ]] && echo "[$(date '+%Y-%m-%d %H:%M:%S')] [VERBOSE] $1"
|
||||
}
|
||||
|
||||
log_info() {
|
||||
[[ "$QUIET" == "false" ]] && echo "[$(date '+%Y-%m-%d %H:%M:%S')] [INFO] $1"
|
||||
}
|
||||
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case $1 in
|
||||
--dry-run)
|
||||
DRY_RUN=true
|
||||
shift
|
||||
;;
|
||||
--verbose|-v)
|
||||
VERBOSE=true
|
||||
shift
|
||||
;;
|
||||
--quiet|-q)
|
||||
QUIET=true
|
||||
shift
|
||||
;;
|
||||
--no-cron)
|
||||
NO_CRON=true
|
||||
shift
|
||||
;;
|
||||
--timeout)
|
||||
if [[ -n "$2" && "$2" =~ ^[0-9]+$ ]]; then
|
||||
TIMEOUT_OVERRIDE="$2"
|
||||
shift 2
|
||||
else
|
||||
echo "Error: --timeout requires a numeric value" >&2
|
||||
exit 1
|
||||
fi
|
||||
;;
|
||||
--version)
|
||||
show_version
|
||||
exit 0
|
||||
;;
|
||||
-h|--help)
|
||||
show_help
|
||||
exit 0
|
||||
;;
|
||||
*)
|
||||
echo "Unknown option: $1" >&2
|
||||
echo "Use --help for usage information" >&2
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
# Get absolute path to this script for cron job installation
|
||||
readonly SCRIPT_PATH="$(readlink -f "$0")"
|
||||
|
||||
# Configuration with defaults - can be overridden by environment variables
|
||||
readonly CRONTAB_USER="${CRONTAB_USER:-root}" # User to install cron job under
|
||||
readonly NODE_EXPORTER_DIR="${NODE_EXPORTER_DIR:-/var/lib/node_exporter}" # Directory where Prometheus metrics are stored
|
||||
readonly PROMETHEUS_USER="${PROMETHEUS_USER:-prometheus}" # User that owns the metrics directory
|
||||
readonly LOCK_DIR="${LOCK_DIR:-/var/run}" # Directory for lock files to prevent concurrent runs
|
||||
readonly UPDATE_INTERVAL="${UPDATE_INTERVAL:-*/10 * * * *}" # Cron schedule - every 10 minutes by default
|
||||
readonly SALT_MASTER_PORT=4505 # Salt master communication port
|
||||
|
||||
# Status codes used in Prometheus metrics
|
||||
readonly STATUS_SUCCESS=1 # Service is working correctly
|
||||
readonly STATUS_FAILURE=0 # Service has failed or is not responding
|
||||
readonly STATUS_NOT_FOUND=2 # Service/command not found on system
|
||||
|
||||
# Validate that critical environment variables are set
|
||||
[[ -z "$NODE_EXPORTER_DIR" || -z "$PROMETHEUS_USER" ]] && {
|
||||
echo "ERROR: Required environment variables not set" >&2
|
||||
exit 1
|
||||
}
|
||||
|
||||
# Error handling function that logs to stderr and exits with specified code
|
||||
handle_error() {
|
||||
echo "ERROR: $1" >&2
|
||||
exit "${2:-1}"
|
||||
}
|
||||
|
||||
# Logging function with timestamp and level
|
||||
log() {
|
||||
echo "[$(date '+%Y-%m-%d %H:%M:%S')] [$1] $2"
|
||||
}
|
||||
|
||||
# Find a command in PATH or fallback directories
|
||||
# Returns the full path to the executable or exits with error
|
||||
find_command() {
|
||||
local cmd="$1"
|
||||
shift
|
||||
local fallback_paths=("$@")
|
||||
|
||||
# First try to find command in PATH
|
||||
if command -v "$cmd" &>/dev/null; then
|
||||
command -v "$cmd"
|
||||
return 0
|
||||
fi
|
||||
|
||||
# If not in PATH, check fallback directories
|
||||
for path in "${fallback_paths[@]}"; do
|
||||
local full_path="$path/$cmd"
|
||||
[[ -x "$full_path" ]] && {
|
||||
echo "$full_path"
|
||||
return 0
|
||||
}
|
||||
done
|
||||
|
||||
# Command not found anywhere
|
||||
handle_error "Could not find '$cmd' executable"
|
||||
}
|
||||
|
||||
# Install a cron job to run this script periodically
|
||||
# Only installs if the job doesn't already exist
|
||||
install_cron_job() {
|
||||
# Check if cron job already exists
|
||||
crontab -l 2>/dev/null | grep -q "$SCRIPT_PATH" && return 0
|
||||
|
||||
# Create temporary file for new crontab
|
||||
local temp_cron
|
||||
temp_cron=$(mktemp)
|
||||
|
||||
# Combine existing crontab with new job
|
||||
{
|
||||
crontab -l 2>/dev/null || true # Get existing crontab, ignore errors if empty
|
||||
echo "$UPDATE_INTERVAL $SCRIPT_PATH > $NODE_EXPORTER_DIR/salt_status.prom 2>&1"
|
||||
} > "$temp_cron"
|
||||
|
||||
# Install the new crontab
|
||||
if crontab -u "$CRONTAB_USER" "$temp_cron"; then
|
||||
log_info "Cron job installed successfully"
|
||||
else
|
||||
rm -f "$temp_cron"
|
||||
handle_error "Failed to install cron job"
|
||||
fi
|
||||
|
||||
# Clean up temporary file
|
||||
rm -f "$temp_cron"
|
||||
}
|
||||
|
||||
# Set up file locking to prevent multiple instances of this script running
|
||||
# Uses file descriptor 9 for the lock
|
||||
setup_lock() {
|
||||
# Ensure lock directory exists
|
||||
[[ ! -d "$LOCK_DIR" ]] && handle_error "Lock directory does not exist: $LOCK_DIR"
|
||||
|
||||
# Clean up old lock files (older than 60 minutes)
|
||||
find "$LOCK_DIR" -name "salt_status.*" -type f -mmin +60 -delete 2>/dev/null || true
|
||||
|
||||
# Create unique lock file
|
||||
lockfile=$(mktemp -p "$LOCK_DIR" salt_status.XXXXXX) || handle_error "Failed to create lock file"
|
||||
|
||||
# Open lock file on file descriptor 9 and attempt to lock it
|
||||
exec 9>"$lockfile"
|
||||
flock -n 9 || handle_error "Script is already running"
|
||||
|
||||
# Set up cleanup trap to release lock and remove file on exit
|
||||
trap 'flock -u 9; exec 9>&-; rm -f "$lockfile"' EXIT INT TERM
|
||||
}
|
||||
|
||||
# Ensure the Node Exporter directory exists and is writable
|
||||
# Creates the directory if running as root and sets proper ownership
|
||||
setup_directories() {
|
||||
# Return early if directory already exists
|
||||
[[ -d "$NODE_EXPORTER_DIR" ]] && return 0
|
||||
|
||||
# Create directory if running as root
|
||||
if [[ "$(id -u)" == "0" ]]; then
|
||||
mkdir -p "$NODE_EXPORTER_DIR"
|
||||
# Set ownership to prometheus user, ignore errors if user doesn't exist
|
||||
chown "$PROMETHEUS_USER:" "$NODE_EXPORTER_DIR" 2>/dev/null || true
|
||||
fi
|
||||
|
||||
# Verify the directory is writable
|
||||
[[ ! -w "$NODE_EXPORTER_DIR" ]] && handle_error "$NODE_EXPORTER_DIR is not writable"
|
||||
}
|
||||
|
||||
# Check if Salt-minion has an active network connection to Salt-master
|
||||
# Uses ss (socket statistics) to check for established connections on port 4505
|
||||
check_salt_connection() {
|
||||
local ss_path
|
||||
ss_path=$(find_command ss /bin /usr/bin /usr/sbin)
|
||||
|
||||
log_verbose "Checking for Salt connection on port $SALT_MASTER_PORT"
|
||||
|
||||
# Check for established connections (-nt = numeric, no header, TCP)
|
||||
if "$ss_path" -nt | grep -q "\b$SALT_MASTER_PORT\b"; then
|
||||
log_verbose "Found active connection on port $SALT_MASTER_PORT"
|
||||
echo $STATUS_SUCCESS
|
||||
else
|
||||
log_verbose "No active connection found on port $SALT_MASTER_PORT"
|
||||
echo $STATUS_FAILURE
|
||||
fi
|
||||
}
|
||||
|
||||
# Test if Salt-minion can successfully ping the Salt-master
|
||||
# Uses salt-call test.ping to verify two-way communication
|
||||
check_salt_ping() {
|
||||
local salt_call_path
|
||||
|
||||
# Try to find salt-call command, return NOT_FOUND if missing
|
||||
if ! salt_call_path=$(find_command salt-call /bin /usr/bin /usr/sbin 2>/dev/null); then
|
||||
echo $STATUS_NOT_FOUND
|
||||
return
|
||||
fi
|
||||
|
||||
# Execute ping test and check for True response
|
||||
if "$salt_call_path" test.ping 2>/dev/null | grep -q '\bTrue\b'; then
|
||||
echo $STATUS_SUCCESS
|
||||
else
|
||||
echo $STATUS_FAILURE
|
||||
fi
|
||||
}
|
||||
|
||||
# Check if Salt-minion service is active using systemctl
|
||||
check_salt_service() {
|
||||
local systemctl_path
|
||||
|
||||
# Find systemctl command
|
||||
if ! systemctl_path=$(find_command systemctl /bin /usr/bin /sbin /usr/sbin 2>/dev/null); then
|
||||
echo $STATUS_NOT_FOUND
|
||||
return
|
||||
fi
|
||||
|
||||
# Check if salt-minion service is active
|
||||
if "$systemctl_path" is-active salt-minion &>/dev/null; then
|
||||
echo $STATUS_SUCCESS
|
||||
else
|
||||
echo $STATUS_FAILURE
|
||||
fi
|
||||
}
|
||||
|
||||
# Get timestamp of last successful Salt communication
|
||||
check_salt_last_communication() {
|
||||
local salt_call_path
|
||||
|
||||
# Try to find salt-call command, return 0 if missing
|
||||
if ! salt_call_path=$(find_command salt-call /bin /usr/bin /usr/sbin 2>/dev/null); then
|
||||
echo "0"
|
||||
return
|
||||
fi
|
||||
|
||||
# Get current timestamp if ping succeeds, otherwise 0
|
||||
if "$salt_call_path" test.ping 2>/dev/null | grep -q '\bTrue\b'; then
|
||||
date +%s
|
||||
else
|
||||
echo "0"
|
||||
fi
|
||||
}
|
||||
|
||||
# Get Salt-minion version information
|
||||
get_salt_version() {
|
||||
local salt_call_path
|
||||
|
||||
# Try to find salt-call command, return empty if missing
|
||||
if ! salt_call_path=$(find_command salt-call /bin /usr/bin /usr/sbin 2>/dev/null); then
|
||||
echo "0"
|
||||
return
|
||||
fi
|
||||
|
||||
# Extract version number and convert to numeric (e.g., 3006.1 becomes 3006.1)
|
||||
local version
|
||||
version=$("$salt_call_path" --version 2>/dev/null | grep -o '[0-9]\+\.[0-9]\+' | head -1)
|
||||
echo "${version:-0}"
|
||||
}
|
||||
|
||||
# Get Salt-minion process memory usage in bytes
|
||||
get_salt_memory_usage() {
|
||||
local ps_path
|
||||
|
||||
# Find ps command
|
||||
if ! ps_path=$(find_command ps /bin /usr/bin 2>/dev/null); then
|
||||
echo "0"
|
||||
return
|
||||
fi
|
||||
|
||||
# Get RSS memory usage in KB and convert to bytes
|
||||
local memory_kb
|
||||
memory_kb=$("$ps_path" -eo comm,rss | grep -E '^salt-minion' | awk '{sum+=$2} END {print sum+0}' 2>/dev/null)
|
||||
[[ -z "$memory_kb" ]] && memory_kb=0
|
||||
echo "$((memory_kb * 1024))"
|
||||
}
|
||||
|
||||
# Count recent errors in salt-minion log
|
||||
count_salt_errors() {
|
||||
local log_file="/var/log/salt/minion"
|
||||
|
||||
# Return 0 if log file doesn't exist or isn't readable
|
||||
[[ ! -r "$log_file" ]] && { echo "0"; return; }
|
||||
|
||||
# Count ERROR lines from last 24 hours
|
||||
local error_count
|
||||
error_count=$(grep -c "\[ERROR\]" "$log_file" 2>/dev/null)
|
||||
echo "${error_count:-0}"
|
||||
}
|
||||
|
||||
# Output a Prometheus metric in the correct format
|
||||
# Parameters: metric_name, value, help_text, metric_type
|
||||
output_metric() {
|
||||
local name="$1" value="$2" help="$3" type="$4"
|
||||
|
||||
# Output in Prometheus exposition format
|
||||
cat << EOF
|
||||
# HELP $name $help
|
||||
# TYPE $name $type
|
||||
$name $value
|
||||
EOF
|
||||
}
|
||||
|
||||
# Main function that orchestrates the metric collection process
|
||||
main() {
|
||||
# Skip setup steps in dry-run mode
|
||||
if [[ "$DRY_RUN" == "false" ]]; then
|
||||
# Set up file locking to prevent concurrent execution
|
||||
setup_lock
|
||||
|
||||
# Ensure output directory exists and is writable
|
||||
setup_directories
|
||||
|
||||
# Install cron job for periodic execution (only if script file exists and not disabled)
|
||||
if [[ -f "$SCRIPT_PATH" && "$NO_CRON" == "false" ]]; then
|
||||
install_cron_job
|
||||
elif [[ "$NO_CRON" == "true" ]]; then
|
||||
log_info "Skipping cron job installation (--no-cron specified)"
|
||||
fi
|
||||
else
|
||||
echo "=== DRY RUN MODE - Metrics that would be written to $NODE_EXPORTER_DIR/salt_status.prom ===" >&2
|
||||
fi
|
||||
|
||||
# Collect Salt status metrics
|
||||
local connection_status ping_status service_status last_comm version memory_usage error_count
|
||||
connection_status=$(check_salt_connection)
|
||||
ping_status=$(check_salt_ping)
|
||||
service_status=$(check_salt_service)
|
||||
last_comm=$(check_salt_last_communication)
|
||||
version=$(get_salt_version)
|
||||
memory_usage=$(get_salt_memory_usage)
|
||||
error_count=$(count_salt_errors)
|
||||
|
||||
# Output metrics in Prometheus format
|
||||
output_metric "minion_connection_status" "$connection_status" \
|
||||
"Shows if Salt-Minion is connected to Salt-Master." "gauge"
|
||||
|
||||
output_metric "minion_ping_status" "$ping_status" \
|
||||
"Shows if Salt-Minion is able to ping Salt-Master." "gauge"
|
||||
|
||||
output_metric "minion_service_status" "$service_status" \
|
||||
"Shows if Salt-Minion service is active." "gauge"
|
||||
|
||||
output_metric "minion_last_communication_timestamp" "$last_comm" \
|
||||
"Timestamp of last successful communication with Salt-Master." "gauge"
|
||||
|
||||
output_metric "minion_version" "$version" \
|
||||
"Salt-Minion version number." "gauge"
|
||||
|
||||
output_metric "minion_memory_usage_bytes" "$memory_usage" \
|
||||
"Salt-Minion process memory usage in bytes." "gauge"
|
||||
|
||||
output_metric "minion_error_count" "$error_count" \
|
||||
"Number of error entries in Salt-Minion log file." "counter"
|
||||
|
||||
if [[ "$DRY_RUN" == "true" ]]; then
|
||||
echo "=== END DRY RUN OUTPUT ===" >&2
|
||||
fi
|
||||
}
|
||||
|
||||
# Execute main function with all script arguments
|
||||
main "$@"
|
||||
@@ -0,0 +1,210 @@
|
||||
#!/bin/bash
|
||||
|
||||
#############################################################
|
||||
#### iperf3 Server Setup ####
|
||||
#### Install and configure iperf3 as a systemd service ####
|
||||
#### ####
|
||||
#### Author: Phil Connor ####
|
||||
#### Contact: contact@mylinux.work ####
|
||||
#### License: MIT ####
|
||||
#### Version: 1.0 ####
|
||||
#### ####
|
||||
#### Usage: sudo ./setup-iperf3-server.sh [OPTIONS] ####
|
||||
#############################################################
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# Default configuration
|
||||
LISTEN_PORT=9182
|
||||
HARDENED=false
|
||||
UNINSTALL=false
|
||||
|
||||
SERVICE_NAME="iperf3-server"
|
||||
SERVICE_FILE="/etc/systemd/system/${SERVICE_NAME}.service"
|
||||
|
||||
show_help() {
|
||||
cat <<EOF
|
||||
Usage: sudo ./setup-iperf3-server.sh [OPTIONS]
|
||||
|
||||
Install and configure iperf3 as a systemd service.
|
||||
|
||||
Options:
|
||||
--port PORT Set the iperf3 listen port (default: 9182)
|
||||
--hardened Use the hardened service file with IP restrictions
|
||||
and security settings (private networks only)
|
||||
--uninstall Stop, disable, and remove the iperf3 service
|
||||
--help Show this help message
|
||||
|
||||
Examples:
|
||||
sudo ./setup-iperf3-server.sh
|
||||
sudo ./setup-iperf3-server.sh --port 5201
|
||||
sudo ./setup-iperf3-server.sh --hardened
|
||||
sudo ./setup-iperf3-server.sh --uninstall
|
||||
EOF
|
||||
exit 0
|
||||
}
|
||||
|
||||
parse_args() {
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
--port)
|
||||
if [[ -z "${2:-}" ]]; then
|
||||
echo "ERROR: --port requires a value"
|
||||
exit 1
|
||||
fi
|
||||
LISTEN_PORT="$2"
|
||||
shift 2
|
||||
;;
|
||||
--hardened)
|
||||
HARDENED=true
|
||||
shift
|
||||
;;
|
||||
--uninstall)
|
||||
UNINSTALL=true
|
||||
shift
|
||||
;;
|
||||
--help)
|
||||
show_help
|
||||
;;
|
||||
*)
|
||||
echo "ERROR: Unknown option: $1"
|
||||
echo "Run with --help for usage information."
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
}
|
||||
|
||||
# Ensure script is run as root
|
||||
if [[ $EUID -ne 0 ]]; then
|
||||
echo "ERROR: This script must be run as root (use sudo)."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
install_iperf3() {
|
||||
if command -v iperf3 >/dev/null 2>&1; then
|
||||
echo "iperf3 is already installed."
|
||||
return
|
||||
fi
|
||||
|
||||
echo "Installing iperf3..."
|
||||
if command -v apt-get >/dev/null 2>&1; then
|
||||
apt-get update && apt-get install -y iperf3
|
||||
elif command -v dnf >/dev/null 2>&1; then
|
||||
dnf install -y iperf3
|
||||
elif command -v yum >/dev/null 2>&1; then
|
||||
yum install -y iperf3
|
||||
else
|
||||
echo "ERROR: Cannot install iperf3 automatically. Please install manually."
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
install_service() {
|
||||
echo "Installing systemd service..."
|
||||
|
||||
if [[ "$HARDENED" == true ]]; then
|
||||
echo "Using hardened service configuration (private networks only)."
|
||||
cat > "$SERVICE_FILE" <<EOF
|
||||
[Unit]
|
||||
Description=iperf3 Network Performance Testing Server
|
||||
After=network.target
|
||||
Wants=network.target
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
User=root
|
||||
Group=root
|
||||
ExecStart=/usr/bin/iperf3 -s -p ${LISTEN_PORT}
|
||||
ExecReload=/bin/kill -HUP \$MAINPID
|
||||
KillMode=process
|
||||
Restart=on-failure
|
||||
RestartSec=5s
|
||||
|
||||
# Security settings
|
||||
NoNewPrivileges=true
|
||||
PrivateTmp=true
|
||||
ProtectSystem=strict
|
||||
ProtectHome=true
|
||||
ReadWritePaths=/tmp
|
||||
ProtectKernelTunables=true
|
||||
ProtectKernelModules=true
|
||||
ProtectControlGroups=true
|
||||
RestrictRealtime=true
|
||||
RestrictSUIDSGID=true
|
||||
|
||||
# Network settings — restrict to private networks
|
||||
IPAddressDeny=any
|
||||
IPAddressAllow=localhost
|
||||
IPAddressAllow=192.168.0.0/16
|
||||
IPAddressAllow=10.0.0.0/8
|
||||
IPAddressAllow=172.16.0.0/12
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
EOF
|
||||
else
|
||||
cat > "$SERVICE_FILE" <<EOF
|
||||
[Unit]
|
||||
Description=iperf3 Network Performance Testing Server
|
||||
After=network.target
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
User=root
|
||||
Group=root
|
||||
ExecStart=/usr/bin/iperf3 -s -p ${LISTEN_PORT}
|
||||
Restart=on-failure
|
||||
RestartSec=5s
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
EOF
|
||||
fi
|
||||
|
||||
chmod 644 "$SERVICE_FILE"
|
||||
|
||||
echo "Enabling and starting service..."
|
||||
systemctl daemon-reload
|
||||
systemctl enable "${SERVICE_NAME}.service"
|
||||
systemctl start "${SERVICE_NAME}.service"
|
||||
|
||||
echo ""
|
||||
echo "iperf3 server service installed and started!"
|
||||
echo ""
|
||||
systemctl status "${SERVICE_NAME}.service" --no-pager || true
|
||||
echo ""
|
||||
echo "Service commands:"
|
||||
echo " Start: sudo systemctl start ${SERVICE_NAME}"
|
||||
echo " Stop: sudo systemctl stop ${SERVICE_NAME}"
|
||||
echo " Status: sudo systemctl status ${SERVICE_NAME}"
|
||||
echo " Logs: sudo journalctl -u ${SERVICE_NAME} -f"
|
||||
echo ""
|
||||
echo "Test connection from another machine:"
|
||||
echo " iperf3 -c $(hostname -I 2>/dev/null | awk '{print $1}') -p ${LISTEN_PORT} -t 10"
|
||||
echo ""
|
||||
echo "To customize settings, edit:"
|
||||
echo " ${SERVICE_FILE}"
|
||||
echo "Then run: sudo systemctl daemon-reload && sudo systemctl restart ${SERVICE_NAME}"
|
||||
}
|
||||
|
||||
uninstall_service() {
|
||||
echo "Removing iperf3 server service..."
|
||||
systemctl stop "${SERVICE_NAME}" 2>/dev/null || true
|
||||
systemctl disable "${SERVICE_NAME}" 2>/dev/null || true
|
||||
rm -f "$SERVICE_FILE"
|
||||
systemctl daemon-reload
|
||||
echo "iperf3 server service removed."
|
||||
}
|
||||
|
||||
# --- Main execution ---
|
||||
|
||||
parse_args "$@"
|
||||
|
||||
if [[ "$UNINSTALL" == true ]]; then
|
||||
uninstall_service
|
||||
else
|
||||
echo "Setting up iperf3 server service on port ${LISTEN_PORT}..."
|
||||
install_iperf3
|
||||
install_service
|
||||
fi
|
||||
Executable
+637
@@ -0,0 +1,637 @@
|
||||
#!/bin/bash
|
||||
|
||||
#############################################################
|
||||
#### Speedtest Metrics Exporter ####
|
||||
#### Internet & LAN speed metrics for Prometheus ####
|
||||
#### ####
|
||||
#### Author: Phil Connor ####
|
||||
#### Contact: contact@mylinux.work ####
|
||||
#### License: MIT ####
|
||||
#### Version: 2.1 ####
|
||||
#### ####
|
||||
#### Usage: ./speedtest-metrics.sh [OPTIONS] ####
|
||||
#############################################################
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
#########################
|
||||
### Output Mode ###
|
||||
#########################
|
||||
|
||||
LISTEN_PORT="${SPEEDTEST_EXPORTER_PORT:-9196}"
|
||||
TEXTFILE_DIR="/var/lib/node_exporter"
|
||||
OUTPUT_FILE=""
|
||||
HTTP_MODE=false
|
||||
|
||||
#########################
|
||||
### Parse Arguments ###
|
||||
#########################
|
||||
|
||||
show_help() {
|
||||
cat <<EOF
|
||||
Speedtest Metrics Exporter for Prometheus
|
||||
|
||||
Usage: $0 [OPTIONS]
|
||||
|
||||
MODES:
|
||||
--textfile Write to node_exporter textfile collector
|
||||
--http Run HTTP server on port $LISTEN_PORT
|
||||
(no flag) Output to stdout (default)
|
||||
|
||||
OPTIONS:
|
||||
-p, --port PORT HTTP port (default: 9196)
|
||||
-o, --output PATH Output file path
|
||||
--help Show this help
|
||||
|
||||
ENVIRONMENT VARIABLES:
|
||||
SPEEDTEST_SERVERS Comma-separated Ookla server IDs or "auto" (default: auto)
|
||||
IPERF_SERVER Local iperf3 server IP (default: 192.168.1.100)
|
||||
IPERF_PORT iperf3 port (default: 9182)
|
||||
SPEEDTEST_EXPORTER_PORT Same as --port
|
||||
|
||||
EXAMPLES:
|
||||
$0 # One-shot to stdout
|
||||
$0 --textfile # Write to textfile collector
|
||||
$0 --http --port 9196 # Run HTTP server
|
||||
$0 -o /tmp/speedtest.prom # Write to custom file
|
||||
|
||||
EOF
|
||||
}
|
||||
|
||||
handle_request() {
|
||||
local request
|
||||
read -r request || true
|
||||
|
||||
local path
|
||||
path=$(echo "$request" | awk '{print $2}')
|
||||
|
||||
case "$path" in
|
||||
/metrics)
|
||||
local metrics
|
||||
metrics=$(collect_metrics)
|
||||
local content_length=${#metrics}
|
||||
printf "HTTP/1.1 200 OK\r\nContent-Type: text/plain; version=0.0.4; charset=utf-8\r\nContent-Length: %d\r\nConnection: close\r\n\r\n%s" "$content_length" "$metrics"
|
||||
;;
|
||||
/)
|
||||
local body="<html><body><h1>Speedtest Metrics Exporter</h1><p><a href='/metrics'>Metrics</a></p></body></html>"
|
||||
local content_length=${#body}
|
||||
printf "HTTP/1.1 200 OK\r\nContent-Type: text/html\r\nContent-Length: %d\r\nConnection: close\r\n\r\n%s" "$content_length" "$body"
|
||||
;;
|
||||
*)
|
||||
printf "HTTP/1.1 404 Not Found\r\nContent-Type: text/plain\r\nContent-Length: 9\r\nConnection: close\r\n\r\nNot Found"
|
||||
;;
|
||||
esac
|
||||
}
|
||||
|
||||
parse_args() {
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
--textfile)
|
||||
OUTPUT_FILE="$TEXTFILE_DIR/speedtest.prom"
|
||||
shift
|
||||
;;
|
||||
--http)
|
||||
HTTP_MODE=true
|
||||
shift
|
||||
;;
|
||||
--port|-p)
|
||||
LISTEN_PORT="$2"
|
||||
HTTP_MODE=true
|
||||
shift 2
|
||||
;;
|
||||
--output|-o)
|
||||
OUTPUT_FILE="$2"
|
||||
shift 2
|
||||
;;
|
||||
--handle-request)
|
||||
handle_request
|
||||
exit 0
|
||||
;;
|
||||
--help|-h)
|
||||
show_help
|
||||
exit 0
|
||||
;;
|
||||
*)
|
||||
echo "Unknown option: $1" >&2
|
||||
show_help >&2
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
}
|
||||
|
||||
parse_args "$@"
|
||||
|
||||
#########################
|
||||
### Metrics Collection ###
|
||||
#########################
|
||||
|
||||
collect_metrics() {
|
||||
|
||||
# Configuration
|
||||
TEMP_FILE="/tmp/speedtest_$$"
|
||||
IPERF_SERVER="${IPERF_SERVER:-192.168.1.100}" # Set to your local iperf3 server IP
|
||||
IPERF_PORT="${IPERF_PORT:-9182}" # iperf3 port
|
||||
# Multiple speedtest servers - add/remove server IDs as needed
|
||||
# Common server IDs for major cities:
|
||||
# Dallas/DFW: 5029 (AT&T), 12190 (Spectrum), 26847 (Verizon)
|
||||
# New York: 3737 (Verizon), 11570 (Optimum), 17395 (Spectrum)
|
||||
SPEEDTEST_SERVERS="${SPEEDTEST_SERVERS:-auto}" # Comma-separated server IDs or "auto"
|
||||
|
||||
cleanup() {
|
||||
rm -f "$TEMP_FILE"
|
||||
}
|
||||
trap cleanup EXIT
|
||||
|
||||
# Record script start time
|
||||
SCRIPT_START_TIME=$(date +%s.%N)
|
||||
|
||||
# Internet Speed Test - Multiple Servers
|
||||
echo "# Running internet speedtest on multiple servers..." >&2
|
||||
|
||||
# Initialize arrays to store results for all servers
|
||||
declare -a SERVER_IDS=()
|
||||
declare -a PING_LATENCIES=()
|
||||
declare -a PING_JITTERS=()
|
||||
declare -a PING_LOWS=()
|
||||
declare -a PING_HIGHS=()
|
||||
declare -a DOWNLOAD_MBPS=()
|
||||
declare -a UPLOAD_MBPS=()
|
||||
declare -a PACKET_LOSSES=()
|
||||
declare -a EXTERNAL_IPS=()
|
||||
declare -a TEST_TIMESTAMPS=()
|
||||
declare -a SERVER_NAMES=()
|
||||
declare -a SERVER_LOCATIONS=()
|
||||
declare -a SERVER_COUNTRIES=()
|
||||
declare -a ISPS=()
|
||||
declare -a RESULT_URLS=()
|
||||
declare -a DOWNLOAD_SIZES=()
|
||||
declare -a UPLOAD_SIZES=()
|
||||
declare -a SUCCESSES=()
|
||||
|
||||
# Convert comma-separated servers to array
|
||||
IFS=',' read -ra SERVERS <<< "$SPEEDTEST_SERVERS"
|
||||
|
||||
# Test each server
|
||||
for server_id in "${SERVERS[@]}"; do
|
||||
server_id=$(echo "$server_id" | xargs) # Trim whitespace
|
||||
echo "# Testing server $server_id..." >&2
|
||||
|
||||
TEMP_SERVER_FILE="/tmp/speedtest_${server_id}_$$"
|
||||
|
||||
# Handle auto server selection vs specific server ID
|
||||
if [[ "$server_id" == "auto" ]]; then
|
||||
speedtest_cmd="speedtest --format=json"
|
||||
else
|
||||
speedtest_cmd="speedtest -s $server_id --format=json"
|
||||
fi
|
||||
|
||||
if $speedtest_cmd --accept-license --accept-gdpr > "$TEMP_SERVER_FILE" 2>/dev/null; then
|
||||
echo "# Server $server_id: SUCCESS" >&2
|
||||
|
||||
# Parse results for this server
|
||||
ping_latency=$(jq -r '.ping.latency // "0"' "$TEMP_SERVER_FILE")
|
||||
ping_jitter=$(jq -r '.ping.jitter // "0"' "$TEMP_SERVER_FILE")
|
||||
ping_low=$(jq -r '.ping.low // "0"' "$TEMP_SERVER_FILE")
|
||||
ping_high=$(jq -r '.ping.high // "0"' "$TEMP_SERVER_FILE")
|
||||
download_bandwidth=$(jq -r '.download.bandwidth // "0"' "$TEMP_SERVER_FILE")
|
||||
upload_bandwidth=$(jq -r '.upload.bandwidth // "0"' "$TEMP_SERVER_FILE")
|
||||
packet_loss=$(jq -r '.packetLoss // "0"' "$TEMP_SERVER_FILE")
|
||||
external_ip=$(jq -r '.interface.externalIp // "unknown"' "$TEMP_SERVER_FILE")
|
||||
|
||||
# Handle timestamp conversion
|
||||
test_timestamp_raw=$(jq -r '.timestamp // "0"' "$TEMP_SERVER_FILE")
|
||||
if [[ "$test_timestamp_raw" != "0" ]] && [[ "$test_timestamp_raw" != "unknown" ]]; then
|
||||
test_timestamp=$(date -d "$test_timestamp_raw" +%s 2>/dev/null || echo "0")
|
||||
else
|
||||
test_timestamp=0
|
||||
fi
|
||||
|
||||
server_name=$(jq -r '.server.name // "unknown"' "$TEMP_SERVER_FILE")
|
||||
server_location=$(jq -r '.server.location // "unknown"' "$TEMP_SERVER_FILE")
|
||||
server_country=$(jq -r '.server.country // "unknown"' "$TEMP_SERVER_FILE")
|
||||
isp=$(jq -r '.isp // "unknown"' "$TEMP_SERVER_FILE")
|
||||
result_url=$(jq -r '.result.url // "unknown"' "$TEMP_SERVER_FILE")
|
||||
download_size=$(jq -r '.download.bytes // "0"' "$TEMP_SERVER_FILE")
|
||||
upload_size=$(jq -r '.upload.bytes // "0"' "$TEMP_SERVER_FILE")
|
||||
|
||||
# Convert from bits to Mbps (fallback to awk if bc unavailable)
|
||||
download_mbps=$(echo "scale=2; $download_bandwidth / 125000" | bc -l 2>/dev/null || echo "$download_bandwidth" | awk '{printf "%.2f", $1/125000}')
|
||||
upload_mbps=$(echo "scale=2; $upload_bandwidth / 125000" | bc -l 2>/dev/null || echo "$upload_bandwidth" | awk '{printf "%.2f", $1/125000}')
|
||||
|
||||
success=1
|
||||
else
|
||||
echo "# Server $server_id: FAILED" >&2
|
||||
|
||||
# Set default values for failed test
|
||||
ping_latency=0; ping_jitter=0; ping_low=0; ping_high=0
|
||||
download_mbps=0; upload_mbps=0; packet_loss=0
|
||||
external_ip="unknown"; test_timestamp=0; server_name="unknown"
|
||||
server_location="unknown"; server_country="unknown"; isp="unknown"
|
||||
result_url="unknown"; download_size=0; upload_size=0
|
||||
success=0
|
||||
fi
|
||||
|
||||
# Store results in arrays
|
||||
SERVER_IDS+=("$server_id")
|
||||
PING_LATENCIES+=("$ping_latency")
|
||||
PING_JITTERS+=("$ping_jitter")
|
||||
PING_LOWS+=("$ping_low")
|
||||
PING_HIGHS+=("$ping_high")
|
||||
DOWNLOAD_MBPS+=("$download_mbps")
|
||||
UPLOAD_MBPS+=("$upload_mbps")
|
||||
PACKET_LOSSES+=("$packet_loss")
|
||||
EXTERNAL_IPS+=("$external_ip")
|
||||
TEST_TIMESTAMPS+=("$test_timestamp")
|
||||
SERVER_NAMES+=("$server_name")
|
||||
SERVER_LOCATIONS+=("$server_location")
|
||||
SERVER_COUNTRIES+=("$server_country")
|
||||
ISPS+=("$isp")
|
||||
RESULT_URLS+=("$result_url")
|
||||
DOWNLOAD_SIZES+=("$download_size")
|
||||
UPLOAD_SIZES+=("$upload_size")
|
||||
SUCCESSES+=("$success")
|
||||
|
||||
# Cleanup temp file
|
||||
rm -f "$TEMP_SERVER_FILE"
|
||||
done
|
||||
|
||||
# Local Network Speed Test (iperf3) - Enhanced with additional metrics
|
||||
echo "# Testing local network speed..." >&2
|
||||
if command -v iperf3 >/dev/null 2>&1; then
|
||||
# Test download from local server (we are client)
|
||||
if local_down=$(timeout 10 iperf3 -c "$IPERF_SERVER" -p "$IPERF_PORT" -t 5 -J 2>/dev/null); then
|
||||
local_download_mbps=$(echo "$local_down" | jq -r '.end.sum_received.bits_per_second // "0"' | awk '{printf "%.2f", $1/1000000}')
|
||||
local_download_bytes=$(echo "$local_down" | jq -r '.end.sum_received.bytes // "0"')
|
||||
local_download_retransmits=$(echo "$local_down" | jq -r '.end.sum_sent.retransmits // "0"')
|
||||
local_download_rtt=$(echo "$local_down" | jq -r '.end.streams[0].sender.mean_rtt // "0"' | awk '{printf "%.3f", $1/1000}') # Convert to ms
|
||||
local_download_rtt_var=$(echo "$local_down" | jq -r '.end.streams[0].sender.rtt_variance // "0"' | awk '{printf "%.3f", $1/1000}')
|
||||
local_download_cpu_local=$(echo "$local_down" | jq -r '.end.cpu_utilization_percent.host_total // "0"')
|
||||
local_download_cpu_remote=$(echo "$local_down" | jq -r '.end.cpu_utilization_percent.remote_total // "0"')
|
||||
local_download_congestion_window=$(echo "$local_down" | jq -r '.end.streams[0].sender.max_snd_cwnd // "0"')
|
||||
local_download_success=1
|
||||
else
|
||||
local_download_mbps=0; local_download_bytes=0; local_download_retransmits=0
|
||||
local_download_rtt=0; local_download_rtt_var=0; local_download_cpu_local=0
|
||||
local_download_cpu_remote=0; local_download_congestion_window=0; local_download_success=0
|
||||
fi
|
||||
|
||||
# Test upload to local server (we are client, reverse mode)
|
||||
if local_up=$(timeout 10 iperf3 -c "$IPERF_SERVER" -p "$IPERF_PORT" -t 5 -R -J 2>/dev/null); then
|
||||
local_upload_mbps=$(echo "$local_up" | jq -r '.end.sum_sent.bits_per_second // "0"' | awk '{printf "%.2f", $1/1000000}')
|
||||
local_upload_bytes=$(echo "$local_up" | jq -r '.end.sum_sent.bytes // "0"')
|
||||
local_upload_retransmits=$(echo "$local_up" | jq -r '.end.sum_received.retransmits // "0"')
|
||||
local_upload_rtt=$(echo "$local_up" | jq -r '.end.streams[0].receiver.mean_rtt // "0"' | awk '{printf "%.3f", $1/1000}')
|
||||
local_upload_rtt_var=$(echo "$local_up" | jq -r '.end.streams[0].receiver.rtt_variance // "0"' | awk '{printf "%.3f", $1/1000}')
|
||||
local_upload_cpu_local=$(echo "$local_up" | jq -r '.end.cpu_utilization_percent.host_total // "0"')
|
||||
local_upload_cpu_remote=$(echo "$local_up" | jq -r '.end.cpu_utilization_percent.remote_total // "0"')
|
||||
local_upload_congestion_window=$(echo "$local_up" | jq -r '.end.streams[0].receiver.max_snd_cwnd // "0"')
|
||||
local_upload_success=1
|
||||
else
|
||||
local_upload_mbps=0; local_upload_bytes=0; local_upload_retransmits=0
|
||||
local_upload_rtt=0; local_upload_rtt_var=0; local_upload_cpu_local=0
|
||||
local_upload_cpu_remote=0; local_upload_congestion_window=0; local_upload_success=0
|
||||
fi
|
||||
else
|
||||
echo "# iperf3 not installed, skipping local network test" >&2
|
||||
local_download_mbps=0; local_upload_mbps=0; local_download_bytes=0; local_upload_bytes=0
|
||||
local_download_retransmits=0; local_upload_retransmits=0; local_download_rtt=0; local_upload_rtt=0
|
||||
local_download_rtt_var=0; local_upload_rtt_var=0; local_download_cpu_local=0; local_upload_cpu_local=0
|
||||
local_download_cpu_remote=0; local_upload_cpu_remote=0; local_download_congestion_window=0; local_upload_congestion_window=0
|
||||
local_download_success=0; local_upload_success=0
|
||||
fi
|
||||
|
||||
# Calculate script runtime
|
||||
SCRIPT_END_TIME=$(date +%s.%N)
|
||||
SCRIPT_RUNTIME=$(echo "$SCRIPT_END_TIME - $SCRIPT_START_TIME" | bc -l 2>/dev/null || echo "$SCRIPT_END_TIME $SCRIPT_START_TIME" | awk '{printf "%.3f", $1-$2}')
|
||||
|
||||
# Output Prometheus metrics
|
||||
cat <<EOF
|
||||
# HELP internet_speedtest_latency Internet connection latency in milliseconds
|
||||
# TYPE internet_speedtest_latency gauge
|
||||
EOF
|
||||
|
||||
# Generate metrics for each server
|
||||
for i in "${!SERVER_IDS[@]}"; do
|
||||
server_id="${SERVER_IDS[$i]}"
|
||||
server_name="${SERVER_NAMES[$i]}"
|
||||
server_location="${SERVER_LOCATIONS[$i]}"
|
||||
server_country="${SERVER_COUNTRIES[$i]}"
|
||||
isp="${ISPS[$i]}"
|
||||
external_ip="${EXTERNAL_IPS[$i]}"
|
||||
result_url="${RESULT_URLS[$i]}"
|
||||
|
||||
cat <<EOF
|
||||
internet_speedtest_latency{server_id="$server_id",server_name="$server_name",server_location="$server_location",server_country="$server_country"} ${PING_LATENCIES[$i]}
|
||||
EOF
|
||||
done
|
||||
|
||||
cat <<EOF
|
||||
|
||||
# HELP internet_speedtest_latency_low Internet connection minimum latency in milliseconds
|
||||
# TYPE internet_speedtest_latency_low gauge
|
||||
EOF
|
||||
|
||||
for i in "${!SERVER_IDS[@]}"; do
|
||||
server_id="${SERVER_IDS[$i]}"
|
||||
server_name="${SERVER_NAMES[$i]}"
|
||||
server_location="${SERVER_LOCATIONS[$i]}"
|
||||
server_country="${SERVER_COUNTRIES[$i]}"
|
||||
|
||||
cat <<EOF
|
||||
internet_speedtest_latency_low{server_id="$server_id",server_name="$server_name",server_location="$server_location",server_country="$server_country"} ${PING_LOWS[$i]}
|
||||
EOF
|
||||
done
|
||||
|
||||
cat <<EOF
|
||||
|
||||
# HELP internet_speedtest_latency_high Internet connection maximum latency in milliseconds
|
||||
# TYPE internet_speedtest_latency_high gauge
|
||||
EOF
|
||||
|
||||
for i in "${!SERVER_IDS[@]}"; do
|
||||
server_id="${SERVER_IDS[$i]}"
|
||||
server_name="${SERVER_NAMES[$i]}"
|
||||
server_location="${SERVER_LOCATIONS[$i]}"
|
||||
server_country="${SERVER_COUNTRIES[$i]}"
|
||||
|
||||
cat <<EOF
|
||||
internet_speedtest_latency_high{server_id="$server_id",server_name="$server_name",server_location="$server_location",server_country="$server_country"} ${PING_HIGHS[$i]}
|
||||
EOF
|
||||
done
|
||||
|
||||
cat <<EOF
|
||||
|
||||
# HELP internet_speedtest_jitter Internet connection jitter in milliseconds
|
||||
# TYPE internet_speedtest_jitter gauge
|
||||
EOF
|
||||
|
||||
for i in "${!SERVER_IDS[@]}"; do
|
||||
server_id="${SERVER_IDS[$i]}"
|
||||
server_name="${SERVER_NAMES[$i]}"
|
||||
server_location="${SERVER_LOCATIONS[$i]}"
|
||||
server_country="${SERVER_COUNTRIES[$i]}"
|
||||
|
||||
cat <<EOF
|
||||
internet_speedtest_jitter{server_id="$server_id",server_name="$server_name",server_location="$server_location",server_country="$server_country"} ${PING_JITTERS[$i]}
|
||||
EOF
|
||||
done
|
||||
|
||||
cat <<EOF
|
||||
|
||||
# HELP internet_speedtest_download Internet download speed in Mbps
|
||||
# TYPE internet_speedtest_download gauge
|
||||
EOF
|
||||
|
||||
for i in "${!SERVER_IDS[@]}"; do
|
||||
server_id="${SERVER_IDS[$i]}"
|
||||
server_name="${SERVER_NAMES[$i]}"
|
||||
server_location="${SERVER_LOCATIONS[$i]}"
|
||||
server_country="${SERVER_COUNTRIES[$i]}"
|
||||
|
||||
cat <<EOF
|
||||
internet_speedtest_download{server_id="$server_id",server_name="$server_name",server_location="$server_location",server_country="$server_country"} ${DOWNLOAD_MBPS[$i]}
|
||||
EOF
|
||||
done
|
||||
|
||||
cat <<EOF
|
||||
|
||||
# HELP internet_speedtest_download_size Internet download test data size in bytes
|
||||
# TYPE internet_speedtest_download_size gauge
|
||||
EOF
|
||||
|
||||
for i in "${!SERVER_IDS[@]}"; do
|
||||
server_id="${SERVER_IDS[$i]}"
|
||||
server_name="${SERVER_NAMES[$i]}"
|
||||
server_location="${SERVER_LOCATIONS[$i]}"
|
||||
server_country="${SERVER_COUNTRIES[$i]}"
|
||||
|
||||
cat <<EOF
|
||||
internet_speedtest_download_size{server_id="$server_id",server_name="$server_name",server_location="$server_location",server_country="$server_country"} ${DOWNLOAD_SIZES[$i]}
|
||||
EOF
|
||||
done
|
||||
|
||||
cat <<EOF
|
||||
|
||||
# HELP internet_speedtest_upload Internet upload speed in Mbps
|
||||
# TYPE internet_speedtest_upload gauge
|
||||
EOF
|
||||
|
||||
for i in "${!SERVER_IDS[@]}"; do
|
||||
server_id="${SERVER_IDS[$i]}"
|
||||
server_name="${SERVER_NAMES[$i]}"
|
||||
server_location="${SERVER_LOCATIONS[$i]}"
|
||||
server_country="${SERVER_COUNTRIES[$i]}"
|
||||
|
||||
cat <<EOF
|
||||
internet_speedtest_upload{server_id="$server_id",server_name="$server_name",server_location="$server_location",server_country="$server_country"} ${UPLOAD_MBPS[$i]}
|
||||
EOF
|
||||
done
|
||||
|
||||
cat <<EOF
|
||||
|
||||
# HELP internet_speedtest_upload_size Internet upload test data size in bytes
|
||||
# TYPE internet_speedtest_upload_size gauge
|
||||
EOF
|
||||
|
||||
for i in "${!SERVER_IDS[@]}"; do
|
||||
server_id="${SERVER_IDS[$i]}"
|
||||
server_name="${SERVER_NAMES[$i]}"
|
||||
server_location="${SERVER_LOCATIONS[$i]}"
|
||||
server_country="${SERVER_COUNTRIES[$i]}"
|
||||
|
||||
cat <<EOF
|
||||
internet_speedtest_upload_size{server_id="$server_id",server_name="$server_name",server_location="$server_location",server_country="$server_country"} ${UPLOAD_SIZES[$i]}
|
||||
EOF
|
||||
done
|
||||
|
||||
cat <<EOF
|
||||
|
||||
# HELP internet_speedtest_packet_loss Internet connection packet loss percentage
|
||||
# TYPE internet_speedtest_packet_loss gauge
|
||||
EOF
|
||||
|
||||
for i in "${!SERVER_IDS[@]}"; do
|
||||
server_id="${SERVER_IDS[$i]}"
|
||||
server_name="${SERVER_NAMES[$i]}"
|
||||
server_location="${SERVER_LOCATIONS[$i]}"
|
||||
server_country="${SERVER_COUNTRIES[$i]}"
|
||||
|
||||
cat <<EOF
|
||||
internet_speedtest_packet_loss{server_id="$server_id",server_name="$server_name",server_location="$server_location",server_country="$server_country"} ${PACKET_LOSSES[$i]}
|
||||
EOF
|
||||
done
|
||||
|
||||
cat <<EOF
|
||||
|
||||
# HELP internet_speedtest_timestamp Unix timestamp when test was performed
|
||||
# TYPE internet_speedtest_timestamp gauge
|
||||
EOF
|
||||
|
||||
for i in "${!SERVER_IDS[@]}"; do
|
||||
server_id="${SERVER_IDS[$i]}"
|
||||
server_name="${SERVER_NAMES[$i]}"
|
||||
server_location="${SERVER_LOCATIONS[$i]}"
|
||||
server_country="${SERVER_COUNTRIES[$i]}"
|
||||
|
||||
cat <<EOF
|
||||
internet_speedtest_timestamp{server_id="$server_id",server_name="$server_name",server_location="$server_location",server_country="$server_country"} ${TEST_TIMESTAMPS[$i]}
|
||||
EOF
|
||||
done
|
||||
|
||||
cat <<EOF
|
||||
|
||||
# HELP internet_speedtest_info Internet speedtest metadata
|
||||
# TYPE internet_speedtest_info gauge
|
||||
EOF
|
||||
|
||||
for i in "${!SERVER_IDS[@]}"; do
|
||||
server_id="${SERVER_IDS[$i]}"
|
||||
server_name="${SERVER_NAMES[$i]}"
|
||||
server_location="${SERVER_LOCATIONS[$i]}"
|
||||
server_country="${SERVER_COUNTRIES[$i]}"
|
||||
external_ip="${EXTERNAL_IPS[$i]}"
|
||||
isp="${ISPS[$i]}"
|
||||
result_url="${RESULT_URLS[$i]}"
|
||||
|
||||
cat <<EOF
|
||||
internet_speedtest_info{server_id="$server_id",server_name="$server_name",server_location="$server_location",server_country="$server_country",external_ip="$external_ip",isp="$isp",result_url="$result_url"} 1
|
||||
EOF
|
||||
done
|
||||
|
||||
cat <<EOF
|
||||
|
||||
# HELP internet_speedtest_success Whether internet speedtest succeeded (1=success, 0=failed)
|
||||
# TYPE internet_speedtest_success gauge
|
||||
EOF
|
||||
|
||||
for i in "${!SERVER_IDS[@]}"; do
|
||||
server_id="${SERVER_IDS[$i]}"
|
||||
server_name="${SERVER_NAMES[$i]}"
|
||||
server_location="${SERVER_LOCATIONS[$i]}"
|
||||
server_country="${SERVER_COUNTRIES[$i]}"
|
||||
|
||||
cat <<EOF
|
||||
internet_speedtest_success{server_id="$server_id",server_name="$server_name",server_location="$server_location",server_country="$server_country"} ${SUCCESSES[$i]}
|
||||
EOF
|
||||
done
|
||||
|
||||
cat <<EOF
|
||||
|
||||
# HELP local_network_download Local network download speed in Mbps
|
||||
# TYPE local_network_download gauge
|
||||
local_network_download $local_download_mbps
|
||||
|
||||
# HELP local_network_upload Local network upload speed in Mbps
|
||||
# TYPE local_network_upload gauge
|
||||
local_network_upload $local_upload_mbps
|
||||
|
||||
# HELP local_network_download_bytes Total bytes downloaded in local network test
|
||||
# TYPE local_network_download_bytes gauge
|
||||
local_network_download_bytes $local_download_bytes
|
||||
|
||||
# HELP local_network_upload_bytes Total bytes uploaded in local network test
|
||||
# TYPE local_network_upload_bytes gauge
|
||||
local_network_upload_bytes $local_upload_bytes
|
||||
|
||||
# HELP local_network_download_retransmits TCP retransmissions during download test
|
||||
# TYPE local_network_download_retransmits gauge
|
||||
local_network_download_retransmits $local_download_retransmits
|
||||
|
||||
# HELP local_network_upload_retransmits TCP retransmissions during upload test
|
||||
# TYPE local_network_upload_retransmits gauge
|
||||
local_network_upload_retransmits $local_upload_retransmits
|
||||
|
||||
# HELP local_network_download_rtt Mean round-trip time during download test in milliseconds
|
||||
# TYPE local_network_download_rtt gauge
|
||||
local_network_download_rtt $local_download_rtt
|
||||
|
||||
# HELP local_network_upload_rtt Mean round-trip time during upload test in milliseconds
|
||||
# TYPE local_network_upload_rtt gauge
|
||||
local_network_upload_rtt $local_upload_rtt
|
||||
|
||||
# HELP local_network_download_rtt_variance RTT variance during download test in milliseconds
|
||||
# TYPE local_network_download_rtt_variance gauge
|
||||
local_network_download_rtt_variance $local_download_rtt_var
|
||||
|
||||
# HELP local_network_upload_rtt_variance RTT variance during upload test in milliseconds
|
||||
# TYPE local_network_upload_rtt_variance gauge
|
||||
local_network_upload_rtt_variance $local_upload_rtt_var
|
||||
|
||||
# HELP local_network_download_cpu_local Local CPU utilization during download test (percentage)
|
||||
# TYPE local_network_download_cpu_local gauge
|
||||
local_network_download_cpu_local $local_download_cpu_local
|
||||
|
||||
# HELP local_network_upload_cpu_local Local CPU utilization during upload test (percentage)
|
||||
# TYPE local_network_upload_cpu_local gauge
|
||||
local_network_upload_cpu_local $local_upload_cpu_local
|
||||
|
||||
# HELP local_network_download_cpu_remote Remote CPU utilization during download test (percentage)
|
||||
# TYPE local_network_download_cpu_remote gauge
|
||||
local_network_download_cpu_remote $local_download_cpu_remote
|
||||
|
||||
# HELP local_network_upload_cpu_remote Remote CPU utilization during upload test (percentage)
|
||||
# TYPE local_network_upload_cpu_remote gauge
|
||||
local_network_upload_cpu_remote $local_upload_cpu_remote
|
||||
|
||||
# HELP local_network_download_congestion_window Maximum TCP congestion window size during download test in bytes
|
||||
# TYPE local_network_download_congestion_window gauge
|
||||
local_network_download_congestion_window $local_download_congestion_window
|
||||
|
||||
# HELP local_network_upload_congestion_window Maximum TCP congestion window size during upload test in bytes
|
||||
# TYPE local_network_upload_congestion_window gauge
|
||||
local_network_upload_congestion_window $local_upload_congestion_window
|
||||
|
||||
# HELP local_network_test_success Whether local network test succeeded (1=success, 0=failed)
|
||||
# TYPE local_network_test_success gauge
|
||||
local_network_test_success $((local_download_success && local_upload_success))
|
||||
|
||||
# HELP speedtest_script_runtime_seconds Total script execution time in seconds
|
||||
# TYPE speedtest_script_runtime_seconds gauge
|
||||
speedtest_script_runtime_seconds $SCRIPT_RUNTIME
|
||||
EOF
|
||||
|
||||
}
|
||||
|
||||
#########################
|
||||
### Output Handling ###
|
||||
#########################
|
||||
|
||||
write_output() {
|
||||
local metrics
|
||||
metrics=$(collect_metrics)
|
||||
|
||||
if [[ -n "$OUTPUT_FILE" ]]; then
|
||||
mkdir -p "$(dirname "$OUTPUT_FILE")"
|
||||
local tmp_file="${OUTPUT_FILE}.$$"
|
||||
echo "$metrics" > "$tmp_file"
|
||||
mv "$tmp_file" "$OUTPUT_FILE"
|
||||
echo "Metrics written to $OUTPUT_FILE" >&2
|
||||
else
|
||||
echo "$metrics"
|
||||
fi
|
||||
}
|
||||
|
||||
start_server() {
|
||||
if ! command -v socat >/dev/null 2>&1; then
|
||||
echo "socat is required for HTTP mode. Install it first." >&2
|
||||
exit 1
|
||||
fi
|
||||
echo "Starting Speedtest Metrics Exporter on port $LISTEN_PORT" >&2
|
||||
echo "Metrics available at http://localhost:$LISTEN_PORT/metrics" >&2
|
||||
while true; do
|
||||
socat TCP-LISTEN:"$LISTEN_PORT",reuseaddr,fork EXEC:"$0 --handle-request" 2>/dev/null || {
|
||||
echo "Server error, restarting in 5 seconds..." >&2
|
||||
sleep 5
|
||||
}
|
||||
done
|
||||
}
|
||||
|
||||
# Main execution
|
||||
if [[ "$HTTP_MODE" == true ]]; then
|
||||
start_server
|
||||
elif [[ -n "$OUTPUT_FILE" ]]; then
|
||||
write_output
|
||||
else
|
||||
collect_metrics
|
||||
fi
|
||||
@@ -0,0 +1,682 @@
|
||||
#!/bin/bash
|
||||
|
||||
################################################
|
||||
#### SSL Certificate Deployer ####
|
||||
#### Deploy certs to multiple services ####
|
||||
#### ####
|
||||
#### Author: Phil Connor ####
|
||||
#### License: MIT ####
|
||||
#### Contact: contact@mylinux.work ####
|
||||
#### Version: 1.00-030326 ####
|
||||
################################################
|
||||
|
||||
set -o pipefail
|
||||
|
||||
SCRIPT_NAME=$(basename "$0")
|
||||
readonly SCRIPT_NAME
|
||||
|
||||
# Runtime variables
|
||||
CERT_FILE=""
|
||||
KEY_FILE=""
|
||||
CA_FILE=""
|
||||
TARGETS=""
|
||||
DRY_RUN=false
|
||||
BACKUP=false
|
||||
DEBUG=${DEBUG:-}
|
||||
|
||||
handle_error() {
|
||||
local exit_code=$1
|
||||
local line_number=$2
|
||||
echo "Error: $SCRIPT_NAME failed at line $line_number with exit code $exit_code" >&2
|
||||
exit "$exit_code"
|
||||
}
|
||||
|
||||
trap 'handle_error $? $LINENO' ERR
|
||||
|
||||
debug_echo() {
|
||||
if [[ -n "$DEBUG" ]]; then
|
||||
echo "[DEBUG] $*" >&2
|
||||
fi
|
||||
}
|
||||
|
||||
info() {
|
||||
echo "[INFO] $*"
|
||||
}
|
||||
|
||||
warn() {
|
||||
echo "[WARN] $*" >&2
|
||||
}
|
||||
|
||||
error() {
|
||||
echo "[ERROR] $*" >&2
|
||||
}
|
||||
|
||||
show_help() {
|
||||
cat << EOF
|
||||
Usage: $SCRIPT_NAME [OPTIONS]
|
||||
|
||||
Deploy SSL certificates to multiple service targets in a single run.
|
||||
|
||||
OPTIONS:
|
||||
--cert FILE Path to the SSL certificate file (required)
|
||||
--key FILE Path to the SSL private key file (required)
|
||||
--ca FILE Path to the CA bundle file (optional)
|
||||
--targets LIST Comma-separated list of targets (required)
|
||||
--dry-run Show what would be done without making changes
|
||||
--backup Backup existing certificates before overwriting
|
||||
--help, -h Show this help message
|
||||
|
||||
SUPPORTED TARGETS:
|
||||
nginx Copy cert+key to /etc/nginx/ssl/, reload nginx
|
||||
apache Copy cert+key to /etc/httpd/ssl/ or /etc/apache2/ssl/, reload
|
||||
postfix Update TLS cert/key in main.cf, reload postfix
|
||||
dovecot Update ssl_cert/ssl_key in dovecot config, reload dovecot
|
||||
artifactory Import cert into Artifactory Java keystore, restart
|
||||
bitbucket Import cert into Bitbucket Java keystore, restart
|
||||
jira Import cert into Jira Java keystore, restart
|
||||
haproxy Concatenate cert+key into PEM at /etc/haproxy/certs/, reload
|
||||
system Update system CA trust store
|
||||
|
||||
ENVIRONMENT VARIABLES:
|
||||
DEBUG Enable debug output when set
|
||||
|
||||
EXAMPLES:
|
||||
$SCRIPT_NAME --cert server.crt --key server.key --targets nginx,haproxy
|
||||
$SCRIPT_NAME --cert server.crt --key server.key --ca ca-bundle.crt --targets apache,postfix,dovecot
|
||||
$SCRIPT_NAME --cert server.crt --key server.key --targets artifactory,bitbucket,jira --backup
|
||||
$SCRIPT_NAME --cert server.crt --key server.key --targets system --dry-run
|
||||
DEBUG=1 $SCRIPT_NAME --cert server.crt --key server.key --targets nginx
|
||||
EOF
|
||||
}
|
||||
|
||||
validate_cert_key_match() {
|
||||
local cert="$1"
|
||||
local key="$2"
|
||||
|
||||
local cert_modulus
|
||||
cert_modulus=$(openssl x509 -noout -modulus -in "$cert" 2>/dev/null | openssl md5)
|
||||
local key_modulus
|
||||
key_modulus=$(openssl rsa -noout -modulus -in "$key" 2>/dev/null | openssl md5)
|
||||
|
||||
if [[ "$cert_modulus" != "$key_modulus" ]]; then
|
||||
error "Certificate and key do not match (modulus mismatch)"
|
||||
debug_echo "Cert modulus: $cert_modulus"
|
||||
debug_echo "Key modulus: $key_modulus"
|
||||
return 1
|
||||
fi
|
||||
|
||||
debug_echo "Certificate and key match"
|
||||
return 0
|
||||
}
|
||||
|
||||
backup_file() {
|
||||
local file="$1"
|
||||
if [[ -f "$file" ]]; then
|
||||
local backup_name
|
||||
backup_name="${file}.bak.$(date +%Y%m%d%H%M%S)"
|
||||
if [[ "$DRY_RUN" == true ]]; then
|
||||
info "[DRY RUN] Would backup $file -> $backup_name"
|
||||
else
|
||||
cp -a "$file" "$backup_name"
|
||||
info "Backed up $file -> $backup_name"
|
||||
fi
|
||||
fi
|
||||
}
|
||||
|
||||
copy_file() {
|
||||
local src="$1"
|
||||
local dest="$2"
|
||||
|
||||
if [[ "$BACKUP" == true ]]; then
|
||||
backup_file "$dest"
|
||||
fi
|
||||
|
||||
if [[ "$DRY_RUN" == true ]]; then
|
||||
info "[DRY RUN] Would copy $src -> $dest"
|
||||
else
|
||||
cp -a "$src" "$dest"
|
||||
chmod 600 "$dest"
|
||||
info "Copied $src -> $dest"
|
||||
fi
|
||||
}
|
||||
|
||||
reload_service() {
|
||||
local service="$1"
|
||||
|
||||
if [[ "$DRY_RUN" == true ]]; then
|
||||
info "[DRY RUN] Would reload $service"
|
||||
else
|
||||
if systemctl is-active --quiet "$service" 2>/dev/null; then
|
||||
systemctl reload "$service"
|
||||
info "Reloaded $service"
|
||||
else
|
||||
warn "Service $service is not active, skipping reload"
|
||||
fi
|
||||
fi
|
||||
}
|
||||
|
||||
restart_service() {
|
||||
local service="$1"
|
||||
|
||||
if [[ "$DRY_RUN" == true ]]; then
|
||||
info "[DRY RUN] Would restart $service"
|
||||
else
|
||||
systemctl restart "$service"
|
||||
info "Restarted $service"
|
||||
fi
|
||||
}
|
||||
|
||||
get_keystore_password() {
|
||||
local password_url="$1"
|
||||
local storepass=""
|
||||
|
||||
# Try Vault HTTP API first if URL provided
|
||||
if [[ -n "$password_url" ]]; then
|
||||
debug_echo "Retrieving keystore password from $password_url"
|
||||
storepass=$(curl -sf -X GET "$password_url" 2>/dev/null | jq -r '.data.password // empty' 2>/dev/null || true)
|
||||
fi
|
||||
|
||||
# Fall back to Vault CLI
|
||||
if [[ -z "$storepass" ]]; then
|
||||
debug_echo "Falling back to Vault CLI for keystore password"
|
||||
storepass=$(vault kv get -field=password secret/keystore 2>/dev/null || true)
|
||||
fi
|
||||
|
||||
# Fall back to default
|
||||
if [[ -z "$storepass" ]]; then
|
||||
debug_echo "Using default keystore password"
|
||||
storepass="changeit"
|
||||
fi
|
||||
|
||||
echo "$storepass"
|
||||
}
|
||||
|
||||
find_java_keystore() {
|
||||
local -n java_bin_ref=$1
|
||||
local -n keystore_ref=$2
|
||||
|
||||
# Common Java installation paths
|
||||
local java_paths=(
|
||||
"/opt/jfrog/artifactory/app/third-party/java"
|
||||
"/mnt/ebs/bitbucket/*/jre"
|
||||
"/mnt/ebs/jira/jre"
|
||||
"/usr/lib/jvm/java-*-openjdk"
|
||||
"/usr/lib/jvm/default-java"
|
||||
"/opt/java"
|
||||
"/usr/java/latest"
|
||||
)
|
||||
|
||||
# Check JAVA_HOME first
|
||||
if [[ -n "${JAVA_HOME:-}" && -x "$JAVA_HOME/bin/keytool" ]]; then
|
||||
java_bin_ref="$JAVA_HOME/bin"
|
||||
keystore_ref="$JAVA_HOME/lib/security/cacerts"
|
||||
if [[ -f "$keystore_ref" ]]; then
|
||||
debug_echo "Found Java via JAVA_HOME: $java_bin_ref"
|
||||
return 0
|
||||
fi
|
||||
fi
|
||||
|
||||
# Search common paths with glob expansion
|
||||
for path_pattern in "${java_paths[@]}"; do
|
||||
for java_dir in $path_pattern; do
|
||||
if [[ -d "$java_dir" ]]; then
|
||||
local bin_dir="$java_dir/bin"
|
||||
local cacerts="$java_dir/lib/security/cacerts"
|
||||
|
||||
if [[ -x "$bin_dir/keytool" && -f "$cacerts" ]]; then
|
||||
java_bin_ref="$bin_dir"
|
||||
keystore_ref="$cacerts"
|
||||
debug_echo "Found Java at: $java_dir"
|
||||
return 0
|
||||
fi
|
||||
fi
|
||||
done
|
||||
done
|
||||
|
||||
# Fallback: try system keytool
|
||||
if command -v keytool >/dev/null 2>&1; then
|
||||
java_bin_ref="$(dirname "$(command -v keytool)")"
|
||||
# Try common system keystore locations
|
||||
local system_keystores=(
|
||||
"/etc/ssl/certs/java/cacerts"
|
||||
"/usr/lib/jvm/default-java/lib/security/cacerts"
|
||||
"/etc/pki/ca-trust/extracted/java/cacerts"
|
||||
)
|
||||
for ks in "${system_keystores[@]}"; do
|
||||
if [[ -f "$ks" ]]; then
|
||||
keystore_ref="$ks"
|
||||
debug_echo "Found system Java at: $java_bin_ref"
|
||||
return 0
|
||||
fi
|
||||
done
|
||||
fi
|
||||
|
||||
return 1
|
||||
}
|
||||
|
||||
deploy_java_keystore() {
|
||||
local keystore="$1"
|
||||
local java_bin="$2"
|
||||
local alias_name="$3"
|
||||
local vault_url="$4"
|
||||
local service_name="$5"
|
||||
|
||||
local storepass
|
||||
storepass=$(get_keystore_password "$vault_url")
|
||||
|
||||
if [[ "$BACKUP" == true ]]; then
|
||||
backup_file "$keystore"
|
||||
fi
|
||||
|
||||
if [[ "$DRY_RUN" == true ]]; then
|
||||
info "[DRY RUN] Would delete alias '$alias_name' from keystore $keystore"
|
||||
info "[DRY RUN] Would import $CERT_FILE into keystore $keystore"
|
||||
info "[DRY RUN] Would restart $service_name"
|
||||
else
|
||||
"$java_bin/keytool" -delete -alias "$alias_name" -keystore "$keystore" -storepass "$storepass" 2>/dev/null || true
|
||||
"$java_bin/keytool" -import -noprompt -alias "$alias_name" -keystore "$keystore" -file "$CERT_FILE" -storepass "$storepass"
|
||||
info "Imported certificate into $keystore"
|
||||
restart_service "$service_name"
|
||||
fi
|
||||
}
|
||||
|
||||
# ---- Target handlers ----
|
||||
|
||||
deploy_nginx() {
|
||||
info "Deploying to nginx..."
|
||||
local ssl_dir="/etc/nginx/ssl"
|
||||
|
||||
if [[ "$DRY_RUN" != true ]]; then
|
||||
mkdir -p "$ssl_dir"
|
||||
fi
|
||||
|
||||
copy_file "$CERT_FILE" "$ssl_dir/server.crt"
|
||||
copy_file "$KEY_FILE" "$ssl_dir/server.key"
|
||||
|
||||
if [[ -n "$CA_FILE" ]]; then
|
||||
copy_file "$CA_FILE" "$ssl_dir/ca-bundle.crt"
|
||||
fi
|
||||
|
||||
reload_service nginx
|
||||
}
|
||||
|
||||
deploy_apache() {
|
||||
info "Deploying to apache..."
|
||||
local ssl_dir=""
|
||||
|
||||
if [[ -d "/etc/httpd" ]]; then
|
||||
ssl_dir="/etc/httpd/ssl"
|
||||
elif [[ -d "/etc/apache2" ]]; then
|
||||
ssl_dir="/etc/apache2/ssl"
|
||||
else
|
||||
error "Could not detect Apache configuration directory"
|
||||
return 1
|
||||
fi
|
||||
|
||||
if [[ "$DRY_RUN" != true ]]; then
|
||||
mkdir -p "$ssl_dir"
|
||||
fi
|
||||
|
||||
copy_file "$CERT_FILE" "$ssl_dir/server.crt"
|
||||
copy_file "$KEY_FILE" "$ssl_dir/server.key"
|
||||
|
||||
if [[ -n "$CA_FILE" ]]; then
|
||||
copy_file "$CA_FILE" "$ssl_dir/ca-bundle.crt"
|
||||
fi
|
||||
|
||||
# Detect and reload the correct service
|
||||
if systemctl list-units --type=service --all 2>/dev/null | grep -q "httpd.service"; then
|
||||
reload_service httpd
|
||||
elif systemctl list-units --type=service --all 2>/dev/null | grep -q "apache2.service"; then
|
||||
reload_service apache2
|
||||
else
|
||||
warn "Could not detect Apache service name"
|
||||
fi
|
||||
}
|
||||
|
||||
deploy_postfix() {
|
||||
info "Deploying to postfix..."
|
||||
local main_cf="/etc/postfix/main.cf"
|
||||
|
||||
if [[ ! -f "$main_cf" ]]; then
|
||||
error "Postfix main.cf not found at $main_cf"
|
||||
return 1
|
||||
fi
|
||||
|
||||
if [[ "$DRY_RUN" == true ]]; then
|
||||
info "[DRY RUN] Would update smtpd_tls_cert_file in $main_cf to $CERT_FILE"
|
||||
info "[DRY RUN] Would update smtpd_tls_key_file in $main_cf to $KEY_FILE"
|
||||
info "[DRY RUN] Would reload postfix"
|
||||
else
|
||||
if [[ "$BACKUP" == true ]]; then
|
||||
backup_file "$main_cf"
|
||||
fi
|
||||
|
||||
if grep -q "^smtpd_tls_cert_file" "$main_cf"; then
|
||||
sed -i "s|^smtpd_tls_cert_file.*|smtpd_tls_cert_file = $CERT_FILE|" "$main_cf"
|
||||
else
|
||||
echo "smtpd_tls_cert_file = $CERT_FILE" >> "$main_cf"
|
||||
fi
|
||||
|
||||
if grep -q "^smtpd_tls_key_file" "$main_cf"; then
|
||||
sed -i "s|^smtpd_tls_key_file.*|smtpd_tls_key_file = $KEY_FILE|" "$main_cf"
|
||||
else
|
||||
echo "smtpd_tls_key_file = $KEY_FILE" >> "$main_cf"
|
||||
fi
|
||||
|
||||
info "Updated $main_cf with certificate paths"
|
||||
reload_service postfix
|
||||
fi
|
||||
}
|
||||
|
||||
deploy_dovecot() {
|
||||
info "Deploying to dovecot..."
|
||||
local dovecot_conf=""
|
||||
|
||||
if [[ -f "/etc/dovecot/conf.d/10-ssl.conf" ]]; then
|
||||
dovecot_conf="/etc/dovecot/conf.d/10-ssl.conf"
|
||||
elif [[ -f "/etc/dovecot/dovecot.conf" ]]; then
|
||||
dovecot_conf="/etc/dovecot/dovecot.conf"
|
||||
else
|
||||
error "Could not find dovecot configuration"
|
||||
return 1
|
||||
fi
|
||||
|
||||
if [[ "$DRY_RUN" == true ]]; then
|
||||
info "[DRY RUN] Would update ssl_cert in $dovecot_conf to <$CERT_FILE"
|
||||
info "[DRY RUN] Would update ssl_key in $dovecot_conf to <$KEY_FILE"
|
||||
info "[DRY RUN] Would reload dovecot"
|
||||
else
|
||||
if [[ "$BACKUP" == true ]]; then
|
||||
backup_file "$dovecot_conf"
|
||||
fi
|
||||
|
||||
if grep -q "^ssl_cert" "$dovecot_conf"; then
|
||||
sed -i "s|^ssl_cert.*|ssl_cert = <$CERT_FILE|" "$dovecot_conf"
|
||||
else
|
||||
echo "ssl_cert = <$CERT_FILE" >> "$dovecot_conf"
|
||||
fi
|
||||
|
||||
if grep -q "^ssl_key" "$dovecot_conf"; then
|
||||
sed -i "s|^ssl_key.*|ssl_key = <$KEY_FILE|" "$dovecot_conf"
|
||||
else
|
||||
echo "ssl_key = <$KEY_FILE" >> "$dovecot_conf"
|
||||
fi
|
||||
|
||||
info "Updated $dovecot_conf with certificate paths"
|
||||
reload_service dovecot
|
||||
fi
|
||||
}
|
||||
|
||||
deploy_artifactory() {
|
||||
info "Deploying to artifactory..."
|
||||
local java_bin="/opt/jfrog/artifactory/app/third-party/java/bin"
|
||||
local keystore="/opt/jfrog/artifactory/app/third-party/java/lib/security/cacerts"
|
||||
|
||||
if [[ ! -x "$java_bin/keytool" || ! -f "$keystore" ]]; then
|
||||
debug_echo "Artifactory default paths not found, searching for Java"
|
||||
if ! find_java_keystore java_bin keystore; then
|
||||
error "Could not find Java keytool or keystore for Artifactory"
|
||||
return 1
|
||||
fi
|
||||
fi
|
||||
|
||||
deploy_java_keystore "$keystore" "$java_bin" "ssl-cert" "" "artifactory"
|
||||
}
|
||||
|
||||
deploy_bitbucket() {
|
||||
info "Deploying to bitbucket..."
|
||||
local java_bin=""
|
||||
local keystore=""
|
||||
|
||||
# Check app-specific paths first with glob
|
||||
for bb_dir in /mnt/ebs/bitbucket/*/jre; do
|
||||
if [[ -d "$bb_dir" && -x "$bb_dir/bin/keytool" && -f "$bb_dir/lib/security/cacerts" ]]; then
|
||||
java_bin="$bb_dir/bin"
|
||||
keystore="$bb_dir/lib/security/cacerts"
|
||||
break
|
||||
fi
|
||||
done
|
||||
|
||||
if [[ -z "$java_bin" || -z "$keystore" ]]; then
|
||||
debug_echo "Bitbucket default paths not found, searching for Java"
|
||||
if ! find_java_keystore java_bin keystore; then
|
||||
error "Could not find Java keytool or keystore for Bitbucket"
|
||||
return 1
|
||||
fi
|
||||
fi
|
||||
|
||||
deploy_java_keystore "$keystore" "$java_bin" "ssl-cert" "" "atlbitbucket"
|
||||
}
|
||||
|
||||
deploy_jira() {
|
||||
info "Deploying to jira..."
|
||||
local java_bin="/mnt/ebs/jira/jre/bin"
|
||||
local keystore="/mnt/ebs/jira/jre/lib/security/cacerts"
|
||||
|
||||
if [[ ! -x "$java_bin/keytool" || ! -f "$keystore" ]]; then
|
||||
debug_echo "Jira default paths not found, searching for Java"
|
||||
if ! find_java_keystore java_bin keystore; then
|
||||
error "Could not find Java keytool or keystore for Jira"
|
||||
return 1
|
||||
fi
|
||||
fi
|
||||
|
||||
deploy_java_keystore "$keystore" "$java_bin" "ssl-cert" "" "jira"
|
||||
}
|
||||
|
||||
deploy_haproxy() {
|
||||
info "Deploying to haproxy..."
|
||||
local cert_dir="/etc/haproxy/certs"
|
||||
local pem_file="$cert_dir/server.pem"
|
||||
|
||||
if [[ "$DRY_RUN" != true ]]; then
|
||||
mkdir -p "$cert_dir"
|
||||
fi
|
||||
|
||||
if [[ "$BACKUP" == true ]]; then
|
||||
backup_file "$pem_file"
|
||||
fi
|
||||
|
||||
if [[ "$DRY_RUN" == true ]]; then
|
||||
info "[DRY RUN] Would concatenate $CERT_FILE + $KEY_FILE -> $pem_file"
|
||||
info "[DRY RUN] Would reload haproxy"
|
||||
else
|
||||
cat "$CERT_FILE" "$KEY_FILE" > "$pem_file"
|
||||
chmod 600 "$pem_file"
|
||||
info "Created combined PEM at $pem_file"
|
||||
reload_service haproxy
|
||||
fi
|
||||
}
|
||||
|
||||
deploy_system() {
|
||||
info "Deploying to system CA trust store..."
|
||||
|
||||
if [[ -z "$CA_FILE" && -z "$CERT_FILE" ]]; then
|
||||
error "No certificate or CA bundle provided for system trust store"
|
||||
return 1
|
||||
fi
|
||||
|
||||
local cert_to_install="${CA_FILE:-$CERT_FILE}"
|
||||
|
||||
if command -v update-ca-trust >/dev/null 2>&1; then
|
||||
# RHEL/CentOS/Fedora/Rocky/Alma
|
||||
local trust_dir="/etc/pki/ca-trust/source/anchors"
|
||||
local cert_name
|
||||
cert_name=$(basename "$cert_to_install")
|
||||
|
||||
if [[ "$DRY_RUN" == true ]]; then
|
||||
info "[DRY RUN] Would copy $cert_to_install -> $trust_dir/$cert_name"
|
||||
info "[DRY RUN] Would run update-ca-trust"
|
||||
else
|
||||
copy_file "$cert_to_install" "$trust_dir/$cert_name"
|
||||
update-ca-trust
|
||||
info "Updated system CA trust store (RHEL-based)"
|
||||
fi
|
||||
elif command -v update-ca-certificates >/dev/null 2>&1; then
|
||||
# Debian/Ubuntu
|
||||
local trust_dir="/usr/local/share/ca-certificates"
|
||||
local cert_name
|
||||
cert_name=$(basename "$cert_to_install")
|
||||
# Debian requires .crt extension
|
||||
cert_name="${cert_name%.*}.crt"
|
||||
|
||||
if [[ "$DRY_RUN" == true ]]; then
|
||||
info "[DRY RUN] Would copy $cert_to_install -> $trust_dir/$cert_name"
|
||||
info "[DRY RUN] Would run update-ca-certificates"
|
||||
else
|
||||
copy_file "$cert_to_install" "$trust_dir/$cert_name"
|
||||
update-ca-certificates
|
||||
info "Updated system CA trust store (Debian-based)"
|
||||
fi
|
||||
else
|
||||
error "Could not find update-ca-trust or update-ca-certificates"
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
parse_arguments() {
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case $1 in
|
||||
--cert)
|
||||
CERT_FILE="$2"
|
||||
shift 2
|
||||
;;
|
||||
--key)
|
||||
KEY_FILE="$2"
|
||||
shift 2
|
||||
;;
|
||||
--ca)
|
||||
CA_FILE="$2"
|
||||
shift 2
|
||||
;;
|
||||
--targets)
|
||||
TARGETS="$2"
|
||||
shift 2
|
||||
;;
|
||||
--dry-run)
|
||||
DRY_RUN=true
|
||||
shift
|
||||
;;
|
||||
--backup)
|
||||
BACKUP=true
|
||||
shift
|
||||
;;
|
||||
--help|-h)
|
||||
show_help
|
||||
exit 0
|
||||
;;
|
||||
*)
|
||||
error "Unknown option: $1"
|
||||
show_help >&2
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
}
|
||||
|
||||
validate_inputs() {
|
||||
if [[ -z "$CERT_FILE" ]]; then
|
||||
error "Certificate file is required (--cert)"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ -z "$KEY_FILE" ]]; then
|
||||
error "Key file is required (--key)"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ -z "$TARGETS" ]]; then
|
||||
error "At least one target is required (--targets)"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ ! -f "$CERT_FILE" ]]; then
|
||||
error "Certificate file not found: $CERT_FILE"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ ! -f "$KEY_FILE" ]]; then
|
||||
error "Key file not found: $KEY_FILE"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ -n "$CA_FILE" && ! -f "$CA_FILE" ]]; then
|
||||
error "CA bundle file not found: $CA_FILE"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if ! openssl x509 -noout -text -in "$CERT_FILE" >/dev/null 2>&1; then
|
||||
error "Invalid certificate file: $CERT_FILE"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if ! openssl rsa -noout -check -in "$KEY_FILE" >/dev/null 2>&1; then
|
||||
error "Invalid key file: $KEY_FILE"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if ! validate_cert_key_match "$CERT_FILE" "$KEY_FILE"; then
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
deploy_target() {
|
||||
local target="$1"
|
||||
|
||||
case "$target" in
|
||||
nginx) deploy_nginx ;;
|
||||
apache) deploy_apache ;;
|
||||
postfix) deploy_postfix ;;
|
||||
dovecot) deploy_dovecot ;;
|
||||
artifactory) deploy_artifactory ;;
|
||||
bitbucket) deploy_bitbucket ;;
|
||||
jira) deploy_jira ;;
|
||||
haproxy) deploy_haproxy ;;
|
||||
system) deploy_system ;;
|
||||
*)
|
||||
error "Unknown target: $target"
|
||||
error "Valid targets: nginx, apache, postfix, dovecot, artifactory, bitbucket, jira, haproxy, system"
|
||||
return 1
|
||||
;;
|
||||
esac
|
||||
}
|
||||
|
||||
main() {
|
||||
parse_arguments "$@"
|
||||
validate_inputs
|
||||
|
||||
if [[ "$DRY_RUN" == true ]]; then
|
||||
info "Running in DRY RUN mode — no changes will be made"
|
||||
fi
|
||||
|
||||
local failed=0
|
||||
local succeeded=0
|
||||
|
||||
IFS=',' read -ra target_list <<< "$TARGETS"
|
||||
for target in "${target_list[@]}"; do
|
||||
# Trim whitespace
|
||||
target=$(echo "$target" | tr -d '[:space:]')
|
||||
info "--- Deploying to target: $target ---"
|
||||
|
||||
if deploy_target "$target"; then
|
||||
((succeeded++))
|
||||
info "Target $target: OK"
|
||||
else
|
||||
((failed++))
|
||||
error "Target $target: FAILED"
|
||||
fi
|
||||
echo
|
||||
done
|
||||
|
||||
info "Deployment complete: $succeeded succeeded, $failed failed"
|
||||
|
||||
if [[ $failed -gt 0 ]]; then
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
# Execute main function if script is run directly
|
||||
if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
|
||||
main "$@"
|
||||
fi
|
||||
@@ -0,0 +1,347 @@
|
||||
#!/bin/bash
|
||||
################################################################################
|
||||
# Script Name: systemd-service-exporter.sh
|
||||
# Version: 1.0
|
||||
# Description: Prometheus textfile collector exporter for systemd service status
|
||||
# Monitors service state, uptime, restart count, and enabled status
|
||||
#
|
||||
# Author: Phil Connor
|
||||
# Contact: contact@mylinux.work
|
||||
# Website: https://mylinux.work
|
||||
# License: MIT
|
||||
# Date: 2026-03-03
|
||||
#
|
||||
# Prerequisites:
|
||||
# - systemctl command available (systemd)
|
||||
# - node_exporter with textfile collector enabled
|
||||
# - /var/lib/node_exporter directory exists
|
||||
#
|
||||
# Usage:
|
||||
# # Configure services via environment variable
|
||||
# SERVICE_LIST="nginx,sshd,cron" ./systemd-service-exporter.sh
|
||||
#
|
||||
# # Configure services via config file
|
||||
# echo -e "nginx\nsshd\ncron" > /etc/systemd-service-exporter.conf
|
||||
# ./systemd-service-exporter.sh
|
||||
#
|
||||
# # Debug mode
|
||||
# DEBUG=1 SERVICE_LIST="nginx" ./systemd-service-exporter.sh
|
||||
#
|
||||
# # Dry run (output to stdout)
|
||||
# ./systemd-service-exporter.sh --dry-run
|
||||
#
|
||||
# Metrics Exported:
|
||||
# - linux_systemd_service_state{service,state} - Service state (1=current, 0=other)
|
||||
# - linux_systemd_service_uptime_seconds{service} - Seconds since service became active
|
||||
# - linux_systemd_service_restarts_total{service} - Number of times the service restarted
|
||||
# - linux_systemd_service_enabled{service} - Whether the service is enabled (1/0)
|
||||
#
|
||||
# Configuration:
|
||||
# Environment: SERVICE_LIST (comma-separated)
|
||||
# Config file: /etc/systemd-service-exporter.conf (one per line)
|
||||
# Textfile directory: /var/lib/node_exporter
|
||||
#
|
||||
################################################################################
|
||||
|
||||
set -o pipefail
|
||||
|
||||
# ============================================================================
|
||||
# CONFIGURATION
|
||||
# ============================================================================
|
||||
|
||||
readonly VERSION="1.0"
|
||||
readonly SCRIPT_NAME="${0##*/}"
|
||||
readonly TEXTFILE_DIR="${TEXTFILE_DIR:-/var/lib/node_exporter}"
|
||||
readonly OUTPUT_FILE="${TEXTFILE_DIR}/systemd_services.prom"
|
||||
readonly CONFIG_FILE="${CONFIG_FILE:-/etc/systemd-service-exporter.conf}"
|
||||
readonly TMP_FILE="${OUTPUT_FILE}.$$"
|
||||
|
||||
# Runtime flags
|
||||
DRY_RUN=false
|
||||
DEBUG=${DEBUG:-}
|
||||
|
||||
# ============================================================================
|
||||
# HELPER FUNCTIONS
|
||||
# ============================================================================
|
||||
|
||||
debug_echo() {
|
||||
if [[ -n "$DEBUG" ]]; then
|
||||
echo "[DEBUG] $*" >&2
|
||||
fi
|
||||
}
|
||||
|
||||
log_error() {
|
||||
echo "[ERROR] $*" >&2
|
||||
}
|
||||
|
||||
cleanup() {
|
||||
rm -f "$TMP_FILE"
|
||||
}
|
||||
|
||||
trap cleanup EXIT
|
||||
|
||||
show_help() {
|
||||
cat <<EOF
|
||||
Usage: $SCRIPT_NAME [OPTIONS]
|
||||
|
||||
Prometheus textfile collector exporter for systemd service status.
|
||||
Monitors service state, uptime, restart count, and enabled status.
|
||||
|
||||
OPTIONS:
|
||||
--dry-run Output metrics to stdout instead of writing to file
|
||||
--debug Enable debug output
|
||||
--help Show this help message
|
||||
--version Show version
|
||||
|
||||
CONFIGURATION:
|
||||
Services can be configured in two ways:
|
||||
|
||||
1. Environment variable (comma-separated):
|
||||
SERVICE_LIST="nginx,sshd,cron" $SCRIPT_NAME
|
||||
|
||||
2. Config file (one service per line):
|
||||
/etc/systemd-service-exporter.conf
|
||||
|
||||
The environment variable takes precedence over the config file.
|
||||
|
||||
ENVIRONMENT VARIABLES:
|
||||
SERVICE_LIST Comma-separated list of services to monitor
|
||||
CONFIG_FILE Path to config file (default: /etc/systemd-service-exporter.conf)
|
||||
TEXTFILE_DIR Textfile collector directory (default: /var/lib/node_exporter)
|
||||
DEBUG Enable debug output when set to any value
|
||||
|
||||
EXAMPLES:
|
||||
SERVICE_LIST="nginx,sshd,cron" $SCRIPT_NAME
|
||||
SERVICE_LIST="docker" $SCRIPT_NAME --dry-run
|
||||
DEBUG=1 $SCRIPT_NAME
|
||||
|
||||
EOF
|
||||
exit 0
|
||||
}
|
||||
|
||||
show_version() {
|
||||
echo "$SCRIPT_NAME version $VERSION"
|
||||
exit 0
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# SERVICE DISCOVERY
|
||||
# ============================================================================
|
||||
|
||||
load_services() {
|
||||
local services=()
|
||||
|
||||
if [[ -n "${SERVICE_LIST:-}" ]]; then
|
||||
debug_echo "Loading services from SERVICE_LIST environment variable"
|
||||
IFS=',' read -ra services <<< "$SERVICE_LIST"
|
||||
elif [[ -f "$CONFIG_FILE" ]]; then
|
||||
debug_echo "Loading services from config file: $CONFIG_FILE"
|
||||
while IFS= read -r line; do
|
||||
line="${line%%#*}"
|
||||
line="${line// /}"
|
||||
if [[ -n "$line" ]]; then
|
||||
services+=("$line")
|
||||
fi
|
||||
done < "$CONFIG_FILE"
|
||||
else
|
||||
log_error "No services configured. Set SERVICE_LIST or create $CONFIG_FILE"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ ${#services[@]} -eq 0 ]]; then
|
||||
log_error "No services found in configuration"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
debug_echo "Monitoring ${#services[@]} services: ${services[*]}"
|
||||
printf '%s\n' "${services[@]}"
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# METRICS COLLECTION
|
||||
# ============================================================================
|
||||
|
||||
get_service_state() {
|
||||
local service="$1"
|
||||
local state
|
||||
state=$(systemctl is-active "$service" 2>/dev/null) || true
|
||||
echo "${state:-unknown}"
|
||||
}
|
||||
|
||||
get_service_uptime() {
|
||||
local service="$1"
|
||||
local timestamp
|
||||
timestamp=$(systemctl show "$service" --property=ActiveEnterTimestamp --value 2>/dev/null) || true
|
||||
|
||||
if [[ -z "$timestamp" || "$timestamp" == "" ]]; then
|
||||
echo "0"
|
||||
return
|
||||
fi
|
||||
|
||||
local active_epoch
|
||||
active_epoch=$(date -d "$timestamp" +%s 2>/dev/null) || true
|
||||
|
||||
if [[ -z "$active_epoch" ]]; then
|
||||
echo "0"
|
||||
return
|
||||
fi
|
||||
|
||||
local now
|
||||
now=$(date +%s)
|
||||
local uptime=$((now - active_epoch))
|
||||
|
||||
if [[ $uptime -lt 0 ]]; then
|
||||
echo "0"
|
||||
else
|
||||
echo "$uptime"
|
||||
fi
|
||||
}
|
||||
|
||||
get_restart_count() {
|
||||
local service="$1"
|
||||
local count
|
||||
count=$(systemctl show "$service" --property=NRestarts --value 2>/dev/null) || true
|
||||
echo "${count:-0}"
|
||||
}
|
||||
|
||||
get_enabled_status() {
|
||||
local service="$1"
|
||||
local status
|
||||
status=$(systemctl is-enabled "$service" 2>/dev/null) || true
|
||||
|
||||
if [[ "$status" == "enabled" ]]; then
|
||||
echo "1"
|
||||
else
|
||||
echo "0"
|
||||
fi
|
||||
}
|
||||
|
||||
state_to_value() {
|
||||
local current_state="$1"
|
||||
local check_state="$2"
|
||||
|
||||
if [[ "$current_state" == "$check_state" ]]; then
|
||||
echo "1"
|
||||
else
|
||||
echo "0"
|
||||
fi
|
||||
}
|
||||
|
||||
collect_metrics() {
|
||||
local services=()
|
||||
while IFS= read -r svc; do
|
||||
services+=("$svc")
|
||||
done < <(load_services)
|
||||
|
||||
local output=""
|
||||
|
||||
# Header comments
|
||||
output+="# HELP linux_systemd_service_state Current state of the systemd service\n"
|
||||
output+="# TYPE linux_systemd_service_state gauge\n"
|
||||
|
||||
for service in "${services[@]}"; do
|
||||
local state
|
||||
state=$(get_service_state "$service")
|
||||
debug_echo "Service $service: state=$state"
|
||||
|
||||
for s in active inactive failed; do
|
||||
local val
|
||||
val=$(state_to_value "$state" "$s")
|
||||
output+="linux_systemd_service_state{service=\"${service}\",state=\"${s}\"} ${val}\n"
|
||||
done
|
||||
done
|
||||
|
||||
output+="# HELP linux_systemd_service_uptime_seconds Time in seconds since the service became active\n"
|
||||
output+="# TYPE linux_systemd_service_uptime_seconds gauge\n"
|
||||
|
||||
for service in "${services[@]}"; do
|
||||
local uptime
|
||||
uptime=$(get_service_uptime "$service")
|
||||
debug_echo "Service $service: uptime=${uptime}s"
|
||||
output+="linux_systemd_service_uptime_seconds{service=\"${service}\"} ${uptime}\n"
|
||||
done
|
||||
|
||||
output+="# HELP linux_systemd_service_restarts_total Total number of service restarts\n"
|
||||
output+="# TYPE linux_systemd_service_restarts_total counter\n"
|
||||
|
||||
for service in "${services[@]}"; do
|
||||
local restarts
|
||||
restarts=$(get_restart_count "$service")
|
||||
debug_echo "Service $service: restarts=$restarts"
|
||||
output+="linux_systemd_service_restarts_total{service=\"${service}\"} ${restarts}\n"
|
||||
done
|
||||
|
||||
output+="# HELP linux_systemd_service_enabled Whether the service is enabled to start at boot\n"
|
||||
output+="# TYPE linux_systemd_service_enabled gauge\n"
|
||||
|
||||
for service in "${services[@]}"; do
|
||||
local enabled
|
||||
enabled=$(get_enabled_status "$service")
|
||||
debug_echo "Service $service: enabled=$enabled"
|
||||
output+="linux_systemd_service_enabled{service=\"${service}\"} ${enabled}\n"
|
||||
done
|
||||
|
||||
printf '%b' "$output"
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# OUTPUT
|
||||
# ============================================================================
|
||||
|
||||
write_metrics() {
|
||||
local metrics
|
||||
metrics=$(collect_metrics)
|
||||
|
||||
if [[ "$DRY_RUN" == "true" ]]; then
|
||||
echo "$metrics"
|
||||
return
|
||||
fi
|
||||
|
||||
if [[ ! -d "$TEXTFILE_DIR" ]]; then
|
||||
log_error "Textfile collector directory does not exist: $TEXTFILE_DIR"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "$metrics" > "$TMP_FILE"
|
||||
mv "$TMP_FILE" "$OUTPUT_FILE"
|
||||
debug_echo "Metrics written to $OUTPUT_FILE"
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# MAIN
|
||||
# ============================================================================
|
||||
|
||||
main() {
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
--dry-run)
|
||||
DRY_RUN=true
|
||||
shift
|
||||
;;
|
||||
--debug)
|
||||
DEBUG=1
|
||||
shift
|
||||
;;
|
||||
--help|-h)
|
||||
show_help
|
||||
;;
|
||||
--version|-v)
|
||||
show_version
|
||||
;;
|
||||
*)
|
||||
log_error "Unknown option: $1"
|
||||
echo "Use --help for usage information" >&2
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
if ! command -v systemctl &>/dev/null; then
|
||||
log_error "systemctl not found — this script requires systemd"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
write_metrics
|
||||
}
|
||||
|
||||
main "$@"
|
||||
Executable
+542
@@ -0,0 +1,542 @@
|
||||
#!/bin/bash
|
||||
################################################################################
|
||||
# Script Name: ufw-blocklist-metrics.sh
|
||||
# Version: 2.3
|
||||
# Description: Production Prometheus exporter for UFW Blocklists (OPTIMIZED)
|
||||
# Author: Phil Connor
|
||||
# Contact: contact@mylinux.work
|
||||
# Website: https://mylinux.work
|
||||
# License: MIT
|
||||
#
|
||||
# Optimizations in v2.1:
|
||||
# - Single journalctl call with cached output
|
||||
# - Cached feed config parsing
|
||||
# - Eliminated redundant file operations
|
||||
# - 4.5 minutes → ~30 seconds typical runtime
|
||||
#
|
||||
# Fixes in v2.2:
|
||||
# - Fixed typo in script name header (bocklist → blocklist)
|
||||
# - Fixed ipset member counting to use Members: section
|
||||
# - Fixed empty journal data producing false grep counts
|
||||
# - Fixed HTTP response headers missing trailing \r\n
|
||||
# - Fixed SC2155/SC2126/SC2295 shellcheck warnings
|
||||
# - Added scrape timestamp metric
|
||||
# - Used SCRIPT_VERSION variable for version strings
|
||||
#
|
||||
# Fixes in v2.3:
|
||||
# - Fixed get_ipset_size using grep -c (exit 1 on 0 matches) causing
|
||||
# duplicate "0" output lines and arithmetic errors; switched to wc -l
|
||||
# - Fixed same grep -c || echo 0 bug in ufw_blocklist_enabled and
|
||||
# ufw_blocklist_total_rules heredoc substitutions
|
||||
# - Fixed misplaced 2>/dev/null on [ ] test for conntrack and effectiveness
|
||||
# - Fixed hardcoded v2.1 in usage text; now uses SCRIPT_VERSION
|
||||
################################################################################
|
||||
|
||||
CONFIG_DIR="/etc/ufw-threats"
|
||||
CACHE_DIR="$CONFIG_DIR/cache"
|
||||
FEEDS_CONFIG="$CONFIG_DIR/feeds.conf"
|
||||
IPSET_PREFIX="ufw-feed"
|
||||
WHITELIST_IPSET="ufw-whitelist"
|
||||
WHITELIST_IPSET_V6="ufw-whitelist-v6"
|
||||
SCRIPT_VERSION="2.3"
|
||||
|
||||
TEXTFILE_DIR="/var/lib/node_exporter"
|
||||
OUTPUT_FILE=""
|
||||
HTTP_MODE=false
|
||||
HTTP_PORT=9418
|
||||
LOCK_FILE="/var/run/ufw-blocklist-metrics.lock"
|
||||
|
||||
# Global cache variables
|
||||
JOURNAL_1H=""
|
||||
JOURNAL_24H=""
|
||||
FEEDS_ARRAY=()
|
||||
|
||||
show_usage() {
|
||||
cat <<EOF
|
||||
Usage: $0 [OPTIONS]
|
||||
|
||||
Export per-feed UFW threat statistics as Prometheus metrics (v${SCRIPT_VERSION}).
|
||||
|
||||
MODES:
|
||||
--textfile Write to node_exporter textfile collector
|
||||
--http Run HTTP server on port $HTTP_PORT
|
||||
|
||||
OPTIONS:
|
||||
-p, --port HTTP port
|
||||
-o, --output Output file
|
||||
-h, --help Show help
|
||||
|
||||
EOF
|
||||
exit 0
|
||||
}
|
||||
|
||||
parse_args() {
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case $1 in
|
||||
-h|--help) show_usage ;;
|
||||
--textfile) OUTPUT_FILE="$TEXTFILE_DIR/ufw_blocklist_metrics.prom"; shift ;;
|
||||
--http) HTTP_MODE=true; shift ;;
|
||||
-p|--port) HTTP_PORT="$2"; shift 2 ;;
|
||||
-o|--output) OUTPUT_FILE="$2"; shift 2 ;;
|
||||
*) echo "Unknown: $1"; exit 1 ;;
|
||||
esac
|
||||
done
|
||||
}
|
||||
|
||||
# Load all journal data ONCE at startup
|
||||
cache_journal_data() {
|
||||
JOURNAL_1H=$(timeout 10 journalctl --since "1 hour ago" 2>/dev/null | grep '\[THREAT' || echo "")
|
||||
JOURNAL_24H=$(timeout 30 journalctl --since "24 hours ago" 2>/dev/null | grep '\[THREAT' || echo "")
|
||||
}
|
||||
|
||||
# Parse feeds config ONCE into array
|
||||
cache_feeds_config() {
|
||||
FEEDS_ARRAY=()
|
||||
if [ -f "$FEEDS_CONFIG" ]; then
|
||||
while IFS='|' read -r enabled name url type description; do
|
||||
[[ "$enabled" =~ ^#.*$ ]] && continue
|
||||
[[ -z "$enabled" ]] && continue
|
||||
FEEDS_ARRAY+=("$enabled|$name|$url|$type|$description")
|
||||
done < "$FEEDS_CONFIG"
|
||||
fi
|
||||
}
|
||||
|
||||
get_ipset_size() {
|
||||
local ipset_name="$1"
|
||||
local count
|
||||
count=$(ipset list "$ipset_name" 2>/dev/null | sed -n '/^Members:$/,$p' | tail -n +2 | wc -l)
|
||||
echo "${count:-0}"
|
||||
}
|
||||
|
||||
# Optimized: Use cached journal data
|
||||
get_feed_blocks() {
|
||||
local feed="$1"
|
||||
local period="$2"
|
||||
local data
|
||||
|
||||
case "$period" in
|
||||
"1 hour ago") data="$JOURNAL_1H" ;;
|
||||
"24 hours ago") data="$JOURNAL_24H" ;;
|
||||
*) echo 0; return ;;
|
||||
esac
|
||||
|
||||
if [ -z "$data" ]; then echo 0; return; fi
|
||||
local count
|
||||
count=$(printf '%s' "$data" | grep -c "\[THREAT:${feed}\]" 2>/dev/null)
|
||||
echo "${count:-0}"
|
||||
}
|
||||
|
||||
get_feed_blocks_v6() {
|
||||
local feed="$1"
|
||||
local period="$2"
|
||||
local data
|
||||
|
||||
case "$period" in
|
||||
"1 hour ago") data="$JOURNAL_1H" ;;
|
||||
"24 hours ago") data="$JOURNAL_24H" ;;
|
||||
*) echo 0; return ;;
|
||||
esac
|
||||
|
||||
if [ -z "$data" ]; then echo 0; return; fi
|
||||
local count
|
||||
count=$(printf '%s' "$data" | grep -c "\[THREAT-v6:${feed}\]" 2>/dev/null)
|
||||
echo "${count:-0}"
|
||||
}
|
||||
|
||||
get_file_timestamp() {
|
||||
[ -f "$1" ] && stat -c %Y "$1" 2>/dev/null || echo "0"
|
||||
}
|
||||
|
||||
get_file_size() {
|
||||
[ -f "$1" ] && stat -c %s "$1" 2>/dev/null || echo "0"
|
||||
}
|
||||
|
||||
get_cache_age() {
|
||||
if [ -f "$1" ]; then
|
||||
echo $(($(date +%s) - $(stat -c %Y "$1" 2>/dev/null || echo 0)))
|
||||
else
|
||||
echo "0"
|
||||
fi
|
||||
}
|
||||
|
||||
get_conntrack_count() {
|
||||
if [ -f /proc/sys/net/netfilter/nf_conntrack_count ]; then
|
||||
cat /proc/sys/net/netfilter/nf_conntrack_count
|
||||
else
|
||||
echo "0"
|
||||
fi
|
||||
}
|
||||
|
||||
get_conntrack_max() {
|
||||
if [ -f /proc/sys/net/netfilter/nf_conntrack_max ]; then
|
||||
cat /proc/sys/net/netfilter/nf_conntrack_max
|
||||
else
|
||||
echo "0"
|
||||
fi
|
||||
}
|
||||
|
||||
get_ipset_memory() {
|
||||
local ipset_name="$1"
|
||||
local mem
|
||||
mem=$(ipset list "$ipset_name" -t 2>/dev/null | grep "Size in memory:" | awk '{print $4}')
|
||||
echo "${mem:-0}"
|
||||
}
|
||||
|
||||
get_cache_disk_usage() {
|
||||
if [ -d "$CACHE_DIR" ]; then
|
||||
df -B1 "$CACHE_DIR" 2>/dev/null | tail -1 | awk '{print $3"|"$4"|"$5}'
|
||||
else
|
||||
echo "0|0|0%"
|
||||
fi
|
||||
}
|
||||
|
||||
get_total_cache_size() {
|
||||
if [ -d "$CACHE_DIR" ]; then
|
||||
du -sb "$CACHE_DIR" 2>/dev/null | awk '{print $1}'
|
||||
else
|
||||
echo "0"
|
||||
fi
|
||||
}
|
||||
|
||||
acquire_lock() {
|
||||
if [ -f "$LOCK_FILE" ]; then
|
||||
local pid
|
||||
pid=$(cat "$LOCK_FILE" 2>/dev/null)
|
||||
if [ -n "$pid" ] && kill -0 "$pid" 2>/dev/null; then
|
||||
echo "ERROR: Another instance is already running (PID: $pid)" >&2
|
||||
exit 1
|
||||
else
|
||||
echo "Removing stale lock file" >&2
|
||||
rm -f "$LOCK_FILE"
|
||||
fi
|
||||
fi
|
||||
echo $$ > "$LOCK_FILE"
|
||||
trap cleanup EXIT INT TERM
|
||||
}
|
||||
|
||||
cleanup() {
|
||||
rm -f "$LOCK_FILE"
|
||||
}
|
||||
|
||||
generate_metrics() {
|
||||
local start_time
|
||||
start_time=$(date +%s)
|
||||
|
||||
cat <<EOF
|
||||
# HELP ufw_blocklist_info Per-feed UFW threat blocking info
|
||||
# TYPE ufw_blocklist_info gauge
|
||||
ufw_blocklist_info{mode="per-feed",version="${SCRIPT_VERSION}"} 1
|
||||
|
||||
# HELP ufw_blocklist_enabled Total enabled feeds
|
||||
# TYPE ufw_blocklist_enabled gauge
|
||||
ufw_blocklist_enabled $(printf '%s\n' "${FEEDS_ARRAY[@]}" | grep -c '^1|')
|
||||
|
||||
# HELP ufw_blocklist_ipset_size Number of IPs per feed ipset
|
||||
# TYPE ufw_blocklist_ipset_size gauge
|
||||
EOF
|
||||
|
||||
# Only export metrics for ipsets that actually exist and are enabled
|
||||
for ipset_name in $(ipset list -n 2>/dev/null | grep "^${IPSET_PREFIX}-"); do
|
||||
# Extract feed name and IP version
|
||||
local feed_name="${ipset_name#"${IPSET_PREFIX}"-}"
|
||||
local ip_version="4"
|
||||
|
||||
if [[ "$feed_name" =~ -v6$ ]]; then
|
||||
feed_name="${feed_name%-v6}"
|
||||
ip_version="6"
|
||||
fi
|
||||
|
||||
# Only show enabled feeds
|
||||
if ! printf '%s\n' "${FEEDS_ARRAY[@]}" | grep -q "^1|${feed_name}|" 2>/dev/null; then
|
||||
continue
|
||||
fi
|
||||
|
||||
local size
|
||||
size=$(get_ipset_size "$ipset_name")
|
||||
echo "ufw_blocklist_ipset_size{feed=\"$feed_name\",ip_version=\"$ip_version\",status=\"enabled\"} $size"
|
||||
done
|
||||
|
||||
cat <<EOF
|
||||
|
||||
# HELP ufw_blocklist_cache_age_seconds Seconds since feed update
|
||||
# TYPE ufw_blocklist_cache_age_seconds gauge
|
||||
EOF
|
||||
|
||||
for feed_line in "${FEEDS_ARRAY[@]}"; do
|
||||
IFS='|' read -r enabled name url type description <<< "$feed_line"
|
||||
[ "$enabled" != "1" ] && continue
|
||||
|
||||
age_v4=$(get_cache_age "$CACHE_DIR/${name}-v4.parsed")
|
||||
age_v6=$(get_cache_age "$CACHE_DIR/${name}-v6.parsed")
|
||||
|
||||
echo "ufw_blocklist_cache_age_seconds{feed=\"$name\",ip_version=\"4\"} $age_v4"
|
||||
echo "ufw_blocklist_cache_age_seconds{feed=\"$name\",ip_version=\"6\"} $age_v6"
|
||||
done
|
||||
|
||||
cat <<EOF
|
||||
|
||||
# HELP ufw_blocklist_last_update_timestamp Unix timestamp of last update
|
||||
# TYPE ufw_blocklist_last_update_timestamp gauge
|
||||
EOF
|
||||
|
||||
for feed_line in "${FEEDS_ARRAY[@]}"; do
|
||||
IFS='|' read -r enabled name url type description <<< "$feed_line"
|
||||
[ "$enabled" != "1" ] && continue
|
||||
|
||||
ts_v4=$(get_file_timestamp "$CACHE_DIR/${name}-v4.parsed")
|
||||
ts_v6=$(get_file_timestamp "$CACHE_DIR/${name}-v6.parsed")
|
||||
|
||||
echo "ufw_blocklist_last_update_timestamp{feed=\"$name\",ip_version=\"4\"} $ts_v4"
|
||||
echo "ufw_blocklist_last_update_timestamp{feed=\"$name\",ip_version=\"6\"} $ts_v6"
|
||||
done
|
||||
|
||||
cat <<EOF
|
||||
|
||||
# HELP ufw_blocklist_file_size_bytes Feed file sizes
|
||||
# TYPE ufw_blocklist_file_size_bytes gauge
|
||||
EOF
|
||||
|
||||
for feed_line in "${FEEDS_ARRAY[@]}"; do
|
||||
IFS='|' read -r enabled name url type description <<< "$feed_line"
|
||||
[ "$enabled" != "1" ] && continue
|
||||
|
||||
parsed_v4=$(get_file_size "$CACHE_DIR/${name}-v4.parsed")
|
||||
parsed_v6=$(get_file_size "$CACHE_DIR/${name}-v6.parsed")
|
||||
|
||||
echo "ufw_blocklist_file_size_bytes{feed=\"$name\",ip_version=\"4\",type=\"parsed\"} $parsed_v4"
|
||||
echo "ufw_blocklist_file_size_bytes{feed=\"$name\",ip_version=\"6\",type=\"parsed\"} $parsed_v6"
|
||||
done
|
||||
|
||||
cat <<EOF
|
||||
|
||||
# HELP ufw_blocklist_ip_version_ratio IPv4 vs IPv6 ratio
|
||||
# TYPE ufw_blocklist_ip_version_ratio gauge
|
||||
EOF
|
||||
|
||||
for feed_line in "${FEEDS_ARRAY[@]}"; do
|
||||
IFS='|' read -r enabled name url type description <<< "$feed_line"
|
||||
[ "$enabled" != "1" ] && continue
|
||||
|
||||
v4_size=$(get_ipset_size "${IPSET_PREFIX}-${name}")
|
||||
v6_size=$(get_ipset_size "${IPSET_PREFIX}-${name}-v6")
|
||||
total=$((v4_size + v6_size))
|
||||
|
||||
if [ "$total" -gt 0 ]; then
|
||||
ratio_v4=$(awk "BEGIN {printf \"%.4f\", $v4_size / $total}")
|
||||
ratio_v6=$(awk "BEGIN {printf \"%.4f\", $v6_size / $total}")
|
||||
else
|
||||
ratio_v4="0.0000"
|
||||
ratio_v6="0.0000"
|
||||
fi
|
||||
|
||||
echo "ufw_blocklist_ip_version_ratio{feed=\"$name\",version=\"4\"} $ratio_v4"
|
||||
echo "ufw_blocklist_ip_version_ratio{feed=\"$name\",version=\"6\"} $ratio_v6"
|
||||
done
|
||||
|
||||
cat <<EOF
|
||||
|
||||
# HELP ufw_blocklist_blocked_total Blocked attempts per feed (rolling window)
|
||||
# TYPE ufw_blocklist_blocked_total gauge
|
||||
EOF
|
||||
|
||||
for feed_line in "${FEEDS_ARRAY[@]}"; do
|
||||
IFS='|' read -r enabled name url type description <<< "$feed_line"
|
||||
[ "$enabled" != "1" ] && continue
|
||||
|
||||
blocks_1h=$(get_feed_blocks "$name" "1 hour ago")
|
||||
blocks_24h=$(get_feed_blocks "$name" "24 hours ago")
|
||||
blocks_1h_v6=$(get_feed_blocks_v6 "$name" "1 hour ago")
|
||||
blocks_24h_v6=$(get_feed_blocks_v6 "$name" "24 hours ago")
|
||||
|
||||
echo "ufw_blocklist_blocked_total{feed=\"$name\",ip_version=\"4\",period=\"1h\"} $blocks_1h"
|
||||
echo "ufw_blocklist_blocked_total{feed=\"$name\",ip_version=\"4\",period=\"24h\"} $blocks_24h"
|
||||
echo "ufw_blocklist_blocked_total{feed=\"$name\",ip_version=\"6\",period=\"1h\"} $blocks_1h_v6"
|
||||
echo "ufw_blocklist_blocked_total{feed=\"$name\",ip_version=\"6\",period=\"24h\"} $blocks_24h_v6"
|
||||
done
|
||||
|
||||
# Calculate total blocks once for hit rate
|
||||
local total_blocks_24h=0
|
||||
for feed_line in "${FEEDS_ARRAY[@]}"; do
|
||||
IFS='|' read -r enabled name url type description <<< "$feed_line"
|
||||
[ "$enabled" != "1" ] && continue
|
||||
local b
|
||||
b=$(get_feed_blocks "$name" "24 hours ago")
|
||||
total_blocks_24h=$((total_blocks_24h + ${b:-0}))
|
||||
done
|
||||
|
||||
cat <<EOF
|
||||
|
||||
# HELP ufw_blocklist_effectiveness Feed hit rate - percentage of total blocks from this feed (24h)
|
||||
# TYPE ufw_blocklist_effectiveness gauge
|
||||
EOF
|
||||
|
||||
for feed_line in "${FEEDS_ARRAY[@]}"; do
|
||||
IFS='|' read -r enabled name url type description <<< "$feed_line"
|
||||
[ "$enabled" != "1" ] && continue
|
||||
|
||||
blocks=$(get_feed_blocks "$name" "24 hours ago")
|
||||
blocks=$(echo "$blocks" | tr -d '\n' | tr -d ' ')
|
||||
blocks=${blocks:-0}
|
||||
|
||||
if [ "${total_blocks_24h:-0}" -gt 0 ]; then
|
||||
effectiveness=$(awk "BEGIN {printf \"%.2f\", ($blocks / $total_blocks_24h) * 100}" 2>/dev/null || echo "0")
|
||||
else
|
||||
effectiveness="0"
|
||||
fi
|
||||
|
||||
echo "ufw_blocklist_effectiveness{feed=\"$name\"} $effectiveness"
|
||||
done
|
||||
|
||||
cat <<EOF
|
||||
|
||||
# HELP ufw_blocklist_ipset_memory_bytes Memory used by each ipset
|
||||
# TYPE ufw_blocklist_ipset_memory_bytes gauge
|
||||
EOF
|
||||
|
||||
for feed_line in "${FEEDS_ARRAY[@]}"; do
|
||||
IFS='|' read -r enabled name url type description <<< "$feed_line"
|
||||
[ "$enabled" != "1" ] && continue
|
||||
|
||||
mem_v4=$(get_ipset_memory "${IPSET_PREFIX}-${name}")
|
||||
mem_v6=$(get_ipset_memory "${IPSET_PREFIX}-${name}-v6")
|
||||
|
||||
echo "ufw_blocklist_ipset_memory_bytes{feed=\"$name\",ip_version=\"4\"} $mem_v4"
|
||||
echo "ufw_blocklist_ipset_memory_bytes{feed=\"$name\",ip_version=\"6\"} $mem_v6"
|
||||
done
|
||||
|
||||
# Conntrack metrics (system-wide)
|
||||
local conntrack_count conntrack_max conntrack_usage
|
||||
conntrack_count=$(get_conntrack_count)
|
||||
conntrack_max=$(get_conntrack_max)
|
||||
|
||||
if [ "${conntrack_max:-0}" -gt 0 ]; then
|
||||
conntrack_usage=$(awk "BEGIN {printf \"%.2f\", ($conntrack_count / $conntrack_max) * 100}" 2>/dev/null || echo "0")
|
||||
else
|
||||
conntrack_usage="0"
|
||||
fi
|
||||
|
||||
# Cache disk metrics
|
||||
local disk_info cache_size disk_used disk_avail disk_pct
|
||||
disk_info=$(get_cache_disk_usage)
|
||||
cache_size=$(get_total_cache_size)
|
||||
disk_used=$(echo "$disk_info" | cut -d'|' -f1)
|
||||
disk_avail=$(echo "$disk_info" | cut -d'|' -f2)
|
||||
disk_pct=$(echo "$disk_info" | cut -d'|' -f3 | tr -d '%')
|
||||
|
||||
cat <<EOF
|
||||
|
||||
# HELP ufw_blocklist_conntrack_entries Current conntrack entries
|
||||
# TYPE ufw_blocklist_conntrack_entries gauge
|
||||
ufw_blocklist_conntrack_entries $conntrack_count
|
||||
|
||||
# HELP ufw_blocklist_conntrack_max Maximum conntrack entries
|
||||
# TYPE ufw_blocklist_conntrack_max gauge
|
||||
ufw_blocklist_conntrack_max $conntrack_max
|
||||
|
||||
# HELP ufw_blocklist_conntrack_usage_percent Conntrack table usage percentage
|
||||
# TYPE ufw_blocklist_conntrack_usage_percent gauge
|
||||
ufw_blocklist_conntrack_usage_percent $conntrack_usage
|
||||
|
||||
# HELP ufw_blocklist_cache_disk_used_bytes Disk space used by cache partition
|
||||
# TYPE ufw_blocklist_cache_disk_used_bytes gauge
|
||||
ufw_blocklist_cache_disk_used_bytes $disk_used
|
||||
|
||||
# HELP ufw_blocklist_cache_disk_available_bytes Disk space available on cache partition
|
||||
# TYPE ufw_blocklist_cache_disk_available_bytes gauge
|
||||
ufw_blocklist_cache_disk_available_bytes $disk_avail
|
||||
|
||||
# HELP ufw_blocklist_cache_disk_usage_percent Cache partition disk usage percentage
|
||||
# TYPE ufw_blocklist_cache_disk_usage_percent gauge
|
||||
ufw_blocklist_cache_disk_usage_percent ${disk_pct:-0}
|
||||
|
||||
# HELP ufw_blocklist_cache_total_size_bytes Total size of cache directory
|
||||
# TYPE ufw_blocklist_cache_total_size_bytes gauge
|
||||
ufw_blocklist_cache_total_size_bytes $cache_size
|
||||
|
||||
# HELP ufw_blocklist_whitelist_size Whitelist ipset size
|
||||
# TYPE ufw_blocklist_whitelist_size gauge
|
||||
ufw_blocklist_whitelist_size{ip_version="4"} $(get_ipset_size "$WHITELIST_IPSET")
|
||||
ufw_blocklist_whitelist_size{ip_version="6"} $(get_ipset_size "$WHITELIST_IPSET_V6")
|
||||
|
||||
# HELP ufw_blocklist_total_unique_ips Total deduplicated IPs across all feeds
|
||||
# TYPE ufw_blocklist_total_unique_ips gauge
|
||||
ufw_blocklist_total_unique_ips{ip_version="4"} $(cat "$CACHE_DIR"/*-v4.parsed 2>/dev/null | sort -u | wc -l)
|
||||
ufw_blocklist_total_unique_ips{ip_version="6"} $(cat "$CACHE_DIR"/*-v6.parsed 2>/dev/null | sort -u | wc -l)
|
||||
|
||||
# HELP ufw_blocklist_total_rules Total UFW firewall rules
|
||||
# TYPE ufw_blocklist_total_rules gauge
|
||||
ufw_blocklist_total_rules $(ufw status numbered 2>/dev/null | grep -c '^\[')
|
||||
|
||||
# HELP ufw_blocklist_scrape_timestamp_seconds Unix timestamp of metric generation
|
||||
# TYPE ufw_blocklist_scrape_timestamp_seconds gauge
|
||||
ufw_blocklist_scrape_timestamp_seconds $(date +%s)
|
||||
|
||||
# HELP ufw_blocklist_exporter_duration_seconds Time to generate all metrics
|
||||
# TYPE ufw_blocklist_exporter_duration_seconds gauge
|
||||
ufw_blocklist_exporter_duration_seconds $(($(date +%s) - start_time))
|
||||
EOF
|
||||
|
||||
echo ""
|
||||
}
|
||||
|
||||
run_http_server() {
|
||||
echo "Starting exporter on port $HTTP_PORT..." >&2
|
||||
|
||||
while true; do
|
||||
{
|
||||
read -r request
|
||||
if [[ "$request" =~ ^GET\ /metrics ]]; then
|
||||
printf "HTTP/1.1 200 OK\r\nContent-Type: text/plain; version=0.0.4; charset=utf-8\r\n\r\n"
|
||||
cache_journal_data
|
||||
cache_feeds_config
|
||||
generate_metrics
|
||||
else
|
||||
printf "HTTP/1.1 200 OK\r\nContent-Type: text/html; charset=utf-8\r\n\r\n"
|
||||
echo "<h1>UFW Blocklist Exporter v${SCRIPT_VERSION}</h1><a href='/metrics'>Metrics</a>"
|
||||
fi
|
||||
} | nc -l -p "$HTTP_PORT" -q 1 2>/dev/null
|
||||
done
|
||||
}
|
||||
|
||||
main() {
|
||||
parse_args "$@"
|
||||
|
||||
# Prevent multiple instances (skip for HTTP mode as it should run continuously)
|
||||
[ "$HTTP_MODE" != true ] && acquire_lock
|
||||
|
||||
if [ "$HTTP_MODE" = true ]; then
|
||||
run_http_server
|
||||
elif [ -n "$OUTPUT_FILE" ]; then
|
||||
# Cache data before generating metrics
|
||||
cache_journal_data
|
||||
cache_feeds_config
|
||||
|
||||
# Ensure output directory exists
|
||||
mkdir -p "$(dirname "$OUTPUT_FILE")"
|
||||
|
||||
# Create temp file in /tmp (not in node_exporter directory!)
|
||||
local temp_file
|
||||
temp_file=$(mktemp /tmp/ufw_metrics.XXXXXX)
|
||||
|
||||
# Generate metrics to temp file
|
||||
generate_metrics > "$temp_file"
|
||||
|
||||
# FORCE NEW INODE: Delete old file first, then move
|
||||
rm -f "$OUTPUT_FILE"
|
||||
|
||||
# Move temp file to final location
|
||||
mv "$temp_file" "$OUTPUT_FILE"
|
||||
|
||||
# Ensure node_exporter user can read it
|
||||
chmod 644 "$OUTPUT_FILE"
|
||||
|
||||
# Force filesystem sync
|
||||
sync
|
||||
else
|
||||
cache_journal_data
|
||||
cache_feeds_config
|
||||
generate_metrics
|
||||
fi
|
||||
}
|
||||
|
||||
main "$@"
|
||||
Executable
+996
@@ -0,0 +1,996 @@
|
||||
#!/bin/bash
|
||||
################################################################################
|
||||
# Script Name: ufw-blocklists.sh
|
||||
# Version: 1.0
|
||||
# Description: Per-feed UFW threat intelligence blocking with ipset
|
||||
# Author: Phil Connor
|
||||
# Contact: contact@mylinux.work
|
||||
# Website: https://mylinux.work
|
||||
# License: MIT
|
||||
################################################################################
|
||||
# Don't use 'set -e' - it causes silent failures when log file has permission issues
|
||||
|
||||
CONFIG_DIR="/etc/ufw-threats"
|
||||
FEEDS_CONFIG="$CONFIG_DIR/feeds.conf"
|
||||
CACHE_DIR="$CONFIG_DIR/cache"
|
||||
LOG_FILE="/var/log/ufw-threats.log"
|
||||
SSH_PORT="22"
|
||||
ENABLE_AUTO_UPDATE=true
|
||||
UPDATE_INTERVAL="daily"
|
||||
ENABLE_IPV6=true
|
||||
UFW_RULES_FILE="/etc/ufw/before.rules"
|
||||
UFW_RULES_V6_FILE="/etc/ufw/before6.rules"
|
||||
IPSET_PREFIX="ufw-feed"
|
||||
WHITELIST_IPSET="ufw-whitelist"
|
||||
WHITELIST_IPSET_V6="ufw-whitelist-v6"
|
||||
MAX_BACKUPS=10
|
||||
|
||||
|
||||
show_usage() {
|
||||
cat <<EOF
|
||||
Usage: $0 [OPTIONS] [COMMAND]
|
||||
|
||||
PER-FEED VERSION: Each threat feed gets its own ipset and iptables rule.
|
||||
Provides detailed per-feed blocking statistics and metrics.
|
||||
|
||||
COMMANDS:
|
||||
install Install and configure threat feed blocking
|
||||
update Update all enabled feeds now (ipsets only, no UFW reload)
|
||||
apply-rules Regenerate and apply UFW rules (use with caution!)
|
||||
test-rules Test rule generation without applying
|
||||
add-feed NAME URL Add a custom feed
|
||||
remove-feed NAME Remove a feed
|
||||
enable-feed NAME Enable a disabled feed
|
||||
disable-feed NAME Disable a feed
|
||||
list-feeds List all configured feeds
|
||||
show-stats Show blocking statistics per feed
|
||||
whitelist-add IP Add IP/CIDR to whitelist
|
||||
whitelist-init Initialize whitelist with RFC1918/Docker networks
|
||||
whitelist-list Show all whitelisted IPs
|
||||
clean-cache Remove cache files for disabled feeds
|
||||
|
||||
OPTIONS:
|
||||
-h, --help Show this help message
|
||||
-s, --ssh-port PORT SSH port (default: 22)
|
||||
--no-auto-update Disable automatic daily updates
|
||||
--no-ipv6 Disable IPv6 support
|
||||
--update-interval TIME Update interval: hourly, daily, weekly (default: daily)
|
||||
|
||||
EXAMPLES:
|
||||
sudo $0 install
|
||||
sudo $0 update # Safe - only updates ipsets
|
||||
sudo $0 test-rules # Safe - validates without applying
|
||||
sudo $0 apply-rules # DANGER - regenerates UFW config
|
||||
sudo $0 show-stats
|
||||
|
||||
EOF
|
||||
exit 0
|
||||
}
|
||||
|
||||
log_message() {
|
||||
local msg
|
||||
msg="[$(date '+%Y-%m-%d %H:%M:%S')] $1"
|
||||
echo "$msg"
|
||||
echo "$msg" >> "$LOG_FILE" 2>/dev/null || true
|
||||
}
|
||||
|
||||
# Iterate over enabled feeds in $FEEDS_CONFIG, calling the provided callback
|
||||
# function with arguments: name url type description
|
||||
# Usage: for_each_enabled_feed my_callback_function
|
||||
for_each_enabled_feed() {
|
||||
local callback="$1"
|
||||
[ -f "$FEEDS_CONFIG" ] || return 0
|
||||
|
||||
local enabled name url type description
|
||||
while IFS='|' read -r enabled name url type description; do
|
||||
[[ "$enabled" =~ ^#.*$ ]] && continue
|
||||
[[ -z "$enabled" ]] && continue
|
||||
[ "$enabled" != "1" ] && continue
|
||||
"$callback" "$name" "$url" "$type" "$description"
|
||||
done < "$FEEDS_CONFIG"
|
||||
}
|
||||
|
||||
# Iterate over ALL feeds (enabled + disabled), calling the provided callback
|
||||
# function with arguments: enabled name url type description
|
||||
for_each_feed() {
|
||||
local callback="$1"
|
||||
[ -f "$FEEDS_CONFIG" ] || return 0
|
||||
|
||||
local enabled name url type description
|
||||
while IFS='|' read -r enabled name url type description; do
|
||||
[[ "$enabled" =~ ^#.*$ ]] && continue
|
||||
[[ -z "$enabled" ]] && continue
|
||||
"$callback" "$enabled" "$name" "$url" "$type" "$description"
|
||||
done < "$FEEDS_CONFIG"
|
||||
}
|
||||
|
||||
parse_args() {
|
||||
COMMAND=""
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case $1 in
|
||||
-h|--help) show_usage ;;
|
||||
-s|--ssh-port) SSH_PORT="$2"; shift 2 ;;
|
||||
--no-auto-update) ENABLE_AUTO_UPDATE=false; shift ;;
|
||||
--no-ipv6) ENABLE_IPV6=false; shift ;;
|
||||
--update-interval) UPDATE_INTERVAL="$2"; shift 2 ;;
|
||||
install|update|apply-rules|test-rules|list-feeds|show-stats|whitelist-init|whitelist-list|clean-cache) COMMAND="$1"; shift ;;
|
||||
add-feed) COMMAND="add-feed"; FEED_NAME="$2"; FEED_URL="$3"; shift 3 ;;
|
||||
remove-feed|enable-feed|disable-feed) COMMAND="$1"; FEED_NAME="$2"; shift 2 ;;
|
||||
whitelist-add) COMMAND="whitelist-add"; WHITELIST_IP="$2"; shift 2 ;;
|
||||
*) echo "Unknown option: $1"; exit 1 ;;
|
||||
esac
|
||||
done
|
||||
[ -z "$COMMAND" ] && COMMAND="install"
|
||||
}
|
||||
|
||||
cleanup_old_backups() {
|
||||
local max_keep=${MAX_BACKUPS:-10}
|
||||
|
||||
find "$(dirname "$UFW_RULES_FILE")" -maxdepth 1 -name "$(basename "$UFW_RULES_FILE").backup-*" -printf '%T@ %p\n' 2>/dev/null \
|
||||
| sort -rn | tail -n +$((max_keep + 1)) | cut -d' ' -f2- | xargs -r rm -f 2>/dev/null || true
|
||||
|
||||
if [ "$ENABLE_IPV6" = true ]; then
|
||||
find "$(dirname "$UFW_RULES_V6_FILE")" -maxdepth 1 -name "$(basename "$UFW_RULES_V6_FILE").backup-*" -printf '%T@ %p\n' 2>/dev/null \
|
||||
| sort -rn | tail -n +$((max_keep + 1)) | cut -d' ' -f2- | xargs -r rm -f 2>/dev/null || true
|
||||
fi
|
||||
|
||||
rm -f "${UFW_RULES_FILE}.backup-"*.clean "${UFW_RULES_V6_FILE}.backup-"*.clean 2>/dev/null || true
|
||||
}
|
||||
|
||||
check_requirements() {
|
||||
local enable_ufw="${1:-true}"
|
||||
|
||||
[ "$EUID" -ne 0 ] && { echo "Please run as root"; exit 1; }
|
||||
|
||||
if ! command -v ufw >/dev/null 2>&1; then
|
||||
apt-get update && apt-get install -y ufw ipset curl 2>/dev/null || \
|
||||
dnf install -y ufw ipset curl 2>/dev/null || \
|
||||
yum install -y ufw ipset curl 2>/dev/null
|
||||
fi
|
||||
|
||||
command -v ipset >/dev/null 2>&1 || apt-get install -y ipset
|
||||
command -v curl >/dev/null 2>&1 || { echo "ERROR: curl required"; exit 1; }
|
||||
|
||||
# CRITICAL: Ensure all ipsets referenced by before.rules exist BEFORE enabling UFW.
|
||||
# If ipsets are missing (e.g., after reboot, failed persistence), UFW enable will fail
|
||||
# with "Set ufw-feed-XXX doesn't exist" and block ALL traffic including DNS.
|
||||
ensure_ipsets_exist
|
||||
|
||||
if [ "$enable_ufw" = true ]; then
|
||||
ufw --force enable
|
||||
fi
|
||||
|
||||
cleanup_old_backups
|
||||
}
|
||||
|
||||
_ensure_feed_ipset() {
|
||||
local name="$1"
|
||||
|
||||
ipset list "${IPSET_PREFIX}-${name}" >/dev/null 2>&1 || \
|
||||
ipset create "${IPSET_PREFIX}-${name}" hash:net family inet hashsize 4096 maxelem 200000 2>/dev/null || true
|
||||
|
||||
if [ "$ENABLE_IPV6" = true ]; then
|
||||
ipset list "${IPSET_PREFIX}-${name}-v6" >/dev/null 2>&1 || \
|
||||
ipset create "${IPSET_PREFIX}-${name}-v6" hash:net family inet6 hashsize 4096 maxelem 200000 2>/dev/null || true
|
||||
fi
|
||||
}
|
||||
|
||||
ensure_ipsets_exist() {
|
||||
if [ -f /etc/ipset.conf ]; then
|
||||
ipset restore -f /etc/ipset.conf 2>/dev/null || true
|
||||
fi
|
||||
|
||||
ipset list "$WHITELIST_IPSET" >/dev/null 2>&1 || \
|
||||
ipset create "$WHITELIST_IPSET" hash:net family inet hashsize 1024 maxelem 10000 2>/dev/null || true
|
||||
|
||||
if [ "$ENABLE_IPV6" = true ]; then
|
||||
ipset list "$WHITELIST_IPSET_V6" >/dev/null 2>&1 || \
|
||||
ipset create "$WHITELIST_IPSET_V6" hash:net family inet6 hashsize 1024 maxelem 10000 2>/dev/null || true
|
||||
fi
|
||||
|
||||
for_each_enabled_feed _ensure_feed_ipset
|
||||
}
|
||||
|
||||
validate_feed_name() {
|
||||
local name="$1"
|
||||
if [ -z "$name" ]; then
|
||||
echo "ERROR: Feed name cannot be empty"; return 1
|
||||
fi
|
||||
if [[ ! "$name" =~ ^[a-zA-Z0-9_-]+$ ]]; then
|
||||
echo "ERROR: Feed name '$name' contains invalid characters (only a-z, 0-9, _, - allowed)"; return 1
|
||||
fi
|
||||
if [ "${#name}" -gt 20 ]; then
|
||||
echo "ERROR: Feed name '$name' too long (max 20 chars, ipset name limit)"; return 1
|
||||
fi
|
||||
}
|
||||
|
||||
create_directory_structure() {
|
||||
mkdir -p "$CONFIG_DIR" "$CACHE_DIR"
|
||||
touch "$LOG_FILE"
|
||||
chmod 700 "$CONFIG_DIR"
|
||||
chmod 600 "$LOG_FILE"
|
||||
}
|
||||
|
||||
initialize_feeds_config() {
|
||||
local has_feeds
|
||||
has_feeds=$(grep -c '^[01]|' "$FEEDS_CONFIG" 2>/dev/null || echo 0)
|
||||
|
||||
if [ -f "$FEEDS_CONFIG" ] && [ "$has_feeds" -gt 0 ]; then
|
||||
log_message "Feeds configuration already exists with $has_feeds feeds"
|
||||
return
|
||||
fi
|
||||
|
||||
log_message "Creating feeds configuration..."
|
||||
|
||||
[ -f "$FEEDS_CONFIG" ] && mv "$FEEDS_CONFIG" "${FEEDS_CONFIG}.old-$(date +%Y%m%d-%H%M%S)"
|
||||
|
||||
cat > "$FEEDS_CONFIG" <<'EOF'
|
||||
# Threat Intelligence Feeds Configuration
|
||||
# Format: ENABLED|NAME|URL|TYPE|DESCRIPTION
|
||||
#
|
||||
# ENABLED: 1 (enabled) or 0 (disabled)
|
||||
# NAME: Unique feed identifier
|
||||
# URL: Feed URL
|
||||
# TYPE: Format type (plain, cidr, commented, custom)
|
||||
# DESCRIPTION: Feed description
|
||||
|
||||
1|cinsarmy|http://cinsscore.com/list/ci-badguys.txt|plain|CINS Army Malicious IPs
|
||||
1|firehol-level1|https://raw.githubusercontent.com/ktsaou/blocklist-ipsets/master/firehol_level1.netset|cidr|FireHOL Level 1 - Most aggressive attackers
|
||||
1|firehol-level2|https://raw.githubusercontent.com/ktsaou/blocklist-ipsets/master/firehol_level2.netset|cidr|FireHOL Level 2 - Attacks in last 48h
|
||||
0|firehol-level3|https://raw.githubusercontent.com/ktsaou/blocklist-ipsets/master/firehol_level3.netset|cidr|FireHOL Level 3 - Attacks in last 30d
|
||||
1|ipsum-1|https://raw.githubusercontent.com/stamparm/ipsum/master/levels/1.txt|plain|IPsum Level 1 - Most dangerous
|
||||
0|ipsum-2|https://raw.githubusercontent.com/stamparm/ipsum/master/levels/2.txt|plain|IPsum Level 2 - Dangerous
|
||||
0|ipsum-3|https://raw.githubusercontent.com/stamparm/ipsum/master/levels/3.txt|plain|IPsum Level 3 - Suspicious
|
||||
0|spamhaus-drop|https://www.spamhaus.org/drop/drop.txt|commented|Spamhaus DROP List
|
||||
0|spamhaus-edrop|https://www.spamhaus.org/drop/edrop.txt|commented|Spamhaus EDROP List
|
||||
1|spamhaus-dropv6|https://www.spamhaus.org/drop/dropv6.txt|commented|Spamhaus DROP V6 List
|
||||
0|feodo-tracker|https://feodotracker.abuse.ch/downloads/ipblocklist.txt|commented|Feodo Tracker C2 IPs
|
||||
0|sslbl-aggressive|https://sslbl.abuse.ch/blacklist/sslipblacklist_aggressive.txt|commented|SSL Blacklist Aggressive
|
||||
0|sslbl-all|https://sslbl.abuse.ch/blacklist/sslipblacklist.txt|commented|SSL Blacklist All
|
||||
1|blocklist-de|https://lists.blocklist.de/lists/all.txt|plain|Blocklist.de All Attacks
|
||||
0|greensnow|https://blocklist.greensnow.co/greensnow.txt|plain|GreenSnow Blacklist
|
||||
0|emergingthreats|https://rules.emergingthreats.net/fwrules/emerging-Block-IPs.txt|plain|Emerging Threats IPs
|
||||
0|bruteforce-ssh|https://lists.blocklist.de/lists/ssh.txt|plain|SSH Bruteforce Attempts
|
||||
1|binarydefense|https://www.binarydefense.com/banlist.txt|plain|Binary Defense Blacklist
|
||||
1|bruteforce-bl|https://danger.rulez.sk/projects/bruteforceblocker/blist.php|commented|BruteForce Blocker
|
||||
0|dshield-top|https://www.dshield.org/block.txt|commented|DShield Top Attackers
|
||||
1|dshield-fhol|https://iplists.firehol.org/files/dshield.netset|commented|Dshield FireHol top 20
|
||||
0|tor-exit|https://check.torproject.org/torbulkexitlist|plain|TOR Exit Nodes (optional)
|
||||
0|abuseipdb-1d|https://raw.githubusercontent.com/borestad/blocklist-abuseipdb/main/abuseipdb-s100-1d.ipv4|commented|AbuseIPDB with confidence score 100 1 day
|
||||
0|abuseipdb-3d|https://raw.githubusercontent.com/borestad/blocklist-abuseipdb/main/abuseipdb-s100-3d.ipv4|commented|AbuseIPDB with confidence score 100 3 day
|
||||
0|abuseipdb-7d|https://raw.githubusercontent.com/borestad/blocklist-abuseipdb/main/abuseipdb-s100-7d.ipv4|commented|AbuseIPDB with confidence score 100 7 day
|
||||
1|abuseipdb-14d|https://raw.githubusercontent.com/borestad/blocklist-abuseipdb/main/abuseipdb-s100-14d.ipv4|commented|AbuseIPDB with confidence score 100 14 day
|
||||
0|abuseipdb-30d|https://raw.githubusercontent.com/borestad/blocklist-abuseipdb/main/abuseipdb-s100-30d.ipv4|commented|AbuseIPDB with confidence score 100 30 day
|
||||
|
||||
|
||||
# Add custom feeds below this line
|
||||
EOF
|
||||
chmod 600 "$FEEDS_CONFIG"
|
||||
}
|
||||
|
||||
_setup_feed_ipset() {
|
||||
local name="$1"
|
||||
|
||||
if ! ipset list "${IPSET_PREFIX}-${name}" >/dev/null 2>&1; then
|
||||
ipset create "${IPSET_PREFIX}-${name}" hash:net family inet hashsize 4096 maxelem 200000
|
||||
log_message " Created ipset: ${IPSET_PREFIX}-${name}"
|
||||
fi
|
||||
|
||||
if [ "$ENABLE_IPV6" = true ] && ! ipset list "${IPSET_PREFIX}-${name}-v6" >/dev/null 2>&1; then
|
||||
ipset create "${IPSET_PREFIX}-${name}-v6" hash:net family inet6 hashsize 4096 maxelem 200000
|
||||
log_message " Created ipset: ${IPSET_PREFIX}-${name}-v6"
|
||||
fi
|
||||
}
|
||||
|
||||
setup_ipsets() {
|
||||
log_message "Setting up ipsets (per-feed mode)..."
|
||||
|
||||
if ! ipset list "$WHITELIST_IPSET" >/dev/null 2>&1; then
|
||||
ipset create "$WHITELIST_IPSET" hash:net family inet hashsize 1024 maxelem 10000
|
||||
ipset add "$WHITELIST_IPSET" 127.0.0.1
|
||||
fi
|
||||
|
||||
if [ "$ENABLE_IPV6" = true ] && ! ipset list "$WHITELIST_IPSET_V6" >/dev/null 2>&1; then
|
||||
ipset create "$WHITELIST_IPSET_V6" hash:net family inet6 hashsize 1024 maxelem 10000
|
||||
ipset add "$WHITELIST_IPSET_V6" ::1
|
||||
fi
|
||||
|
||||
for_each_enabled_feed _setup_feed_ipset
|
||||
setup_ipset_persistence
|
||||
}
|
||||
|
||||
setup_ipset_persistence() {
|
||||
cat > /etc/systemd/system/ipset-persistent.service <<'EOF'
|
||||
[Unit]
|
||||
Description=ipset persistent configuration
|
||||
Before=network-pre.target ufw.service
|
||||
Wants=network-pre.target
|
||||
|
||||
[Service]
|
||||
Type=oneshot
|
||||
RemainAfterExit=yes
|
||||
ExecStart=-/sbin/ipset restore -f /etc/ipset.conf
|
||||
ExecStop=/sbin/ipset save -f /etc/ipset.conf
|
||||
StandardOutput=null
|
||||
StandardError=null
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
EOF
|
||||
|
||||
ipset save > /etc/ipset.conf
|
||||
systemctl enable ipset-persistent.service 2>/dev/null || true
|
||||
}
|
||||
|
||||
download_feed() {
|
||||
local url="$1" output="$2"
|
||||
local http_code
|
||||
http_code=$(curl -f -s -m 60 --connect-timeout 10 -L \
|
||||
-A "ufw-threat-feeds-per-feed/1.0" \
|
||||
-w "%{http_code}" -o "$output" "$url" 2>/dev/null) || true
|
||||
|
||||
if [ ! -s "$output" ]; then
|
||||
log_message " Download failed for $url (HTTP $http_code, empty response)"
|
||||
return 1
|
||||
fi
|
||||
return 0
|
||||
}
|
||||
|
||||
parse_feed() {
|
||||
local file="$1" type="$2" output_v4="$3" output_v6="$4"
|
||||
|
||||
: > "$output_v4"
|
||||
: > "$output_v6"
|
||||
|
||||
case "$type" in
|
||||
plain)
|
||||
grep -E '^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+(/[0-9]+)?$' "$file" >> "$output_v4" 2>/dev/null || true
|
||||
if [ "$ENABLE_IPV6" = true ]; then
|
||||
grep -E '^[0-9a-fA-F:]+(/[0-9]+)?$' "$file" | grep ':' >> "$output_v6" 2>/dev/null || true
|
||||
fi
|
||||
;;
|
||||
cidr)
|
||||
grep -E '^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+(/[0-9]+)?' "$file" \
|
||||
| cut -d' ' -f1 | cut -d'#' -f1 | grep -v '^$' >> "$output_v4" 2>/dev/null || true
|
||||
if [ "$ENABLE_IPV6" = true ]; then
|
||||
grep -E '^[0-9a-fA-F:]+(/[0-9]+)?' "$file" \
|
||||
| grep ':' | cut -d' ' -f1 | cut -d'#' -f1 | grep -v '^$' >> "$output_v6" 2>/dev/null || true
|
||||
fi
|
||||
;;
|
||||
commented)
|
||||
grep -v -E '^[#;]|^$' "$file" \
|
||||
| grep -oE '[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+(/[0-9]+)?' >> "$output_v4" 2>/dev/null || true
|
||||
if [ "$ENABLE_IPV6" = true ]; then
|
||||
grep -v -E '^[#;]|^$' "$file" \
|
||||
| grep -oE '[0-9a-fA-F:]+(/[0-9]+)?' \
|
||||
| grep -E '^[0-9a-fA-F]{1,4}:[0-9a-fA-F:]+' >> "$output_v6" 2>/dev/null || true
|
||||
fi
|
||||
;;
|
||||
esac
|
||||
}
|
||||
|
||||
_clean_stale_cache() {
|
||||
local enabled_feeds="$1"
|
||||
local cleaned=0
|
||||
|
||||
for cache_file in "$CACHE_DIR"/*.raw "$CACHE_DIR"/*-v4.parsed "$CACHE_DIR"/*-v6.parsed; do
|
||||
[ -f "$cache_file" ] || continue
|
||||
local bn feed_name
|
||||
bn=$(basename "$cache_file")
|
||||
feed_name="${bn%%.raw}"
|
||||
feed_name="${feed_name%%-v4.parsed}"
|
||||
feed_name="${feed_name%%-v6.parsed}"
|
||||
|
||||
if ! grep -q "^${feed_name}$" <<< "$enabled_feeds"; then
|
||||
rm -f "$cache_file" && cleaned=$((cleaned + 1))
|
||||
fi
|
||||
done
|
||||
|
||||
[ "$cleaned" -gt 0 ] && log_message " Cleaned $cleaned stale cache files"
|
||||
}
|
||||
|
||||
_load_ipset_v4() {
|
||||
local name="$1" v4_file="$2"
|
||||
{
|
||||
echo "create ${IPSET_PREFIX}-${name}-tmp hash:net family inet hashsize 4096 maxelem 200000"
|
||||
while IFS= read -r ip; do
|
||||
[ -z "$ip" ] && continue
|
||||
echo "add ${IPSET_PREFIX}-${name}-tmp $ip"
|
||||
done < "$v4_file"
|
||||
echo "swap ${IPSET_PREFIX}-${name} ${IPSET_PREFIX}-${name}-tmp"
|
||||
echo "destroy ${IPSET_PREFIX}-${name}-tmp"
|
||||
} | ipset restore 2>/dev/null
|
||||
}
|
||||
|
||||
_load_ipset_v6() {
|
||||
local name="$1" v6_file="$2"
|
||||
{
|
||||
echo "create ${IPSET_PREFIX}-${name}-v6-tmp hash:net family inet6 hashsize 4096 maxelem 200000"
|
||||
while IFS= read -r ip; do
|
||||
[ -z "$ip" ] && continue
|
||||
echo "add ${IPSET_PREFIX}-${name}-v6-tmp $ip"
|
||||
done < "$v6_file"
|
||||
echo "swap ${IPSET_PREFIX}-${name}-v6 ${IPSET_PREFIX}-${name}-v6-tmp"
|
||||
echo "destroy ${IPSET_PREFIX}-${name}-v6-tmp"
|
||||
} | ipset restore 2>/dev/null
|
||||
}
|
||||
|
||||
update_feeds() {
|
||||
log_message "Starting per-feed update..."
|
||||
|
||||
if [ ! -f "$FEEDS_CONFIG" ]; then
|
||||
echo "ERROR: Feeds config not found: $FEEDS_CONFIG"
|
||||
echo "Run 'install' command first"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
local enabled_count
|
||||
enabled_count=$(grep -c '^1|' "$FEEDS_CONFIG" 2>/dev/null || echo 0)
|
||||
if [ "$enabled_count" -eq 0 ]; then
|
||||
echo "ERROR: No enabled feeds found in $FEEDS_CONFIG"
|
||||
echo "Check the config file format"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
log_message "Found $enabled_count enabled feeds"
|
||||
|
||||
local enabled_feeds
|
||||
enabled_feeds=$(grep '^1|' "$FEEDS_CONFIG" 2>/dev/null | cut -d'|' -f2)
|
||||
|
||||
# NOTE: Do NOT destroy ipsets for disabled feeds here. The before.rules may still
|
||||
# reference them (if apply-rules hasn't been re-run). Destroying in-use ipsets causes
|
||||
# "Set doesn't exist" on next UFW reload, which blocks all traffic.
|
||||
# Ipset cleanup happens safely in cmd_disable_feed/cmd_remove_feed after rules are regenerated.
|
||||
_clean_stale_cache "$enabled_feeds"
|
||||
|
||||
local total_feeds=0
|
||||
local failed_feeds=0
|
||||
|
||||
local enabled name url type description
|
||||
while IFS='|' read -r enabled name url type description; do
|
||||
[[ "$enabled" =~ ^#.*$ ]] && continue
|
||||
[[ -z "$enabled" ]] && continue
|
||||
[ "$enabled" != "1" ] && continue
|
||||
|
||||
total_feeds=$((total_feeds + 1))
|
||||
log_message "Processing feed: $name"
|
||||
|
||||
local raw="$CACHE_DIR/${name}.raw"
|
||||
local v4_file="$CACHE_DIR/${name}-v4.parsed"
|
||||
local v6_file="$CACHE_DIR/${name}-v6.parsed"
|
||||
|
||||
if download_feed "$url" "$raw" && parse_feed "$raw" "$type" "$v4_file" "$v6_file"; then
|
||||
local count_v4 count_v6
|
||||
count_v4=$(wc -l < "$v4_file" 2>/dev/null || echo 0)
|
||||
count_v6=0
|
||||
[ "$ENABLE_IPV6" = true ] && count_v6=$(wc -l < "$v6_file" 2>/dev/null || echo 0)
|
||||
|
||||
[ "$count_v4" -gt 0 ] && _load_ipset_v4 "$name" "$v4_file"
|
||||
[ "$ENABLE_IPV6" = true ] && [ "$count_v6" -gt 0 ] && _load_ipset_v6 "$name" "$v6_file"
|
||||
|
||||
log_message " $name: $count_v4 IPv4, $count_v6 IPv6"
|
||||
else
|
||||
log_message " FAILED: $name"
|
||||
failed_feeds=$((failed_feeds + 1))
|
||||
fi
|
||||
done < "$FEEDS_CONFIG"
|
||||
|
||||
ipset save > /etc/ipset.conf
|
||||
log_message "Updated $total_feeds feeds ($failed_feeds failed)"
|
||||
}
|
||||
|
||||
# Build iptables rules block for IPv4 or IPv6
|
||||
# Args: v4|v6 output_file
|
||||
_build_rules_block() {
|
||||
local family="$1" output="$2"
|
||||
local chain_prefix whitelist_set set_suffix log_tag
|
||||
|
||||
if [ "$family" = "v4" ]; then
|
||||
chain_prefix="ufw-before-input"
|
||||
whitelist_set="$WHITELIST_IPSET"
|
||||
set_suffix=""
|
||||
log_tag="THREAT"
|
||||
else
|
||||
chain_prefix="ufw6-before-input"
|
||||
whitelist_set="$WHITELIST_IPSET_V6"
|
||||
set_suffix="-v6"
|
||||
log_tag="THREAT-v6"
|
||||
fi
|
||||
|
||||
cat > "$output" <<EOF
|
||||
|
||||
# UFW THREAT FEEDS - PER-FEED MODE - START
|
||||
# Whitelist bypass
|
||||
-A ${chain_prefix} -m set --match-set ${whitelist_set} src -j ACCEPT
|
||||
|
||||
EOF
|
||||
|
||||
local enabled name url type description
|
||||
while IFS='|' read -r enabled name url type description; do
|
||||
[[ "$enabled" =~ ^#.*$ ]] && continue
|
||||
[[ -z "$enabled" ]] && continue
|
||||
[ "$enabled" != "1" ] && continue
|
||||
|
||||
cat >> "$output" <<EOF
|
||||
# $description
|
||||
-A ${chain_prefix} -m set --match-set ${IPSET_PREFIX}-${name}${set_suffix} src -m limit --limit 5/min -j LOG --log-prefix "[${log_tag}:${name}] "
|
||||
-A ${chain_prefix} -m set --match-set ${IPSET_PREFIX}-${name}${set_suffix} src -j DROP
|
||||
EOF
|
||||
done < "$FEEDS_CONFIG"
|
||||
|
||||
echo "# UFW THREAT FEEDS - PER-FEED MODE - END" >> "$output"
|
||||
}
|
||||
|
||||
# Insert rules into a UFW template file and validate
|
||||
# Args: template_file rules_file output_file
|
||||
# Returns 0 on success, 1 on validation failure
|
||||
_insert_and_validate_rules() {
|
||||
local template="$1" rules_file="$2" output="$3"
|
||||
local insert_line
|
||||
|
||||
insert_line=$(grep -n "^# End required lines" "$template" | cut -d: -f1)
|
||||
if [ -z "$insert_line" ]; then
|
||||
log_message "ERROR: Could not find '# End required lines' in $template"
|
||||
return 1
|
||||
fi
|
||||
|
||||
head -n "$insert_line" "$template" > "$output"
|
||||
cat "$rules_file" >> "$output"
|
||||
tail -n +"$((insert_line + 1))" "$template" >> "$output"
|
||||
|
||||
local filter_count
|
||||
filter_count=$(grep -c '^\*filter' "$output" 2>/dev/null || echo 0)
|
||||
if [ "$filter_count" -ne 1 ]; then
|
||||
log_message "ERROR: Generated rules file has $filter_count *filter blocks (expected 1)"
|
||||
return 1
|
||||
fi
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
_verify_ipsets_callback() {
|
||||
local name="$1"
|
||||
|
||||
if ! ipset list "${IPSET_PREFIX}-${name}" >/dev/null 2>&1; then
|
||||
log_message "ERROR: Required ipset ${IPSET_PREFIX}-${name} is missing"
|
||||
_MISSING_SETS=$((_MISSING_SETS + 1))
|
||||
fi
|
||||
if [ "$ENABLE_IPV6" = true ] && ! ipset list "${IPSET_PREFIX}-${name}-v6" >/dev/null 2>&1; then
|
||||
log_message "ERROR: Required ipset ${IPSET_PREFIX}-${name}-v6 is missing"
|
||||
_MISSING_SETS=$((_MISSING_SETS + 1))
|
||||
fi
|
||||
}
|
||||
|
||||
apply_ufw_rules() {
|
||||
log_message "Applying UFW rules (per-feed)..."
|
||||
|
||||
if [ ! -f /usr/share/ufw/before.rules ]; then
|
||||
log_message "ERROR: UFW default template /usr/share/ufw/before.rules not found"
|
||||
return 1
|
||||
fi
|
||||
|
||||
local tmpdir
|
||||
tmpdir=$(mktemp -d)
|
||||
trap 'rm -rf "$tmpdir"' RETURN
|
||||
|
||||
[ -f "$UFW_RULES_FILE" ] && cp "$UFW_RULES_FILE" "${UFW_RULES_FILE}.backup-$(date +%Y%m%d-%H%M%S)"
|
||||
[ "$ENABLE_IPV6" = true ] && [ -f "$UFW_RULES_V6_FILE" ] && \
|
||||
cp "$UFW_RULES_V6_FILE" "${UFW_RULES_V6_FILE}.backup-$(date +%Y%m%d-%H%M%S)"
|
||||
|
||||
cp /usr/share/ufw/before.rules "$UFW_RULES_FILE"
|
||||
[ "$ENABLE_IPV6" = true ] && cp /usr/share/ufw/before6.rules "$UFW_RULES_V6_FILE"
|
||||
|
||||
log_message " Starting from clean UFW templates"
|
||||
|
||||
# Build and insert IPv4 rules
|
||||
local v4_rules="$tmpdir/v4_rules"
|
||||
local v4_output="$tmpdir/v4_output"
|
||||
_build_rules_block "v4" "$v4_rules"
|
||||
|
||||
if ! _insert_and_validate_rules "$UFW_RULES_FILE" "$v4_rules" "$v4_output"; then
|
||||
log_message " Aborting to prevent corruption."
|
||||
return 1
|
||||
fi
|
||||
mv "$v4_output" "$UFW_RULES_FILE"
|
||||
log_message " IPv4 rules generated and validated"
|
||||
|
||||
# Build and insert IPv6 rules
|
||||
if [ "$ENABLE_IPV6" = true ]; then
|
||||
local v6_rules="$tmpdir/v6_rules"
|
||||
local v6_output="$tmpdir/v6_output"
|
||||
_build_rules_block "v6" "$v6_rules"
|
||||
|
||||
if _insert_and_validate_rules "$UFW_RULES_V6_FILE" "$v6_rules" "$v6_output"; then
|
||||
mv "$v6_output" "$UFW_RULES_V6_FILE"
|
||||
log_message " IPv6 rules generated and validated"
|
||||
else
|
||||
log_message " Aborting IPv6 rules. Keeping IPv4 only."
|
||||
fi
|
||||
fi
|
||||
|
||||
ufw limit "$SSH_PORT/tcp" 2>/dev/null || ufw allow "$SSH_PORT/tcp"
|
||||
|
||||
# CRITICAL: Ensure all ipsets exist BEFORE reloading UFW
|
||||
log_message " Verifying ipsets exist..."
|
||||
ensure_ipsets_exist
|
||||
setup_ipsets
|
||||
|
||||
_MISSING_SETS=0
|
||||
for_each_enabled_feed _verify_ipsets_callback
|
||||
|
||||
if [ "$_MISSING_SETS" -gt 0 ]; then
|
||||
log_message "ERROR: $_MISSING_SETS required ipsets missing. Aborting UFW reload to prevent lockout."
|
||||
return 1
|
||||
fi
|
||||
|
||||
ipset save > /etc/ipset.conf
|
||||
|
||||
log_message " Reloading UFW..."
|
||||
if ufw status | grep -q "Status: active"; then
|
||||
ufw reload
|
||||
else
|
||||
ufw --force enable
|
||||
fi
|
||||
|
||||
cleanup_old_backups
|
||||
log_message "UFW rules applied and validated successfully"
|
||||
}
|
||||
|
||||
setup_auto_update() {
|
||||
[ "$ENABLE_AUTO_UPDATE" = false ] && return
|
||||
|
||||
local script_path
|
||||
script_path=$(readlink -f "$0")
|
||||
|
||||
cat > /etc/systemd/system/ufw-threat-feeds-update.service <<EOF
|
||||
[Unit]
|
||||
Description=Update UFW threat feeds (per-feed)
|
||||
After=network-online.target
|
||||
|
||||
[Service]
|
||||
Type=oneshot
|
||||
ExecStart=$script_path update
|
||||
ExecStartPost=/bin/sh -c 'ipset save > /etc/ipset.conf'
|
||||
EOF
|
||||
|
||||
cat > /etc/systemd/system/ufw-threat-feeds-update.timer <<EOF
|
||||
[Unit]
|
||||
Description=Update UFW threat feeds $UPDATE_INTERVAL
|
||||
|
||||
[Timer]
|
||||
Unit=ufw-threat-feeds-update.service
|
||||
OnCalendar=$UPDATE_INTERVAL
|
||||
Persistent=true
|
||||
RandomizedDelaySec=1800
|
||||
|
||||
[Install]
|
||||
WantedBy=timers.target
|
||||
EOF
|
||||
|
||||
systemctl daemon-reload
|
||||
systemctl enable --now ufw-threat-feeds-update.timer
|
||||
}
|
||||
|
||||
create_management_commands() {
|
||||
cat > /usr/local/bin/ufw-whitelist <<'EOF'
|
||||
#!/bin/bash
|
||||
[ -z "$1" ] && { echo "Usage: ufw-whitelist <IP|CIDR>"; exit 1; }
|
||||
if [[ "$1" == *:* ]]; then
|
||||
ipset add ufw-whitelist-v6 "$1" && echo "Whitelisted IPv6: $1"
|
||||
else
|
||||
ipset add ufw-whitelist "$1" && echo "Whitelisted IPv4: $1"
|
||||
fi
|
||||
ipset save > /etc/ipset.conf
|
||||
EOF
|
||||
|
||||
local script_path
|
||||
script_path=$(readlink -f "$0")
|
||||
cat > /usr/local/bin/ufw-threat-reload <<EOF
|
||||
#!/bin/bash
|
||||
$script_path apply-rules
|
||||
EOF
|
||||
|
||||
chmod +x /usr/local/bin/ufw-{whitelist,threat-reload}
|
||||
}
|
||||
|
||||
cmd_show_stats() {
|
||||
echo "=========================================="
|
||||
echo "Per-Feed Blocking Statistics"
|
||||
echo "=========================================="
|
||||
printf "%-25s %10s %10s %12s\n" "FEED" "IPv4 IPs" "IPv6 IPs" "BLOCKS (1h)"
|
||||
echo "-------------------------------------------------------------------"
|
||||
|
||||
if [ ! -f "$FEEDS_CONFIG" ]; then
|
||||
echo "ERROR: Config not found. Run 'install' first."
|
||||
return 1
|
||||
fi
|
||||
|
||||
local enabled name url type description
|
||||
local v4_count v6_count blocks
|
||||
while IFS='|' read -r enabled name url type description; do
|
||||
[[ "$enabled" =~ ^#.*$ ]] && continue
|
||||
[[ -z "$enabled" ]] && continue
|
||||
[ "$enabled" != "1" ] && continue
|
||||
|
||||
v4_count=$(ipset list "${IPSET_PREFIX}-${name}" 2>/dev/null | grep -c '^[0-9]' 2>/dev/null)
|
||||
v4_count=${v4_count:-0}
|
||||
|
||||
v6_count=0
|
||||
if [ "$ENABLE_IPV6" = true ]; then
|
||||
v6_count=$(ipset list "${IPSET_PREFIX}-${name}-v6" 2>/dev/null | grep -c '^[0-9a-fA-F:]' 2>/dev/null)
|
||||
v6_count=${v6_count:-0}
|
||||
fi
|
||||
|
||||
blocks=$(journalctl --since "1 hour ago" 2>/dev/null | grep -c "\[THREAT:${name}\]" 2>/dev/null)
|
||||
blocks=${blocks:-0}
|
||||
|
||||
printf "%-25s %10d %10d %12d\n" "$name" "$v4_count" "$v6_count" "$blocks"
|
||||
done < "$FEEDS_CONFIG"
|
||||
}
|
||||
|
||||
_list_feed_entry() {
|
||||
local feed_enabled="$1" name="$2" url="$3" type="$4" description="$5"
|
||||
local status="DISABLED"
|
||||
[ "$feed_enabled" = "1" ] && status="ENABLED"
|
||||
printf "%-10s %-25s %s\n" "$status" "$name" "$description"
|
||||
}
|
||||
|
||||
cmd_list_feeds() {
|
||||
printf "%-10s %-25s %s\n" "STATUS" "NAME" "DESCRIPTION"
|
||||
echo "-------------------------------------------------------------------"
|
||||
for_each_feed _list_feed_entry
|
||||
}
|
||||
|
||||
cmd_add_feed() {
|
||||
validate_feed_name "$FEED_NAME" || exit 1
|
||||
grep -q "^[01]|${FEED_NAME}|" "$FEEDS_CONFIG" 2>/dev/null && { echo "Feed exists"; exit 1; }
|
||||
echo "1|${FEED_NAME}|${FEED_URL}|plain|Custom: ${FEED_NAME}" >> "$FEEDS_CONFIG"
|
||||
log_message "Added feed: $FEED_NAME"
|
||||
}
|
||||
|
||||
cmd_remove_feed() {
|
||||
validate_feed_name "$FEED_NAME" || exit 1
|
||||
sed -i "/^[01]|${FEED_NAME}|/d" "$FEEDS_CONFIG"
|
||||
log_message "Removed feed: $FEED_NAME"
|
||||
|
||||
log_message "Regenerating UFW rules..."
|
||||
apply_ufw_rules || return 1
|
||||
|
||||
ipset destroy "${IPSET_PREFIX}-${FEED_NAME}" 2>/dev/null || true
|
||||
ipset destroy "${IPSET_PREFIX}-${FEED_NAME}-v6" 2>/dev/null || true
|
||||
}
|
||||
|
||||
cmd_enable_feed() {
|
||||
validate_feed_name "$FEED_NAME" || exit 1
|
||||
sed -i "s/^0|${FEED_NAME}|/1|${FEED_NAME}|/" "$FEEDS_CONFIG"
|
||||
log_message "Enabled: $FEED_NAME"
|
||||
|
||||
log_message "Regenerating UFW rules..."
|
||||
apply_ufw_rules
|
||||
}
|
||||
|
||||
cmd_disable_feed() {
|
||||
validate_feed_name "$FEED_NAME" || exit 1
|
||||
sed -i "s/^1|${FEED_NAME}|/0|${FEED_NAME}|/" "$FEEDS_CONFIG"
|
||||
log_message "Disabled: $FEED_NAME"
|
||||
|
||||
log_message "Regenerating UFW rules..."
|
||||
apply_ufw_rules || return 1
|
||||
|
||||
ipset destroy "${IPSET_PREFIX}-${FEED_NAME}" 2>/dev/null || true
|
||||
ipset destroy "${IPSET_PREFIX}-${FEED_NAME}-v6" 2>/dev/null || true
|
||||
}
|
||||
|
||||
cmd_whitelist_add() {
|
||||
[ -z "$WHITELIST_IP" ] && { echo "Usage: $0 whitelist-add <IP|CIDR>"; exit 1; }
|
||||
|
||||
if [[ "$WHITELIST_IP" == *:* ]]; then
|
||||
if ipset add "$WHITELIST_IPSET_V6" "$WHITELIST_IP" 2>/dev/null; then
|
||||
log_message "Added to IPv6 whitelist: $WHITELIST_IP"
|
||||
else
|
||||
echo "Failed to add $WHITELIST_IP"; exit 1
|
||||
fi
|
||||
else
|
||||
if ipset add "$WHITELIST_IPSET" "$WHITELIST_IP" 2>/dev/null; then
|
||||
log_message "Added to IPv4 whitelist: $WHITELIST_IP"
|
||||
else
|
||||
echo "Failed to add $WHITELIST_IP"; exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
ipset save > /etc/ipset.conf
|
||||
}
|
||||
|
||||
cmd_whitelist_init() {
|
||||
log_message "Initializing whitelist with private networks..."
|
||||
|
||||
local private_networks=(
|
||||
"10.0.0.0/8"
|
||||
"172.16.0.0/12"
|
||||
"192.168.0.0/16"
|
||||
"169.254.0.0/16"
|
||||
"127.0.0.0/8"
|
||||
)
|
||||
|
||||
local private_networks_v6=(
|
||||
"fc00::/7"
|
||||
"fe80::/10"
|
||||
"::1"
|
||||
)
|
||||
|
||||
echo "Adding IPv4 private networks to whitelist..."
|
||||
for net in "${private_networks[@]}"; do
|
||||
if ipset add "$WHITELIST_IPSET" "$net" 2>/dev/null; then
|
||||
echo " + $net"
|
||||
else
|
||||
echo " - $net (already exists or error)"
|
||||
fi
|
||||
done
|
||||
|
||||
if [ "$ENABLE_IPV6" = true ]; then
|
||||
echo "Adding IPv6 private networks to whitelist..."
|
||||
for net in "${private_networks_v6[@]}"; do
|
||||
if ipset add "$WHITELIST_IPSET_V6" "$net" 2>/dev/null; then
|
||||
echo " + $net"
|
||||
else
|
||||
echo " - $net (already exists or error)"
|
||||
fi
|
||||
done
|
||||
fi
|
||||
|
||||
ipset save > /etc/ipset.conf
|
||||
log_message "Whitelist initialized with RFC1918/private networks"
|
||||
}
|
||||
|
||||
cmd_whitelist_list() {
|
||||
echo "=========================================="
|
||||
echo "IPv4 Whitelist ($WHITELIST_IPSET)"
|
||||
echo "=========================================="
|
||||
ipset list "$WHITELIST_IPSET" 2>/dev/null | grep -E '^[0-9]' || echo "No entries"
|
||||
|
||||
if [ "$ENABLE_IPV6" = true ]; then
|
||||
echo ""
|
||||
echo "=========================================="
|
||||
echo "IPv6 Whitelist ($WHITELIST_IPSET_V6)"
|
||||
echo "=========================================="
|
||||
ipset list "$WHITELIST_IPSET_V6" 2>/dev/null | grep -E '^[0-9a-fA-F:]' || echo "No entries"
|
||||
fi
|
||||
}
|
||||
|
||||
cmd_clean_cache() {
|
||||
log_message "Cleaning cache for disabled feeds..."
|
||||
|
||||
local removed=0
|
||||
local kept=0
|
||||
|
||||
local enabled_feeds
|
||||
enabled_feeds=$(grep '^1|' "$FEEDS_CONFIG" 2>/dev/null | cut -d'|' -f2)
|
||||
|
||||
for cache_file in "$CACHE_DIR"/*.raw "$CACHE_DIR"/*-v4.parsed "$CACHE_DIR"/*-v6.parsed; do
|
||||
[ -f "$cache_file" ] || continue
|
||||
|
||||
local bn feed_name
|
||||
bn=$(basename "$cache_file")
|
||||
feed_name="${bn%%.raw}"
|
||||
feed_name="${feed_name%%-v4.parsed}"
|
||||
feed_name="${feed_name%%-v6.parsed}"
|
||||
|
||||
if ! grep -q "^${feed_name}$" <<< "$enabled_feeds"; then
|
||||
rm -f "$cache_file"
|
||||
removed=$((removed + 1))
|
||||
else
|
||||
kept=$((kept + 1))
|
||||
fi
|
||||
done
|
||||
|
||||
log_message "Removed $removed cache files, kept $kept active feeds"
|
||||
}
|
||||
|
||||
cmd_test_rules() {
|
||||
log_message "Testing UFW rule generation (dry-run mode)..."
|
||||
|
||||
if [ ! -f /usr/share/ufw/before.rules ]; then
|
||||
echo "ERROR: UFW default template /usr/share/ufw/before.rules not found"
|
||||
return 1
|
||||
fi
|
||||
|
||||
local test_dir
|
||||
test_dir=$(mktemp -d)
|
||||
trap 'rm -rf "$test_dir"' RETURN
|
||||
|
||||
local test_v4="$test_dir/before.rules.test"
|
||||
cp /usr/share/ufw/before.rules "$test_v4"
|
||||
[ "$ENABLE_IPV6" = true ] && cp /usr/share/ufw/before6.rules "$test_dir/before6.rules.test"
|
||||
|
||||
local v4_rules="$test_dir/v4_rules"
|
||||
local v4_output="$test_dir/v4_output"
|
||||
|
||||
_build_rules_block "v4" "$v4_rules"
|
||||
|
||||
local feed_count
|
||||
feed_count=$(grep -c '^1|' "$FEEDS_CONFIG" 2>/dev/null || echo 0)
|
||||
echo "Generated rules for $feed_count enabled feeds"
|
||||
|
||||
if ! _insert_and_validate_rules "$test_v4" "$v4_rules" "$v4_output"; then
|
||||
echo "VALIDATION FAILED"
|
||||
return 1
|
||||
fi
|
||||
|
||||
echo "Validation passed: exactly 1 *filter block found"
|
||||
|
||||
local total_lines rule_lines
|
||||
total_lines=$(wc -l < "$v4_output")
|
||||
rule_lines=$(grep -c "^-A " "$v4_output" 2>/dev/null || echo 0)
|
||||
|
||||
echo "Generated $rule_lines iptables rules in $total_lines total lines"
|
||||
echo ""
|
||||
echo "=========================================="
|
||||
echo "Sample of generated rules:"
|
||||
echo "=========================================="
|
||||
grep "# UFW THREAT FEEDS" -A 10 "$v4_output" | head -15
|
||||
echo "..."
|
||||
echo ""
|
||||
echo "=========================================="
|
||||
echo "Test passed - rules would be generated safely"
|
||||
echo " To apply these rules, run: $0 apply-rules"
|
||||
echo "=========================================="
|
||||
}
|
||||
|
||||
cmd_install() {
|
||||
log_message "Installing per-feed threat blocking..."
|
||||
check_requirements
|
||||
create_directory_structure
|
||||
initialize_feeds_config
|
||||
setup_ipsets
|
||||
update_feeds
|
||||
apply_ufw_rules
|
||||
setup_auto_update
|
||||
create_management_commands
|
||||
|
||||
echo ""
|
||||
echo "=========================================="
|
||||
echo "Per-Feed Installation Complete"
|
||||
echo "=========================================="
|
||||
echo "Mode: Per-feed ipsets (detailed tracking)"
|
||||
echo "Feeds: $(grep -c '^1|' "$FEEDS_CONFIG")"
|
||||
echo "IPv6: $ENABLE_IPV6"
|
||||
echo "Auto-update: $ENABLE_AUTO_UPDATE ($UPDATE_INTERVAL)"
|
||||
echo ""
|
||||
echo "Commands:"
|
||||
echo " $0 show-stats # View per-feed statistics"
|
||||
echo " $0 update # Update all feeds"
|
||||
echo " ufw-whitelist IP # Whitelist an IP"
|
||||
echo ""
|
||||
echo "Logs: grep 'THREAT:' /var/log/syslog"
|
||||
echo "=========================================="
|
||||
}
|
||||
|
||||
main() {
|
||||
parse_args "$@"
|
||||
case "$COMMAND" in
|
||||
install) cmd_install ;;
|
||||
update)
|
||||
check_requirements false
|
||||
create_directory_structure
|
||||
update_feeds
|
||||
;;
|
||||
apply-rules)
|
||||
check_requirements
|
||||
apply_ufw_rules
|
||||
;;
|
||||
test-rules) cmd_test_rules ;;
|
||||
list-feeds) cmd_list_feeds ;;
|
||||
show-stats) cmd_show_stats ;;
|
||||
add-feed) cmd_add_feed ;;
|
||||
remove-feed) cmd_remove_feed ;;
|
||||
enable-feed) cmd_enable_feed ;;
|
||||
disable-feed) cmd_disable_feed ;;
|
||||
whitelist-add) cmd_whitelist_add ;;
|
||||
whitelist-init) cmd_whitelist_init ;;
|
||||
whitelist-list) cmd_whitelist_list ;;
|
||||
clean-cache) cmd_clean_cache ;;
|
||||
*) show_usage ;;
|
||||
esac
|
||||
}
|
||||
|
||||
main "$@"
|
||||
@@ -0,0 +1,96 @@
|
||||
#!/bin/bash
|
||||
|
||||
####################################################################
|
||||
#### Code-Server Update Script ####
|
||||
#### For RHEL/Rocky/Alma, Oracle Linux, Debian & Ubuntu ####
|
||||
#### ####
|
||||
#### Author: Phil Connor ####
|
||||
#### Contact: contact@mylinux.work ####
|
||||
#### License: MIT ####
|
||||
#### Version: 1.2 ####
|
||||
#### ####
|
||||
#### Usage: sudo ./update-code-server.sh ####
|
||||
####################################################################
|
||||
|
||||
#############################
|
||||
#### User Configurations ####
|
||||
#############################
|
||||
SERVDIR=/usr/local/code-server # where you want the code-server installed
|
||||
|
||||
########################
|
||||
#### System Configs ####
|
||||
########################
|
||||
OS=$(grep PRETTY_NAME /etc/os-release | sed 's/PRETTY_NAME=//g' | tr -d '="' | awk '{print $1}' | tr '[:upper:]' '[:lower:]')
|
||||
OSVER=$(grep VERSION_ID /etc/os-release | sed 's/VERSION_ID=//g' | tr -d '="' | awk -F. '{print $1}')
|
||||
CSVER=$(code-server --version | awk '{print $1}')
|
||||
|
||||
###########################################################
|
||||
#### Detect Package Manger from OS and OSVer Variables ####
|
||||
###########################################################
|
||||
if [ "${OS}" = ubuntu ]; then
|
||||
PAKMGR="apt-get -y"
|
||||
elif [[ ${OS} = centos || ${OS} = red || ${OS} = oracle || ${OS} = rocky || ${OS} = alma ]]; then
|
||||
if [ "${OSVER}" = 7 ]; then
|
||||
PAKMGR="yum -y"
|
||||
fi
|
||||
if [ "${OSVER}" = 8 ] || [ "${OSVER}" = 9 ]; then
|
||||
PAKMGR="dnf -y"
|
||||
fi
|
||||
fi
|
||||
|
||||
###################
|
||||
#### Update OS ####
|
||||
###################
|
||||
function update_os() {
|
||||
{
|
||||
if [ "${OS}" = ubuntu ]; then
|
||||
${PAKMGR} update
|
||||
${PAKMGR} upgrade
|
||||
else
|
||||
${PAKMGR} update
|
||||
fi
|
||||
}
|
||||
}
|
||||
###############################################
|
||||
#### Get the latest version of Code Server ####
|
||||
###############################################
|
||||
get_latest_version() {
|
||||
{
|
||||
version="$(curl -fsSLI -o /dev/null -w "%{url_effective}" https://github.com/coder/code-server/releases/latest)"
|
||||
version="${version#https://github.com/coder/code-server/releases/tag/}"
|
||||
version="${version#v}"
|
||||
echo "$version"
|
||||
#### Compare Code-Server versions ####
|
||||
if [[ "$version" != "$CSVER" ]] && [[ "$(printf '%s\n' "$CSVER" "$version" | sort -V | tail -1)" == "$version" ]]; then
|
||||
compare=1
|
||||
else
|
||||
compare=0
|
||||
fi
|
||||
}
|
||||
}
|
||||
|
||||
#########################################
|
||||
#### Download and Update Codeserver ####
|
||||
#########################################
|
||||
install_codeserver() {
|
||||
{
|
||||
if [ $compare = 1 ]; then
|
||||
systemctl stop code-server
|
||||
# check if command wget exists
|
||||
if ! command -v wget >/dev/null 2>&1; then
|
||||
${PAKMGR} install wget
|
||||
fi
|
||||
cd ~/ || exit
|
||||
wget "https://github.com/coder/code-server/releases/download/v$version/code-server-$version-linux-amd64.tar.gz"
|
||||
tar xvf "code-server-$version-linux-amd64.tar.gz"
|
||||
cp -r ~/code-server-"$version"-linux-amd64/* ${SERVDIR}
|
||||
rm -f ~/code-server-"$version"-linux-amd64.tar.gz
|
||||
rm -rf ~/code-server-"$version"-linux-amd64
|
||||
systemctl start code-server
|
||||
fi
|
||||
}
|
||||
}
|
||||
|
||||
#update_os
|
||||
get_latest_version
|
||||
install_codeserver
|
||||
Executable
+570
@@ -0,0 +1,570 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
##########################################################################
|
||||
## Prometheus Stack Updater ##
|
||||
## ##
|
||||
## Updates installed Prometheus ecosystem binaries to latest release ##
|
||||
## from GitHub. Only touches components that are already installed. ##
|
||||
## ##
|
||||
## Supported components: ##
|
||||
## prometheus, node_exporter, blackbox_exporter, ##
|
||||
## alertmanager, mysqld_exporter, promtool, amtool, ##
|
||||
## loki, promtail, alloy, grafana ##
|
||||
## ##
|
||||
## Usage: ##
|
||||
## ./update-prometheus-stack.sh [OPTIONS] ##
|
||||
## ##
|
||||
## Options: ##
|
||||
## --check Show what would be updated (no changes) ##
|
||||
## --all Update all installed components ##
|
||||
## --prometheus Update only Prometheus ##
|
||||
## --node-exporter Update only node_exporter ##
|
||||
## --blackbox Update only blackbox_exporter ##
|
||||
## --alertmanager Update only AlertManager ##
|
||||
## --mysql-exporter Update only mysqld_exporter ##
|
||||
## --loki Update only Loki ##
|
||||
## --promtail Update only Promtail ##
|
||||
## --alloy Update only Alloy ##
|
||||
## --grafana Update only Grafana (via package manager) ##
|
||||
## --force Update even if already at latest version ##
|
||||
## --arch <arch> Override architecture (default: auto-detect) ##
|
||||
## --backup-only Backup configs only (no updates) ##
|
||||
## --help Show this help message ##
|
||||
## ##
|
||||
## Author: Phil Connor ##
|
||||
## Contact: pconnor@ara.com ##
|
||||
##########################################################################
|
||||
|
||||
BINDIR="/usr/local/bin"
|
||||
PROMDIR="/etc/prometheus"
|
||||
BACKUPDIR="${PROMDIR}/backups"
|
||||
LOGFILE="/var/log/prometheus-update.log"
|
||||
TMPDIR_BASE="/tmp/prometheus-update-$$"
|
||||
CHECK_ONLY=false
|
||||
BACKUP_ONLY=false
|
||||
FORCE=false
|
||||
ARCH=""
|
||||
UPDATED=0
|
||||
SKIPPED=0
|
||||
FAILED=0
|
||||
COMPONENTS_REQUESTED=()
|
||||
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
CYAN='\033[0;36m'
|
||||
NC='\033[0m'
|
||||
|
||||
log() {
|
||||
local msg
|
||||
msg="[$(date '+%Y-%m-%d %H:%M:%S')] $1"
|
||||
echo -e "$msg" | tee -a "$LOGFILE" 2>/dev/null || echo -e "$msg"
|
||||
}
|
||||
|
||||
log_ok() { log "${GREEN}✓${NC} $1"; }
|
||||
log_warn() { log "${YELLOW}⚠${NC} $1"; }
|
||||
log_err() { log "${RED}✗${NC} $1" >&2; }
|
||||
log_info() { log "${CYAN}→${NC} $1"; }
|
||||
|
||||
# shellcheck disable=SC2329
|
||||
cleanup() {
|
||||
# shellcheck disable=SC2317
|
||||
[[ -d "$TMPDIR_BASE" ]] && rm -rf "$TMPDIR_BASE"
|
||||
}
|
||||
trap cleanup EXIT
|
||||
|
||||
show_help() {
|
||||
sed -n '/^## Usage:/,/^####/{ /^####/d; s/^## //; s/^##$//; p }' "$0"
|
||||
exit 0
|
||||
}
|
||||
|
||||
detect_arch() {
|
||||
if [[ -n "$ARCH" ]]; then
|
||||
echo "$ARCH"
|
||||
return
|
||||
fi
|
||||
local machine
|
||||
machine=$(uname -m)
|
||||
case "$machine" in
|
||||
x86_64) echo "amd64" ;;
|
||||
aarch64) echo "arm64" ;;
|
||||
armv7l) echo "armv7" ;;
|
||||
armv6l) echo "armv6" ;;
|
||||
*) echo "amd64" ;;
|
||||
esac
|
||||
}
|
||||
|
||||
get_installed_version() {
|
||||
local binary="$1"
|
||||
local path="${BINDIR}/${binary}"
|
||||
if [[ ! -x "$path" ]]; then
|
||||
echo "not_installed"
|
||||
return
|
||||
fi
|
||||
case "$binary" in
|
||||
prometheus|promtool)
|
||||
"$path" --version 2>&1 | head -1 | grep -oP 'version \K[0-9]+\.[0-9]+\.[0-9]+' || echo "unknown"
|
||||
;;
|
||||
node_exporter|blackbox_exporter|mysqld_exporter)
|
||||
"$path" --version 2>&1 | head -1 | grep -oP 'version \K[0-9]+\.[0-9]+\.[0-9]+' || echo "unknown"
|
||||
;;
|
||||
alertmanager|amtool)
|
||||
"$path" --version 2>&1 | head -1 | grep -oP 'version \K[0-9]+\.[0-9]+\.[0-9]+' || echo "unknown"
|
||||
;;
|
||||
loki|promtail)
|
||||
"$path" --version 2>&1 | head -1 | grep -oP 'version \K[0-9]+\.[0-9]+\.[0-9]+' || echo "unknown"
|
||||
;;
|
||||
alloy)
|
||||
"$path" --version 2>&1 | head -1 | grep -oP '[0-9]+\.[0-9]+\.[0-9]+' || echo "unknown"
|
||||
;;
|
||||
*)
|
||||
echo "unknown"
|
||||
;;
|
||||
esac
|
||||
}
|
||||
|
||||
get_latest_version() {
|
||||
local repo="$1"
|
||||
local version=""
|
||||
|
||||
case "$repo" in
|
||||
prometheus/*)
|
||||
local component="${repo#prometheus/}"
|
||||
version=$(curl -sf "https://prometheus.io/download/" | \
|
||||
grep -oP "${component}-\K[0-9]+\.[0-9]+\.[0-9]+" | head -1 || echo "")
|
||||
;;
|
||||
grafana/*)
|
||||
version=$(curl -sfL "https://github.com/${repo}/releases/latest" | \
|
||||
grep -oP 'releases/tag/v\K[0-9]+\.[0-9]+\.[0-9]+' | head -1 || echo "")
|
||||
;;
|
||||
esac
|
||||
|
||||
if [[ -z "$version" ]]; then
|
||||
log_err "Failed to query latest version for ${repo}"
|
||||
return 1
|
||||
fi
|
||||
echo "$version"
|
||||
}
|
||||
|
||||
get_download_url() {
|
||||
local repo="$1"
|
||||
local version="$2"
|
||||
local pattern="$3"
|
||||
local component="${repo#*/}"
|
||||
|
||||
case "$repo" in
|
||||
prometheus/*)
|
||||
echo "https://github.com/${repo}/releases/download/v${version}/${component}-${version}.${pattern}"
|
||||
;;
|
||||
grafana/*)
|
||||
echo "https://github.com/${repo}/releases/download/v${version}/${pattern}"
|
||||
;;
|
||||
esac
|
||||
}
|
||||
|
||||
download_and_extract() {
|
||||
local url="$1"
|
||||
local workdir="$2"
|
||||
mkdir -p "$workdir"
|
||||
local filename
|
||||
filename=$(basename "$url")
|
||||
log_info "Downloading ${filename}"
|
||||
if ! curl -sfL -o "${workdir}/${filename}" "$url"; then
|
||||
log_err "Download failed: ${url}"
|
||||
return 1
|
||||
fi
|
||||
cd "$workdir"
|
||||
case "$filename" in
|
||||
*.tar.gz|*.tgz)
|
||||
tar -xzf "$filename"
|
||||
;;
|
||||
*.zip)
|
||||
unzip -q "$filename"
|
||||
;;
|
||||
*)
|
||||
chmod +x "$filename"
|
||||
;;
|
||||
esac
|
||||
}
|
||||
|
||||
stop_service() {
|
||||
local service="$1"
|
||||
if systemctl is-active --quiet "$service" 2>/dev/null; then
|
||||
log_info "Stopping ${service}"
|
||||
systemctl stop "$service"
|
||||
return 0
|
||||
fi
|
||||
return 1
|
||||
}
|
||||
|
||||
start_service() {
|
||||
local service="$1"
|
||||
if systemctl is-enabled --quiet "$service" 2>/dev/null; then
|
||||
log_info "Starting ${service}"
|
||||
systemctl daemon-reload
|
||||
systemctl start "$service"
|
||||
fi
|
||||
}
|
||||
|
||||
backup_binary() {
|
||||
local binary="$1"
|
||||
local path="${BINDIR}/${binary}"
|
||||
if [[ -f "$path" ]]; then
|
||||
local backup
|
||||
backup="${path}.backup.$(date +%Y%m%d_%H%M%S)"
|
||||
cp "$path" "$backup"
|
||||
log_info "Backed up ${path} → ${backup}"
|
||||
fi
|
||||
}
|
||||
|
||||
backup_configs() {
|
||||
local name="$1"
|
||||
local config_files="$2"
|
||||
if [[ -z "$config_files" ]]; then
|
||||
return 0
|
||||
fi
|
||||
mkdir -p "$BACKUPDIR"
|
||||
local timestamp
|
||||
timestamp=$(date +%Y%m%d_%H%M%S)
|
||||
for cfg in $config_files; do
|
||||
if [[ -f "$cfg" ]]; then
|
||||
local filename
|
||||
filename=$(basename "$cfg")
|
||||
cp "$cfg" "${BACKUPDIR}/${filename}.${timestamp}"
|
||||
log_info "Config backed up: ${cfg} → ${BACKUPDIR}/${filename}.${timestamp}"
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
update_component() {
|
||||
local name="$1"
|
||||
local repo="$2"
|
||||
local service_name="$3"
|
||||
local binaries="$4"
|
||||
local file_pattern="$5"
|
||||
local owner="${6:-prometheus}"
|
||||
local config_files="${7:-}"
|
||||
|
||||
local hw
|
||||
hw=$(detect_arch)
|
||||
|
||||
local installed
|
||||
installed=$(get_installed_version "${binaries%% *}")
|
||||
|
||||
if [[ "$installed" == "not_installed" ]]; then
|
||||
return 0
|
||||
fi
|
||||
|
||||
local latest
|
||||
latest=$(get_latest_version "$repo") || { ((FAILED++)) || true; return 1; }
|
||||
|
||||
echo ""
|
||||
log " ${CYAN}${name}${NC}: installed=${installed} latest=${latest}"
|
||||
|
||||
if [[ "$installed" == "$latest" ]] && [[ "$FORCE" == "false" ]]; then
|
||||
log_ok "Already at latest version"
|
||||
((SKIPPED++)) || true
|
||||
return 0
|
||||
fi
|
||||
|
||||
if [[ "$CHECK_ONLY" == "true" ]]; then
|
||||
if [[ "$installed" != "$latest" ]]; then
|
||||
log_warn "Update available: ${installed} → ${latest}"
|
||||
fi
|
||||
return 0
|
||||
fi
|
||||
|
||||
local pattern="${file_pattern//ARCH/${hw}}"
|
||||
local url
|
||||
url=$(get_download_url "$repo" "$latest" "$pattern")
|
||||
if [[ -z "$url" ]]; then
|
||||
log_err "Could not find download URL for ${name} (pattern: ${pattern})"
|
||||
((FAILED++)) || true
|
||||
return 1
|
||||
fi
|
||||
|
||||
local workdir="${TMPDIR_BASE}/${name}"
|
||||
download_and_extract "$url" "$workdir" || { ((FAILED++)) || true; return 1; }
|
||||
|
||||
backup_configs "$name" "$config_files"
|
||||
|
||||
local was_running=false
|
||||
if stop_service "$service_name"; then
|
||||
was_running=true
|
||||
fi
|
||||
|
||||
for bin in $binaries; do
|
||||
local found
|
||||
found=$(find "$workdir" \( -name "$bin" -o -name "${bin}-*" \) -type f 2>/dev/null | head -1)
|
||||
if [[ -n "$found" ]]; then
|
||||
backup_binary "$bin"
|
||||
mv "$found" "${BINDIR}/${bin}"
|
||||
chown "${owner}:${owner}" "${BINDIR}/${bin}" 2>/dev/null || \
|
||||
chown "${owner}." "${BINDIR}/${bin}" 2>/dev/null || true
|
||||
chmod 755 "${BINDIR}/${bin}"
|
||||
log_ok "Updated ${bin}"
|
||||
else
|
||||
log_warn "Binary ${bin} not found in download"
|
||||
fi
|
||||
done
|
||||
|
||||
if [[ "$was_running" == "true" ]]; then
|
||||
start_service "$service_name"
|
||||
fi
|
||||
|
||||
local new_ver
|
||||
new_ver=$(get_installed_version "${binaries%% *}")
|
||||
log_ok "${name} updated: ${installed} → ${new_ver}"
|
||||
((UPDATED++)) || true
|
||||
}
|
||||
|
||||
is_pkg_installed() {
|
||||
local pkg="$1"
|
||||
if command -v rpm >/dev/null 2>&1; then
|
||||
rpm -q "$pkg" >/dev/null 2>&1
|
||||
elif command -v dpkg >/dev/null 2>&1; then
|
||||
dpkg -l "$pkg" 2>/dev/null | grep -q "^ii"
|
||||
else
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
update_alloy() {
|
||||
if ! command -v alloy >/dev/null 2>&1 && [[ ! -x "${BINDIR}/alloy" ]]; then
|
||||
return 0
|
||||
fi
|
||||
|
||||
if is_pkg_installed "alloy"; then
|
||||
log_info "Alloy installed via package manager — updating with dnf/apt"
|
||||
update_alloy_pkg
|
||||
else
|
||||
log_info "Alloy installed as standalone binary — updating from GitHub"
|
||||
update_component "Alloy" "grafana/alloy" "alloy" "alloy" "alloy-linux-ARCH.zip" "root" "/etc/alloy/config.alloy"
|
||||
fi
|
||||
}
|
||||
|
||||
update_alloy_pkg() {
|
||||
local alloy_bin="alloy"
|
||||
command -v alloy >/dev/null 2>&1 || alloy_bin="${BINDIR}/alloy"
|
||||
|
||||
local installed
|
||||
installed=$("$alloy_bin" --version 2>&1 | grep -oP '[0-9]+\.[0-9]+\.[0-9]+' | head -1 || echo "unknown")
|
||||
|
||||
local latest
|
||||
latest=$(curl -sfL "https://github.com/grafana/alloy/releases/latest" | \
|
||||
grep -oP 'releases/tag/v\K[0-9]+\.[0-9]+\.[0-9]+' | head -1 || echo "")
|
||||
if [[ -z "$latest" ]]; then
|
||||
log_err "Failed to query latest version for Alloy"
|
||||
((FAILED++)) || true
|
||||
return 1
|
||||
fi
|
||||
|
||||
echo ""
|
||||
log " ${CYAN}Alloy${NC}: installed=${installed} latest=${latest}"
|
||||
|
||||
if [[ "$installed" == "$latest" ]] && [[ "$FORCE" == "false" ]]; then
|
||||
log_ok "Already at latest version"
|
||||
((SKIPPED++)) || true
|
||||
return 0
|
||||
fi
|
||||
|
||||
if [[ "$CHECK_ONLY" == "true" ]]; then
|
||||
if [[ "$installed" != "$latest" ]]; then
|
||||
log_warn "Update available: ${installed} → ${latest}"
|
||||
fi
|
||||
return 0
|
||||
fi
|
||||
|
||||
backup_configs "Alloy" "/etc/alloy/config.alloy"
|
||||
|
||||
if command -v apt-get >/dev/null 2>&1; then
|
||||
apt-get -y update && apt-get -y install --only-upgrade alloy
|
||||
elif command -v dnf >/dev/null 2>&1; then
|
||||
dnf -y upgrade alloy
|
||||
elif command -v yum >/dev/null 2>&1; then
|
||||
yum -y update alloy
|
||||
fi
|
||||
|
||||
systemctl daemon-reload
|
||||
systemctl restart alloy
|
||||
|
||||
local new_ver
|
||||
new_ver=$("$alloy_bin" --version 2>&1 | grep -oP '[0-9]+\.[0-9]+\.[0-9]+' | head -1 || echo "unknown")
|
||||
log_ok "Alloy updated: ${installed} → ${new_ver}"
|
||||
((UPDATED++)) || true
|
||||
}
|
||||
|
||||
update_grafana() {
|
||||
if ! command -v grafana-server >/dev/null 2>&1; then
|
||||
return 0
|
||||
fi
|
||||
|
||||
local installed
|
||||
installed=$(grafana-server -v 2>&1 | grep -oP '[0-9]+\.[0-9]+\.[0-9]+' | head -1 || echo "unknown")
|
||||
|
||||
local latest
|
||||
latest=$(curl -sfL "https://github.com/grafana/grafana/releases/latest" | \
|
||||
grep -oP 'releases/tag/v\K[0-9]+\.[0-9]+\.[0-9]+' | head -1 || echo "")
|
||||
if [[ -z "$latest" ]]; then
|
||||
log_err "Failed to query latest version for Grafana"
|
||||
((FAILED++)) || true
|
||||
return 1
|
||||
fi
|
||||
|
||||
echo ""
|
||||
log " ${CYAN}Grafana${NC}: installed=${installed} latest=${latest}"
|
||||
|
||||
if [[ "$installed" == "$latest" ]] && [[ "$FORCE" == "false" ]]; then
|
||||
log_ok "Already at latest version"
|
||||
((SKIPPED++)) || true
|
||||
return 0
|
||||
fi
|
||||
|
||||
if [[ "$CHECK_ONLY" == "true" ]]; then
|
||||
if [[ "$installed" != "$latest" ]]; then
|
||||
log_warn "Update available: ${installed} → ${latest}"
|
||||
fi
|
||||
return 0
|
||||
fi
|
||||
|
||||
backup_configs "Grafana" "/etc/grafana/grafana.ini /etc/grafana/ldap.toml"
|
||||
|
||||
log_info "Updating Grafana via package manager"
|
||||
if command -v apt-get >/dev/null 2>&1; then
|
||||
apt-get -y update && apt-get -y install --only-upgrade grafana
|
||||
elif command -v dnf >/dev/null 2>&1; then
|
||||
dnf -y upgrade grafana
|
||||
elif command -v yum >/dev/null 2>&1; then
|
||||
yum -y update grafana
|
||||
else
|
||||
log_err "No supported package manager found for Grafana update"
|
||||
((FAILED++)) || true
|
||||
return 1
|
||||
fi
|
||||
|
||||
systemctl daemon-reload
|
||||
systemctl restart grafana-server
|
||||
|
||||
local new_ver
|
||||
new_ver=$(grafana-server -v 2>&1 | grep -oP '[0-9]+\.[0-9]+\.[0-9]+' | head -1 || echo "unknown")
|
||||
log_ok "Grafana updated: ${installed} → ${new_ver}"
|
||||
((UPDATED++)) || true
|
||||
}
|
||||
|
||||
should_update() {
|
||||
local component="$1"
|
||||
if [[ ${#COMPONENTS_REQUESTED[@]} -eq 0 ]]; then
|
||||
return 0
|
||||
fi
|
||||
for c in "${COMPONENTS_REQUESTED[@]}"; do
|
||||
[[ "$c" == "$component" ]] && return 0
|
||||
done
|
||||
return 1
|
||||
}
|
||||
|
||||
parse_arguments() {
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
--check) CHECK_ONLY=true; shift ;;
|
||||
--backup-only) BACKUP_ONLY=true; shift ;;
|
||||
--force) FORCE=true; shift ;;
|
||||
--all) COMPONENTS_REQUESTED=(); shift ;;
|
||||
--prometheus) COMPONENTS_REQUESTED+=("prometheus"); shift ;;
|
||||
--node-exporter) COMPONENTS_REQUESTED+=("node_exporter"); shift ;;
|
||||
--blackbox) COMPONENTS_REQUESTED+=("blackbox"); shift ;;
|
||||
--alertmanager) COMPONENTS_REQUESTED+=("alertmanager"); shift ;;
|
||||
--mysql-exporter) COMPONENTS_REQUESTED+=("mysql_exporter"); shift ;;
|
||||
--loki) COMPONENTS_REQUESTED+=("loki"); shift ;;
|
||||
--promtail) COMPONENTS_REQUESTED+=("promtail"); shift ;;
|
||||
--alloy) COMPONENTS_REQUESTED+=("alloy"); shift ;;
|
||||
--grafana) COMPONENTS_REQUESTED+=("grafana"); shift ;;
|
||||
--arch) ARCH="$2"; shift 2 ;;
|
||||
--help) show_help ;;
|
||||
*)
|
||||
log_err "Unknown option: $1"
|
||||
show_help
|
||||
;;
|
||||
esac
|
||||
done
|
||||
}
|
||||
|
||||
main() {
|
||||
parse_arguments "$@"
|
||||
|
||||
if [[ $EUID -ne 0 ]]; then
|
||||
log_err "This script must be run as root"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
mkdir -p "$TMPDIR_BASE" "$(dirname "$LOGFILE")"
|
||||
touch "$LOGFILE"
|
||||
|
||||
local mode="UPDATE"
|
||||
[[ "$CHECK_ONLY" == "true" ]] && mode="CHECK"
|
||||
[[ "$BACKUP_ONLY" == "true" ]] && mode="BACKUP"
|
||||
|
||||
echo ""
|
||||
echo "=============================================="
|
||||
echo " Prometheus Stack Updater [${mode}]"
|
||||
echo " $(date '+%Y-%m-%d %H:%M:%S')"
|
||||
echo " Architecture: $(detect_arch)"
|
||||
echo "=============================================="
|
||||
|
||||
if [[ "$BACKUP_ONLY" == "true" ]]; then
|
||||
local configs=(
|
||||
"$PROMDIR/prometheus.yml"
|
||||
"$PROMDIR/blackbox.yml"
|
||||
"$PROMDIR/alertmanager.yml"
|
||||
"/etc/.mysqld_exporter.cnf"
|
||||
"/etc/loki/loki-config.yml"
|
||||
"/etc/promtail/promtail-config.yml"
|
||||
"/etc/alloy/config.alloy"
|
||||
"/etc/grafana/grafana.ini"
|
||||
"/etc/grafana/ldap.toml"
|
||||
)
|
||||
local backed_up=0
|
||||
mkdir -p "$BACKUPDIR"
|
||||
local timestamp
|
||||
timestamp=$(date +%Y%m%d_%H%M%S)
|
||||
for cfg in "${configs[@]}"; do
|
||||
if [[ -f "$cfg" ]]; then
|
||||
local filename
|
||||
filename=$(basename "$cfg")
|
||||
cp "$cfg" "${BACKUPDIR}/${filename}.${timestamp}"
|
||||
log_ok "Backed up ${cfg} → ${BACKUPDIR}/${filename}.${timestamp}"
|
||||
((backed_up++))
|
||||
fi
|
||||
done
|
||||
echo ""
|
||||
log "Backed up ${backed_up} config file(s) to ${BACKUPDIR}"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Name Repo Service Binaries File Pattern Owner Config Files
|
||||
should_update "prometheus" && update_component "Prometheus" "prometheus/prometheus" "prometheus" "prometheus promtool" "linux-ARCH.tar.gz" "prometheus" "$PROMDIR/prometheus.yml"
|
||||
should_update "node_exporter" && update_component "Node Exporter" "prometheus/node_exporter" "node_exporter" "node_exporter" "linux-ARCH.tar.gz" "root" ""
|
||||
should_update "blackbox" && update_component "Blackbox Exporter" "prometheus/blackbox_exporter" "blackbox_exporter" "blackbox_exporter" "linux-ARCH.tar.gz" "prometheus" "$PROMDIR/blackbox.yml"
|
||||
should_update "alertmanager" && update_component "AlertManager" "prometheus/alertmanager" "alertmanager" "alertmanager amtool" "linux-ARCH.tar.gz" "alertmanager" "$PROMDIR/alertmanager.yml"
|
||||
should_update "mysql_exporter" && update_component "MySQL Exporter" "prometheus/mysqld_exporter" "mysqld_exporter" "mysqld_exporter" "linux-ARCH.tar.gz" "prometheus" "/etc/.mysqld_exporter.cnf"
|
||||
should_update "loki" && update_component "Loki" "grafana/loki" "loki" "loki" "loki-linux-ARCH.zip" "loki" "/etc/loki/loki-config.yml"
|
||||
should_update "promtail" && update_component "Promtail" "grafana/loki" "promtail" "promtail" "promtail-linux-ARCH.zip" "promtail" "/etc/promtail/promtail-config.yml"
|
||||
should_update "alloy" && update_alloy
|
||||
should_update "grafana" && update_grafana
|
||||
|
||||
echo ""
|
||||
echo "=============================================="
|
||||
echo -e " Results: ${GREEN}${UPDATED} updated${NC} ${YELLOW}${SKIPPED} current${NC} ${RED}${FAILED} failed${NC}"
|
||||
echo "=============================================="
|
||||
echo ""
|
||||
|
||||
if [[ "$CHECK_ONLY" == "false" ]]; then
|
||||
log "Log saved to ${LOGFILE}"
|
||||
fi
|
||||
|
||||
[[ $FAILED -gt 0 ]] && exit 1
|
||||
exit 0
|
||||
}
|
||||
|
||||
main "$@"
|
||||
@@ -0,0 +1,692 @@
|
||||
<#
|
||||
.SYNOPSIS
|
||||
Monitors RDP user sessions and exports metrics for Prometheus windows_exporter.
|
||||
|
||||
.DESCRIPTION
|
||||
This script monitors the number of active RDP user sessions and creates Prometheus-formatted metrics.
|
||||
The metrics are written to a text file that can be consumed by the windows_exporter.
|
||||
It can also run periodically.
|
||||
|
||||
.PARAMETER MetricsPath
|
||||
The path where the Prometheus metrics file will be written.
|
||||
|
||||
.PARAMETER IntervalSeconds
|
||||
The interval in seconds for the scheduled task. Default is 60 seconds.
|
||||
|
||||
.Parameter RunOnce
|
||||
Switch to run the script once and exit instead of creating a scheduled task.
|
||||
|
||||
.PARAMETER Debug
|
||||
Switch to run the script in debug mode.
|
||||
|
||||
.PARAMETER RunOnce
|
||||
Switch to run the script once and exit instead of creating a scheduled task.
|
||||
|
||||
.PARAMETER DryRun
|
||||
Switch to output metrics to console instead of writing to file.
|
||||
|
||||
.PARAMETER Verbose
|
||||
Switch to enable verbose debug output.
|
||||
|
||||
.PARAMETER Quiet
|
||||
Switch to suppress non-error output.
|
||||
|
||||
.PARAMETER NoSchedule
|
||||
Switch to skip scheduled task creation.
|
||||
|
||||
.PARAMETER Version
|
||||
Switch to display script version and exit.
|
||||
|
||||
.NOTES
|
||||
Version: 1.1.2-20251002
|
||||
Author: Phil Connor contact@mylinux.work
|
||||
|
||||
Features:
|
||||
- Monitors active RDP user sessions using quser command
|
||||
- Captures username, session name, session ID, state (Active/Disconnected), idle time, and logon time
|
||||
- Attempts to correlate session IDs with client IP addresses using qwinsta
|
||||
- Writes metrics to a text file for consumption by windows_exporter.
|
||||
- Reads last 10 PowerShell commands from each user's PSReadline history file.
|
||||
#>
|
||||
|
||||
param(
|
||||
[ValidateScript({
|
||||
if ($_ -and -not (Test-Path (Split-Path $_ -Parent))) {
|
||||
throw "Directory for metrics path does not exist: $(Split-Path $_ -Parent)"
|
||||
}
|
||||
return $true
|
||||
})]
|
||||
[string]$MetricsPath = "C:\Program Files\windows_exporter\textfile_inputs\users_logged_in.prom",
|
||||
[int]$IntervalSeconds = 60,
|
||||
[switch]$RunOnce,
|
||||
[switch]$Debug,
|
||||
[switch]$DryRun,
|
||||
[switch]$Verbose,
|
||||
[switch]$Quiet,
|
||||
[switch]$NoSchedule,
|
||||
[switch]$Version
|
||||
)
|
||||
|
||||
# Handle version display
|
||||
if ($Version) {
|
||||
Write-Host "Windows RDP User Monitor PowerShell Script"
|
||||
Write-Host "Version: 1.1.0-20250915"
|
||||
Write-Host "Author: Phil Connor contact@mylinux.work"
|
||||
exit 0
|
||||
}
|
||||
|
||||
# Set up logging preferences based on Verbose/Quiet flags
|
||||
if ($Verbose) {
|
||||
$VerbosePreference = 'Continue'
|
||||
$InformationPreference = 'Continue'
|
||||
}
|
||||
if ($Quiet) {
|
||||
$VerbosePreference = 'SilentlyContinue'
|
||||
$InformationPreference = 'SilentlyContinue'
|
||||
$WarningPreference = 'SilentlyContinue'
|
||||
}
|
||||
|
||||
# Enhanced logging functions
|
||||
function Write-InfoLog {
|
||||
param([string]$Message)
|
||||
if (-not $Quiet) {
|
||||
Write-Host "[INFO] $(Get-Date -Format 'yyyy-MM-dd HH:mm:ss') $Message" -ForegroundColor Green
|
||||
}
|
||||
}
|
||||
|
||||
function Write-VerboseLog {
|
||||
param([string]$Message)
|
||||
if ($Verbose) {
|
||||
Write-Host "[VERBOSE] $(Get-Date -Format 'yyyy-MM-dd HH:mm:ss') $Message" -ForegroundColor Cyan
|
||||
}
|
||||
}
|
||||
|
||||
# Configuration constants for the script
|
||||
$script:Config = @{
|
||||
RDP_SESSION_PATTERN = "rdp-tcp#\d+|console" # Regex pattern to match RDP session names
|
||||
METRIC_NAME = "windows_rdp_users_logged_in" # Primary Prometheus metric name
|
||||
QWINSTA_IP_REGEX = '^\s*(\S+)\s+(\S+)\s+(\d+)\s+(\S+)\s+(\S+)\s+(\d+\.\d+\.\d+\.\d+)' # Pattern for IP extraction
|
||||
QUSER_HEADER_REGEX = "USERNAME.*SESSIONNAME.*ID.*STATE" # Expected quser output header format
|
||||
COLUMNS = @{ # Column positions in quser output
|
||||
USERNAME = 0; SESSION = 1; ID = 2; STATE = 3; IDLE = 4; LOGON_START = 5
|
||||
}
|
||||
}
|
||||
|
||||
# Sanitize string values for use as Prometheus metric labels
|
||||
# Removes or replaces characters that would break Prometheus metric format
|
||||
function ConvertTo-MetricLabel {
|
||||
param([AllowEmptyString()][string]$Value)
|
||||
if ([string]::IsNullOrEmpty($Value)) { return "" }
|
||||
|
||||
# Replace problematic characters with underscores to prevent metric parsing issues
|
||||
$sanitized = $Value -replace '["\\\n\r\t>]', '_'
|
||||
|
||||
# Limit length to prevent overly long metric labels (Prometheus best practice)
|
||||
if ($sanitized.Length -gt 200) {
|
||||
$sanitized = $sanitized.Substring(0, 200) + "..."
|
||||
}
|
||||
return $sanitized
|
||||
}
|
||||
|
||||
# Format metric data into Prometheus text format
|
||||
function Write-PrometheusMetric {
|
||||
param(
|
||||
[ValidateNotNullOrEmpty()][string]$Name,
|
||||
[ValidateNotNullOrEmpty()][string]$Help,
|
||||
[ValidateNotNullOrEmpty()][string]$Type,
|
||||
[ValidateNotNull()][array]$Metrics
|
||||
)
|
||||
|
||||
try {
|
||||
@(
|
||||
# Write Prometheus metric header with help text and type
|
||||
"# HELP $Name $Help"
|
||||
"# TYPE $Name $Type"
|
||||
# Format each metric with its labels and value
|
||||
$Metrics | ForEach-Object {
|
||||
if ($null -eq $_ -or $null -eq $_.Labels -or $null -eq $_.Value) {
|
||||
throw "Invalid metric data"
|
||||
}
|
||||
"$Name$($_.Labels) $($_.Value)"
|
||||
}
|
||||
)
|
||||
}
|
||||
catch {
|
||||
Write-Error "Failed to write metric: $($_.Exception.Message)"
|
||||
}
|
||||
}
|
||||
|
||||
# Execute quser command and validate output format
|
||||
# Returns raw quser command output after basic validation
|
||||
function Get-QUserData {
|
||||
try {
|
||||
# Run quser command and suppress stderr to avoid noise
|
||||
$output = quser 2>$null
|
||||
|
||||
# Validate that we got some output
|
||||
if (-not $output -or $output.Count -eq 0) {
|
||||
throw "No user sessions found or quser command failed"
|
||||
}
|
||||
|
||||
# Ensure output has expected header format
|
||||
if ($output.Count -lt 2 -or $output[0] -notmatch $script:Config.QUSER_HEADER_REGEX) {
|
||||
throw "Unexpected quser output format"
|
||||
}
|
||||
return $output
|
||||
}
|
||||
catch [System.Management.Automation.CommandNotFoundException] {
|
||||
throw "quser command not found. This script requires Windows with Terminal Services."
|
||||
}
|
||||
}
|
||||
|
||||
# Get IP addresses for RDP sessions using qwinsta command
|
||||
# Attempts to correlate session IDs with client IP addresses for remote sessions
|
||||
function Get-SessionIPAddresses {
|
||||
try {
|
||||
$sessionIPs = @{}
|
||||
# Run qwinsta to get session information including IP addresses
|
||||
$qwinstaOutput = qwinsta 2>$null
|
||||
|
||||
if ($qwinstaOutput) {
|
||||
Write-Verbose "Raw qwinsta output:"
|
||||
$qwinstaOutput | ForEach-Object { Write-Verbose " $_" }
|
||||
|
||||
foreach ($line in $qwinstaOutput) {
|
||||
# Skip header lines and empty lines
|
||||
if ([string]::IsNullOrWhiteSpace($line) -or $line -match '^\s*SESSIONNAME') {
|
||||
continue
|
||||
}
|
||||
|
||||
Write-Verbose "Processing qwinsta line: '$line'"
|
||||
|
||||
# Look for any IP address in the line and try to correlate with session ID
|
||||
if ($line -match '(\d+\.\d+\.\d+\.\d+)') {
|
||||
$ipAddress = $matches[1]
|
||||
|
||||
# Try different patterns to find session ID that corresponds to this IP
|
||||
$sessionId = $null
|
||||
|
||||
# Pattern 1: Standard format with session ID as 3rd column
|
||||
if ($line -match '^\s*(\S+)\s+(\S+)?\s+(\d+)\s+') {
|
||||
$sessionId = $matches[3]
|
||||
}
|
||||
# Pattern 2: RDP session format
|
||||
elseif ($line -match 'rdp-tcp#\d+.*?\s(\d+)\s+') {
|
||||
$sessionId = $matches[1]
|
||||
}
|
||||
# Pattern 3: Any number that looks like a session ID (between spaces)
|
||||
elseif ($line -match '\s(\d+)\s+\w+') {
|
||||
$sessionId = $matches[1]
|
||||
}
|
||||
|
||||
# Store the mapping if we found a valid session ID
|
||||
if ($sessionId) {
|
||||
$sessionIPs[$sessionId] = $ipAddress
|
||||
Write-Verbose "Mapped session ID $sessionId to IP $ipAddress"
|
||||
}
|
||||
else {
|
||||
Write-Verbose "Found IP $ipAddress but could not determine session ID"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Write-Verbose "Final session IP mapping: $($sessionIPs | ConvertTo-Json -Compress)"
|
||||
return $sessionIPs
|
||||
}
|
||||
catch {
|
||||
# Don't fail the entire script if IP detection fails
|
||||
Write-Warning "Failed to get session IP addresses: $($_.Exception.Message)"
|
||||
return @{}
|
||||
}
|
||||
}
|
||||
|
||||
# Parses a single line of quser output into a structured object
|
||||
# Converts space-separated quser output into a PowerShell object with named properties
|
||||
function ConvertFrom-QUserLine {
|
||||
param(
|
||||
[ValidateNotNullOrEmpty()][string]$Line,
|
||||
[hashtable]$SessionIPs = @{}
|
||||
)
|
||||
|
||||
# Split the line into fields, normalizing whitespace
|
||||
$fields = $Line.Trim() -Replace '\s+', ' ' -Split '\s'
|
||||
|
||||
# Validate minimum expected field count
|
||||
if ($fields.Length -lt 6) { return $null }
|
||||
|
||||
$cols = $script:Config.COLUMNS
|
||||
$sessionId = $fields[$cols.ID]
|
||||
|
||||
# Look up IP address for this session if available
|
||||
$ipAddress = if ($SessionIPs.ContainsKey($sessionId)) { $SessionIPs[$sessionId] } else { "unknown" }
|
||||
|
||||
# Extract logon time from remaining fields (may span multiple columns)
|
||||
$logonTime = if ($fields.Length -gt $cols.LOGON_START) {
|
||||
$endIndex = if ($fields.Length -gt 6) { $fields.Length - 2 } else { $fields.Length - 1 }
|
||||
$fields[$cols.LOGON_START..$endIndex] -join ' '
|
||||
}
|
||||
else { "Unknown" }
|
||||
|
||||
# Clean username by removing leading ">" character if present (indicates active session)
|
||||
$cleanUserName = $fields[$cols.USERNAME] -replace '^>', ''
|
||||
|
||||
# Create structured object with all session information
|
||||
return [PSCustomObject]@{
|
||||
UserName = $cleanUserName
|
||||
SessionName = $fields[$cols.SESSION]
|
||||
ID = $sessionId
|
||||
State = $fields[$cols.STATE]
|
||||
IdleTime = $fields[$cols.IDLE]
|
||||
LogonTime = $logonTime
|
||||
ClientLocation = if ($fields.Length -gt 6) { $fields[-1] } else { "local" }
|
||||
IPAddress = $ipAddress
|
||||
}
|
||||
}
|
||||
|
||||
# Get command history for a specific user session
|
||||
# Retrieves recent PowerShell commands from the user's PSReadline history file
|
||||
function Get-UserCommandHistory {
|
||||
param(
|
||||
[string]$UserName,
|
||||
[string]$SessionId,
|
||||
[int]$MaxCommands = 10
|
||||
)
|
||||
|
||||
try {
|
||||
# Sanitize username to remove invalid file path characters
|
||||
$sanitizedUserName = $UserName -replace '[<>:"|?*]', '_'
|
||||
|
||||
# Try to get PowerShell history from the user's profile
|
||||
$historyPath = "C:\Users\$sanitizedUserName\AppData\Roaming\Microsoft\Windows\PowerShell\PSReadline\ConsoleHost_history.txt"
|
||||
$commands = @()
|
||||
|
||||
# Check if PowerShell history file exists
|
||||
if (Test-Path $historyPath) {
|
||||
# Read the last N commands from the history file
|
||||
$historyContent = Get-Content $historyPath -Tail $MaxCommands -ErrorAction SilentlyContinue
|
||||
if ($historyContent) {
|
||||
# Clean up the commands by trimming whitespace and removing empty lines
|
||||
$commands = $historyContent | ForEach-Object { $_.Trim() } | Where-Object { $_ -ne "" }
|
||||
}
|
||||
}
|
||||
|
||||
# If no PowerShell history, try to get CMD history using doskey
|
||||
if ($commands.Count -eq 0) {
|
||||
try {
|
||||
# Use query session to check if user is active, then try to get command history
|
||||
$sessionInfo = query session $SessionId 2>$null
|
||||
if ($sessionInfo) {
|
||||
# This is a simplified approach - in practice, CMD history is harder to access remotely
|
||||
$commands = @("No recent command history available")
|
||||
}
|
||||
}
|
||||
catch {
|
||||
$commands = @("Unable to retrieve command history")
|
||||
}
|
||||
}
|
||||
|
||||
# Return the most recent commands up to the specified limit
|
||||
return $commands | Select-Object -First $MaxCommands
|
||||
}
|
||||
catch {
|
||||
Write-Verbose "Failed to get command history for user $UserName (Session $SessionId): $($_.Exception.Message)"
|
||||
return @("Command history unavailable")
|
||||
}
|
||||
}
|
||||
|
||||
# Get all active RDP user sessions with detailed information
|
||||
# Combines quser and qwinsta data to create comprehensive user session objects
|
||||
function Get-RDPUsers {
|
||||
try {
|
||||
# Get raw user session data and IP address mappings
|
||||
$qUserOutput = Get-QUserData
|
||||
$sessionIPs = Get-SessionIPAddresses
|
||||
Write-Verbose "Found $($qUserOutput.Count) total user sessions"
|
||||
Write-Verbose "Found $($sessionIPs.Count) session IP addresses"
|
||||
|
||||
# Process each user session line (skip header line)
|
||||
$allUsers = $qUserOutput | Select-Object -Skip 1 | ForEach-Object {
|
||||
# Parse the quser output line into a structured object
|
||||
$user = ConvertFrom-QUserLine $_ $sessionIPs
|
||||
if ($null -eq $user) {
|
||||
Write-Warning "Skipping malformed quser output: $_"
|
||||
return
|
||||
}
|
||||
|
||||
# Add command history to user object
|
||||
$commandHistory = Get-UserCommandHistory -UserName $user.UserName -SessionId $user.ID
|
||||
$user | Add-Member -NotePropertyName "CommandHistory" -NotePropertyValue $commandHistory
|
||||
|
||||
$user
|
||||
} | Where-Object { $_ }
|
||||
|
||||
# Filter to only RDP sessions (excluding services and other non-user sessions)
|
||||
$rdpUsers = $allUsers | Where-Object {
|
||||
$_.SessionName -match $script:Config.RDP_SESSION_PATTERN -and
|
||||
![string]::IsNullOrEmpty($_.UserName) -and
|
||||
![string]::IsNullOrEmpty($_.SessionName) -and
|
||||
![string]::IsNullOrEmpty($_.State)
|
||||
}
|
||||
|
||||
Write-Verbose "Processed $($allUsers.Count) valid user sessions"
|
||||
Write-Verbose "Filtered to $($rdpUsers.Count) RDP sessions"
|
||||
|
||||
return $rdpUsers
|
||||
}
|
||||
catch {
|
||||
throw "Failed to collect user data: $($_.Exception.Message)"
|
||||
}
|
||||
}
|
||||
|
||||
# Creates Prometheus metrics from user session data
|
||||
# Transforms user session objects into Prometheus-formatted metric data
|
||||
function New-UserMetrics {
|
||||
param([array]$Users)
|
||||
|
||||
if (-not $Users) { return @() }
|
||||
|
||||
# Initialize counters and collections for metric generation
|
||||
$stateCount = @{ Active = 0; Disc = 0 }
|
||||
$usernames = @()
|
||||
$userMetrics = @()
|
||||
$commandMetrics = @()
|
||||
|
||||
# Process each user to create individual metrics
|
||||
foreach ($user in $Users) {
|
||||
if ($null -eq $user) {
|
||||
Write-Warning "Found null user in collection"
|
||||
continue
|
||||
}
|
||||
|
||||
# Track state counts for summary metrics
|
||||
$stateCount[$user.State]++
|
||||
$usernames += $user.UserName
|
||||
|
||||
# Create individual user session metric
|
||||
$userMetrics += @{
|
||||
Labels = "{username=`"$(ConvertTo-MetricLabel $user.UserName)`",session=`"$(ConvertTo-MetricLabel $user.SessionName)`",state=`"$($user.State)`",location=`"$(ConvertTo-MetricLabel $user.ClientLocation)`",ip=`"$(ConvertTo-MetricLabel $user.IPAddress)`"}"
|
||||
Value = 1
|
||||
}
|
||||
|
||||
# Add command history metrics for each user
|
||||
if ($user.CommandHistory -and $user.CommandHistory.Count -gt 0) {
|
||||
for ($i = 0; $i -lt $user.CommandHistory.Count; $i++) {
|
||||
$command = ConvertTo-MetricLabel $user.CommandHistory[$i]
|
||||
$commandMetrics += @{
|
||||
Labels = "{username=`"$(ConvertTo-MetricLabel $user.UserName)`",session=`"$(ConvertTo-MetricLabel $user.SessionName)`",command_index=`"$($i + 1)`",command=`"$command`"}"
|
||||
Value = 1
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Create summary metrics with totals and user list
|
||||
$summaryMetrics = @(
|
||||
@{ Labels = '{metric="total"}'; Value = $Users.Count }
|
||||
@{ Labels = '{metric="active"}'; Value = $stateCount.Active }
|
||||
@{ Labels = '{metric="disconnected"}'; Value = $stateCount.Disc }
|
||||
@{ Labels = '{metric="users_list",users="' + $(ConvertTo-MetricLabel (($usernames | Sort-Object) -join ',')) + '"}'; Value = 1 }
|
||||
)
|
||||
|
||||
# Combine all metric types into a single collection
|
||||
return $summaryMetrics + $userMetrics + $commandMetrics
|
||||
}
|
||||
|
||||
# Write metrics content to file using atomic write operation
|
||||
function Write-MetricsFile {
|
||||
param(
|
||||
[ValidateNotNull()]$Content,
|
||||
[string]$Path
|
||||
)
|
||||
|
||||
if (-not $Path) {
|
||||
return $Content
|
||||
}
|
||||
|
||||
# Ensure the directory exists
|
||||
$directory = Split-Path $Path -Parent
|
||||
if ($directory -and -not (Test-Path $directory)) {
|
||||
try {
|
||||
New-Item -Path $directory -ItemType Directory -Force | Out-Null
|
||||
Write-Verbose "Created directory: $directory"
|
||||
}
|
||||
catch {
|
||||
Write-Error "Failed to create directory '$directory': $($_.Exception.Message)"
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
$tempPath = "$Path.tmp"
|
||||
try {
|
||||
if ($Content -is [array]) {
|
||||
$Content -join "`n" | Out-File -FilePath $tempPath -Encoding UTF8
|
||||
} else {
|
||||
$Content | Out-File -FilePath $tempPath -Encoding UTF8
|
||||
}
|
||||
Move-Item -Path $tempPath -Destination $Path -Force -ErrorAction Stop
|
||||
}
|
||||
catch {
|
||||
Write-Error "Failed to write metrics file: $($_.Exception.Message)"
|
||||
if (Test-Path $tempPath) { Remove-Item $tempPath -Force }
|
||||
}
|
||||
}
|
||||
|
||||
# Main function that orchestrates the complete metrics collection process
|
||||
# Coordinates all data collection, processing, and output generation
|
||||
function Invoke-MetricsCollection {
|
||||
$startTime = Get-Date
|
||||
|
||||
# Add dry-run header if applicable
|
||||
if ($DryRun) {
|
||||
Write-Host "=== DRY RUN MODE - Metrics that would be written to $MetricsPath ===" -ForegroundColor Yellow
|
||||
}
|
||||
|
||||
try {
|
||||
# Collect RDP user session data
|
||||
Write-VerboseLog "Collecting RDP user session data..."
|
||||
$rdpUsers = Get-RDPUsers
|
||||
if ($null -eq $rdpUsers) {
|
||||
throw "Get-RDPUsers returned null"
|
||||
}
|
||||
|
||||
Write-VerboseLog "Found $($rdpUsers.Count) RDP users"
|
||||
|
||||
# Convert user data to Prometheus metrics
|
||||
$metrics = New-UserMetrics -Users $rdpUsers
|
||||
if ($null -eq $metrics) {
|
||||
throw "New-UserMetrics returned null"
|
||||
}
|
||||
|
||||
# Collect failed login attempts
|
||||
Write-VerboseLog "Collecting failed login data..."
|
||||
$failedLoginMetrics = Get-FailedLogins
|
||||
|
||||
# Calculate script execution time for performance monitoring
|
||||
$endTime = Get-Date
|
||||
$executionTimeMs = [math]::Round(($endTime - $startTime).TotalMilliseconds, 2)
|
||||
|
||||
# Add execution time metric for monitoring script performance
|
||||
$executionMetric = @{
|
||||
Labels = '{metric="execution_time_ms"}'
|
||||
Value = $executionTimeMs
|
||||
}
|
||||
$metrics += $executionMetric
|
||||
|
||||
# Split metrics into different types
|
||||
$userMetrics = $metrics | Where-Object { $_.Labels -notmatch 'command=' }
|
||||
$commandMetrics = $metrics | Where-Object { $_.Labels -match 'command=' }
|
||||
|
||||
# Generate Prometheus-formatted output
|
||||
$output = @()
|
||||
$output += Write-PrometheusMetric -Name $script:Config.METRIC_NAME -Help "Number of RDP users currently logged in" -Type "gauge" -Metrics $userMetrics
|
||||
|
||||
# Add command history metrics as a separate metric family
|
||||
if ($commandMetrics.Count -gt 0) {
|
||||
$output += Write-PrometheusMetric -Name "windows_rdp_user_command_history" -Help "Recent command history for RDP users" -Type "gauge" -Metrics $commandMetrics
|
||||
}
|
||||
|
||||
# Add failed login metrics
|
||||
if ($failedLoginMetrics.Count -gt 0) {
|
||||
$output += Write-PrometheusMetric -Name "windows_user_failed_logins" -Help "Failed login attempts from Windows Event Log" -Type "counter" -Metrics $failedLoginMetrics
|
||||
}
|
||||
|
||||
if ($null -eq $output) {
|
||||
throw "Write-PrometheusMetric returned null"
|
||||
}
|
||||
|
||||
Write-VerboseLog "Metrics collection completed (execution time: ${executionTimeMs}ms)"
|
||||
|
||||
# Output to console and/or file based on mode
|
||||
if ($DryRun) {
|
||||
Write-Host $output
|
||||
Write-Host "=== END DRY RUN OUTPUT ===" -ForegroundColor Yellow
|
||||
} else {
|
||||
Write-Output $output
|
||||
Write-MetricsFile -Content $output -Path $MetricsPath
|
||||
}
|
||||
}
|
||||
catch {
|
||||
Write-Error "Failed to collect metrics: $($_.Exception.Message)"
|
||||
# Attempt to write partial results if available
|
||||
if ($MetricsPath -and $output -and -not $DryRun) {
|
||||
$output | Out-File -FilePath $MetricsPath -Encoding UTF8
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Register cleanup handler for graceful shutdown
|
||||
Register-EngineEvent -SourceIdentifier PowerShell.Exiting -Action {
|
||||
Write-Host "Shutting down gracefully..."
|
||||
}
|
||||
|
||||
# Create scheduled task for periodic execution
|
||||
function New-MetricsScheduledTask {
|
||||
param(
|
||||
[int]$IntervalSeconds = 60,
|
||||
[string]$TaskName = "PrometheusRDPMetrics"
|
||||
)
|
||||
|
||||
try {
|
||||
# Check if scheduled task already exists
|
||||
if (Get-ScheduledTask -TaskName $TaskName -ErrorAction SilentlyContinue) {
|
||||
Write-InfoLog "Scheduled task '$TaskName' already exists. Skipping creation."
|
||||
return
|
||||
}
|
||||
|
||||
$principal = New-ScheduledTaskPrincipal -UserId "SYSTEM" -LogonType ServiceAccount -RunLevel Highest
|
||||
$action = New-ScheduledTaskAction -Execute "powershell.exe" -Argument "-NoProfile -ExecutionPolicy Bypass -File `"$($MyInvocation.MyCommand.Path)`" -MetricsPath `"$MetricsPath`" -RunOnce"
|
||||
$trigger = New-ScheduledTaskTrigger -Once -At (Get-Date) -RepetitionInterval (New-TimeSpan -Seconds $IntervalSeconds)
|
||||
$settings = New-ScheduledTaskSettingsSet -AllowStartIfOnBatteries -DontStopIfGoingOnBatteries -StartWhenAvailable
|
||||
|
||||
Register-ScheduledTask -TaskName $TaskName -Action $action -Trigger $trigger -Principal $principal -Settings $settings -Force
|
||||
Write-InfoLog "Scheduled task '$TaskName' created successfully with $IntervalSeconds second interval"
|
||||
}
|
||||
catch {
|
||||
Write-Error "Failed to create scheduled task: $($_.Exception.Message)"
|
||||
}
|
||||
}
|
||||
|
||||
# Debug function to test qwinsta parsing
|
||||
function Test-QwinstaOutput {
|
||||
Write-Host "=== Testing qwinsta output parsing ===" -ForegroundColor Cyan
|
||||
|
||||
try {
|
||||
$qwinstaOutput = qwinsta 2>$null
|
||||
Write-Host "Raw qwinsta output:" -ForegroundColor Yellow
|
||||
$qwinstaOutput | ForEach-Object { Write-Host " $_" }
|
||||
|
||||
Write-Host "`nTesting IP address extraction:" -ForegroundColor Yellow
|
||||
$sessionIPs = Get-SessionIPAddresses
|
||||
$sessionIPs.GetEnumerator() | ForEach-Object {
|
||||
Write-Host " Session ID $($_.Key) -> IP $($_.Value)" -ForegroundColor Green
|
||||
}
|
||||
|
||||
Write-Host "`nTesting quser output:" -ForegroundColor Yellow
|
||||
$quserOutput = quser 2>$null
|
||||
$quserOutput | ForEach-Object { Write-Host " $_" }
|
||||
|
||||
}
|
||||
catch {
|
||||
Write-Error "Test failed: $($_.Exception.Message)"
|
||||
}
|
||||
}
|
||||
|
||||
# Get failed login attempts from Windows Event Log
|
||||
function Get-FailedLogins {
|
||||
try {
|
||||
$failedLogins = @()
|
||||
$24HoursAgo = (Get-Date).AddHours(-24)
|
||||
|
||||
# Query Windows Security Event Log for failed logon attempts (Event ID 4625)
|
||||
$failedLogonEvents = Get-WinEvent -FilterHashtable @{
|
||||
LogName = 'Security'
|
||||
Id = 4625 # Failed logon attempts
|
||||
StartTime = $24HoursAgo
|
||||
} -ErrorAction SilentlyContinue | Select-Object -First 50
|
||||
|
||||
if ($failedLogonEvents) {
|
||||
foreach ($event in $failedLogonEvents) {
|
||||
try {
|
||||
$eventXml = [xml]$event.ToXml()
|
||||
$eventData = $eventXml.Event.EventData.Data
|
||||
|
||||
# Extract relevant information from event data
|
||||
$targetUserName = ($eventData | Where-Object {$_.Name -eq 'TargetUserName'}).'#text'
|
||||
$workstationName = ($eventData | Where-Object {$_.Name -eq 'WorkstationName'}).'#text'
|
||||
$sourceNetworkAddress = ($eventData | Where-Object {$_.Name -eq 'IpAddress'}).'#text'
|
||||
$failureReason = ($eventData | Where-Object {$_.Name -eq 'SubStatus'}).'#text'
|
||||
|
||||
# Clean up values
|
||||
if ([string]::IsNullOrWhiteSpace($targetUserName)) { $targetUserName = "unknown" }
|
||||
if ([string]::IsNullOrWhiteSpace($sourceNetworkAddress) -or $sourceNetworkAddress -eq '-') { $sourceNetworkAddress = "local" }
|
||||
if ([string]::IsNullOrWhiteSpace($workstationName)) { $workstationName = "unknown" }
|
||||
|
||||
# Determine failure type based on sub status
|
||||
$failureType = switch ($failureReason) {
|
||||
"0xC0000064" { "invalid_user" }
|
||||
"0xC000006A" { "wrong_password" }
|
||||
"0xC0000234" { "account_locked" }
|
||||
"0xC0000072" { "account_disabled" }
|
||||
"0xC000006F" { "logon_time_restriction" }
|
||||
"0xC0000070" { "workstation_restriction" }
|
||||
default { "other_failure" }
|
||||
}
|
||||
|
||||
$failedLogins += @{
|
||||
Labels = "{username=`"$targetUserName`",source_ip=`"$sourceNetworkAddress`",workstation=`"$workstationName`",failure_type=`"$failureType`"}"
|
||||
Value = 1
|
||||
}
|
||||
} catch {
|
||||
Write-VerboseLog "Failed to parse event: $($_.Exception.Message)"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return $failedLogins
|
||||
} catch {
|
||||
Write-Warning "Failed to get failed login events: $($_.Exception.Message)"
|
||||
return @()
|
||||
}
|
||||
}
|
||||
|
||||
# Main execution logic - determines script behavior based on parameters
|
||||
if ($Debug) {
|
||||
# Debug mode: test qwinsta and quser output parsing
|
||||
Test-QwinstaOutput
|
||||
}
|
||||
elseif ($RunOnce -or $DryRun) {
|
||||
# Single execution mode: collect metrics once and exit
|
||||
Invoke-MetricsCollection
|
||||
}
|
||||
else {
|
||||
# Scheduled mode: create scheduled task (unless NoSchedule) and run immediately
|
||||
if (-not $NoSchedule) {
|
||||
New-MetricsScheduledTask -IntervalSeconds $IntervalSeconds
|
||||
} else {
|
||||
Write-InfoLog "Skipping scheduled task creation (-NoSchedule specified)"
|
||||
}
|
||||
|
||||
# Run metrics collection immediately
|
||||
Invoke-MetricsCollection
|
||||
}
|
||||
Executable
+619
@@ -0,0 +1,619 @@
|
||||
#!/bin/bash
|
||||
|
||||
########################################################################################
|
||||
#### users_logged_in.sh ####
|
||||
#### ####
|
||||
#### This script monitors and reports information about users currently logged into ####
|
||||
#### a Linux system. It's designed to work with Prometheus monitoring system to ####
|
||||
#### track user activity on Amazon, Ubuntu, and RedHat Linux servers. ####
|
||||
#### ####
|
||||
#### Contact: Phil Connor contact@mylinux.work ####
|
||||
#### Version 3.3.1-20250923 ####
|
||||
########################################################################################
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# CLI flags
|
||||
DRY_RUN=false
|
||||
VERBOSE=false
|
||||
QUIET=false
|
||||
NO_CRON=false
|
||||
SCRIPT_VERSION="3.3.1-20250923"
|
||||
|
||||
# Parse command line arguments
|
||||
parse_arguments() {
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case $1 in
|
||||
--dry-run)
|
||||
DRY_RUN=true
|
||||
shift
|
||||
;;
|
||||
--verbose|-v)
|
||||
VERBOSE=true
|
||||
DEBUG=1
|
||||
shift
|
||||
;;
|
||||
--quiet|-q)
|
||||
QUIET=true
|
||||
shift
|
||||
;;
|
||||
--no-cron)
|
||||
NO_CRON=true
|
||||
shift
|
||||
;;
|
||||
--version)
|
||||
echo "User Login Monitor"
|
||||
echo "Version: $SCRIPT_VERSION"
|
||||
echo "Author: Phil Connor contact@mylinux.work"
|
||||
exit 0
|
||||
;;
|
||||
-h|--help)
|
||||
echo "Usage: $0 [OPTIONS]"
|
||||
echo "Monitor user login activity and export Prometheus metrics"
|
||||
echo ""
|
||||
echo "Options:"
|
||||
echo " --dry-run Output metrics to console instead of file"
|
||||
echo " --verbose Enable verbose debug output"
|
||||
echo " --quiet Suppress non-error output"
|
||||
echo " --no-cron Skip cron job installation"
|
||||
echo " --version Show version and exit"
|
||||
echo " --help Show this help message"
|
||||
exit 0
|
||||
;;
|
||||
*)
|
||||
echo "Unknown option: $1" >&2
|
||||
echo "Use --help for usage information" >&2
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
}
|
||||
|
||||
# Enhanced logging functions
|
||||
log_verbose() {
|
||||
[[ "$VERBOSE" == "true" ]] && echo "[$(date '+%Y-%m-%d %H:%M:%S')] [VERBOSE] $1"
|
||||
}
|
||||
|
||||
log_info() {
|
||||
[[ "$QUIET" == "false" ]] && echo "[$(date '+%Y-%m-%d %H:%M:%S')] [INFO] $1"
|
||||
}
|
||||
|
||||
# System Configuration - Define default values and paths
|
||||
readonly NODE_EXPORTER_DIR="${NODE_EXPORTER_DIR:-/var/lib/node_exporter}" # Directory where Prometheus metrics are stored
|
||||
readonly PROMETHEUS_USER="${PROMETHEUS_USER:-prometheus}" # User that owns the Prometheus files
|
||||
readonly CRONTAB_USER="${CRONTAB_USER:-root}" # User under which the cron job runs
|
||||
readonly SCRIPT_PATH="$(readlink -f "$0")" # Full path to this script
|
||||
readonly UPDATE_INTERVAL="${UPDATE_INTERVAL:-*/3 * * * *}" # Cron schedule (every 3 minutes by default)
|
||||
readonly LOCKFILE="/var/run/users_logged_in.lock" # Prevents multiple instances from running
|
||||
|
||||
# Required commands - Map of commands to their expected locations
|
||||
declare -A COMMANDS=(
|
||||
[awk]="/usr/bin" # Text processing utility
|
||||
[cut]="/usr/bin" # Extract columns from text
|
||||
[grep]="/usr/bin" # Search text patterns
|
||||
[sed]="/usr/bin" # Stream editor for text manipulation
|
||||
[sort]="/usr/bin" # Sort lines of text
|
||||
[uniq]="/usr/bin" # Remove duplicate lines
|
||||
[who]="/usr/bin" # Show logged in users
|
||||
)
|
||||
|
||||
# Command paths (populated by find_commands function)
|
||||
declare -A CMD_PATHS
|
||||
|
||||
# Validation - Ensure required environment variables are set
|
||||
[[ -z "$NODE_EXPORTER_DIR" || -z "$PROMETHEUS_USER" ]] && {
|
||||
echo "ERROR: Required environment variables not set" >&2
|
||||
exit 1
|
||||
}
|
||||
|
||||
# Error handling function - Display error message and exit with specified code
|
||||
handle_error() {
|
||||
local err_msg="$1"
|
||||
local exit_code="${2:-1}"
|
||||
echo "ERROR: $err_msg" >&2
|
||||
exit "$exit_code"
|
||||
}
|
||||
|
||||
# Logging function - Output timestamped log messages
|
||||
log() {
|
||||
local level="$1"
|
||||
local message="$2"
|
||||
echo "[$(date '+%Y-%m-%d %H:%M:%S')] [$level] $message"
|
||||
}
|
||||
|
||||
# Find command location - Locate executable path or use fallback
|
||||
find_command() {
|
||||
local command_name="$1"
|
||||
local fallback_path="$2"
|
||||
|
||||
local path
|
||||
path=$(command -v "$command_name" 2>/dev/null) || path="$fallback_path/$command_name"
|
||||
|
||||
[[ -x "$path" ]] || handle_error "Cannot find or execute '$command_name'"
|
||||
echo "$path"
|
||||
}
|
||||
|
||||
# Initialize command paths - Populate CMD_PATHS array with actual command locations
|
||||
find_commands() {
|
||||
for cmd in "${!COMMANDS[@]}"; do
|
||||
CMD_PATHS[$cmd]=$(find_command "$cmd" "${COMMANDS[$cmd]}")
|
||||
done
|
||||
}
|
||||
|
||||
# Cleanup function - Remove lockfile on script exit
|
||||
cleanup() {
|
||||
rm -f "$LOCKFILE"
|
||||
}
|
||||
|
||||
# Setup Prometheus directory - Create and set permissions for metrics output directory
|
||||
setup_directory() {
|
||||
if [[ ! -d "$NODE_EXPORTER_DIR" ]]; then
|
||||
if [[ $(id -u) -eq 0 ]]; then
|
||||
mkdir -p "$NODE_EXPORTER_DIR"
|
||||
chown "$PROMETHEUS_USER": "$NODE_EXPORTER_DIR" 2>/dev/null || true
|
||||
fi
|
||||
fi
|
||||
|
||||
[[ -w "$NODE_EXPORTER_DIR" ]] || handle_error "$NODE_EXPORTER_DIR is not writable"
|
||||
}
|
||||
|
||||
# Setup lockfile - Prevent multiple script instances from running simultaneously
|
||||
setup_lockfile() {
|
||||
find "$LOCKFILE" -mmin +60 -delete 2>/dev/null || true # Remove stale lockfiles older than 60 minutes
|
||||
[[ -f "$LOCKFILE" ]] && handle_error "Script is already running"
|
||||
touch "$LOCKFILE" && chmod 600 "$LOCKFILE"
|
||||
}
|
||||
|
||||
# Install cron job - Automatically schedule this script to run periodically
|
||||
install_cron_job() {
|
||||
if [[ "$NO_CRON" == "true" ]]; then
|
||||
log_info "Skipping cron job installation (--no-cron specified)"
|
||||
return 0
|
||||
fi
|
||||
|
||||
if [[ -f "$SCRIPT_PATH" ]] && ! crontab -l 2>/dev/null | grep -q "$SCRIPT_PATH"; then
|
||||
local cron_entry="$UPDATE_INTERVAL $SCRIPT_PATH > $NODE_EXPORTER_DIR/usrlogins.prom 2>&1"
|
||||
if ! (echo -e "$(crontab -u "$CRONTAB_USER" -l 2>/dev/null || echo '')\n$cron_entry" | crontab -u "$CRONTAB_USER" -); then
|
||||
log "WARNING" "Failed to install cron job for user $CRONTAB_USER"
|
||||
else
|
||||
log_info "Cron job installed successfully"
|
||||
fi
|
||||
fi
|
||||
}
|
||||
|
||||
# Get logged users - Extract user information and format as Prometheus metrics
|
||||
get_logged_users() {
|
||||
"${CMD_PATHS[who]}" | "${CMD_PATHS[sort]}" | "${CMD_PATHS[uniq]}" | \
|
||||
"${CMD_PATHS[awk]}" '{
|
||||
gsub(/US\\|@us\.[^.]+\.net/, "", $1) # Remove domain prefixes from username (US\ or @us.*.net)
|
||||
gsub(/\//, " ", $2) # Replace slashes in terminal names
|
||||
gsub(/:/, "", $2) # Remove colons from terminal names
|
||||
gsub(/:100/, "aws_workspace", $5) # Convert AWS workspace notation
|
||||
gsub(/\(|\)/, "", $5) # Remove parentheses from location
|
||||
print "node_logged_in_usrs{name=\""$1"\", terminal=\""$2"\", location=\""$5"\"}", 1
|
||||
}'
|
||||
}
|
||||
|
||||
# Get user terminal count - Count open terminals per user
|
||||
get_user_terminal_count() {
|
||||
"${CMD_PATHS[who]}" | "${CMD_PATHS[sed]}" 's/.*US\\[\t ]*//;s/,//g' | \
|
||||
"${CMD_PATHS[cut]}" -f1 -d' ' | "${CMD_PATHS[sort]}" | "${CMD_PATHS[uniq]}" -c | \
|
||||
"${CMD_PATHS[awk]}" '{
|
||||
gsub(/@us\.[^.]+\.net/, "", $2) # Remove email domain from username (@us.*.net)
|
||||
print "node_logged_in_usr_terminals{username=\""$2"\"}", $1
|
||||
}'
|
||||
}
|
||||
|
||||
# Get total user count - Count total logged in sessions
|
||||
get_total_user_count() {
|
||||
"${CMD_PATHS[who]}" -q | "${CMD_PATHS[grep]}" users | \
|
||||
"${CMD_PATHS[awk]}" '{print $2}' | "${CMD_PATHS[cut]}" -d "=" -f2
|
||||
}
|
||||
|
||||
# Get last user commands - Extract recent bash history for each user
|
||||
get_last_user_commands() {
|
||||
local username="$1"
|
||||
local history_file
|
||||
|
||||
if [[ -z "$username" ]]; then
|
||||
return 1
|
||||
fi
|
||||
|
||||
# Try different history file locations based on username and common paths
|
||||
for hist_path in "/home/${username}/.bash_history" "/home/${username}/.history" "/root/.bash_history"; do
|
||||
if [[ -r "$hist_path" ]]; then
|
||||
history_file="$hist_path"
|
||||
break
|
||||
fi
|
||||
done
|
||||
|
||||
# Extract last 10 commands and format as Prometheus metrics
|
||||
if [[ -n "$history_file" ]]; then
|
||||
tail -n 10 "$history_file" 2>/dev/null | \
|
||||
"${CMD_PATHS[awk]}" -v user="$username" 'NR <= 10 {
|
||||
gsub(/\\/, "\\\\", $0) # Escape backslashes first (before other escaping)
|
||||
gsub(/"/, "\\\"", $0) # Escape double quotes in commands
|
||||
gsub(/'\''/, "", $0) # Remove single quotes (problematic for Prometheus)
|
||||
print "node_user_last_commands{username=\"" user "\", command_number=\"" NR "\", command=\"" $0 "\"} 1"
|
||||
}'
|
||||
fi
|
||||
}
|
||||
|
||||
# Get sudo commands - Extract recent privileged commands from auth logs
|
||||
get_sudo_commands() {
|
||||
local username="$1"
|
||||
|
||||
if [[ -z "$username" ]]; then
|
||||
return 1
|
||||
fi
|
||||
|
||||
# Strip domain prefixes for comparison
|
||||
local clean_username="${username#US\\}"
|
||||
clean_username="${clean_username%@*}"
|
||||
|
||||
# Check both Ubuntu (/var/log/auth.log) and RHEL (/var/log/secure) locations
|
||||
local auth_logs=("/var/log/secure" "/var/log/auth.log")
|
||||
local commands_found=""
|
||||
|
||||
for log_file in "${auth_logs[@]}"; do
|
||||
if [[ -r "$log_file" ]]; then
|
||||
# Try RHEL/Amazon Linux format first (TTY= pattern)
|
||||
commands_found=$(grep "TTY=" "$log_file" 2>/dev/null | \
|
||||
grep -E "(US\\\\$clean_username|$clean_username|$username)" | \
|
||||
grep "COMMAND=" | \
|
||||
tail -10 | \
|
||||
"${CMD_PATHS[awk]}" -F'; COMMAND=' -v user="$clean_username" '{
|
||||
if (NF >= 2) {
|
||||
cmd = $2
|
||||
gsub(/#040/, " ", cmd) # Convert #040 to spaces
|
||||
gsub(/^[ \t]+|[ \t]+$/, "", cmd) # Trim whitespace
|
||||
gsub(/\\/, "\\\\", cmd) # Escape backslashes first (before other escaping)
|
||||
gsub(/"/, "\\\"", cmd) # Escape double quotes
|
||||
gsub(/'\''/, "", cmd) # Remove single quotes (problematic for Prometheus)
|
||||
if (cmd != "" && length(cmd) > 0) {
|
||||
print user "|||" cmd # Use delimiter for deduplication
|
||||
}
|
||||
}
|
||||
}')
|
||||
|
||||
# If RHEL format didn't work, try Ubuntu format
|
||||
if [[ -z "$commands_found" ]]; then
|
||||
commands_found=$(grep "COMMAND=" "$log_file" 2>/dev/null | \
|
||||
grep -E "(USER=$clean_username|$clean_username :)" | \
|
||||
tail -10 | \
|
||||
"${CMD_PATHS[awk]}" -F'COMMAND=' -v user="$clean_username" '{
|
||||
if (NF >= 2) {
|
||||
cmd = $2
|
||||
gsub(/^[ \t]+|[ \t]+$/, "", cmd) # Trim whitespace
|
||||
gsub(/\\/, "\\\\", cmd) # Escape backslashes first (before other escaping)
|
||||
gsub(/"/, "\\\"", cmd) # Escape double quotes
|
||||
gsub(/'\''/, "", cmd) # Remove single quotes (problematic for Prometheus)
|
||||
if (cmd != "" && length(cmd) > 0) {
|
||||
print user "|||" cmd # Use delimiter for deduplication
|
||||
}
|
||||
}
|
||||
}')
|
||||
fi
|
||||
|
||||
# If we found commands, break (prefer secure over auth.log for RHEL)
|
||||
if [[ -n "$commands_found" ]]; then
|
||||
break
|
||||
fi
|
||||
fi
|
||||
done
|
||||
|
||||
# Deduplicate and format as proper metrics
|
||||
if [[ -n "$commands_found" ]]; then
|
||||
echo "$commands_found" | "${CMD_PATHS[sort]}" | "${CMD_PATHS[uniq]}" | \
|
||||
"${CMD_PATHS[awk]}" -F'\\|\\|\\|' '{
|
||||
print "node_user_sudo_commands{username=\"" $1 "\", command=\"" $2 "\"} 1"
|
||||
}'
|
||||
fi
|
||||
}
|
||||
|
||||
# Get session events - Extract login/logout events from auth logs
|
||||
get_session_events() {
|
||||
local username="$1"
|
||||
|
||||
if [[ -z "$username" ]]; then
|
||||
return 1
|
||||
fi
|
||||
|
||||
# Strip domain prefixes for comparison
|
||||
local clean_username="${username#US\\}"
|
||||
clean_username="${clean_username%@*}"
|
||||
|
||||
# Check both log files for session events
|
||||
local auth_logs=("/var/log/secure" "/var/log/auth.log")
|
||||
local session_events=""
|
||||
|
||||
for log_file in "${auth_logs[@]}"; do
|
||||
if [[ -r "$log_file" ]]; then
|
||||
# Get recent session events (last 24 hours worth)
|
||||
session_events=$(grep -E "(session opened|session closed|Accepted)" "$log_file" 2>/dev/null | \
|
||||
grep -E "(US\\\\$clean_username|$clean_username|$username)" | \
|
||||
tail -20 | \
|
||||
"${CMD_PATHS[awk]}" -v user="$clean_username" '{
|
||||
if ($0 ~ /session opened/) {
|
||||
method = "ssh"
|
||||
if ($0 ~ /sudo/) method = "sudo"
|
||||
print user "|||login|||" method # Use delimiter for deduplication
|
||||
}
|
||||
else if ($0 ~ /session closed/) {
|
||||
method = "ssh"
|
||||
if ($0 ~ /sudo/) method = "sudo"
|
||||
print user "|||logout|||" method # Use delimiter for deduplication
|
||||
}
|
||||
else if ($0 ~ /Accepted/) {
|
||||
method = "ssh"
|
||||
if ($0 ~ /publickey/) method = "ssh-key"
|
||||
else if ($0 ~ /password/) method = "ssh-password"
|
||||
print user "|||login|||" method # Use delimiter for deduplication
|
||||
}
|
||||
}')
|
||||
|
||||
if [[ -n "$session_events" ]]; then
|
||||
break
|
||||
fi
|
||||
fi
|
||||
done
|
||||
|
||||
# Deduplicate and format as proper metrics
|
||||
if [[ -n "$session_events" ]]; then
|
||||
echo "$session_events" | "${CMD_PATHS[sort]}" | "${CMD_PATHS[uniq]}" | \
|
||||
"${CMD_PATHS[awk]}" -F'\\|\\|\\|' '{
|
||||
print "node_user_session_events{username=\"" $1 "\", event=\"" $2 "\", method=\"" $3 "\"} 1"
|
||||
}'
|
||||
fi
|
||||
}
|
||||
|
||||
# Get failed login attempts - Track security events
|
||||
get_failed_logins() {
|
||||
# Check both log files for failed authentication attempts
|
||||
local auth_logs=("/var/log/secure" "/var/log/auth.log")
|
||||
local failed_logins=""
|
||||
|
||||
for log_file in "${auth_logs[@]}"; do
|
||||
if [[ -r "$log_file" ]]; then
|
||||
# Get failed login attempts from last 24 hours
|
||||
failed_logins=$(grep -E "(Failed password|authentication failure|Invalid user)" "$log_file" 2>/dev/null | \
|
||||
tail -50 | \
|
||||
"${CMD_PATHS[awk]}" '{
|
||||
username = "unknown"
|
||||
source_ip = "unknown"
|
||||
|
||||
# Extract username - handle various formats
|
||||
if ($0 ~ /for [a-zA-Z0-9_]+/) {
|
||||
match($0, /for ([a-zA-Z0-9_\\]+)/, arr)
|
||||
if (arr[1]) {
|
||||
username = arr[1]
|
||||
gsub(/US\\/, "", username) # Clean domain prefix
|
||||
}
|
||||
}
|
||||
|
||||
# Extract source IP
|
||||
if ($0 ~ /from [0-9]+\.[0-9]+\.[0-9]+\.[0-9]+/) {
|
||||
match($0, /from ([0-9]+\.[0-9]+\.[0-9]+\.[0-9]+)/, arr)
|
||||
if (arr[1]) source_ip = arr[1]
|
||||
}
|
||||
|
||||
failure_type = "password"
|
||||
if ($0 ~ /Invalid user/) failure_type = "invalid_user"
|
||||
else if ($0 ~ /authentication failure/) failure_type = "auth_failure"
|
||||
|
||||
print username "|||" source_ip "|||" failure_type # Use delimiter for deduplication
|
||||
}')
|
||||
|
||||
if [[ -n "$failed_logins" ]]; then
|
||||
break
|
||||
fi
|
||||
fi
|
||||
done
|
||||
|
||||
# Deduplicate and format as proper metrics
|
||||
if [[ -n "$failed_logins" ]]; then
|
||||
echo "$failed_logins" | "${CMD_PATHS[sort]}" | "${CMD_PATHS[uniq]}" | \
|
||||
"${CMD_PATHS[awk]}" -F'\\|\\|\\|' '{
|
||||
print "node_user_failed_logins{username=\"" $1 "\", source_ip=\"" $2 "\", failure_type=\"" $3 "\"} 1"
|
||||
}'
|
||||
fi
|
||||
}
|
||||
|
||||
# Get active session durations - Calculate how long users have been logged in
|
||||
get_session_durations() {
|
||||
local current_time
|
||||
current_time=$(date +%s)
|
||||
|
||||
"${CMD_PATHS[who]}" -u | "${CMD_PATHS[awk]}" -v current_time="$current_time" '{
|
||||
if (NF >= 5) {
|
||||
username = $1
|
||||
gsub(/US\\|@us\.[^.]+\.net/, "", username) # Clean username (US\ or @us.*.net)
|
||||
|
||||
# Parse login time (format: Oct 15 14:30 or 14:30)
|
||||
login_time = ""
|
||||
if ($3 ~ /:/) {
|
||||
# Today format: 14:30
|
||||
login_time = $3
|
||||
login_date = strftime("%Y-%m-%d", current_time)
|
||||
} else if ($4 ~ /:/) {
|
||||
# Date format: Oct 15 14:30
|
||||
login_date = strftime("%Y", current_time) "-" $3 "-" $4
|
||||
login_time = $5
|
||||
}
|
||||
|
||||
if (login_time != "" && login_date != "") {
|
||||
# Convert to epoch (approximate)
|
||||
split(login_time, time_parts, ":")
|
||||
hours = time_parts[1]
|
||||
minutes = time_parts[2]
|
||||
|
||||
# Simple duration calculation (today only)
|
||||
login_seconds = (hours * 3600) + (minutes * 60)
|
||||
current_seconds = strftime("%H", current_time) * 3600 + strftime("%M", current_time) * 60
|
||||
|
||||
if (current_seconds >= login_seconds) {
|
||||
duration = current_seconds - login_seconds
|
||||
} else {
|
||||
duration = (86400 - login_seconds) + current_seconds # Cross midnight
|
||||
}
|
||||
|
||||
print username "|||" duration # Use delimiter for deduplication
|
||||
}
|
||||
}
|
||||
}' | "${CMD_PATHS[sort]}" -k1,1 | \
|
||||
"${CMD_PATHS[awk]}" -F'\\|\\|\\|' '{
|
||||
# Keep the latest/highest duration for each username
|
||||
if ($1 != prev_user) {
|
||||
if (prev_user != "") {
|
||||
print "node_user_session_duration_seconds{username=\"" prev_user "\"} " max_duration
|
||||
}
|
||||
prev_user = $1
|
||||
max_duration = $2
|
||||
} else if ($2 > max_duration) {
|
||||
max_duration = $2
|
||||
}
|
||||
} END {
|
||||
if (prev_user != "") {
|
||||
print "node_user_session_duration_seconds{username=\"" prev_user "\"} " max_duration
|
||||
}
|
||||
}'
|
||||
}
|
||||
|
||||
# Output metric - Format and display Prometheus metric with help text and type
|
||||
output_metric() {
|
||||
local metric_name="$1"
|
||||
local help_text="$2"
|
||||
local metric_type="$3"
|
||||
local metric_value="$4"
|
||||
local default_value="$5"
|
||||
|
||||
echo "# HELP $metric_name $help_text"
|
||||
echo "# TYPE $metric_name $metric_type"
|
||||
echo "${metric_value:-$default_value}"
|
||||
}
|
||||
|
||||
# Main function - Orchestrate the entire monitoring process
|
||||
main() {
|
||||
# Parse command line arguments first
|
||||
parse_arguments "$@"
|
||||
|
||||
# Record script start time for runtime metric
|
||||
local script_start_time
|
||||
script_start_time=$(date +%s.%N)
|
||||
|
||||
# Add dry-run header if applicable
|
||||
if [[ "$DRY_RUN" == "true" ]]; then
|
||||
echo "=== DRY RUN MODE - Metrics that would be written to $NODE_EXPORTER_DIR/usrlogins.prom ===" >&2
|
||||
fi
|
||||
|
||||
trap cleanup EXIT # Ensure cleanup runs when script exits
|
||||
|
||||
# Initialize environment and commands
|
||||
find_commands
|
||||
|
||||
# Skip setup in dry-run mode
|
||||
if [[ "$DRY_RUN" == "false" ]]; then
|
||||
setup_directory
|
||||
setup_lockfile
|
||||
install_cron_job
|
||||
fi
|
||||
|
||||
# Generate and output all Prometheus metrics
|
||||
|
||||
# Metric 1: Individual user sessions with details
|
||||
local users
|
||||
users=$(get_logged_users)
|
||||
output_metric "node_logged_in_usrs" "Currently Logged in Users" "gauge" \
|
||||
"$users" 'node_logged_in_usrs{name="", location=""} 0'
|
||||
|
||||
# Metric 2: Terminal count per user
|
||||
local user_terminals
|
||||
user_terminals=$(get_user_terminal_count)
|
||||
output_metric "node_logged_in_usr_terminals" "Total of open sessions per user" "gauge" \
|
||||
"$user_terminals" 'node_logged_in_usr_terminals{username=""} 0'
|
||||
|
||||
# Metric 3: Total user count system-wide
|
||||
local total_count
|
||||
total_count=$(get_total_user_count)
|
||||
output_metric "node_logged_in_total" "Total of open sessions on the system" "gauge" \
|
||||
"node_logged_in_total ${total_count:-0}" "node_logged_in_total 0"
|
||||
|
||||
# Metric 4: Last 10 commands for each logged in user
|
||||
local logged_users
|
||||
logged_users=$("${CMD_PATHS[who]}" | "${CMD_PATHS[awk]}" '{gsub(/US\\|@us\.[^.]+\.net/, "", $1); print $1}' | "${CMD_PATHS[sort]}" | "${CMD_PATHS[uniq]}")
|
||||
|
||||
local user_commands=""
|
||||
while IFS= read -r user; do
|
||||
if [[ -n "$user" ]]; then
|
||||
local commands
|
||||
commands=$(get_last_user_commands "$user")
|
||||
if [[ -n "$commands" ]]; then
|
||||
user_commands+="$commands"$'\n'
|
||||
fi
|
||||
fi
|
||||
done <<< "$logged_users"
|
||||
|
||||
output_metric "node_user_last_commands" "Last 10 commands executed by logged in users" "gauge" \
|
||||
"$user_commands" 'node_user_last_commands{username="", command_number="", command=""} 0'
|
||||
|
||||
# Metric 5: Recent sudo commands for each logged in user
|
||||
local sudo_commands=""
|
||||
while IFS= read -r user; do
|
||||
if [[ -n "$user" ]]; then
|
||||
local sudo_cmds
|
||||
sudo_cmds=$(get_sudo_commands "$user")
|
||||
if [[ -n "$sudo_cmds" ]]; then
|
||||
sudo_commands+="$sudo_cmds"$'\n'
|
||||
fi
|
||||
fi
|
||||
done <<< "$logged_users"
|
||||
|
||||
output_metric "node_user_sudo_commands" "Recent sudo commands executed by logged in users" "gauge" \
|
||||
"$sudo_commands" 'node_user_sudo_commands{username="", command=""} 0'
|
||||
|
||||
# Metric 6: Session events (login/logout) for each logged in user
|
||||
local session_events=""
|
||||
while IFS= read -r user; do
|
||||
if [[ -n "$user" ]]; then
|
||||
local events
|
||||
events=$(get_session_events "$user")
|
||||
if [[ -n "$events" ]]; then
|
||||
session_events+="$events"$'\n'
|
||||
fi
|
||||
fi
|
||||
done <<< "$logged_users"
|
||||
|
||||
output_metric "node_user_session_events" "Login and logout events for users" "gauge" \
|
||||
"$session_events" 'node_user_session_events{username="", event="", method=""} 0'
|
||||
|
||||
# Metric 7: Active session durations
|
||||
local session_durations
|
||||
session_durations=$(get_session_durations)
|
||||
output_metric "node_user_session_duration_seconds" "Duration of active user sessions in seconds" "gauge" \
|
||||
"$session_durations" 'node_user_session_duration_seconds{username=""} 0'
|
||||
|
||||
# Metric 8: Failed login attempts (security monitoring)
|
||||
local failed_logins
|
||||
failed_logins=$(get_failed_logins)
|
||||
output_metric "node_user_failed_logins" "Failed login attempts by username and source IP" "counter" \
|
||||
"$failed_logins" 'node_user_failed_logins{username="", source_ip="", failure_type=""} 0'
|
||||
|
||||
# Metric 9: Script runtime
|
||||
local script_end_time script_runtime
|
||||
script_end_time=$(date +%s.%N)
|
||||
script_runtime=$(echo "$script_end_time - $script_start_time" | bc -l 2>/dev/null || echo "0")
|
||||
output_metric "node_user_monitor_runtime_seconds" "Script execution time in seconds" "gauge" \
|
||||
"node_user_monitor_runtime_seconds $script_runtime" "node_user_monitor_runtime_seconds 0"
|
||||
|
||||
if [[ "$DRY_RUN" == "true" ]]; then
|
||||
echo "=== END DRY RUN OUTPUT ===" >&2
|
||||
fi
|
||||
}
|
||||
|
||||
# Script entry point
|
||||
main "$@"
|
||||
|
||||
# 2025-09-23
|
||||
# Fixed: Prometheus parsing errors with single quotes (\' sequences)
|
||||
# Fixed: Prometheus parsing errors with backslash escapes (\u, \x, etc.)
|
||||
# Improved: Domain regex pattern now handles any us.*.net domain instead of just us.calormen.net
|
||||
Reference in New Issue
Block a user