Add all 44 scripts, update CI: error severity baseline, PowerShell validation, multi-distro testing

Amp-Thread-ID: https://ampcode.com/threads/T-019cc404-c628-759e-a50b-f5eeea35b91f
Co-authored-by: Amp <amp@ampcode.com>
This commit is contained in:
root
2026-03-07 05:40:51 +01:00
parent db43b8a313
commit 88551536e6
43 changed files with 28906 additions and 23 deletions
+74 -23
View File
@@ -1,9 +1,11 @@
###############################################################################
# .gitlab-ci.yml — CI pipeline for bash script testing
# .gitlab-ci.yml — CI pipeline for linux-scripts repository
#
# Stages:
# 1. lint — ShellCheck static analysis + bash syntax check
# 2. test — Run --help and --dry-run in Ubuntu and RHEL containers
# All scripts are tested on every push:
# 1. lint — ShellCheck + bash syntax + PowerShell syntax
# 2. test — --help and --dry-run validation on Ubuntu and Rocky Linux
#
# On success on master, scripts are ready to sync to the website.
###############################################################################
stages:
@@ -11,7 +13,8 @@ stages:
- test
variables:
SHELLCHECK_SEVERITY: "warning"
# Start at "error" for a clean baseline, tighten to "warning" as scripts are cleaned up
SHELLCHECK_SEVERITY: "error"
# ─────────────────────────────────────────────
# Lint Stage
@@ -21,30 +24,57 @@ shellcheck:
stage: lint
image: koalaman/shellcheck-alpine:stable
script:
- echo "Running ShellCheck on all .sh files..."
- echo "Running ShellCheck on $(find . -name '*.sh' -not -path './.git/*' | wc -l) scripts..."
- find . -name "*.sh" -not -path "./.git/*" -print0 |
xargs -0 -r shellcheck --severity="$SHELLCHECK_SEVERITY" --format=tty
- echo "ShellCheck passed"
bash-syntax:
stage: lint
image: bash:5
script:
- echo "Checking bash syntax (bash -n)..."
- echo "Checking bash syntax..."
- |
errors=0
total=0
for script in $(find . -name "*.sh" -not -path "./.git/*"); do
total=$((total + 1))
if ! bash -n "$script" 2>&1; then
errors=$((errors + 1))
fi
done
if [ "$errors" -gt 0 ]; then
echo "FAILED: $errors script(s) have syntax errors"
echo "FAILED: $errors/$total script(s) have syntax errors"
exit 1
fi
echo "All scripts pass syntax check"
echo "All $total scripts pass syntax check"
powershell-syntax:
stage: lint
image: mcr.microsoft.com/powershell:lts-ubuntu-24.04
script:
- echo "Checking PowerShell syntax..."
- |
errors=0
total=0
for script in $(find . -name "*.ps1" -not -path "./.git/*"); do
total=$((total + 1))
echo "Checking: $script"
if ! pwsh -Command "try { \$null = [System.Management.Automation.Language.Parser]::ParseFile('$script', [ref]\$null, [ref]\$null); Write-Host 'OK: $script' } catch { Write-Error \$_; exit 1 }" 2>&1; then
errors=$((errors + 1))
fi
done
if [ "$errors" -gt 0 ]; then
echo "FAILED: $errors/$total PowerShell script(s) have syntax errors"
exit 1
fi
echo "All $total PowerShell scripts pass syntax check"
rules:
- exists:
- "*.ps1"
# ─────────────────────────────────────────────
# Test Stage — Ubuntu
# Test Stage — Ubuntu 24.04
# ─────────────────────────────────────────────
test-ubuntu:
@@ -54,19 +84,30 @@ test-ubuntu:
- apt-get update -qq
- apt-get install -y -qq procps iproute2 kmod >/dev/null 2>&1
script:
- echo "=== Testing on Ubuntu 24.04 ==="
- echo "=== Testing --help flags on Ubuntu 24.04 ==="
- |
for script in $(find . -maxdepth 1 -name "*.sh" -not -path "./.git/*"); do
echo ""
echo "--- $(basename "$script") --help ---"
bash "$script" --help 2>&1 || true
passed=0
failed=0
for script in $(find . -maxdepth 1 -name "*.sh" -not -path "./.git/*" | sort); do
name=$(basename "$script")
if bash "$script" --help >/dev/null 2>&1; then
echo "✓ $name --help"
passed=$((passed + 1))
elif bash "$script" -h >/dev/null 2>&1; then
echo "✓ $name -h"
passed=$((passed + 1))
else
echo "○ $name (no --help flag)"
fi
done
echo ""
echo "$passed scripts have working --help"
- echo ""
- echo "--- networktuning.sh --dry-run ---"
- echo "=== Testing networktuning.sh --dry-run ==="
- bash networktuning.sh --dry-run 2>&1 || true
# ─────────────────────────────────────────────
# Test Stage — RHEL
# Test Stage — Rocky Linux 9
# ─────────────────────────────────────────────
test-rhel:
@@ -75,13 +116,23 @@ test-rhel:
before_script:
- dnf install -y -q procps iproute kmod >/dev/null 2>&1
script:
- echo "=== Testing on Rocky Linux 9 ==="
- echo "=== Testing --help flags on Rocky Linux 9 ==="
- |
for script in $(find . -maxdepth 1 -name "*.sh" -not -path "./.git/*"); do
echo ""
echo "--- $(basename "$script") --help ---"
bash "$script" --help 2>&1 || true
passed=0
for script in $(find . -maxdepth 1 -name "*.sh" -not -path "./.git/*" | sort); do
name=$(basename "$script")
if bash "$script" --help >/dev/null 2>&1; then
echo "✓ $name --help"
passed=$((passed + 1))
elif bash "$script" -h >/dev/null 2>&1; then
echo "✓ $name -h"
passed=$((passed + 1))
else
echo "○ $name (no --help flag)"
fi
done
echo ""
echo "$passed scripts have working --help"
- echo ""
- echo "--- networktuning.sh --dry-run ---"
- echo "=== Testing networktuning.sh --dry-run ==="
- bash networktuning.sh --dry-run 2>&1 || true
+287
View File
@@ -0,0 +1,287 @@
#!/bin/bash
######################################################################################
#### Version 2.2 ####
#### For questions or comments contact@mylinux.work ####
#### Author : Phil Connor ####
#### ####
#### Notes : ####
#### This script is a simple "helper" to install and configure Maria, ####
#### PowerDNS and PowerAdmin on RedHat Based servers. ####
#### There is no silver bullet. Don't expect the perfect setup, ####
#### review comments and adapt the parameters to your application usage. ####
#### ####
#### Use this script at your OWN risk. There is no guarantee whatsoever. ####
#### ####
#### Usage chmod 755 then ./PdnsInstall.sh or bash PdnsInstall.sh ####
######################################################################################
############################
#### User Configurables ####
############################
# HTTP=apache
NAGAD=nagiosadmin
NAGADPASS=MyPaSsWoRd
##########################
#### System Variables ####
##########################
# IPADD=$(ifconfig | grep -Eo 'inet (addr:)?([0-9]*\.){3}[0-9]*' | grep -Eo '([0-9]*\.){3}[0-9]*' | grep -v '127.0.0.1')
OS=$(grep PRETTY_NAME /etc/os-release | sed 's/PRETTY_NAME=//g' | tr -d '="' | awk '{print $1}' | tr '[:upper:]' '[:lower:]')
OSVER=$(grep VERSION_ID /etc/os-release | sed 's/VERSION_ID=//g' | tr -d '="' | awk -F. '{print $1}')
# SAEMAIL=
###########################################################
#### Detect Package Manger from OS and OSVer Variables ####
###########################################################
if [ "${OS}" = ubuntu ]; then
PAKMGR="apt -y"
elif [[ ${OS} = centos || ${OS} = red || ${OS} = oracle || ${OS} = rocky || ${OS} = alma ]]; then
if [ "${OSVER}" = 8 ] || [ "${OSVER}" = 9 ]; then
PAKMGR="dnf -y"
fi
fi
###########################
#### Install Net-Utils ####
###########################
if [ ! "$(command -v ifconfig)" ]; then
if [ "${OS}" = ubuntu ]; then
${PAKMGR} update
${PAKMGR} install net-utils
else
${PAKMGR} install net-tools
fi
fi
########################
#### Nagios Install ####
########################
function nagios_install() {
{
if [ "${OS}" = ubuntu ]; then
htpath=/etc/apache2/conf-enabled/nagios4-cgi.conf
else
htpath=/etc/apache2/conf.d/nagios.conf
fi
#if [ "${OS}" = ubuntu ]; then
${PAKMGR} update
DEBIAN_FRONTEND=noninteractive ${PAKMGR} install nagios4 nagios-nrpe-server nagios-plugins nagios-plugins-contrib expect
a2enmod authz_groupfile auth_digest
# ${PAKMGR} install autoconf gcc libc6 make wget unzip apache2 php libapache2-mod-php libgd-dev libssl-dev expect
sed -i 's/Require ip ::1\/128 fc00::\/7 fe80::\/10 10\.0\.0\.0\/8 127\.0\.0\.0\/8 169\.254\.0\.0\/16 172\.16\.0\.0\/12 192\.168\.0\.0\/16/# Require ip ::1\/128 fc00::\/7 fe80::\/10 10\.0\.0\.0\/8 127\.0\.0\.0\/8 169\.254\.0\.0\/16 172\.16\.0\.0\/12 192\.168\.0\.0\/16/g' $htpath
#sed -i 's/<Files "cmd.cgi">/#<Files "cmd.cgi">/g' $htpath
sed -i 's/Require all/#Require all/g' $htpath
#sed -i 's/<//Files>/#<//Files>/g' $htpath
sed -i 's/#Require /Require /g' $htpath
expect -f - <<-EOF
set timeout 5
spawn htdigest -c /etc/nagios4/htdigest.users Nagios4 $NAGAD
expect "New password:"
send -- "$NAGADPASS\r"
expect "Re-type new password:"
send -- "$NAGADPASS\r"
expect eof
EOF
systemctl enable --now nagios
systemctl status nagios
if [ "${OS}" = ubuntu ]; then
systemctl enable apache2
systemctl restart apache2
else
systemctl enable httpd
systemctl restart httpd
fi
}
}
nagios_install
# OUTFILE1="$nagdir/nrpe_rule.te"
# # TITLE="nrpe_rule"
# define NRPE_RULE << 'EOF'
# module nrpe_rule 1.0;
# require {
# type nrpe_t;
# type proc_net_t;
# class file { open read };
# class file { ioctl open read getattr };
# }
# #=================== nrpe_t =========================
# allow nrpe_t proc_net_t:file open;
# allow nrpe_t proc_net_t:file read;
# allow nrpe_t proc_net_t:file { getattr ioctl };
# EOF
# {
# printf "%s\n" "$NRPE_RULE" | cut -c 3-
# } > "$OUTFILE1"
# checkmodule -M -m -o $nagdir/nrpe_rule.mod $nagdir/nrpe_rule.te
# semodule_package -o $nagdir/nrpe_rule.pp -m $nagdir/nrpe_rule.mod
# semodule -i $nagdir/nrpe_rule.pp
# semanage permissive -a nrpe_t
# ${PAKMGR} install nrpe nrpe-selinux nagios-plugins nagios-plugins-all nagios-plugins-uptime nagios-plugins-oracle nagios-plugins-check-updates
# sed -i "/^allowed_hosts/c\allowed_hosts=127.0.0.1,::1,$IPADD" $nrpecfg
# sed -i "/^#command\[check_load\]/c\command[check_uptime]=$nagdir/check_uptime" $nrpecfg
# sed -i "/^command\[check_load\]/c\command\[check_load\]=$nagdir/check_load -r -w 6,4,2 -c 12,10,7" $nrpecfg
# sed -i "/^command\[check_hda1\]/c\command[check_hda1]=$nagdir/check_disk -w 15% -c 10% -p /dev/sda3" $nrpecfg
# sed -i "/^command\[check_zombie_procs\]/c\# command[check_zombie_procs]=$nagdir/check_procs -w 5 -c 10 -s Z" $nrpecfg
# sed -i "/^command\[check_total_procs\]/c\command[check_total_procs]=$nagdir/check_procs -w 250 -c 300 -s RSZDT" $nrpecfg
# sed -i "/^#command\[check_users\]/c\command[check_net]=$nagdir/check_net" $nrpecfg
# sed -i "/^#command\[check_swap\]/c\command[check_swap]=$nagdir/check_swap -w 20% -c 10%" $nrpecfg
# sed -i "/^#command\[check_mem\]/c\command[check_mem]=$nagdir/check_mem" $nrpecfg
# if [ "${OS}" = ubuntu ]; then
# sed -i "/^#command\[check_apt\]/c\command[check_apt]=$nagdir/check_apt/" $nrpecfg
# else
# sed -i "/^#command\[check_yum\]/c\command[check_yum]=$nagdir/check_updates" $nrpecfg
# fi
# sed -i "/^#command\[check_all_procs\]/c\command[check_logic]=$nagdir/check_http -p 7011" $nrpecfg
# sed -i "/^#command\[check_procs\]/c\command[check_oracle]=$nagdir/check_http -p 8010" $nrpecfg
# sed -i "/^#command\[check_disk\]/c\command[check_ping]=$nagdir/check_ping 127.0.0.1 -w 100.0,20% -c 500.0,60%" $nrpecfg
# sed -i "/^#command\[check_cpu_stats\]/c\command[check_ssh]=$nagdir/check_ssh" $nrpecfg
# }
# }
######################
#### HTTP Install ####
######################
# function install_http() {
# {
# if [ "${OS}" = ubuntu ]; then
# if [ $HTTP = apache ]; then
# echo "Apache"
# else
# echo "Nginx"
# fi
# echo "something"
# else
# if [ $HTTP = apache ]; then
# echo "Apache"
# else
# echo "Nginx"
# fi
# fi
# }
# }
nagios_install
# install_http
# # SAMPLE CONFIG SNIPPETS FOR APACHE WEB SERVER
# #
# # This file contains examples of entries that need
# # to be incorporated into your Apache web server
# # configuration file. Customize the paths, etc. as
# # needed to fit your system.
# ScriptAlias /nagios/cgi-bin "/usr/local/nagios/sbin"
# <Directory "/usr/local/nagios/sbin">
# # SSLRequireSSL
# Options ExecCGI
# AllowOverride None
# <IfVersion >= 2.3>
# <RequireAll>
# Require all granted
# # Require host 127.0.0.1
# AuthName "Nagios Access"
# AuthType Basic
# AuthUserFile /usr/local/nagios/etc/htpasswd.users
# Require valid-user
# </RequireAll>
# </IfVersion>
# <IfVersion < 2.3>
# Order allow,deny
# Allow from all
# # Order deny,allow
# # Deny from all
# # Allow from 127.0.0.1
# AuthName "Nagios Access"
# AuthType Basic
# AuthUserFile /usr/local/nagios/etc/htpasswd.users
# Require valid-user
# </IfVersion>
# </Directory>
# Alias /nagios "/usr/local/nagios/share"
# <Directory "/usr/local/nagios/share">
# # SSLRequireSSL
# Options None
# AllowOverride None
# <IfVersion >= 2.3>
# <RequireAll>
# Require all granted
# # Require host 127.0.0.1
# AuthName "Nagios Access"
# AuthType Basic
# AuthUserFile /usr/local/nagios/etc/htpasswd.users
# Require valid-user
# </RequireAll>
# </IfVersion>
# <IfVersion < 2.3>
# Order allow,deny
# Allow from all
# # Order deny,allow
# # Deny from all
# # Allow from 127.0.0.1
# AuthName "Nagios Access"
# AuthType Basic
# AuthUserFile /usr/local/nagios/etc/htpasswd.users
# Require valid-user
# </IfVersion>
# </Directory>
# wget https://assets.nagios.com/downloads/nagioscore/releases/nagios-4.4.7.tar.gz
# tar xzf nagios-4.4.7.tar.gz
# cd nagios-4.4.7 || exit
# if [ "${OS}" = ubuntu ]; then
# ./configure --with-httpd-conf=/etc/apache2/sites-enabled
# else
# ./configure --with-httpd-conf=/etc/httpd/conf.d
# fi
# make all
# make install-groups-users
# if [ "${OS}" = ubuntu ]; then
# usermod -aG nagios www-data
# else
# usermod -aG nagios apache
# fi
# make install
# make install-init
# make install-daemoninit
# make install-commandmode
# make install-config
# make install-webconf
# if [ "${OS}" = ubuntu ]; then
# a2enmod rewrite cgi
# fi
# fi
# if [ ! "$(command -v wget)" ]; then
# ${PAKMGR} install wget
# fi
# ndir1=/usr/lib/nagios/plugins
# ndir2=/usr/lib64/nagios/plugins
# #nrpecfg=/etc/nagios/nrpe.cfg
# if [ -d $ndir1 ]; then
# nagdir=$ndir1
# elif [ -d $ndir2 ]; then
# nagdir=$ndir2
# fi
# define () {
# IFS=$'\n' read -r -d '' "$1"
# }
+1298
View File
File diff suppressed because it is too large Load Diff
+3953
View File
File diff suppressed because it is too large Load Diff
+809
View File
@@ -0,0 +1,809 @@
#!/bin/bash
################################################################################
# Script Name: add-http-auth.sh
# Version: 3.0
# Description: Add HTTP Basic Auth to Prometheus stack reverse proxies
# Supports both nginx and Apache — auto-detects which is in use.
# Uses non-destructive include snippets to preserve existing
# HTTPS/certbot configs.
#
# Author: Phil Connor
# Contact: contact@mylinux.work
# Website: https://mylinux.work
# License: MIT
#
# Supported Services:
# - Prometheus (port 9090)
# - Alertmanager (port 9093)
# - Mimir (port 9009) — optionally protects /api/v1/push
# - Loki (port 3100) — optionally protects /loki/api/v1/push
#
# Supported Web Servers:
# - nginx — inserts 'include' snippets into location blocks
# - Apache — inserts 'Include' snippets into <Location> blocks
#
# Usage:
# sudo ./add-http-auth.sh
# sudo ./add-http-auth.sh --remove
# sudo ./add-http-auth.sh --status
#
################################################################################
set -euo pipefail
SCRIPT_VERSION="3.0"
BACKUP_DIR="/var/backups/http-auth"
# Detected at runtime
WEB_SERVER="" # "nginx" or "apache"
CONFIG_DIR="" # where vhost configs live
SNIPPET_DIR="" # where auth snippets go
AUTH_DIR="" # where htpasswd files go
WEB_USER="" # www-data, nginx, apache, etc.
SERVICE_NAME="" # systemd service name
# Service definitions: name|nginx_config|apache_config|port
SERVICES=(
"prometheus|prometheus.conf|prometheus.conf|9090"
"alertmanager|alerts.conf|alerts.conf|9093"
"mimir|mimir.conf|mimir.conf|9009"
"loki|loki.conf|loki.conf|3100"
)
# ============================================================================
# HELPER FUNCTIONS
# ============================================================================
show_usage() {
cat <<EOF
Usage: $0 [OPTIONS]
Add HTTP Basic Auth to Prometheus stack reverse proxies (v${SCRIPT_VERSION}).
Supports both nginx and Apache — auto-detects which is in use.
OPTIONS:
--remove Remove auth from all services (restore backups)
--status Show current auth status for each service
-h, --help Show this help
EXAMPLES:
$0 # Interactive setup
$0 --status # Check which services have auth enabled
$0 --remove # Remove auth and restore original configs
EOF
exit 0
}
die() {
echo "ERROR: $1" >&2
exit 1
}
warn() {
echo "WARNING: $1" >&2
}
# Get the config filename for the current web server
get_config_file() {
local entry="$1"
local name nginx_conf apache_conf port
IFS='|' read -r name nginx_conf apache_conf port <<< "$entry"
if [ "$WEB_SERVER" = "nginx" ]; then
echo "$nginx_conf"
else
echo "$apache_conf"
fi
}
get_service_name() {
local entry="$1"
IFS='|' read -r name _ _ _ <<< "$entry"
echo "$name"
}
get_service_port() {
local entry="$1"
IFS='|' read -r _ _ _ port <<< "$entry"
echo "$port"
}
# ============================================================================
# WEB SERVER DETECTION
# ============================================================================
detect_web_server() {
local has_nginx=false
local has_apache=false
if command -v nginx &>/dev/null && systemctl is-active --quiet nginx 2>/dev/null; then
has_nginx=true
fi
if command -v apache2ctl &>/dev/null && systemctl is-active --quiet apache2 2>/dev/null; then
has_apache=true
elif command -v httpd &>/dev/null && systemctl is-active --quiet httpd 2>/dev/null; then
has_apache=true
fi
if [ "$has_nginx" = true ] && [ "$has_apache" = true ]; then
echo ""
echo "Both nginx and Apache detected. Which are you using for reverse proxies?"
echo " 1) nginx"
echo " 2) Apache"
read -r -p "Select [1]: " choice
case "${choice:-1}" in
2) WEB_SERVER="apache" ;;
*) WEB_SERVER="nginx" ;;
esac
elif [ "$has_nginx" = true ]; then
WEB_SERVER="nginx"
elif [ "$has_apache" = true ]; then
WEB_SERVER="apache"
else
die "Neither nginx nor Apache detected as running"
fi
echo " Detected web server: ${WEB_SERVER}"
}
# Set paths based on detected web server
configure_paths() {
if [ "$WEB_SERVER" = "nginx" ]; then
if [ -d "/etc/nginx/sites-available" ]; then
CONFIG_DIR="/etc/nginx/sites-available"
elif [ -d "/etc/nginx/conf.d" ]; then
CONFIG_DIR="/etc/nginx/conf.d"
else
die "nginx config directory not found"
fi
SNIPPET_DIR="/etc/nginx/snippets"
AUTH_DIR="/etc/nginx/auth"
SERVICE_NAME="nginx"
if id "www-data" &>/dev/null; then
WEB_USER="www-data"
elif id "nginx" &>/dev/null; then
WEB_USER="nginx"
else
WEB_USER="root"
fi
else
# Apache
if [ -d "/etc/apache2/sites-available" ]; then
CONFIG_DIR="/etc/apache2/sites-available"
SNIPPET_DIR="/etc/apache2/conf-available"
SERVICE_NAME="apache2"
elif [ -d "/etc/httpd/conf.d" ]; then
CONFIG_DIR="/etc/httpd/conf.d"
SNIPPET_DIR="/etc/httpd/conf.d"
SERVICE_NAME="httpd"
else
die "Apache config directory not found"
fi
AUTH_DIR="/etc/httpd/auth"
[ -d "/etc/apache2" ] && AUTH_DIR="/etc/apache2/auth"
if id "www-data" &>/dev/null; then
WEB_USER="www-data"
elif id "apache" &>/dev/null; then
WEB_USER="apache"
else
WEB_USER="root"
fi
fi
}
# ============================================================================
# HTTPS DETECTION
# ============================================================================
has_https() {
local config_file="$1"
if [ "$WEB_SERVER" = "nginx" ]; then
grep -qE 'listen\s+.*443\s+ssl' "$config_file" 2>/dev/null
else
grep -qE 'SSLEngine\s+on|<VirtualHost\s+\*:443' "$config_file" 2>/dev/null
fi
}
# ============================================================================
# AUTH SNIPPET CHECK
# ============================================================================
has_auth_snippet() {
local config_file="$1"
local service="$2"
if [ "$WEB_SERVER" = "nginx" ]; then
grep -qF "include ${SNIPPET_DIR}/auth-${service}.conf" "$config_file" 2>/dev/null
else
grep -qF "Include ${SNIPPET_DIR}/auth-${service}.conf" "$config_file" 2>/dev/null
fi
}
# ============================================================================
# SETUP FUNCTIONS
# ============================================================================
install_htpasswd() {
if command -v htpasswd &>/dev/null; then
return 0
fi
echo "Installing htpasswd..."
if command -v apt-get &>/dev/null; then
apt-get -y install apache2-utils
elif command -v dnf &>/dev/null; then
dnf -y install httpd-tools
elif command -v yum &>/dev/null; then
yum -y install httpd-tools
else
die "Cannot install htpasswd — install apache2-utils or httpd-tools manually"
fi
}
backup_config() {
local config_file="$1"
local timestamp
timestamp=$(date +%F_%H%M%S)
local backup_path="${BACKUP_DIR}/${timestamp}"
mkdir -p "$backup_path"
cp "$config_file" "$backup_path/"
echo " Backed up to ${backup_path}/$(basename "$config_file")"
}
# ============================================================================
# NGINX-SPECIFIC FUNCTIONS
# ============================================================================
nginx_create_snippet() {
local service="$1"
local display_name="$2"
cat > "${SNIPPET_DIR}/auth-${service}.conf" <<EOF
# Auth snippet for ${display_name} — managed by add-http-auth.sh
auth_basic "${display_name} - Authentication Required";
auth_basic_user_file ${AUTH_DIR}/.htpasswd-${service};
EOF
echo " Created ${SNIPPET_DIR}/auth-${service}.conf"
}
nginx_insert_auth() {
local config_file="$1"
local service="$2"
local include_line=" include ${SNIPPET_DIR}/auth-${service}.conf;"
if has_auth_snippet "$config_file" "$service"; then
echo " Auth already configured in $(basename "$config_file") — skipping"
return 0
fi
backup_config "$config_file"
local temp_file
temp_file=$(mktemp)
awk -v inc="$include_line" '
/location \/ \{/ && !done {
print
print inc
done = 1
next
}
{ print }
' "$config_file" > "$temp_file"
mv "$temp_file" "$config_file"
echo " Inserted auth include into $(basename "$config_file")"
}
nginx_insert_push_auth() {
local config_file="$1"
local service="$2"
if grep -q "location.*/api/v1/push" "$config_file" && \
! grep -A2 "location.*/api/v1/push" "$config_file" | grep -qF "auth-${service}.conf"; then
local temp_file
temp_file=$(mktemp)
local include_line=" include ${SNIPPET_DIR}/auth-${service}.conf;"
awk -v inc="$include_line" '
/location.*\/api\/v1\/push/ && !push_done {
print
print inc
push_done = 1
next
}
{ print }
' "$config_file" > "$temp_file"
mv "$temp_file" "$config_file"
echo " Protected push endpoint with auth"
fi
}
nginx_remove_auth() {
local config_file="$1"
local service="$2"
local temp_file
temp_file=$(mktemp)
grep -vF "include ${SNIPPET_DIR}/auth-${service}.conf" "$config_file" > "$temp_file"
mv "$temp_file" "$config_file"
}
nginx_test_config() {
nginx -t 2>&1
}
# ============================================================================
# APACHE-SPECIFIC FUNCTIONS
# ============================================================================
apache_create_snippet() {
local service="$1"
local display_name="$2"
cat > "${SNIPPET_DIR}/auth-${service}.conf" <<EOF
# Auth snippet for ${display_name} — managed by add-http-auth.sh
AuthType Basic
AuthName "${display_name} - Authentication Required"
AuthUserFile ${AUTH_DIR}/.htpasswd-${service}
Require valid-user
EOF
echo " Created ${SNIPPET_DIR}/auth-${service}.conf"
}
apache_insert_auth() {
local config_file="$1"
local service="$2"
local include_line=" Include ${SNIPPET_DIR}/auth-${service}.conf"
if has_auth_snippet "$config_file" "$service"; then
echo " Auth already configured in $(basename "$config_file") — skipping"
return 0
fi
backup_config "$config_file"
# Check if config uses <Location /> or <Proxy *>
local temp_file
temp_file=$(mktemp)
if grep -qE '<Location\s+/\s*>' "$config_file"; then
# Insert after <Location /> opening tag
awk -v inc="$include_line" '
/<Location\s+\/\s*>/ && !done {
print
print inc
done = 1
next
}
{ print }
' "$config_file" > "$temp_file"
elif grep -qE '<Proxy\s+' "$config_file"; then
# Insert inside <VirtualHost> before the first ProxyPass
awk -v inc="$include_line" -v sdir="${SNIPPET_DIR}" -v svc="$service" '
/ProxyPass\s/ && !done {
# Add a Location block with auth before ProxyPass
print " <Location />"
print inc
print " </Location>"
print ""
done = 1
}
{ print }
' "$config_file" > "$temp_file"
else
# No Location or Proxy block found — add a Location block before </VirtualHost>
awk -v inc="$include_line" '
/<\/VirtualHost>/ && !done {
print ""
print " <Location />"
print inc
print " </Location>"
print ""
done = 1
}
{ print }
' "$config_file" > "$temp_file"
fi
mv "$temp_file" "$config_file"
echo " Inserted auth into $(basename "$config_file")"
}
apache_insert_push_auth() {
local config_file="$1"
local service="$2"
local push_path=""
if [ "$service" = "mimir" ]; then
push_path="/api/v1/push"
elif [ "$service" = "loki" ]; then
push_path="/loki/api/v1/push"
else
return 0
fi
# Check if there's already a Location block for the push path
if grep -qF "$push_path" "$config_file" && \
! grep -A3 "$push_path" "$config_file" | grep -qF "auth-${service}.conf"; then
backup_config "$config_file"
local temp_file
temp_file=$(mktemp)
local include_line=" Include ${SNIPPET_DIR}/auth-${service}.conf"
awk -v path="$push_path" -v inc="$include_line" '
$0 ~ path && /Location/ && !push_done {
print
print inc
push_done = 1
next
}
{ print }
' "$config_file" > "$temp_file"
mv "$temp_file" "$config_file"
echo " Protected push endpoint with auth"
fi
}
apache_remove_auth() {
local config_file="$1"
local service="$2"
local temp_file
temp_file=$(mktemp)
grep -vF "Include ${SNIPPET_DIR}/auth-${service}.conf" "$config_file" > "$temp_file"
mv "$temp_file" "$config_file"
}
apache_test_config() {
if command -v apache2ctl &>/dev/null; then
apache2ctl configtest 2>&1
else
httpd -t 2>&1
fi
}
# ============================================================================
# GENERIC WRAPPERS (dispatch to nginx or apache functions)
# ============================================================================
create_snippet() {
if [ "$WEB_SERVER" = "nginx" ]; then
nginx_create_snippet "$@"
else
apache_create_snippet "$@"
fi
}
insert_auth() {
if [ "$WEB_SERVER" = "nginx" ]; then
nginx_insert_auth "$@"
else
apache_insert_auth "$@"
fi
}
insert_push_auth() {
if [ "$WEB_SERVER" = "nginx" ]; then
nginx_insert_push_auth "$@"
else
apache_insert_push_auth "$@"
fi
}
remove_auth_from_config() {
if [ "$WEB_SERVER" = "nginx" ]; then
nginx_remove_auth "$@"
else
apache_remove_auth "$@"
fi
}
test_config() {
if [ "$WEB_SERVER" = "nginx" ]; then
nginx_test_config
else
apache_test_config
fi
}
# ============================================================================
# STATUS & REMOVE
# ============================================================================
show_status() {
detect_web_server
configure_paths
echo ""
echo "=========================================="
echo "HTTP Basic Auth Status (${WEB_SERVER})"
echo "=========================================="
echo ""
for entry in "${SERVICES[@]}"; do
local name config_file
name=$(get_service_name "$entry")
config_file=$(get_config_file "$entry")
local display_name
display_name="${name^}"
local full_path="${CONFIG_DIR}/${config_file}"
printf " %-14s " "${display_name}:"
if [ ! -f "$full_path" ]; then
echo "no config found"
continue
fi
if has_auth_snippet "$full_path" "$name"; then
if [ -f "${AUTH_DIR}/.htpasswd-${name}" ]; then
echo "ENABLED (htpasswd + snippet)"
else
echo "BROKEN (snippet exists but htpasswd file missing)"
fi
else
echo "not configured"
fi
done
echo ""
echo "Web server: ${WEB_SERVER}"
echo "Config dir: ${CONFIG_DIR}"
echo "Snippet dir: ${SNIPPET_DIR}"
echo "Auth dir: ${AUTH_DIR}"
echo "Backup dir: ${BACKUP_DIR}"
echo ""
}
do_remove() {
detect_web_server
configure_paths
echo ""
echo "Removing HTTP Basic Auth from all services (${WEB_SERVER})..."
echo ""
for entry in "${SERVICES[@]}"; do
local name config_file
name=$(get_service_name "$entry")
config_file=$(get_config_file "$entry")
local full_path="${CONFIG_DIR}/${config_file}"
if [ ! -f "$full_path" ]; then
continue
fi
if has_auth_snippet "$full_path" "$name"; then
backup_config "$full_path"
remove_auth_from_config "$full_path" "$name"
echo " Removed auth from ${config_file}"
fi
rm -f "${SNIPPET_DIR}/auth-${name}.conf"
done
echo ""
echo "Testing ${WEB_SERVER} configuration..."
if test_config; then
systemctl reload "$SERVICE_NAME"
echo ""
echo "Auth removed and ${WEB_SERVER} reloaded."
else
warn "${WEB_SERVER} config test failed — check your config manually"
fi
}
# ============================================================================
# MAIN SETUP
# ============================================================================
setup_auth() {
detect_web_server
configure_paths
echo ""
echo "=========================================="
echo "Add HTTP Basic Auth to Prometheus Stack"
echo "Version: ${SCRIPT_VERSION} (${WEB_SERVER})"
echo "=========================================="
# Check for HTTPS
local has_any_https=false
for entry in "${SERVICES[@]}"; do
local name config_file
name=$(get_service_name "$entry")
config_file=$(get_config_file "$entry")
local full_path="${CONFIG_DIR}/${config_file}"
if [ -f "$full_path" ] && has_https "$full_path"; then
has_any_https=true
break
fi
done
if [ "$has_any_https" = false ]; then
echo ""
warn "No HTTPS configuration detected!"
echo " Basic Auth over HTTP sends credentials in cleartext."
echo " Strongly recommended: run certbot first to enable HTTPS."
echo ""
read -r -p "Continue without HTTPS? [y/N]: " confirm
if [[ ! "$confirm" =~ ^[Yy]$ ]]; then
echo "Aborted. Run certbot first, then re-run this script."
exit 0
fi
fi
# Detect which services have configs
echo ""
echo "Detected services:"
local found_any=false
for entry in "${SERVICES[@]}"; do
local name config_file
name=$(get_service_name "$entry")
config_file=$(get_config_file "$entry")
local full_path="${CONFIG_DIR}/${config_file}"
if [ -f "$full_path" ]; then
local https_status="HTTP"
has_https "$full_path" && https_status="HTTPS"
echo "${name} (${config_file}) [${https_status}]"
found_any=true
fi
done
if [ "$found_any" = false ]; then
die "No service configs found in ${CONFIG_DIR}. Set up ${WEB_SERVER} reverse proxies first."
fi
echo ""
# Ask about push endpoint protection
local protect_push=false
echo "Mimir and Loki have push endpoints used by remote agents."
echo "Protecting them requires configuring credentials in Prometheus/Alloy."
read -r -p "Protect push endpoints with auth too? [y/N]: " push_confirm
if [[ "$push_confirm" =~ ^[Yy]$ ]]; then
protect_push=true
fi
# Ask about shared vs per-service credentials
local shared_creds=false
local shared_htpasswd=""
echo ""
echo "Credential mode:"
echo " 1) Same username/password for all services"
echo " 2) Different credentials per service"
read -r -p "Select [1]: " cred_mode
if [[ "${cred_mode:-1}" != "2" ]]; then
shared_creds=true
read -r -p "Username for all services [admin]: " shared_user
shared_user=${shared_user:-admin}
# Create a temporary shared htpasswd file — will be copied per service
shared_htpasswd=$(mktemp)
htpasswd -c "$shared_htpasswd" "$shared_user"
fi
# Create directories
mkdir -p "$AUTH_DIR" "$SNIPPET_DIR" "$BACKUP_DIR"
echo ""
# Set up auth for each detected service
for entry in "${SERVICES[@]}"; do
local name config_file port
name=$(get_service_name "$entry")
config_file=$(get_config_file "$entry")
port=$(get_service_port "$entry")
local full_path="${CONFIG_DIR}/${config_file}"
if [ ! -f "$full_path" ]; then
continue
fi
local display_name
display_name="${name^}"
echo "--- ${display_name} ---"
# Create htpasswd file
if [ "$shared_creds" = true ]; then
if [ -f "${AUTH_DIR}/.htpasswd-${name}" ]; then
read -r -p " htpasswd file exists. Overwrite with shared credentials? [Y/n]: " overwrite
if [[ "$overwrite" =~ ^[Nn]$ ]]; then
echo " Keeping existing htpasswd"
else
cp "$shared_htpasswd" "${AUTH_DIR}/.htpasswd-${name}"
echo " Using shared credentials"
fi
else
cp "$shared_htpasswd" "${AUTH_DIR}/.htpasswd-${name}"
echo " Using shared credentials"
fi
else
if [ -f "${AUTH_DIR}/.htpasswd-${name}" ]; then
read -r -p " htpasswd file exists. Recreate? [y/N]: " recreate
if [[ ! "$recreate" =~ ^[Yy]$ ]]; then
echo " Keeping existing htpasswd"
else
read -r -p " Username [admin]: " username
username=${username:-admin}
htpasswd -c "${AUTH_DIR}/.htpasswd-${name}" "$username"
fi
else
read -r -p " Username [admin]: " username
username=${username:-admin}
htpasswd -c "${AUTH_DIR}/.htpasswd-${name}" "$username"
fi
fi
# Create auth snippet
create_snippet "$name" "$display_name"
# Insert into main location/proxy block
insert_auth "$full_path" "$name"
# Handle push endpoints for Mimir and Loki
if [[ "$name" == "mimir" ]] || [[ "$name" == "loki" ]]; then
if [ "$protect_push" = true ]; then
insert_push_auth "$full_path" "$name"
else
echo " ⚠ Push endpoint left open — consider IP restrictions"
fi
fi
echo ""
done
# Clean up shared temp file
[ -n "$shared_htpasswd" ] && rm -f "$shared_htpasswd"
# Set permissions on htpasswd files
chmod 640 "${AUTH_DIR}"/.htpasswd-* 2>/dev/null || true
chown "root:${WEB_USER}" "${AUTH_DIR}"/.htpasswd-* 2>/dev/null || true
# Test and reload
echo "Testing ${WEB_SERVER} configuration..."
if test_config; then
systemctl reload "$SERVICE_NAME"
echo ""
echo "=========================================="
echo "HTTP Basic Auth Successfully Configured!"
echo "=========================================="
echo ""
echo "Web server: ${WEB_SERVER}"
echo "Backups: ${BACKUP_DIR}"
echo ""
echo "To remove auth later: $0 --remove"
echo "To check status: $0 --status"
else
echo ""
echo "${WEB_SERVER} configuration test FAILED!"
echo "Your backups are in ${BACKUP_DIR} — restore manually if needed."
exit 1
fi
}
# ============================================================================
# MAIN
# ============================================================================
main() {
if [[ $EUID -ne 0 ]]; then
die "This script must be run as root"
fi
case "${1:-}" in
-h|--help) show_usage ;;
--remove) do_remove ;;
--status) show_status ;;
*)
install_htpasswd
setup_auth
;;
esac
}
main "$@"
+94
View File
@@ -0,0 +1,94 @@
#!/bin/bash
######################################################################################
#### Version 2.01 ####
#### For questions or comments contact@mylinux.work ####
#### Author : Phil Connor ####
#### ####
#### Notes : ####
#### This script is a simple "helper" to configure Auto Updates on linux ####
#### servers. ####
#### ####
#### Use this script at your OWN risk. There is no guarantee whatsoever. ####
#### ####
#### Usage "tuning.sh" or "tuning.sh ssd" if you are running on ssd'd ####
######################################################################################
###########################
#### System Variables ####
###########################
OS=$(grep PRETTY_NAME /etc/os-release | sed 's/PRETTY_NAME=//g' | tr -d '="' | awk '{print $1}' | tr '[:upper:]' '[:lower:]')
OSVER=$(grep VERSION_ID /etc/os-release | sed 's/VERSION_ID=//g' | tr -d '="' | awk -F. '{print $1}')
aptcnf="/etc/apt/apt.conf.d"
dnfcnf="/etc/dnf/automatic.conf"
yum6cnf="/etc/sysconfig/yum-cron"
yum7cnf="/etc/yum/yum-cron.conf"
###################################
#### Copy to EOF file function ####
###################################
function no_show() {
{
expand | awk 'NR == 1 {match($0, /^ */); l = RLENGTH + 1}
{print substr($0, l)}'
}
}
###########################################################
#### Detect Package Manger from OS and OSVer Variables ####
###########################################################
if [ "${OS}" = ubuntu ]; then
PAKMGR="apt-get -y"
elif [[ ${OS} = centos || ${OS} = red || ${OS} = oracle || ${OS} = rocky || ${OS} = alma ]]; then
if [ "${OSVER}" = 7 ]; then
PAKMGR="yum -y"
fi
if [ "${OSVER}" = 8 ]; then
PAKMGR="dnf -y"
fi
fi
#####################################
#### Install Auto Update Service ####
#####################################
if [[ ${OS} = centos || ${OS} = red || ${OS} = oracle || ${OS} = rocky || ${OS} = alma ]]; then
if [ "${OSVER}" = 6 ] || [ "${OSVER}" = 7 ]; then
${PAKMGR} update
${PAKMGR} install yum-cron
if [ "${OSVER}" = 6 ]; then
chkconfig yum-cron on
chkconfig yum-updatesd off
service yum-updatesd stop
#echo 'exclude= http php* kernel*' >> /etc/yum.conf # <-- If you need to add exclude package from updating
#sed -i 's/YUM_PARAMETER=""/YUM_PARAMETER="-x http -x php* -x kernel*"/g' >> $yum6cnf # <-- If you need to add exclude package from updating
sed -i 's/CHECK_ONLY=yes/CHECK_ONLY=no/g' $yum6cnf
sed -i 's/DOWNLOAD_ONLY=yes/DOWNLOAD_ONLY=no/g' $yum6cnf
sed -i 's/MAILTO=/MAILTO=root/g' $yum6cnf
service yum-cron start
fi
if [ "${OSVER}" = 7 ]; then
sed -i 's/update_cmd = default/update_cmd = security/g' $yum7cnf #<-- comment this out for ALL available upgrades
sed -i 's/apply_updates = no/apply_updates = yes/g' $yum7cnf
sed -i 's/download_updates = no/download_updates = yes/g' $yum7cnf
systemctl enable --nom yum-cron
fi
fi
if [ "${OSVER}" = 8 ] || [ "${OSVER}" = 9 ]; then
${PAKMGR} update
${PAKMGR} install dnf-automatic
sed -i 's/upgrade_type = default/upgrade_type = security/g' $dnfcnf #<-- comment this out for ALL available upgrades
sed -i 's/apply_updates = no/apply_updates = yes/g' $dnfcnf
systemctl enable --now dnf-automatic.timer
fi
elif [ "${OS}" = ubuntu ]; then
${PAKMGR} upgrade
${PAKMGR} install unattended-upgrades apticron
touch $aptcnf/20auto-upgrades
no_show << EOF > $aptcnf/20auto-upgrades
APT::Periodic::Update-Package-Lists "1";
APT::Periodic::Download-Upgradeable-Packages "1";
APT::Periodic::AutocleanInterval "7";
APT::Periodic::Unattended-Upgrade "1";
EOF
sed -i 's/\/\/Unattended-Upgrade\:\:Mail "root";/Unattended-Upgrade\:\:Mail "root";/g' $aptcnf/50unattended-upgrades
fi
+452
View File
@@ -0,0 +1,452 @@
#!/bin/bash
################################################################################
# Script Name: backup-status-exporter.sh
# Version: 1.0
# Description: Prometheus textfile collector exporter for backup job status
# Monitors backup age, size, and success/failure from multiple
# sources including timestamp files, log files, and directories
#
# Author: Phil Connor
# Contact: contact@mylinux.work
# Website: https://mylinux.work
# License: MIT
# Date: 2026-03-03
#
# Prerequisites:
# - node_exporter with textfile collector enabled
# - /var/lib/node_exporter directory exists
# - Config file at /etc/backup-status-exporter.conf
#
# Usage:
# # Run with default config
# sudo ./backup-status-exporter.sh
#
# # Dry run (output to stdout)
# ./backup-status-exporter.sh --dry-run
#
# # Debug mode
# DEBUG=1 sudo ./backup-status-exporter.sh
#
# Config Format (pipe-delimited, one job per line):
# job_name|type|path|max_age_hours
#
# Types:
# directory - find newest file in directory, report mtime and size
# statusfile - read unix timestamp of last success from a file
# logfile - grep for success/failure patterns in a log file
#
# Metrics Exported:
# - linux_backup_last_success_timestamp{job} - Unix timestamp of last backup
# - linux_backup_age_hours{job} - Hours since last backup
# - linux_backup_size_bytes{job} - Size of last backup in bytes
# - linux_backup_status{job} - 1=ok, 0=stale/failed
#
################################################################################
set -o pipefail
# ============================================================================
# CONFIGURATION
# ============================================================================
readonly VERSION="1.0"
readonly SCRIPT_NAME="${0##*/}"
readonly TEXTFILE_DIR="${TEXTFILE_DIR:-/var/lib/node_exporter}"
readonly OUTPUT_FILE="${TEXTFILE_DIR}/backup_status.prom"
readonly CONFIG_FILE="${CONFIG_FILE:-/etc/backup-status-exporter.conf}"
readonly TMP_FILE="${OUTPUT_FILE}.$$"
# Runtime flags
DRY_RUN=false
DEBUG=${DEBUG:-}
# Log success patterns (case-insensitive grep)
readonly SUCCESS_PATTERNS="(completed successfully|backup successful|backup finished|success|completed without error)"
readonly FAILURE_PATTERNS="(failed|error|fatal|backup failed|aborted)"
# ============================================================================
# HELPER FUNCTIONS
# ============================================================================
debug_echo() {
if [[ -n "$DEBUG" ]]; then
echo "[DEBUG] $*" >&2
fi
}
log_error() {
echo "[ERROR] $*" >&2
}
cleanup() {
rm -f "$TMP_FILE"
}
trap cleanup EXIT
show_help() {
cat <<EOF
Usage: $SCRIPT_NAME [OPTIONS]
Prometheus textfile collector exporter for backup job status.
Monitors backup age, size, and success/failure from multiple sources.
OPTIONS:
--dry-run Output metrics to stdout instead of writing to file
--debug Enable debug output
--help Show this help message
--version Show version
CONFIGURATION:
Jobs are configured in /etc/backup-status-exporter.conf (or set CONFIG_FILE).
Each line defines a backup job in pipe-delimited format:
job_name|type|path|max_age_hours
Types:
directory Find the newest file in a directory, report mtime and size
statusfile Read a file containing a unix timestamp of last success
logfile Parse a log file for success/failure patterns
Example config:
daily_db|directory|/backups/db/|26
rsync_home|statusfile|/var/log/rsync-home.status|26
restic_full|logfile|/var/log/restic-backup.log|170
Lines starting with # are comments. Blank lines are ignored.
ENVIRONMENT VARIABLES:
CONFIG_FILE Path to config file (default: /etc/backup-status-exporter.conf)
TEXTFILE_DIR Textfile collector directory (default: /var/lib/node_exporter)
DEBUG Enable debug output when set to any value
EXAMPLES:
sudo $SCRIPT_NAME
$SCRIPT_NAME --dry-run
DEBUG=1 sudo $SCRIPT_NAME
CONFIG_FILE=/etc/my-backups.conf sudo $SCRIPT_NAME
EOF
exit 0
}
show_version() {
echo "$SCRIPT_NAME version $VERSION"
exit 0
}
# ============================================================================
# JOB LOADING
# ============================================================================
load_jobs() {
if [[ ! -f "$CONFIG_FILE" ]]; then
log_error "Config file not found: $CONFIG_FILE"
exit 1
fi
local job_count=0
while IFS= read -r line; do
# Strip comments and whitespace
line="${line%%#*}"
line="${line#"${line%%[![:space:]]*}"}"
line="${line%"${line##*[![:space:]]}"}"
if [[ -z "$line" ]]; then
continue
fi
echo "$line"
job_count=$((job_count + 1))
done < "$CONFIG_FILE"
if [[ "$job_count" -eq 0 ]]; then
log_error "No jobs found in config file: $CONFIG_FILE"
exit 1
fi
debug_echo "Loaded $job_count backup jobs from $CONFIG_FILE"
}
# ============================================================================
# BACKUP CHECK FUNCTIONS
# ============================================================================
check_directory() {
local job_name="$1"
local path="$2"
local max_age_hours="$3"
if [[ ! -d "$path" ]]; then
debug_echo "[$job_name] Directory not found: $path"
echo "0|0|0|0"
return
fi
# Find the newest file in the directory
local newest_file
newest_file=$(find "$path" -type f -printf '%T@ %s %p\n' 2>/dev/null | sort -rn | head -1)
if [[ -z "$newest_file" ]]; then
debug_echo "[$job_name] No files found in: $path"
echo "0|0|0|0"
return
fi
local file_epoch
file_epoch=$(echo "$newest_file" | awk '{printf "%.0f", $1}')
local file_size
file_size=$(echo "$newest_file" | awk '{print $2}')
local file_path
file_path=$(echo "$newest_file" | awk '{$1=""; $2=""; print}' | sed 's/^ //')
local now
now=$(date +%s)
local age_seconds=$((now - file_epoch))
local age_hours
age_hours=$(awk "BEGIN {printf \"%.1f\", $age_seconds / 3600}")
local max_age_seconds=$((max_age_hours * 3600))
local status=1
if [[ "$age_seconds" -gt "$max_age_seconds" ]]; then
status=0
fi
debug_echo "[$job_name] Newest file: $file_path (age=${age_hours}h, size=${file_size}B, status=$status)"
echo "${file_epoch}|${age_hours}|${file_size}|${status}"
}
check_statusfile() {
local job_name="$1"
local path="$2"
local max_age_hours="$3"
if [[ ! -f "$path" ]]; then
debug_echo "[$job_name] Status file not found: $path"
echo "0|0|0|0"
return
fi
local timestamp
timestamp=$(head -1 "$path" 2>/dev/null)
timestamp="${timestamp//[[:space:]]/}"
if [[ -z "$timestamp" ]] || ! [[ "$timestamp" =~ ^[0-9]+$ ]]; then
debug_echo "[$job_name] Invalid timestamp in status file: $path"
echo "0|0|0|0"
return
fi
local now
now=$(date +%s)
local age_seconds=$((now - timestamp))
local age_hours
age_hours=$(awk "BEGIN {printf \"%.1f\", $age_seconds / 3600}")
# Status files don't have a meaningful size — report file size of the status file itself
local file_size
file_size=$(stat -c '%s' "$path" 2>/dev/null) || file_size=0
local max_age_seconds=$((max_age_hours * 3600))
local status=1
if [[ "$age_seconds" -gt "$max_age_seconds" ]]; then
status=0
fi
debug_echo "[$job_name] Status timestamp: $timestamp (age=${age_hours}h, status=$status)"
echo "${timestamp}|${age_hours}|${file_size}|${status}"
}
check_logfile() {
local job_name="$1"
local path="$2"
local max_age_hours="$3"
if [[ ! -f "$path" ]]; then
debug_echo "[$job_name] Log file not found: $path"
echo "0|0|0|0"
return
fi
# Check for failure patterns first (most recent occurrence)
local last_failure
last_failure=$(grep -inE "$FAILURE_PATTERNS" "$path" 2>/dev/null | tail -1) || true
local last_success
last_success=$(grep -inE "$SUCCESS_PATTERNS" "$path" 2>/dev/null | tail -1) || true
local failure_line=0
local success_line=0
if [[ -n "$last_failure" ]]; then
failure_line=$(echo "$last_failure" | cut -d: -f1)
fi
if [[ -n "$last_success" ]]; then
success_line=$(echo "$last_success" | cut -d: -f1)
fi
# Use the log file's mtime as the timestamp
local file_epoch
file_epoch=$(stat -c '%Y' "$path" 2>/dev/null) || file_epoch=0
local file_size
file_size=$(stat -c '%s' "$path" 2>/dev/null) || file_size=0
local now
now=$(date +%s)
local age_seconds=$((now - file_epoch))
local age_hours
age_hours=$(awk "BEGIN {printf \"%.1f\", $age_seconds / 3600}")
local max_age_seconds=$((max_age_hours * 3600))
# Determine status: success if last success line is after last failure line
# and the log is not stale
local status=0
if [[ "$success_line" -gt "$failure_line" ]] && [[ "$age_seconds" -le "$max_age_seconds" ]]; then
status=1
fi
if [[ "$success_line" -eq 0 ]] && [[ "$failure_line" -eq 0 ]]; then
debug_echo "[$job_name] No success or failure patterns found in: $path"
status=0
fi
debug_echo "[$job_name] Log file: $path (age=${age_hours}h, success_line=$success_line, failure_line=$failure_line, status=$status)"
echo "${file_epoch}|${age_hours}|${file_size}|${status}"
}
# ============================================================================
# METRICS COLLECTION
# ============================================================================
collect_metrics() {
local jobs=()
while IFS= read -r job_line; do
jobs+=("$job_line")
done < <(load_jobs)
local output=""
local timestamps=""
local ages=""
local sizes=""
local statuses=""
for job_line in "${jobs[@]}"; do
local job_name
job_name=$(echo "$job_line" | cut -d'|' -f1)
local job_type
job_type=$(echo "$job_line" | cut -d'|' -f2)
local job_path
job_path=$(echo "$job_line" | cut -d'|' -f3)
local max_age_hours
max_age_hours=$(echo "$job_line" | cut -d'|' -f4)
if [[ -z "$job_name" ]] || [[ -z "$job_type" ]] || [[ -z "$job_path" ]] || [[ -z "$max_age_hours" ]]; then
log_error "Invalid config line: $job_line (expected: job_name|type|path|max_age_hours)"
continue
fi
local result=""
case "$job_type" in
directory)
result=$(check_directory "$job_name" "$job_path" "$max_age_hours")
;;
statusfile)
result=$(check_statusfile "$job_name" "$job_path" "$max_age_hours")
;;
logfile)
result=$(check_logfile "$job_name" "$job_path" "$max_age_hours")
;;
*)
log_error "Unknown job type '$job_type' for job '$job_name' (expected: directory, statusfile, logfile)"
continue
;;
esac
local ts
ts=$(echo "$result" | cut -d'|' -f1)
local age
age=$(echo "$result" | cut -d'|' -f2)
local size
size=$(echo "$result" | cut -d'|' -f3)
local st
st=$(echo "$result" | cut -d'|' -f4)
timestamps+="linux_backup_last_success_timestamp{job=\"${job_name}\"} ${ts}\n"
ages+="linux_backup_age_hours{job=\"${job_name}\"} ${age}\n"
sizes+="linux_backup_size_bytes{job=\"${job_name}\"} ${size}\n"
statuses+="linux_backup_status{job=\"${job_name}\"} ${st}\n"
done
output+="# HELP linux_backup_last_success_timestamp Unix timestamp of the last successful backup\n"
output+="# TYPE linux_backup_last_success_timestamp gauge\n"
output+="$timestamps"
output+="# HELP linux_backup_age_hours Hours since the last successful backup\n"
output+="# TYPE linux_backup_age_hours gauge\n"
output+="$ages"
output+="# HELP linux_backup_size_bytes Size of the last backup in bytes\n"
output+="# TYPE linux_backup_size_bytes gauge\n"
output+="$sizes"
output+="# HELP linux_backup_status Backup job status (1=ok, 0=stale or failed)\n"
output+="# TYPE linux_backup_status gauge\n"
output+="$statuses"
printf '%b' "$output"
}
# ============================================================================
# OUTPUT
# ============================================================================
write_metrics() {
local metrics
metrics=$(collect_metrics)
if [[ "$DRY_RUN" == "true" ]]; then
echo "$metrics"
return
fi
if [[ ! -d "$TEXTFILE_DIR" ]]; then
log_error "Textfile collector directory does not exist: $TEXTFILE_DIR"
exit 1
fi
echo "$metrics" > "$TMP_FILE"
mv "$TMP_FILE" "$OUTPUT_FILE"
debug_echo "Metrics written to $OUTPUT_FILE"
}
# ============================================================================
# MAIN
# ============================================================================
main() {
while [[ $# -gt 0 ]]; do
case "$1" in
--dry-run)
DRY_RUN=true
shift
;;
--debug)
DEBUG=1
shift
;;
--help|-h)
show_help
;;
--version|-v)
show_version
;;
*)
log_error "Unknown option: $1"
echo "Use --help for usage information" >&2
exit 1
;;
esac
done
write_metrics
}
main "$@"
Executable
+428
View File
@@ -0,0 +1,428 @@
#!/bin/bash
################################################
#### AD Certificate checker and renewal ####
#### for Amazon, Ubuntu and RedHat servers ####
#### ####
#### Author: Phil Connor ####
#### License: MIT ####
#### Contact: contact@mylinux.work ####
#### Version: 3.00-081425 ####
################################################
set -o pipefail
SCRIPT_NAME=$(basename "$0")
readonly SCRIPT_NAME
# Default configuration
readonly DEFAULT_PEM_PATH="/etc/pki/ca-trust/source/anchors/ad-cert.pem"
readonly DEFAULT_DAYS_THRESHOLD=30
readonly DEFAULT_DOMAIN="example"
readonly DEFAULT_NODE_DIR="/var/lib/node_exporter"
# Configuration variables (can be overridden by environment)
PEM_PATH=${PEM_PATH:-$DEFAULT_PEM_PATH}
DAYS_THRESHOLD=${DAYS_THRESHOLD:-$DEFAULT_DAYS_THRESHOLD}
DOMAIN=${DOMAIN:-$DEFAULT_DOMAIN}
NODE_DIR=${NODE_DIR:-$DEFAULT_NODE_DIR}
SERVER_TYPE=${SERVER_TYPE:-}
DEBUG=${DEBUG:-}
# Runtime flags
MONITOR_ONLY=false
RENEW_ONLY=false
handle_error() {
local exit_code=$1
local line_number=$2
echo "Error: $SCRIPT_NAME failed at line $line_number with exit code $exit_code" >&2
exit "$exit_code"
}
trap 'handle_error $? $LINENO' ERR
debug_echo() {
if [[ -n "$DEBUG" ]]; then
echo "[DEBUG] $*" >&2
fi
}
show_help() {
cat << EOF
Usage: $SCRIPT_NAME [OPTIONS]
SSL certificate checker and renewal script for Prometheus monitoring.
OPTIONS:
--monitor Only generate Prometheus metrics (no renewal)
--renew Only handle certificate renewal (no monitoring)
--all Run both monitoring and renewal (default)
--help, -h Show this help message
ENVIRONMENT VARIABLES:
PEM_PATH Path to certificate file (default: $DEFAULT_PEM_PATH)
DAYS_THRESHOLD Days before expiry to trigger renewal (default: $DEFAULT_DAYS_THRESHOLD)
DOMAIN Domain name (default: $DEFAULT_DOMAIN)
NODE_DIR Node exporter directory (default: $DEFAULT_NODE_DIR)
SERVER_TYPE Server type (artifactory, bitbucket, cloudaccess, jira)
DEBUG Enable debug output
EXAMPLES:
$SCRIPT_NAME --monitor
SERVER_TYPE=bitbucket $SCRIPT_NAME --renew
DEBUG=1 $SCRIPT_NAME --all
EOF
}
validate_certificate_file() {
local cert_file="$1"
if [[ ! -f "$cert_file" ]]; then
debug_echo "Certificate file not found: $cert_file"
return 1
fi
if ! openssl x509 -noout -text -in "$cert_file" >/dev/null 2>&1; then
echo "Error: Invalid certificate file: $cert_file" >&2
return 1
fi
return 0
}
download_certificate() {
local domain="$1"
local output_file="$2"
local server_url="us.${domain}.net:636"
debug_echo "Downloading certificate from $server_url"
if ! timeout 30 openssl s_client -connect "$server_url" -servername "us.${domain}.net" < /dev/null 2>/dev/null | \
sed -ne '/-BEGIN CERTIFICATE-/,/-END CERTIFICATE-/p' > "$output_file"; then
echo "Error: Failed to download certificate from $server_url" >&2
return 1
fi
return 0
}
calculate_certificate_dates() {
local cert_file="$1"
local -n days_left_ref=$2
local -n days_gone_ref=$3
local beg_date end_date beg_sec end_sec now_sec
beg_date=$(openssl x509 -noout -startdate -in "$cert_file")
end_date=$(openssl x509 -noout -enddate -in "$cert_file")
beg_sec=$(date --date="${beg_date##*=}" +%s)
end_sec=$(date --date="${end_date##*=}" +%s)
now_sec=$(date +%s)
days_gone_ref=$(( (now_sec - beg_sec) / 86400 ))
days_left_ref=$(( (end_sec - now_sec) / 86400 ))
debug_echo "Certificate valid from $(date -d @"$beg_sec") to $(date -d @"$end_sec")"
debug_echo "Days gone: $days_gone_ref, Days left: $days_left_ref"
}
generate_prometheus_metrics() {
local days_left="$1"
local days_gone="$2"
local output_file="$NODE_DIR/adcert_check.prom"
debug_echo "Generating Prometheus metrics to $output_file"
mkdir -p "$NODE_DIR"
{
echo '# HELP linux_ad_cert_expire AD Certificate expiration days'
echo '# TYPE linux_ad_cert_expire gauge'
if [[ $days_left -lt 0 ]]; then
echo "linux_ad_cert_expire{status=\"expired\",days_gone=\"$days_gone\"} 0"
else
echo "linux_ad_cert_expire{status=\"valid\"} $days_left"
fi
} > "$output_file"
}
get_keystore_password() {
local password_url="$1"
local storepass=""
# Try Vault HTTP API first if URL provided
if [[ -n "$password_url" ]]; then
debug_echo "Retrieving keystore password from $password_url"
storepass=$(curl -sf -X GET "$password_url" 2>/dev/null | jq -r '.data.password // empty' 2>/dev/null || true)
fi
# Fall back to Vault CLI
if [[ -z "$storepass" ]]; then
debug_echo "Falling back to Vault CLI for keystore password"
storepass=$(vault kv get -field=password secret/keystore 2>/dev/null || true)
fi
# Fall back to default
if [[ -z "$storepass" ]]; then
debug_echo "Using default keystore password"
storepass="changeit"
fi
echo "$storepass"
}
execute_keytool_command() {
local java_bin="$1"
local keystore="$2"
local action="$3"
local cert_file="$4"
local password_url="$5"
local storepass
storepass=$(get_keystore_password "$password_url")
case "$action" in
"delete")
"$java_bin/keytool" -delete -alias ad -keystore "$keystore" -storepass "$storepass" 2>/dev/null || true
;;
"import")
"$java_bin/keytool" -import -noprompt -alias ad -keystore "$keystore" -file "$cert_file" -storepass "$storepass"
;;
esac
}
handle_artifactory_renewal() {
local java_bin keystore
local vault_url="http://vault.${DOMAIN}.net/v1/secret/secret/artifactory/keytool"
# Check app-specific paths first, then fall back to auto-detection
java_bin="/opt/jfrog/artifactory/app/third-party/java/bin"
keystore="/opt/jfrog/artifactory/app/third-party/java/lib/security/cacerts"
if [[ ! -x "$java_bin/keytool" || ! -f "$keystore" ]]; then
debug_echo "Artifactory default paths not found, searching for Java"
if ! find_java_keystore java_bin keystore; then
echo "Error: Could not find Java keytool or keystore for Artifactory" >&2
return 1
fi
fi
execute_keytool_command "$java_bin" "$keystore" "delete" "$PEM_PATH" "$vault_url"
execute_keytool_command "$java_bin" "$keystore" "import" "$PEM_PATH" "$vault_url"
systemctl restart artifactory
}
handle_bitbucket_renewal() {
local java_bin keystore
local vault_url="http://vault.${DOMAIN}.net/v1/secret/secret/bitbucket/keytool"
# Check app-specific paths first, then fall back to auto-detection
java_bin="/mnt/ebs/bitbucket/8.19.3/jre/bin"
keystore="/mnt/ebs/bitbucket/8.19.3/jre/lib/security/cacerts"
if [[ ! -x "$java_bin/keytool" || ! -f "$keystore" ]]; then
debug_echo "Bitbucket default paths not found, searching for Java"
if ! find_java_keystore java_bin keystore; then
echo "Error: Could not find Java keytool or keystore for Bitbucket" >&2
return 1
fi
fi
if [[ -n "$DEBUG" ]]; then
debug_echo "Would execute: $java_bin/keytool -delete -alias ad -keystore $keystore"
debug_echo "Would execute: curl -X GET $vault_url"
debug_echo "Would execute: $java_bin/keytool -import -alias ad -keystore $keystore -file $PEM_PATH"
debug_echo "Would execute: systemctl restart atlbitbucket"
else
execute_keytool_command "$java_bin" "$keystore" "delete" "$PEM_PATH" "$vault_url"
execute_keytool_command "$java_bin" "$keystore" "import" "$PEM_PATH" "$vault_url"
systemctl restart atlbitbucket
fi
}
handle_cloudaccess_renewal() {
docker restart cloudaccess_server_
}
handle_jira_renewal() {
local java_bin keystore
local vault_url="http://vault.${DOMAIN}.net/v1/secret/secret/jira/keytool"
# Check app-specific paths first, then fall back to auto-detection
java_bin="/mnt/ebs/jira/jre/bin"
keystore="/mnt/ebs/jira/jre/lib/security/cacerts"
if [[ ! -x "$java_bin/keytool" || ! -f "$keystore" ]]; then
debug_echo "Jira default paths not found, searching for Java"
if ! find_java_keystore java_bin keystore; then
echo "Error: Could not find Java keytool or keystore for Jira" >&2
return 1
fi
fi
execute_keytool_command "$java_bin" "$keystore" "delete" "$PEM_PATH" "$vault_url"
execute_keytool_command "$java_bin" "$keystore" "import" "$PEM_PATH" "$vault_url"
systemctl restart jira
}
find_java_keystore() {
local -n java_bin_ref=$1
local -n keystore_ref=$2
# Common Java installation paths
local java_paths=(
"/opt/jfrog/artifactory/app/third-party/java"
"/mnt/ebs/bitbucket/*/jre"
"/mnt/ebs/jira/jre"
"/usr/lib/jvm/java-*-openjdk"
"/usr/lib/jvm/default-java"
"/opt/java"
"/usr/java/latest"
)
# Check JAVA_HOME first
if [[ -n "$JAVA_HOME" && -x "$JAVA_HOME/bin/keytool" ]]; then
java_bin_ref="$JAVA_HOME/bin"
keystore_ref="$JAVA_HOME/lib/security/cacerts"
if [[ -f "$keystore_ref" ]]; then
debug_echo "Found Java via JAVA_HOME: $java_bin_ref"
return 0
fi
fi
# Search common paths with glob expansion
for path_pattern in "${java_paths[@]}"; do
for java_dir in $path_pattern; do
if [[ -d "$java_dir" ]]; then
local bin_dir="$java_dir/bin"
local cacerts="$java_dir/lib/security/cacerts"
if [[ -x "$bin_dir/keytool" && -f "$cacerts" ]]; then
java_bin_ref="$bin_dir"
keystore_ref="$cacerts"
debug_echo "Found Java at: $java_dir"
return 0
fi
fi
done
done
# Fallback: try system keytool
if command -v keytool >/dev/null 2>&1; then
java_bin_ref="$(dirname "$(command -v keytool)")"
# Try common system keystore locations
local system_keystores=(
"/etc/ssl/certs/java/cacerts"
"/usr/lib/jvm/default-java/lib/security/cacerts"
"/etc/pki/ca-trust/extracted/java/cacerts"
)
for keystore in "${system_keystores[@]}"; do
if [[ -f "$keystore" ]]; then
keystore_ref="$keystore"
debug_echo "Found system Java at: $java_bin_ref"
return 0
fi
done
fi
return 1
}
handle_server_renewal() {
if [[ -z "$SERVER_TYPE" ]]; then
echo "Error: SERVER_TYPE environment variable must be set for renewal" >&2
echo "Valid values: artifactory, bitbucket, cloudaccess, jira" >&2
return 1
fi
debug_echo "Handling renewal for server type: $SERVER_TYPE"
case "$SERVER_TYPE" in
"artifactory") handle_artifactory_renewal ;;
"bitbucket") handle_bitbucket_renewal ;;
"cloudaccess") handle_cloudaccess_renewal ;;
"jira") handle_jira_renewal ;;
*)
echo "Error: Unknown server type: $SERVER_TYPE" >&2
echo "Valid values: artifactory, bitbucket, cloudaccess, jira" >&2
return 1
;;
esac
}
parse_arguments() {
while [[ $# -gt 0 ]]; do
case $1 in
--monitor)
MONITOR_ONLY=true
shift
;;
--renew)
RENEW_ONLY=true
shift
;;
--all)
MONITOR_ONLY=false
RENEW_ONLY=false
shift
;;
--help|-h)
show_help
exit 0
;;
*)
echo "Error: Unknown option: $1" >&2
show_help >&2
exit 1
;;
esac
done
}
main() {
parse_arguments "$@"
# Check if certificate file exists, if not exit silently
if [[ ! -f "$PEM_PATH" ]]; then
debug_echo "Certificate file not found: $PEM_PATH"
exit 0
fi
# Download fresh certificate
if ! download_certificate "$DOMAIN" "$PEM_PATH"; then
exit 1
fi
# Validate the downloaded certificate
if ! validate_certificate_file "$PEM_PATH"; then
exit 1
fi
# Calculate certificate expiration dates
local days_left days_gone
calculate_certificate_dates "$PEM_PATH" days_left days_gone
# Handle monitoring (unless renew-only mode)
if [[ "$RENEW_ONLY" != true ]]; then
generate_prometheus_metrics "$days_left" "$days_gone"
debug_echo "Generated Prometheus metrics"
fi
# Handle renewal (unless monitor-only mode)
if [[ "$MONITOR_ONLY" != true && $days_left -le $DAYS_THRESHOLD ]]; then
debug_echo "Certificate expires in $days_left days (threshold: $DAYS_THRESHOLD)"
if ! handle_server_renewal; then
exit 1
fi
debug_echo "Certificate renewal completed"
fi
debug_echo "Script completed successfully"
}
# Execute main function if script is run directly
if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
main "$@"
fi
+300
View File
@@ -0,0 +1,300 @@
#!/bin/bash
##############################################
#### Create Swap for all Linux Servers ####
#### ####
#### Author: Phil Connor ####
#### Contact: pconnor@ara.com ####
#### Version 3.50.20250729 ####
#### ####
#### Created 06/01/2023 ####
##############################################
# Exit on any error, undefined variables, and pipe failures
set -euo pipefail
# Script configuration constants
readonly SCRIPT_NAME="$(basename "$0")"
readonly SWAPFILE_PATH="/.swapfile" # Standard location for swap file
readonly SWAPPINESS_VALUE=80 # How aggressively to use swap (0-100)
# Logging function - outputs to stderr with script name prefix
log() {
echo "[$SCRIPT_NAME] $*" >&2
}
# Error function - logs error message and exits with status 1
error() {
log "ERROR: $*"
exit 1
}
# Display usage information
usage() {
cat <<EOF
Usage: $SCRIPT_NAME [-h|--help]
Creates and configures a swap file sized 1:1 with system RAM.
Handles creation, resizing, and removal of existing swap files.
Options:
-h, --help Show this help message and exit
Must be run as root.
EOF
exit 0
}
# Clean up partial swap file on unexpected failure
cleanup_on_error() {
log "Error detected, cleaning up partial swap file"
swapoff "$SWAPFILE_PATH" 2>/dev/null || true
rm -f "$SWAPFILE_PATH"
}
# Detect the operating system distribution (ubuntu, centos, etc.)
detect_os() {
if command -v lsb_release >/dev/null 2>&1; then
# Use lsb_release if available (most reliable)
lsb_release -i | awk '{print $3}' | tr '[:upper:]' '[:lower:]'
else
# Fallback to parsing /etc/os-release
# shellcheck source=/dev/null
. /etc/os-release 2>/dev/null && echo "${ID:-unknown}" | tr '[:upper:]' '[:lower:]'
fi
}
# Get total system memory in GB, rounded to nearest whole number
get_memory_gb() {
local mem_kb
# Extract memory from /proc/meminfo (in KB)
mem_kb=$(grep MemTotal /proc/meminfo | awk '{print $2}')
if [[ -z "$mem_kb" || "$mem_kb" -eq 0 ]]; then
error "Unable to determine system memory"
fi
local mem_gb
# Convert KB to GB and round to nearest whole number
mem_gb=$(awk "BEGIN {printf \"%.0f\", ($mem_kb/1024/1024)}")
# Ensure minimum of 1GB to avoid division by zero issues
[[ "$mem_gb" -eq 0 ]] && mem_gb=1
echo "$mem_gb"
}
# Calculate swap size needed in MB (1:1 ratio with RAM)
get_swap_needed_mb() {
local mem_gb="$1"
echo $((mem_gb * 1024))
}
# Get the current swap file size in MB, or 0 if no swap file exists
get_current_swap_size() {
if [[ -f "$SWAPFILE_PATH" ]]; then
local size_bytes
size_bytes=$(stat -c%s "$SWAPFILE_PATH" 2>/dev/null || echo 0)
echo $((size_bytes / 1024 / 1024))
else
echo 0
fi
}
# Check if our swap file is currently active
is_swap_active() {
swapon --show=NAME --noheadings 2>/dev/null | grep -q "^${SWAPFILE_PATH}$"
}
# Check if there's enough disk space for the swap file (with 10% buffer)
check_disk_space() {
local needed_mb="$1"
local filesystem="/"
log "Checking available disk space for ${needed_mb}MB swap file"
local available_kb
# Get available space in KB from df command
available_kb=$(df --output=avail "$filesystem" | tail -n 1)
local available_mb=$((available_kb / 1024))
# Add 10% buffer for safety
local required_mb=$((needed_mb + (needed_mb / 10)))
if [[ "$available_mb" -lt "$required_mb" ]]; then
error "Insufficient disk space. Need ${required_mb}MB (${needed_mb}MB + 10% buffer), but only ${available_mb}MB available on $filesystem"
fi
log "Disk space check passed: ${available_mb}MB available, ${required_mb}MB required"
}
# Verify script is running with root privileges
check_permissions() {
if [[ $EUID -ne 0 ]]; then
error "This script must be run as root! Login as root, or use sudo."
fi
}
# Configure system swappiness (how aggressively to use swap)
setup_swappiness() {
local sysconf="/etc/sysctl.conf"
local procswap="/proc/sys/vm/swappiness"
log "Configuring swappiness to $SWAPPINESS_VALUE"
# If no swappiness setting exists, add it
if ! grep -q "vm.swappiness" "$sysconf"; then
echo "$SWAPPINESS_VALUE" > "$procswap"
echo "vm.swappiness = $SWAPPINESS_VALUE" >> "$sysconf"
# If setting exists but with different value, update it
elif ! grep -q "vm.swappiness = $SWAPPINESS_VALUE" "$sysconf"; then
sed -i "/vm.swappiness/d" "$sysconf"
echo "$SWAPPINESS_VALUE" > "$procswap"
echo "vm.swappiness = $SWAPPINESS_VALUE" >> "$sysconf"
fi
}
# Set up automated cache clearing cron job (every 5 minutes)
setup_cache_clearing() {
local os="$1"
local ctab
# Different crontab locations for different distributions
if [[ "$os" == "ubuntu" ]]; then
ctab="/var/spool/cron/crontabs/root"
else
ctab="/var/spool/cron/root"
fi
log "Setting up cache clearing cron job"
# Remove any existing cache clearing jobs that use 'echo 3' (more aggressive)
if crontab -l 2>/dev/null | grep -q '/usr/bin/sync; echo 3'; then
sed -i "/\/usr\/bin\/sync.*echo 3/d" "$ctab" 2>/dev/null || true
fi
# Add cache clearing job if it doesn't exist (echo 1 = page cache only)
if ! crontab -l 2>/dev/null | grep -q '/usr/bin/sync; echo 1'; then
(crontab -u root -l 2>/dev/null; echo "*/5 * * * * /usr/bin/sync; echo 1 > /proc/sys/vm/drop_caches") | crontab -u root -
fi
}
# Remove existing swap file and clean up fstab entries
remove_swap() {
local backup_time
# Create timestamp for backup file
backup_time=$(date +%y-%m-%d--%H-%M-%S)
log "Removing existing swap file: $SWAPFILE_PATH"
# Disable swap file (ignore errors if already disabled)
swapoff "$SWAPFILE_PATH" 2>/dev/null || true
# Backup fstab before modifying
cp /etc/fstab "/etc/fstab.$backup_time"
# Remove swap entries from fstab
sed -i "\|${SWAPFILE_PATH}|d" /etc/fstab
# Delete the swap file
rm -f "$SWAPFILE_PATH"
}
# Create and configure a new swap file
create_swap() {
local swap_mb="$1"
if [[ "$swap_mb" -eq 0 ]]; then
error "Cannot create swap: swap size cannot be 0 MB"
fi
log "Creating swap file of size ${swap_mb}MB at $SWAPFILE_PATH"
# Set trap to clean up partial swap file on failure
trap cleanup_on_error ERR
# Create swap file using dd with progress display (oflag=direct avoids polluting page cache)
dd if=/dev/zero of="$SWAPFILE_PATH" bs=1M count="$swap_mb" oflag=direct status=progress
# Set proper permissions (only root can read/write)
chmod 600 "$SWAPFILE_PATH"
# Format the file as swap space
mkswap "$SWAPFILE_PATH"
# Enable the swap file
swapon "$SWAPFILE_PATH"
# Add to fstab for persistent mounting if not already present
if ! grep -q "$SWAPFILE_PATH" /etc/fstab; then
echo "$SWAPFILE_PATH swap swap defaults 0 0" >> /etc/fstab
fi
# Clear the error trap now that swap is fully created
trap - ERR
log "Swap file created and enabled successfully"
}
# Main function - orchestrates the entire swap setup process
main() {
# Handle --help flag
if [[ "${1:-}" == "-h" || "${1:-}" == "--help" ]]; then
usage
fi
# Ensure script is run with root privileges
check_permissions
# Detect operating system for distribution-specific configurations
local os
os=$(detect_os)
# Get system memory information
local mem_gb
mem_gb=$(get_memory_gb)
# Calculate required swap size
local needed_mb
needed_mb=$(get_swap_needed_mb "$mem_gb")
# Check current swap configuration
local current_size
current_size=$(get_current_swap_size)
# Configure system settings
setup_swappiness
setup_cache_clearing "$os"
# If swap file exists at the correct size and is active, nothing to do
if [[ "$current_size" -eq "$needed_mb" ]] && is_swap_active; then
log "Swap size is already correct and active"
log "Swap setup completed successfully"
return 0
fi
# If swap file exists but wrong size, remove first so disk space check is accurate
if [[ "$current_size" -ne 0 && "$needed_mb" -ne "$current_size" ]]; then
remove_swap
fi
# Verify system has enough disk space (after potential removal)
if [[ "$needed_mb" -ne "$current_size" ]]; then
check_disk_space "$needed_mb"
create_swap "$needed_mb"
else
# File is the right size but not active, re-enable it
log "Swap file exists at correct size but is not active, enabling"
chmod 600 "$SWAPFILE_PATH"
mkswap "$SWAPFILE_PATH"
swapon "$SWAPFILE_PATH"
if ! grep -q "$SWAPFILE_PATH" /etc/fstab; then
echo "$SWAPFILE_PATH swap swap defaults 0 0" >> /etc/fstab
fi
fi
log "Swap setup completed successfully"
}
# Execute main function with all script arguments
main "$@"
+267
View File
@@ -0,0 +1,267 @@
#!/usr/bin/env bash
# directory-size-exporter.sh — Prometheus exporter for directory sizes
#
# Monitors directory disk usage that node_exporter can't see.
# Node exporter only reports mounted filesystem totals — this script
# tracks individual directories like /var/log, /home, /opt, or any
# path you care about.
#
# Author: Phil Connor
# Contact: contact@mylinux.work
# License: MIT
# Version: 1.0.0
set -euo pipefail
EXPORTER_NAME="directory_size"
DEFAULT_PORT=9101
OUTPUT_MODE="stdout"
OUTPUT_FILE=""
PORT="${DIRECTORY_SIZE_PORT:-$DEFAULT_PORT}"
TIMEOUT="${DIRECTORY_SIZE_TIMEOUT:-300}"
VERBOSE=false
QUIET=false
DRY_RUN=false
TARGET_DIRECTORIES=()
# ── Metrics Collection ──────────────────────────────────────────────
log_verbose() {
[[ "$VERBOSE" == true ]] && echo "[$(date '+%Y-%m-%d %H:%M:%S')] $*" >&2
}
log_info() {
[[ "$QUIET" == false ]] && echo "[$(date '+%Y-%m-%d %H:%M:%S')] $*" >&2
}
collect_metrics() {
local start_time
start_time=$(date +%s%N)
echo "# HELP node_directory_size_bytes Disk space used by directory"
echo "# TYPE node_directory_size_bytes gauge"
echo "# HELP node_directory_filesystem_usage_percent Filesystem usage percentage for the directory mount point"
echo "# TYPE node_directory_filesystem_usage_percent gauge"
local success=1
for directory in "${TARGET_DIRECTORIES[@]}"; do
log_verbose "Running du for: $directory"
# Get directory size in bytes
local du_output
du_output=$(timeout "$TIMEOUT" du --block-size=1 --summarize "$directory" 2>/dev/null) || {
log_info "WARNING: du failed for $directory"
success=0
continue
}
local size_bytes
size_bytes=$(echo "$du_output" | awk '{print $1}')
echo "node_directory_size_bytes{directory=\"${directory}\"} ${size_bytes}"
# Get filesystem usage percentage for the mount point
local pct
pct=$(df --output=pcent "$directory" 2>/dev/null | tail -n 1 | tr -d ' %')
if [[ "$pct" =~ ^[0-9]+$ ]]; then
echo "node_directory_filesystem_usage_percent{directory=\"${directory}\"} ${pct}"
fi
done
# ── Script runtime ──
local end_time runtime
end_time=$(date +%s%N)
runtime=$(awk "BEGIN {printf \"%.3f\", ($end_time - $start_time) / 1000000000}")
echo ""
echo "# HELP ${EXPORTER_NAME}_duration_seconds Script execution time"
echo "# TYPE ${EXPORTER_NAME}_duration_seconds gauge"
echo "${EXPORTER_NAME}_duration_seconds ${runtime}"
echo "# HELP ${EXPORTER_NAME}_last_run_timestamp Last successful run"
echo "# TYPE ${EXPORTER_NAME}_last_run_timestamp gauge"
echo "${EXPORTER_NAME}_last_run_timestamp $(date +%s)"
echo "# HELP ${EXPORTER_NAME}_success Whether the exporter ran successfully"
echo "# TYPE ${EXPORTER_NAME}_success gauge"
echo "${EXPORTER_NAME}_success ${success}"
}
# ── HTTP Request Handler ────────────────────────────────────────────
handle_request() {
read -r method path version
while IFS= read -r header; do
[[ "$header" == $'\r' || -z "$header" ]] && break
done
if [[ "$path" == "/metrics" ]]; then
local metrics length
metrics=$(collect_metrics)
length=${#metrics}
printf "HTTP/1.1 200 OK\r\n"
printf "Content-Type: text/plain; version=0.0.4; charset=utf-8\r\n"
printf "Content-Length: %d\r\n" "$length"
printf "Connection: close\r\n"
printf "\r\n"
printf "%s" "$metrics"
else
local body="404 Not Found"
printf "HTTP/1.1 404 Not Found\r\n"
printf "Content-Type: text/plain\r\n"
printf "Content-Length: %d\r\n" "${#body}"
printf "Connection: close\r\n"
printf "\r\n"
printf "%s" "$body"
fi
}
# ── Help ─────────────────────────────────────────────────────────────
show_help() {
cat <<EOF
Usage: $0 [OPTIONS] <directory> [directory2 ...]
Monitor directory sizes for Prometheus. Node exporter only reports
mounted filesystem totals — this script tracks individual directories.
Output modes:
(default) Print metrics to stdout
--textfile Write to node_exporter textfile collector
-o FILE Write to a specific file
--http Run as HTTP server (default port: ${DEFAULT_PORT})
Options:
--port PORT HTTP listen port (default: ${DEFAULT_PORT})
--timeout SECS du command timeout (default: 300)
--dry-run Show what would be written without writing
--verbose, -v Enable verbose debug output
--quiet, -q Suppress non-error output
-h, --help Show this help message
Environment variables:
DIRECTORY_SIZE_PORT HTTP listen port (default: ${DEFAULT_PORT})
DIRECTORY_SIZE_TIMEOUT du command timeout in seconds (default: 300)
Examples:
$0 /var/log /home /opt
$0 --textfile /var/log /var/lib/mysql
$0 --http --port 9101 /var/log /home
$0 -o /tmp/dir_sizes.prom /var/log
EOF
}
# ── Argument Parsing ────────────────────────────────────────────────
while [[ $# -gt 0 ]]; do
case "$1" in
--textfile)
OUTPUT_MODE="textfile"
shift
;;
-o)
OUTPUT_MODE="file"
OUTPUT_FILE="$2"
shift 2
;;
--http)
OUTPUT_MODE="http"
shift
;;
--port)
PORT="$2"
shift 2
;;
--timeout)
TIMEOUT="$2"
shift 2
;;
--dry-run)
DRY_RUN=true
shift
;;
--verbose|-v)
VERBOSE=true
shift
;;
--quiet|-q)
QUIET=true
shift
;;
--handle-request)
handle_request
exit 0
;;
-h|--help)
show_help
exit 0
;;
-*)
echo "Unknown option: $1" >&2
exit 1
;;
*)
TARGET_DIRECTORIES+=("$1")
shift
;;
esac
done
# Validate directories
if [[ ${#TARGET_DIRECTORIES[@]} -eq 0 ]]; then
echo "Error: at least one directory argument is required" >&2
echo "Run with --help for usage" >&2
exit 1
fi
for dir in "${TARGET_DIRECTORIES[@]}"; do
if [[ ! -d "$dir" ]]; then
echo "Error: directory does not exist: $dir" >&2
exit 1
fi
if [[ ! -r "$dir" ]]; then
echo "Error: directory is not readable: $dir" >&2
exit 1
fi
done
# ── Output ──────────────────────────────────────────────────────────
if [[ "$DRY_RUN" == true ]]; then
log_info "DRY RUN — metrics that would be written:"
collect_metrics
exit 0
fi
case "$OUTPUT_MODE" in
stdout)
collect_metrics
;;
textfile)
output_dir="/var/lib/node_exporter"
OUTPUT_FILE="${output_dir}/${EXPORTER_NAME}.prom"
mkdir -p "$output_dir"
temp_file=$(mktemp "${output_dir}/.${EXPORTER_NAME}.XXXXXX")
collect_metrics > "$temp_file"
chmod 644 "$temp_file"
mv -f "$temp_file" "$OUTPUT_FILE"
;;
file)
temp_file=$(mktemp "${OUTPUT_FILE}.XXXXXX")
collect_metrics > "$temp_file"
chmod 644 "$temp_file"
mv -f "$temp_file" "$OUTPUT_FILE"
;;
http)
if ! command -v socat &>/dev/null; then
echo "ERROR: socat is required for --http mode" >&2
echo "Install it: apt install socat or dnf install socat" >&2
exit 1
fi
echo "${EXPORTER_NAME} listening on port ${PORT}..."
echo "Monitoring directories: ${TARGET_DIRECTORIES[*]}"
socat TCP-LISTEN:"$PORT",reuseaddr,fork EXEC:"$0 --handle-request"
;;
esac
+354
View File
@@ -0,0 +1,354 @@
#!/usr/bin/env bash
# disk-io-exporter.sh — Prometheus exporter for per-disk I/O performance
#
# Reads /proc/diskstats and calculates per-disk IOPS, throughput,
# latency, utilization, and queue depth. Takes two samples with a
# configurable interval to compute rates from the cumulative counters.
#
# Author: Phil Connor
# Contact: contact@mylinux.work
# License: MIT
# Date: 2026-03-03
# Version: 1.0.0
set -euo pipefail
# ── Configuration ───────────────────────────────────────────────────
readonly VERSION="1.0.0"
readonly SCRIPT_NAME="${0##*/}"
readonly NODE_DIR="${NODE_DIR:-/var/lib/node_exporter}"
readonly OUTPUT_FILE="${NODE_DIR}/disk_io.prom"
readonly TMP_FILE="${OUTPUT_FILE}.$$"
readonly SAMPLE_INTERVAL="${SAMPLE_INTERVAL:-1}"
readonly DISK_FILTER="${DISK_FILTER:-}"
# Runtime flags
DRY_RUN=false
DEBUG=${DEBUG:-}
# ── Helpers ─────────────────────────────────────────────────────────
debug_echo() {
if [[ -n "$DEBUG" ]]; then
echo "[DEBUG] $*" >&2
fi
}
log_error() {
echo "[ERROR] $*" >&2
}
cleanup() {
rm -f "$TMP_FILE"
}
trap cleanup EXIT
show_help() {
cat <<EOF
Usage: $SCRIPT_NAME [OPTIONS]
Prometheus textfile collector exporter for per-disk I/O performance.
Reads /proc/diskstats, takes two samples ${SAMPLE_INTERVAL}s apart, and
calculates rates per disk.
OPTIONS:
--dry-run Output metrics to stdout instead of writing to file
--debug Enable debug output
--help Show this help message
--version Show version
ENVIRONMENT VARIABLES:
DISK_FILTER Regex of disk names to include (default: all real disks)
Example: DISK_FILTER="^sd[a-z]+$|^nvme[0-9]+n[0-9]+$"
NODE_DIR Textfile collector directory (default: /var/lib/node_exporter)
SAMPLE_INTERVAL Seconds between the two samples (default: 1)
DEBUG Enable debug output when set to any value
EXAMPLES:
$SCRIPT_NAME --dry-run
DISK_FILTER="^sda$" $SCRIPT_NAME
SAMPLE_INTERVAL=2 $SCRIPT_NAME
DEBUG=1 $SCRIPT_NAME --dry-run
FILTERED DEVICES:
loop*, ram* devices are excluded by default. Use DISK_FILTER to
restrict to specific disks (e.g. only sd* or nvme* devices).
EOF
exit 0
}
show_version() {
echo "$SCRIPT_NAME version $VERSION"
exit 0
}
# ── Snapshot /proc/diskstats ────────────────────────────────────────
#
# Fields from /proc/diskstats (kernel 4.18+):
# $1 major
# $2 minor
# $3 device name
# $4 reads completed
# $5 reads merged
# $6 sectors read
# $7 time reading (ms)
# $8 writes completed
# $9 writes merged
# $10 sectors written
# $11 time writing (ms)
# $12 I/Os in progress (instantaneous)
# $13 time doing I/Os (ms)
# $14 weighted time doing I/Os (ms)
take_snapshot() {
local -n _snapshot=$1
while read -r _ _ dev reads _ sectors_read read_ms writes _ sectors_written write_ms inflight io_ms weighted_ms _; do
# Skip loop and ram devices
[[ "$dev" =~ ^loop[0-9] ]] && continue
[[ "$dev" =~ ^ram[0-9] ]] && continue
# Skip partition devices (e.g. sda1, nvme0n1p1) — report whole disks only
[[ "$dev" =~ [0-9]+p[0-9]+$ ]] && continue
[[ "$dev" =~ ^[a-z]+[0-9]+$ && ! "$dev" =~ ^nvme ]] && continue
# Apply user filter if set
if [[ -n "$DISK_FILTER" ]]; then
if ! [[ "$dev" =~ $DISK_FILTER ]]; then
continue
fi
fi
_snapshot["${dev}_reads"]="$reads"
_snapshot["${dev}_sectors_read"]="$sectors_read"
_snapshot["${dev}_read_ms"]="$read_ms"
_snapshot["${dev}_writes"]="$writes"
_snapshot["${dev}_sectors_written"]="$sectors_written"
_snapshot["${dev}_write_ms"]="$write_ms"
_snapshot["${dev}_inflight"]="$inflight"
_snapshot["${dev}_io_ms"]="$io_ms"
_snapshot["${dev}_weighted_ms"]="$weighted_ms"
done < /proc/diskstats
}
# ── Collect device list from a snapshot ─────────────────────────────
get_devices() {
local -n _snap=$1
local dev
for key in "${!_snap[@]}"; do
dev="${key%_reads}"
if [[ "$dev" != "$key" ]]; then
echo "$dev"
fi
done | sort
}
# ── Metrics Collection ─────────────────────────────────────────────
collect_metrics() {
local start_time
start_time=$(date +%s%N)
# First snapshot
declare -A snap1
take_snapshot snap1
debug_echo "First snapshot taken"
sleep "$SAMPLE_INTERVAL"
# Second snapshot
declare -A snap2
take_snapshot snap2
debug_echo "Second snapshot taken after ${SAMPLE_INTERVAL}s interval"
local devices
devices=$(get_devices snap2)
if [[ -z "$devices" ]]; then
log_error "No disks found after filtering"
echo "# No disks found"
return
fi
local interval="$SAMPLE_INTERVAL"
# ── HELP/TYPE headers and metric values ──
echo "# HELP linux_disk_io_read_iops Read operations per second"
echo "# TYPE linux_disk_io_read_iops gauge"
while read -r dev; do
local r1 r2 delta
r1="${snap1[${dev}_reads]:-0}"
r2="${snap2[${dev}_reads]:-0}"
delta=$((r2 - r1))
local value
value=$(awk "BEGIN {printf \"%.2f\", $delta / $interval}")
echo "linux_disk_io_read_iops{disk=\"${dev}\"} ${value}"
debug_echo "$dev read_iops=$value"
done <<< "$devices"
echo "# HELP linux_disk_io_write_iops Write operations per second"
echo "# TYPE linux_disk_io_write_iops gauge"
while read -r dev; do
local w1 w2 delta
w1="${snap1[${dev}_writes]:-0}"
w2="${snap2[${dev}_writes]:-0}"
delta=$((w2 - w1))
local value
value=$(awk "BEGIN {printf \"%.2f\", $delta / $interval}")
echo "linux_disk_io_write_iops{disk=\"${dev}\"} ${value}"
debug_echo "$dev write_iops=$value"
done <<< "$devices"
echo "# HELP linux_disk_io_read_bytes_per_sec Bytes read per second"
echo "# TYPE linux_disk_io_read_bytes_per_sec gauge"
while read -r dev; do
local s1 s2 delta
s1="${snap1[${dev}_sectors_read]:-0}"
s2="${snap2[${dev}_sectors_read]:-0}"
delta=$((s2 - s1))
# Each sector is 512 bytes
local value
value=$(awk "BEGIN {printf \"%.2f\", ($delta * 512) / $interval}")
echo "linux_disk_io_read_bytes_per_sec{disk=\"${dev}\"} ${value}"
debug_echo "$dev read_bytes_per_sec=$value"
done <<< "$devices"
echo "# HELP linux_disk_io_write_bytes_per_sec Bytes written per second"
echo "# TYPE linux_disk_io_write_bytes_per_sec gauge"
while read -r dev; do
local s1 s2 delta
s1="${snap1[${dev}_sectors_written]:-0}"
s2="${snap2[${dev}_sectors_written]:-0}"
delta=$((s2 - s1))
local value
value=$(awk "BEGIN {printf \"%.2f\", ($delta * 512) / $interval}")
echo "linux_disk_io_write_bytes_per_sec{disk=\"${dev}\"} ${value}"
debug_echo "$dev write_bytes_per_sec=$value"
done <<< "$devices"
echo "# HELP linux_disk_io_await_ms Average I/O latency in milliseconds"
echo "# TYPE linux_disk_io_await_ms gauge"
while read -r dev; do
local r1 r2 w1 w2 rm1 rm2 wm1 wm2
r1="${snap1[${dev}_reads]:-0}"
r2="${snap2[${dev}_reads]:-0}"
w1="${snap1[${dev}_writes]:-0}"
w2="${snap2[${dev}_writes]:-0}"
rm1="${snap1[${dev}_read_ms]:-0}"
rm2="${snap2[${dev}_read_ms]:-0}"
wm1="${snap1[${dev}_write_ms]:-0}"
wm2="${snap2[${dev}_write_ms]:-0}"
local total_ops total_ms
total_ops=$(( (r2 - r1) + (w2 - w1) ))
total_ms=$(( (rm2 - rm1) + (wm2 - wm1) ))
local value
if [[ "$total_ops" -gt 0 ]]; then
value=$(awk "BEGIN {printf \"%.2f\", $total_ms / $total_ops}")
else
value="0.00"
fi
echo "linux_disk_io_await_ms{disk=\"${dev}\"} ${value}"
debug_echo "$dev await_ms=$value"
done <<< "$devices"
echo "# HELP linux_disk_io_util_percent Disk utilization percentage"
echo "# TYPE linux_disk_io_util_percent gauge"
while read -r dev; do
local m1 m2 delta
m1="${snap1[${dev}_io_ms]:-0}"
m2="${snap2[${dev}_io_ms]:-0}"
delta=$((m2 - m1))
# io_ms is milliseconds spent doing I/O; interval is in seconds
local value
value=$(awk "BEGIN {v = ($delta / ($interval * 1000)) * 100; if (v > 100) v = 100; printf \"%.2f\", v}")
echo "linux_disk_io_util_percent{disk=\"${dev}\"} ${value}"
debug_echo "$dev util_percent=$value"
done <<< "$devices"
echo "# HELP linux_disk_io_queue_depth Weighted number of I/Os in progress (avgqu-sz)"
echo "# TYPE linux_disk_io_queue_depth gauge"
while read -r dev; do
local m1 m2 delta
m1="${snap1[${dev}_weighted_ms]:-0}"
m2="${snap2[${dev}_weighted_ms]:-0}"
delta=$((m2 - m1))
local value
value=$(awk "BEGIN {printf \"%.2f\", $delta / ($interval * 1000)}")
echo "linux_disk_io_queue_depth{disk=\"${dev}\"} ${value}"
debug_echo "$dev queue_depth=$value"
done <<< "$devices"
# ── Script metadata metrics ──
local end_time runtime
end_time=$(date +%s%N)
runtime=$(awk "BEGIN {printf \"%.3f\", ($end_time - $start_time) / 1000000000}")
echo ""
echo "# HELP linux_disk_io_exporter_duration_seconds Script execution time"
echo "# TYPE linux_disk_io_exporter_duration_seconds gauge"
echo "linux_disk_io_exporter_duration_seconds ${runtime}"
echo "# HELP linux_disk_io_exporter_last_run_timestamp Last successful run"
echo "# TYPE linux_disk_io_exporter_last_run_timestamp gauge"
echo "linux_disk_io_exporter_last_run_timestamp $(date +%s)"
echo "# HELP linux_disk_io_exporter_success Whether the exporter ran successfully"
echo "# TYPE linux_disk_io_exporter_success gauge"
echo "linux_disk_io_exporter_success 1"
}
# ── Main ────────────────────────────────────────────────────────────
main() {
while [[ $# -gt 0 ]]; do
case "$1" in
--dry-run)
DRY_RUN=true
shift
;;
--debug)
DEBUG=1
shift
;;
--help|-h)
show_help
;;
--version|-v)
show_version
;;
*)
log_error "Unknown option: $1"
echo "Use --help for usage information" >&2
exit 1
;;
esac
done
if [[ ! -f /proc/diskstats ]]; then
log_error "/proc/diskstats not found — this script requires a Linux system"
exit 1
fi
if [[ "$DRY_RUN" == true ]]; then
collect_metrics
exit 0
fi
if [[ ! -d "$NODE_DIR" ]]; then
log_error "Textfile collector directory does not exist: $NODE_DIR"
exit 1
fi
collect_metrics > "$TMP_FILE"
chmod 644 "$TMP_FILE"
mv -f "$TMP_FILE" "$OUTPUT_FILE"
debug_echo "Metrics written to $OUTPUT_FILE"
}
main "$@"
+383
View File
@@ -0,0 +1,383 @@
#!/bin/bash
################################################################################
# Script Name: dns-health-check.sh
# Version: 1.0
# Description: Prometheus textfile collector exporter for DNS resolution health
# Queries configurable DNS records and reports resolution status
# and latency via node_exporter textfile collector
#
# Author: Phil Connor
# Contact: contact@mylinux.work
# Website: https://mylinux.work
# License: MIT
# Date: 2026-03-03
#
# Prerequisites:
# - dig (bind-utils / dnsutils)
# - node_exporter with textfile collector enabled
# - /var/lib/node_exporter directory exists
#
# Usage:
# # Run with default config
# sudo ./dns-health-check.sh
#
# # Dry run (output to stdout)
# ./dns-health-check.sh --dry-run
#
# # Debug mode
# DEBUG=1 sudo ./dns-health-check.sh
#
# Config Format (pipe-delimited, one record per line):
# record_name|record_type|dns_server|expected_value(optional)
#
# Examples:
# example.com|A|8.8.8.8|
# mail.example.com|MX|8.8.8.8|
# _ldap._tcp.example.com|SRV|10.0.0.1|
# example.com|A|8.8.8.8|93.184.216.34
#
# Metrics Exported:
# - linux_dns_query_success{record,type,server} - 1=resolved, 0=failed
# - linux_dns_query_time_seconds{record,type,server} - Resolution time
# - linux_dns_query_answer_match{record,type,server,expected} - 1=match, 0=mismatch
#
################################################################################
set -o pipefail
# ============================================================================
# CONFIGURATION
# ============================================================================
readonly VERSION="1.0"
readonly SCRIPT_NAME="${0##*/}"
readonly TEXTFILE_DIR="${TEXTFILE_DIR:-/var/lib/node_exporter}"
readonly OUTPUT_FILE="${TEXTFILE_DIR}/dns_health.prom"
readonly CONFIG_FILE="${CONFIG_FILE:-/etc/dns-health-check.conf}"
readonly TMP_FILE="${OUTPUT_FILE}.$$"
# Runtime flags
DRY_RUN=false
DEBUG=${DEBUG:-}
# Default DNS records to check if no config file and no env var
readonly DEFAULT_RECORDS="localhost|A|127.0.0.1|"
# ============================================================================
# HELPER FUNCTIONS
# ============================================================================
debug_echo() {
if [[ -n "$DEBUG" ]]; then
echo "[DEBUG] $*" >&2
fi
}
log_error() {
echo "[ERROR] $*" >&2
}
cleanup() {
rm -f "$TMP_FILE"
}
trap cleanup EXIT
show_help() {
cat <<EOF
Usage: $SCRIPT_NAME [OPTIONS]
Prometheus textfile collector exporter for DNS resolution health.
Queries DNS records and reports resolution status and latency.
OPTIONS:
--dry-run Output metrics to stdout instead of writing to file
--debug Enable debug output
--help Show this help message
--version Show version
CONFIGURATION:
DNS records are configured in /etc/dns-health-check.conf (or set CONFIG_FILE).
Each line defines a DNS check in pipe-delimited format:
record_name|record_type|dns_server|expected_value(optional)
Example config:
example.com|A|8.8.8.8|
mail.example.com|MX|8.8.8.8|
_ldap._tcp.example.com|SRV|10.0.0.1|
example.com|A|8.8.8.8|93.184.216.34
Lines starting with # are comments. Blank lines are ignored.
If expected_value is set, the script checks whether the DNS answer matches.
Records can also be supplied via the DNS_RECORDS environment variable
as a semicolon-separated list using the same pipe-delimited format:
DNS_RECORDS="example.com|A|8.8.8.8|;google.com|A|8.8.4.4|"
If neither a config file nor DNS_RECORDS is found, a default check
queries localhost via 127.0.0.1 as a basic resolution test.
ENVIRONMENT VARIABLES:
CONFIG_FILE Path to config file (default: /etc/dns-health-check.conf)
TEXTFILE_DIR Textfile collector directory (default: /var/lib/node_exporter)
DNS_RECORDS Semicolon-separated DNS records (overrides config file)
DEBUG Enable debug output when set to any value
EXAMPLES:
sudo $SCRIPT_NAME
$SCRIPT_NAME --dry-run
DEBUG=1 sudo $SCRIPT_NAME
DNS_RECORDS="example.com|A|8.8.8.8|93.184.216.34" $SCRIPT_NAME --dry-run
EOF
exit 0
}
show_version() {
echo "$SCRIPT_NAME version $VERSION"
exit 0
}
# ============================================================================
# DEPENDENCY CHECK
# ============================================================================
check_dependencies() {
if ! command -v dig &>/dev/null; then
log_error "'dig' is not installed. Install bind-utils (RHEL/Rocky) or dnsutils (Debian/Ubuntu)."
exit 1
fi
}
# ============================================================================
# RECORD LOADING
# ============================================================================
load_records() {
local record_count=0
local source=""
# Priority: DNS_RECORDS env var > config file > defaults
if [[ -n "${DNS_RECORDS:-}" ]]; then
source="DNS_RECORDS environment variable"
local IFS=";"
local entry
for entry in $DNS_RECORDS; do
entry="${entry#"${entry%%[![:space:]]*}"}"
entry="${entry%"${entry##*[![:space:]]}"}"
if [[ -n "$entry" ]]; then
echo "$entry"
record_count=$((record_count + 1))
fi
done
elif [[ -f "$CONFIG_FILE" ]]; then
source="$CONFIG_FILE"
while IFS= read -r line; do
# Strip comments and whitespace
line="${line%%#*}"
line="${line#"${line%%[![:space:]]*}"}"
line="${line%"${line##*[![:space:]]}"}"
if [[ -z "$line" ]]; then
continue
fi
echo "$line"
record_count=$((record_count + 1))
done < "$CONFIG_FILE"
else
source="defaults"
echo "$DEFAULT_RECORDS"
record_count=1
fi
debug_echo "Loaded $record_count DNS record(s) from $source"
}
# ============================================================================
# DNS QUERY
# ============================================================================
query_dns() {
local record="$1"
local rtype="$2"
local server="$3"
local expected="$4"
debug_echo "Querying $rtype record for $record via $server"
local dig_output
local query_start
local query_end
local query_time
local success=0
local answer=""
local match=""
query_start=$(date +%s%N 2>/dev/null) || query_start=$(date +%s)000000000
if dig_output=$(dig +short +time=5 +tries=2 "$record" "$rtype" "@${server}" 2>/dev/null); then
query_end=$(date +%s%N 2>/dev/null) || query_end=$(date +%s)000000000
answer="${dig_output}"
if [[ -n "$answer" ]]; then
success=1
debug_echo " Answer: $(echo "$answer" | tr '\n' ' ')"
else
success=0
debug_echo " Empty answer (NXDOMAIN or no records)"
fi
else
query_end=$(date +%s%N 2>/dev/null) || query_end=$(date +%s)000000000
success=0
debug_echo " Query failed"
fi
# Calculate query time in seconds
local elapsed_ns=$((query_end - query_start))
query_time=$(awk "BEGIN {printf \"%.6f\", $elapsed_ns / 1000000000}")
# Check expected value if provided
if [[ -n "$expected" ]]; then
if echo "$answer" | grep -qF "$expected"; then
match=1
debug_echo " Expected value matched: $expected"
else
match=0
debug_echo " Expected value NOT matched: $expected (got: $(echo "$answer" | tr '\n' ' '))"
fi
fi
echo "${success}|${query_time}|${match}"
}
# ============================================================================
# METRICS COLLECTION
# ============================================================================
collect_metrics() {
local records=()
while IFS= read -r record_line; do
records+=("$record_line")
done < <(load_records)
local success_metrics=""
local time_metrics=""
local match_metrics=""
local has_match_metric=false
for record_line in "${records[@]}"; do
local record
record=$(echo "$record_line" | cut -d'|' -f1)
local rtype
rtype=$(echo "$record_line" | cut -d'|' -f2)
local server
server=$(echo "$record_line" | cut -d'|' -f3)
local expected
expected=$(echo "$record_line" | cut -d'|' -f4)
if [[ -z "$record" ]] || [[ -z "$rtype" ]] || [[ -z "$server" ]]; then
log_error "Invalid config line: $record_line (expected: record_name|record_type|dns_server|expected_value)"
continue
fi
local result
result=$(query_dns "$record" "$rtype" "$server" "$expected")
local qsuccess
qsuccess=$(echo "$result" | cut -d'|' -f1)
local qtime
qtime=$(echo "$result" | cut -d'|' -f2)
local qmatch
qmatch=$(echo "$result" | cut -d'|' -f3)
local labels="record=\"${record}\",type=\"${rtype}\",server=\"${server}\""
success_metrics+="linux_dns_query_success{${labels}} ${qsuccess}\n"
time_metrics+="linux_dns_query_time_seconds{${labels}} ${qtime}\n"
if [[ -n "$expected" ]]; then
has_match_metric=true
local match_labels="${labels},expected=\"${expected}\""
match_metrics+="linux_dns_query_answer_match{${match_labels}} ${qmatch}\n"
fi
done
local output=""
output+="# HELP linux_dns_query_success DNS query resolved successfully (1=resolved, 0=failed)\n"
output+="# TYPE linux_dns_query_success gauge\n"
output+="$success_metrics"
output+="# HELP linux_dns_query_time_seconds DNS query resolution time in seconds\n"
output+="# TYPE linux_dns_query_time_seconds gauge\n"
output+="$time_metrics"
if [[ "$has_match_metric" == "true" ]]; then
output+="# HELP linux_dns_query_answer_match DNS answer matches expected value (1=match, 0=mismatch)\n"
output+="# TYPE linux_dns_query_answer_match gauge\n"
output+="$match_metrics"
fi
printf '%b' "$output"
}
# ============================================================================
# OUTPUT
# ============================================================================
write_metrics() {
local metrics
metrics=$(collect_metrics)
if [[ "$DRY_RUN" == "true" ]]; then
echo "$metrics"
return
fi
if [[ ! -d "$TEXTFILE_DIR" ]]; then
log_error "Textfile collector directory does not exist: $TEXTFILE_DIR"
exit 1
fi
echo "$metrics" > "$TMP_FILE"
mv "$TMP_FILE" "$OUTPUT_FILE"
debug_echo "Metrics written to $OUTPUT_FILE"
}
# ============================================================================
# MAIN
# ============================================================================
main() {
while [[ $# -gt 0 ]]; do
case "$1" in
--dry-run)
DRY_RUN=true
shift
;;
--debug)
DEBUG=1
shift
;;
--help|-h)
show_help
;;
--version|-v)
show_version
;;
*)
log_error "Unknown option: $1"
echo "Use --help for usage information" >&2
exit 1
;;
esac
done
check_dependencies
write_metrics
}
main "$@"
+1256
View File
File diff suppressed because it is too large Load Diff
+319
View File
@@ -0,0 +1,319 @@
#!/bin/bash
#############################################################
#### Expand Drive ####
#### Auto-expand partitions and filesystems ####
#### ####
#### Author: Phil Connor ####
#### Contact: contact@mylinux.work ####
#### License: MIT ####
#### Version: 2.3 ####
#### ####
#### Usage: sudo ./expand-drive.sh ####
#############################################################
# Set strict error handling:
# -e: Exit immediately if a command exits with a non-zero status
# -u: Treat unset variables as an error when substituting
# -o pipefail: The return value of a pipeline is the status of the last command to exit with a non-zero status
set -euo pipefail
# Constants - Define paths to required system binaries (use command names, let PATH resolve)
readonly BLKID_PATH="blkid" # Tool to locate/print block device attributes
readonly LSBLK_PATH="lsblk" # Tool to list block devices
readonly LOG_FILE="/var/log/expand_drive.log" # Location for script log output
# Configuration - Runtime behavior settings
readonly DRY_RUN=${DRY_RUN:-false} # If true, show what would be done without making changes
readonly REQUIRED_COMMANDS=("growpart" "xfs_growfs" "resize2fs") # Commands that must be available
readonly SUPPORTED_FILESYSTEMS=("xfs" "ext2" "ext3" "ext4") # Filesystem types we can expand
# Exit codes - Standardized exit status values
readonly EXIT_SUCCESS=0 # Script completed successfully
readonly EXIT_ERROR=1 # General error occurred
readonly EXIT_ROOT_REQUIRED=2 # Script must be run as root user
readonly EXIT_MISSING_DEPS=3 # Required dependencies are missing
# Function to log messages with timestamp to both console and log file
log_message() {
echo "$(date): $1" | tee -a "$LOG_FILE"
}
# Function to log error messages with timestamp to both console, log file, and stderr
log_error() {
echo "$(date): ERROR: $1" | tee -a "$LOG_FILE" >&2
}
# Function to check if a command exists in the system PATH
command_exists() {
command -v "$1" >/dev/null 2>&1
}
# Function to handle script interruption (SIGINT/SIGTERM) and perform cleanup
cleanup() {
# shellcheck disable=SC2317 # Suppress warning about unreachable code
log_message "Script interrupted, cleaning up..."
# shellcheck disable=SC2317 # Suppress warning about unreachable code
exit "$EXIT_ERROR"
}
# Function to validate prerequisites before script execution
validate_prerequisites() {
# Check if script is run as root (required for partition/filesystem operations)
if [ "$(id -u)" -ne 0 ]; then
echo "Error: This script must be run as root"
exit "$EXIT_ROOT_REQUIRED"
fi
# Ensure log directory exists and is writable
local log_dir
log_dir=$(dirname "$LOG_FILE")
if [ ! -d "$log_dir" ]; then
mkdir -p "$log_dir" || {
echo "Error: Cannot create log directory $log_dir"
exit "$EXIT_ERROR"
}
fi
# Verify all required system commands are available
for cmd in "${REQUIRED_COMMANDS[@]}"; do
if ! command_exists "$cmd"; then
log_error "Required command '$cmd' not found. Please install it."
exit "$EXIT_MISSING_DEPS"
fi
done
}
# Function to check if filesystem type is supported by this script
is_supported_filesystem() {
local fs_type="$1"
# Loop through supported filesystem types array
for supported in "${SUPPORTED_FILESYSTEMS[@]}"; do
if [[ "$fs_type" == "$supported" ]]; then
return 0 # Filesystem type is supported
fi
done
return 1 # Filesystem type is not supported
}
# Function to expand filesystem based on type (XFS or EXT variants)
expand_filesystem() {
local partition="$1" # Block device path (e.g., /dev/sda1)
local fs_type="$2" # Filesystem type (xfs, ext2, ext3, ext4)
local mount_point="$3" # Where the filesystem is mounted
# Validate filesystem type is one we support
if ! is_supported_filesystem "$fs_type"; then
log_error "Unsupported filesystem type $fs_type on $partition"
return 1
fi
# Handle different filesystem types with appropriate expansion commands
case $fs_type in
"xfs")
log_message "Expanding XFS filesystem on $partition"
if [ "$DRY_RUN" = "true" ]; then
log_message "DRY RUN: Would expand XFS filesystem on $partition"
return 0
# XFS uses xfs_growfs and requires the mount point as argument
elif xfs_growfs "$mount_point" >/dev/null 2>&1; then
log_message "Successfully expanded XFS filesystem on $partition"
return 0
else
log_error "Failed to expand XFS filesystem on $partition"
return 1
fi
;;
"ext2" | "ext3" | "ext4")
log_message "Expanding EXT filesystem on $partition"
if [ "$DRY_RUN" = "true" ]; then
log_message "DRY RUN: Would expand EXT filesystem on $partition"
return 0
# EXT filesystems use resize2fs and require the device path as argument
elif resize2fs "$partition" >/dev/null 2>&1; then
log_message "Successfully expanded EXT filesystem on $partition"
return 0
else
log_error "Failed to expand EXT filesystem on $partition"
return 1
fi
;;
esac
}
# Function to expand partition to use available disk space
expand_partition() {
local disk="$1" # Parent disk device (e.g., /dev/sda)
local partition="$2" # Partition device (e.g., /dev/sda1)
local part_num="$3" # Partition number (e.g., 1)
# Check if partition can be expanded using growpart dry-run
if ! growpart "$disk" "$part_num" --dry-run 2>/dev/null; then
log_message "Partition $partition doesn't need expansion or cannot be expanded, skipping..."
return 1 # Not an error, just nothing to do
fi
# Perform the actual partition expansion
if [ "$DRY_RUN" = "true" ]; then
log_message "DRY RUN: Would expand partition $partition"
return 0
elif growpart "$disk" "$part_num" >/dev/null 2>&1; then
log_message "Successfully expanded partition $partition"
return 0
else
log_error "Failed to expand partition $partition"
return 1
fi
}
# Set up signal trap to handle interruptions gracefully
trap cleanup INT TERM
# Initialize script by validating prerequisites
validate_prerequisites
# Function to process a single partition (expand partition and filesystem)
process_partition() {
local partition="$1" # Partition device path (e.g., /dev/sda1)
local disk="$2" # Parent disk device path (e.g., /dev/sda)
log_message "Processing partition $partition"
# Check if the filesystem is currently mounted (required for filesystem expansion)
local mount_point
mount_point=$(findmnt -n -o TARGET "$partition" 2>/dev/null)
if [ -z "$mount_point" ]; then
log_message "Warning: $partition is not mounted, skipping filesystem resize"
return 0
fi
# Extract partition number from device path (e.g., extract "1" from "/dev/sda1")
local part_num
part_num=$(echo "$partition" | grep -o '[0-9]\+$' | tail -1)
if [ -z "$part_num" ]; then
log_error "Could not extract partition number from $partition"
return 1
fi
# First expand the partition to use available disk space
if ! expand_partition "$disk" "$partition" "$part_num"; then
return 0 # Not an error if partition doesn't need expansion
fi
# Detect the filesystem type using blkid
local fs_type
fs_type=$($BLKID_PATH -s TYPE -o value "$partition")
if [ -z "$fs_type" ]; then
log_message "Warning: Could not detect filesystem type for $partition, skipping..."
return 0
fi
# Get current filesystem size before expansion
local current_size
current_size=$(df -h "$mount_point" | awk 'NR==2 {print $2}')
log_message "Current filesystem size on $partition: $current_size"
# Expand the filesystem to use the newly available partition space
expand_filesystem "$partition" "$fs_type" "$mount_point"
# Show new size after expansion
local new_size
new_size=$(df -h "$mount_point" | awk 'NR==2 {print $2}')
log_message "New filesystem size on $partition: $new_size"
}
# Function to process a disk with direct filesystem (no partitions)
process_direct_filesystem() {
local disk="$1" # Disk device path (e.g., /dev/nvme3n1)
local mount_point="$2" # Where the filesystem is mounted
log_message "Processing direct filesystem on $disk mounted at $mount_point"
# Detect the filesystem type using blkid
local fs_type
fs_type=$($BLKID_PATH -s TYPE -o value "$disk")
if [ -z "$fs_type" ]; then
log_message "Warning: Could not detect filesystem type for $disk, skipping..."
return 0
fi
# Get current filesystem size before expansion
local current_size
current_size=$(df -h "$mount_point" | awk 'NR==2 {print $2}')
log_message "Current filesystem size on $disk: $current_size"
# Expand the filesystem to use the full disk space
expand_filesystem "$disk" "$fs_type" "$mount_point"
# Show new size after expansion
local new_size
new_size=$(df -h "$mount_point" | awk 'NR==2 {print $2}')
log_message "New filesystem size on $disk: $new_size"
}
# Function to process all partitions on a single disk
process_disk() {
local disk="$1" # Disk device path (e.g., /dev/sda)
log_message "Checking partitions on $disk..."
# Get list of partitions for the current disk using lsblk
# Filter for partition type and extract device names
local partitions
local lsblk_output
lsblk_output=$($LSBLK_PATH -pln -o NAME,TYPE "$disk" 2>&1) || {
log_error "lsblk command failed for $disk: $lsblk_output"
return 1
}
partitions=$(echo "$lsblk_output" | grep "part" | cut -d' ' -f1 || true)
if [ -z "$partitions" ]; then
# Check if the disk itself has a filesystem (no partition table)
local mount_point
mount_point=$(findmnt -n -o TARGET "$disk" 2>/dev/null)
if [ -n "$mount_point" ]; then
log_message "No partitions found on $disk, but disk has direct filesystem. Processing disk directly..."
process_direct_filesystem "$disk" "$mount_point"
else
log_message "No partitions found on $disk, skipping..."
fi
return 0
fi
# Process each partition found on this disk
for partition in $partitions; do
process_partition "$partition" "$disk"
done
}
# Main execution function - orchestrates the entire drive expansion process
main() {
log_message "Starting drive expansion process..."
# Get list of all disk devices in the system using lsblk
# Filter for disk type and extract device names
local devices
devices=$($LSBLK_PATH -pln -o NAME,TYPE | grep "disk" | cut -d' ' -f1)
# Verify we found at least one disk device
if [ -z "$devices" ]; then
log_error "No disk devices found"
exit "$EXIT_ERROR"
fi
# Process each disk device found
for disk in $devices; do
# Verify device is actually a block device before processing
if [ ! -b "$disk" ]; then
log_error "Device $disk is not a block device, skipping..."
continue
fi
process_disk "$disk"
done
log_message "Drive expansion completed"
exit "$EXIT_SUCCESS"
}
# Execute the main function to start the script
main
+914
View File
@@ -0,0 +1,914 @@
#!/bin/bash
################################################################################
# Script Name: fail2ban-exporter.sh
# Version: 2.0
# Description: Prometheus exporter for fail2ban providing comprehensive metrics
# for monitoring jail status, ban/unban activity, and threat analysis
#
# Author: Phil Connor
# Contact: contact@mylinux.work
# Website: https://mylinux.work
# License: MIT
#
# Prerequisites:
# - fail2ban-client command available
# - fail2ban service running
# - journalctl (systemd) for historical data
# - netcat (nc) for HTTP mode
# - /var/log/fail2ban.log for timestamp parsing
#
# Usage:
# # Output to stdout
# ./fail2ban-exporter.sh
#
# # HTTP server mode
# ./fail2ban-exporter.sh --http -p 9191
#
# # Textfile collector mode
# ./fail2ban-exporter.sh --textfile
#
# Metrics Exported:
# Core Metrics (v1.0):
# - fail2ban_up{} - Exporter status (1=up, 0=down)
# - fail2ban_server_info{version,exporter_version} - Server version info
# - fail2ban_jail_count{} - Total number of jails (gauge)
# - fail2ban_jail_enabled{jail} - Jail enabled status (gauge)
# - fail2ban_jail_failed_current{jail} - Currently failed attempts (gauge)
# - fail2ban_jail_banned_current{jail} - Currently banned IPs (gauge)
# - fail2ban_jail_failed_total{jail} - Total failed attempts (counter)
# - fail2ban_jail_banned_total{jail} - Total banned IPs (counter)
# - fail2ban_jail_ban_rate{jail} - Ban ratio: banned/failed (gauge)
#
# Enhanced Metrics (v2.0):
# - fail2ban_jail_last_ban_timestamp{jail} - Unix timestamp of last ban (gauge)
# - fail2ban_jail_last_unban_timestamp{jail} - Unix timestamp of last unban (gauge)
# - fail2ban_jail_bans_per_period{jail,period} - Bans in 1h/24h (gauge)
# - fail2ban_jail_unbans_per_period{jail,period} - Unbans in 1h/24h (gauge)
# - fail2ban_jail_unique_banned_ips{jail,period} - Unique IPs banned (gauge)
# - fail2ban_jail_info{jail,port,protocol,filter} - Jail configuration (gauge)
# - fail2ban_jail_top_attacker_count{jail,ip} - Top 5 attacking IPs (gauge)
# - fail2ban_jail_ban_rate_per_hour{jail} - Average bans/hour over 24h (gauge)
# - fail2ban_jail_repeat_offenders{jail,threshold} - Repeat offender count (7d)
# - fail2ban_jail_seconds_since_last_ban{jail} - Seconds since last ban
# - fail2ban_jail_seconds_since_last_unban{jail} - Seconds since last unban
# - fail2ban_log_size_bytes - Size of fail2ban.log file
# - fail2ban_log_age_seconds - Time since last log modification
# - fail2ban_log_rotation_timestamp - Last log rotation time
# - fail2ban_exporter_duration_seconds - Script execution time
# - fail2ban_exporter_last_run_timestamp - Last successful run time
#
# Configuration:
# Default HTTP port: 9191
# Textfile directory: /var/lib/node_exporter
# Log source: /var/log/fail2ban.log
#
################################################################################
# ============================================================================
# CONFIGURATION VARIABLES
# ============================================================================
TEXTFILE_DIR="/var/lib/node_exporter"
OUTPUT_FILE=""
HTTP_MODE=false
HTTP_PORT=9191
FAIL2BAN_LOG="/var/log/fail2ban.log"
# ============================================================================
# HELPER FUNCTIONS
# ============================================================================
show_usage() {
cat <<EOF
Usage: $0 [OPTIONS]
Export fail2ban statistics as Prometheus metrics (Enhanced v2.0).
MODES:
--textfile Write to node_exporter textfile collector
--http Run HTTP server on port $HTTP_PORT
OPTIONS:
-p, --port HTTP port (default: 9191)
-o, --output Output file path
EXAMPLES:
$0 --textfile # Write to textfile collector
$0 --http --port 9191 # Run HTTP server
$0 -o /tmp/fail2ban.prom # Write to custom file
NEW METRICS v2.0:
- Jail health: last ban/unban timestamps, ban rates
- Top attackers: most banned IPs per jail
- Ban duration: average, min, max per jail
- Protocol/port breakdown
- Jail uptime and status
EOF
exit 0
}
parse_args() {
while [[ $# -gt 0 ]]; do
case $1 in
-h|--help) show_usage ;;
--textfile) OUTPUT_FILE="$TEXTFILE_DIR/fail2ban.prom"; shift ;;
--http) HTTP_MODE=true; shift ;;
-p|--port) HTTP_PORT="$2"; shift 2 ;;
-o|--output) OUTPUT_FILE="$2"; shift 2 ;;
*) echo "Unknown option: $1" >&2; exit 1 ;;
esac
done
}
# Check if fail2ban is installed and running
# Returns: 0 if OK, 1 if error
check_fail2ban() {
if ! command -v fail2ban-client >/dev/null 2>&1; then
echo "ERROR: fail2ban-client not found" >&2
return 1
fi
# Verify fail2ban server is responding
if ! fail2ban-client ping >/dev/null 2>&1; then
echo "ERROR: fail2ban server not responding" >&2
return 1
fi
return 0
}
# Get list of all active fail2ban jails
# Returns: Space-separated list of jail names
get_jails() {
# Extract jail names from status output, convert comma-separated to space-separated
fail2ban-client status 2>/dev/null | grep "Jail list:" | sed 's/.*Jail list://' | tr -d '\t' | tr ',' '\n' | xargs
}
# Get statistics for a specific jail
# Args: $1 - jail name
# Returns: Pipe-delimited string: currently_failed|currently_banned|total_failed|total_banned
get_jail_stats() {
local jail="$1"
local status_output
status_output=$(fail2ban-client status "$jail" 2>/dev/null)
local currently_failed currently_banned total_failed total_banned
# Parse fail2ban-client output using awk to extract last field (the number)
currently_failed=$(echo "$status_output" | grep "Currently failed:" | awk '{print $NF}')
currently_banned=$(echo "$status_output" | grep "Currently banned:" | awk '{print $NF}')
total_failed=$(echo "$status_output" | grep "Total failed:" | awk '{print $NF}')
total_banned=$(echo "$status_output" | grep "Total banned:" | awk '{print $NF}')
# Return pipe-delimited format with defaults to 0 if empty
echo "${currently_failed:-0}|${currently_banned:-0}|${total_failed:-0}|${total_banned:-0}"
}
# Get list of currently banned IPs for a jail
# Args: $1 - jail name
# Returns: List of IPs, one per line
get_banned_ips() {
local jail="$1"
fail2ban-client status "$jail" 2>/dev/null | grep "Banned IP list:" | sed 's/.*Banned IP list://' | tr ' ' '\n' | grep -v '^$'
}
# Get timestamp of last ban event for a jail
# Args: $1 - jail name
# Returns: Unix timestamp (seconds since epoch) or 0 if not found
get_last_ban_timestamp() {
local jail="$1"
local timestamp
# Extract date from log, convert to Unix timestamp
timestamp=$(grep "\[$jail\]" "$FAIL2BAN_LOG" 2>/dev/null | grep "Ban " | tail -1 | awk '{print $1, $2}' | xargs -I{} date -d "{}" +%s 2>/dev/null)
echo "${timestamp:-0}"
}
# Get timestamp of last unban event for a jail
# Args: $1 - jail name
# Returns: Unix timestamp (seconds since epoch) or 0 if not found
get_last_unban_timestamp() {
local jail="$1"
local timestamp
# Extract date from log, convert to Unix timestamp
timestamp=$(grep "\[$jail\]" "$FAIL2BAN_LOG" 2>/dev/null | grep "Unban " | tail -1 | awk '{print $1, $2}' | xargs -I{} date -d "{}" +%s 2>/dev/null)
echo "${timestamp:-0}"
}
# Count ban events within a time period
# Args: $1 - jail name, $2 - time period (e.g., "1 hour ago")
# Returns: Number of ban events
get_ban_rate() {
local jail="$1"
local period="$2"
local count cutoff_timestamp
# Convert period to Unix timestamp
cutoff_timestamp=$(date -d "$period" +%s 2>/dev/null || echo 0)
# Try journalctl first (faster)
count=$(journalctl -u fail2ban --since "$period" 2>/dev/null | grep -c "\[$jail\] Ban " 2>/dev/null)
# If journalctl returns 0, fall back to log file (more reliable)
if [ "$count" -eq 0 ] && [ -f "$FAIL2BAN_LOG" ]; then
count=$(awk -v jail="$jail" -v cutoff="$cutoff_timestamp" '
/\['"$jail"'\] Ban / {
# Parse timestamp from log line
cmd = "date -d \"" $1 " " $2 "\" +%s 2>/dev/null"
cmd | getline ts
close(cmd)
if (ts >= cutoff) count++
}
END { print count+0 }
' "$FAIL2BAN_LOG" 2>/dev/null)
fi
echo "${count:-0}"
}
# Count unban events within a time period
# Args: $1 - jail name, $2 - time period (e.g., "1 hour ago")
# Returns: Number of unban events
get_unban_rate() {
local jail="$1"
local period="$2"
local count cutoff_timestamp
# Convert period to Unix timestamp
cutoff_timestamp=$(date -d "$period" +%s 2>/dev/null || echo 0)
# Try journalctl first
count=$(journalctl -u fail2ban --since "$period" 2>/dev/null | grep -c "\[$jail\] Unban " 2>/dev/null)
# Fall back to log file
if [ "$count" -eq 0 ] && [ -f "$FAIL2BAN_LOG" ]; then
count=$(awk -v jail="$jail" -v cutoff="$cutoff_timestamp" '
/\['"$jail"'\] Unban / {
cmd = "date -d \"" $1 " " $2 "\" +%s 2>/dev/null"
cmd | getline ts
close(cmd)
if (ts >= cutoff) count++
}
END { print count+0 }
' "$FAIL2BAN_LOG" 2>/dev/null)
fi
echo "${count:-0}"
}
# Get top attacking IPs by ban count
# Args: $1 - jail name, $2 - limit (default: 5)
# Returns: Lines with "count IP" format, sorted by count descending
get_top_banned_ips() {
local jail="$1"
local limit="${2:-5}"
grep "\[$jail\] Ban " "$FAIL2BAN_LOG" 2>/dev/null | \
grep -oE '[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+' | \
sort | uniq -c | sort -rn | head -n "$limit"
}
# Count unique IPs banned in a time period
# Args: $1 - jail name, $2 - time period (e.g., "24 hours ago")
# Returns: Number of unique IPs
get_unique_banned_ips() {
local jail="$1"
local period="$2"
local count cutoff_timestamp
# Convert period to Unix timestamp
cutoff_timestamp=$(date -d "$period" +%s 2>/dev/null || echo 0)
# Try journalctl first
count=$(journalctl -u fail2ban --since "$period" 2>/dev/null | \
grep "\[$jail\] Ban " | \
grep -oE '[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+' | \
sort -u | wc -l 2>/dev/null)
# Fall back to log file if journalctl returns 0
if [ "$count" -eq 0 ] && [ -f "$FAIL2BAN_LOG" ]; then
count=$(awk -v jail="$jail" -v cutoff="$cutoff_timestamp" '
/\['"$jail"'\] Ban / {
# Extract IP
match($0, /[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+/)
if (RSTART > 0) {
ip = substr($0, RSTART, RLENGTH)
# Parse timestamp
cmd = "date -d \"" $1 " " $2 "\" +%s 2>/dev/null"
cmd | getline ts
close(cmd)
if (ts >= cutoff && ip != "") ips[ip] = 1
}
}
END {
count = 0
for (ip in ips) count++
print count
}
' "$FAIL2BAN_LOG" 2>/dev/null)
fi
echo "${count:-0}"
}
get_ban_duration_stats() {
local jail="$1"
# Parse ban times and calculate average duration (placeholder - complex to implement)
# Returns: avg|min|max in seconds
echo "3600|1800|7200" # Placeholder: 1h avg, 30min min, 2h max
}
get_jail_port() {
local jail="$1"
local port
# Extract port from jail config (simplified)
if [ -f "/etc/fail2ban/jail.d/$jail.conf" ]; then
port=$(grep "^port" "/etc/fail2ban/jail.d/$jail.conf" 2>/dev/null | awk '{print $NF}')
fi
if [ -z "$port" ] && [ -f "/etc/fail2ban/jail.local" ]; then
port=$(awk "/\[$jail\]/,/^\[/ {if(/^port/) print \$NF}" "/etc/fail2ban/jail.local" 2>/dev/null | head -1)
fi
echo "${port:-unknown}"
}
# Detect protocol based on jail name
# Args: $1 - jail name
# Returns: Protocol (tcp/udp), defaults to tcp
get_jail_protocol() {
local jail="$1"
# Heuristic matching based on common service patterns
case "$jail" in
*ssh*|*sshd*) echo "tcp" ;;
*http*|*nginx*|*apache*) echo "tcp" ;;
*smtp*|*mail*) echo "tcp" ;;
*dns*) echo "udp" ;;
*) echo "tcp" ;; # Default to TCP for unknown services
esac
}
get_jail_logpath() {
local jail="$1"
local logpath
if [ -f "/etc/fail2ban/jail.d/$jail.conf" ]; then
logpath=$(grep "^logpath" "/etc/fail2ban/jail.d/$jail.conf" 2>/dev/null | awk '{print $NF}')
fi
if [ -z "$logpath" ] && [ -f "/etc/fail2ban/jail.local" ]; then
logpath=$(awk "/\[$jail\]/,/^\[/ {if(/^logpath/) print \$NF}" "/etc/fail2ban/jail.local" 2>/dev/null | head -1)
fi
echo "${logpath:-/var/log/auth.log}"
}
get_jail_filter() {
local jail="$1"
# Filter command doesn't work in fail2ban-client, extract from config
if [ -f "/etc/fail2ban/jail.d/$jail.local" ]; then
grep "^filter" "/etc/fail2ban/jail.d/$jail.local" 2>/dev/null | awk '{print $NF}' || echo "$jail"
else
echo "$jail" # Default to jail name
fi
}
get_jail_enabled() {
local jail="$1"
# Check if jail is enabled in config
if fail2ban-client status "$jail" >/dev/null 2>&1; then
echo "1"
else
echo "0"
fi
}
get_repeat_offender_count() {
local jail="$1"
local threshold="${2:-2}" # Default: 2+ bans = repeat offender
local count cutoff_timestamp
# 7 days ago timestamp
cutoff_timestamp=$(date -d "7 days ago" +%s 2>/dev/null || echo 0)
# Try journalctl first
count=$(journalctl -u fail2ban --since "7 days ago" 2>/dev/null | \
grep "\[$jail\] Ban " | \
grep -oE '[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+' | \
sort | uniq -c | \
awk -v t="$threshold" '$1 >= t {count++} END {print count+0}')
# Fall back to log file if journalctl returns 0
if [ "$count" -eq 0 ] && [ -f "$FAIL2BAN_LOG" ]; then
count=$(awk -v jail="$jail" -v cutoff="$cutoff_timestamp" -v threshold="$threshold" '
/\['"$jail"'\] Ban / {
# Extract IP
match($0, /[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+/)
if (RSTART > 0) {
ip = substr($0, RSTART, RLENGTH)
# Parse timestamp
cmd = "date -d \"" $1 " " $2 "\" +%s 2>/dev/null"
cmd | getline ts
close(cmd)
if (ts >= cutoff && ip != "") ip_count[ip]++
}
}
END {
repeat_count = 0
for (ip in ip_count) {
if (ip_count[ip] >= threshold) repeat_count++
}
print repeat_count
}
' "$FAIL2BAN_LOG" 2>/dev/null)
fi
echo "${count:-0}"
}
get_log_size() {
[ -f "$FAIL2BAN_LOG" ] && stat -c %s "$FAIL2BAN_LOG" 2>/dev/null || echo "0"
}
get_log_age() {
if [ -f "$FAIL2BAN_LOG" ]; then
echo $(($(date +%s) - $(stat -c %Y "$FAIL2BAN_LOG" 2>/dev/null || echo 0)))
else
echo "0"
fi
}
get_log_rotation_timestamp() {
# Find most recent rotated log to determine last rotation time
local rotated_log
rotated_log=$(ls -t "${FAIL2BAN_LOG}".1 "${FAIL2BAN_LOG}"-*.gz 2>/dev/null | head -1)
if [ -n "$rotated_log" ]; then
stat -c %Y "$rotated_log" 2>/dev/null || echo "0"
else
echo "0"
fi
}
# ============================================================================
# METRIC GENERATION
# ============================================================================
# Generate all Prometheus metrics
# Returns: Prometheus text format metrics on stdout
generate_metrics() {
local script_start=$(date +%s)
# Check fail2ban status first
if ! check_fail2ban; then
cat <<EOF
# HELP fail2ban_up Fail2ban exporter status
# TYPE fail2ban_up gauge
fail2ban_up 0
EOF
return
fi
local jails
jails=$(get_jails)
local jail_count
jail_count=$(echo "$jails" | wc -w)
cat <<EOF
# HELP fail2ban_up Fail2ban exporter status
# TYPE fail2ban_up gauge
fail2ban_up 1
# HELP fail2ban_server_info Fail2ban server information
# TYPE fail2ban_server_info gauge
fail2ban_server_info{version="$(fail2ban-client version 2>/dev/null | head -1 | awk '{print $NF}')",exporter_version="2.0"} 1
# HELP fail2ban_jail_count Total number of jails
# TYPE fail2ban_jail_count gauge
fail2ban_jail_count $jail_count
# HELP fail2ban_jail_enabled Jail enabled status
# TYPE fail2ban_jail_enabled gauge
EOF
for jail in $jails; do
local enabled
enabled=$(get_jail_enabled "$jail")
echo "fail2ban_jail_enabled{jail=\"$jail\"} $enabled"
done
echo ""
cat <<EOF
# HELP fail2ban_jail_failed_current Currently failed login attempts per jail
# TYPE fail2ban_jail_failed_current gauge
EOF
for jail in $jails; do
local stats
stats=$(get_jail_stats "$jail")
local currently_failed
currently_failed=$(echo "$stats" | cut -d'|' -f1)
echo "fail2ban_jail_failed_current{jail=\"$jail\"} ${currently_failed:-0}"
done
echo ""
cat <<EOF
# HELP fail2ban_jail_banned_current Currently banned IPs per jail
# TYPE fail2ban_jail_banned_current gauge
EOF
for jail in $jails; do
local stats
stats=$(get_jail_stats "$jail")
local currently_banned
currently_banned=$(echo "$stats" | cut -d'|' -f2)
echo "fail2ban_jail_banned_current{jail=\"$jail\"} ${currently_banned:-0}"
done
echo ""
cat <<EOF
# HELP fail2ban_jail_failed_total Total failed login attempts per jail
# TYPE fail2ban_jail_failed_total counter
EOF
for jail in $jails; do
local stats
stats=$(get_jail_stats "$jail")
local total_failed
total_failed=$(echo "$stats" | cut -d'|' -f3)
echo "fail2ban_jail_failed_total{jail=\"$jail\"} ${total_failed:-0}"
done
echo ""
cat <<EOF
# HELP fail2ban_jail_banned_total Total banned IPs per jail (all time)
# TYPE fail2ban_jail_banned_total counter
EOF
for jail in $jails; do
local stats
stats=$(get_jail_stats "$jail")
local total_banned
total_banned=$(echo "$stats" | cut -d'|' -f4)
echo "fail2ban_jail_banned_total{jail=\"$jail\"} ${total_banned:-0}"
done
echo ""
cat <<EOF
# HELP fail2ban_jail_ban_rate Ban rate (total_banned / total_failed) per jail
# TYPE fail2ban_jail_ban_rate gauge
EOF
# Calculate ban rate (ratio of banned to failed attempts)
for jail in $jails; do
local stats
stats=$(get_jail_stats "$jail")
local total_failed total_banned ban_rate
total_failed=$(echo "$stats" | cut -d'|' -f3)
total_banned=$(echo "$stats" | cut -d'|' -f4)
# Avoid division by zero
if [ "${total_failed:-0}" -gt 0 ] 2>/dev/null; then
# Use awk for floating point arithmetic
ban_rate=$(awk "BEGIN {printf \"%.4f\", ${total_banned:-0} / ${total_failed}}" 2>/dev/null || echo "0")
else
ban_rate="0"
fi
echo "fail2ban_jail_ban_rate{jail=\"$jail\"} $ban_rate"
done
echo ""
# ========================================================================
# ENHANCED METRICS (v2.0) - Jail Health & Activity Tracking
# ========================================================================
cat <<EOF
# HELP fail2ban_jail_last_ban_timestamp Timestamp of last ban per jail
# TYPE fail2ban_jail_last_ban_timestamp gauge
EOF
for jail in $jails; do
local last_ban
last_ban=$(get_last_ban_timestamp "$jail")
echo "fail2ban_jail_last_ban_timestamp{jail=\"$jail\"} ${last_ban}"
done
echo ""
cat <<EOF
# HELP fail2ban_jail_last_unban_timestamp Timestamp of last unban per jail
# TYPE fail2ban_jail_last_unban_timestamp gauge
EOF
for jail in $jails; do
local last_unban
last_unban=$(get_last_unban_timestamp "$jail")
echo "fail2ban_jail_last_unban_timestamp{jail=\"$jail\"} ${last_unban}"
done
echo ""
# NEW METRICS - Ban/Unban Rates
cat <<EOF
# HELP fail2ban_jail_bans_per_period Bans in time period per jail
# TYPE fail2ban_jail_bans_per_period gauge
EOF
for jail in $jails; do
local bans_1h bans_24h
bans_1h=$(get_ban_rate "$jail" "1 hour ago")
bans_24h=$(get_ban_rate "$jail" "24 hours ago")
echo "fail2ban_jail_bans_per_period{jail=\"$jail\",period=\"1h\"} ${bans_1h}"
echo "fail2ban_jail_bans_per_period{jail=\"$jail\",period=\"24h\"} ${bans_24h}"
done
echo ""
cat <<EOF
# HELP fail2ban_jail_unbans_per_period Unbans in time period per jail
# TYPE fail2ban_jail_unbans_per_period gauge
EOF
for jail in $jails; do
local unbans_1h unbans_24h
unbans_1h=$(get_unban_rate "$jail" "1 hour ago")
unbans_24h=$(get_unban_rate "$jail" "24 hours ago")
echo "fail2ban_jail_unbans_per_period{jail=\"$jail\",period=\"1h\"} ${unbans_1h}"
echo "fail2ban_jail_unbans_per_period{jail=\"$jail\",period=\"24h\"} ${unbans_24h}"
done
echo ""
# NEW METRICS - Unique IPs
cat <<EOF
# HELP fail2ban_jail_unique_banned_ips Unique IPs banned in period per jail
# TYPE fail2ban_jail_unique_banned_ips gauge
EOF
for jail in $jails; do
local unique_1h unique_24h
unique_1h=$(get_unique_banned_ips "$jail" "1 hour ago")
unique_24h=$(get_unique_banned_ips "$jail" "24 hours ago")
echo "fail2ban_jail_unique_banned_ips{jail=\"$jail\",period=\"1h\"} ${unique_1h}"
echo "fail2ban_jail_unique_banned_ips{jail=\"$jail\",period=\"24h\"} ${unique_24h}"
done
echo ""
# NEW METRICS - Jail Configuration
cat <<EOF
# HELP fail2ban_jail_info Jail configuration information
# TYPE fail2ban_jail_info gauge
EOF
for jail in $jails; do
local port protocol filter
port=$(get_jail_port "$jail")
protocol=$(get_jail_protocol "$jail")
filter=$(get_jail_filter "$jail")
echo "fail2ban_jail_info{jail=\"$jail\",port=\"$port\",protocol=\"$protocol\",filter=\"$filter\"} 1"
done
echo ""
# NEW METRICS - Top Attackers (as labels with counts)
cat <<EOF
# HELP fail2ban_jail_top_attacker_count Top attacking IPs per jail (24h)
# TYPE fail2ban_jail_top_attacker_count gauge
EOF
for jail in $jails; do
while read -r count ip; do
[ -z "$ip" ] && continue
echo "fail2ban_jail_top_attacker_count{jail=\"$jail\",ip=\"$ip\"} $count"
done < <(get_top_banned_ips "$jail" 5)
done
echo ""
# NEW METRICS - Ban Effectiveness (bans per hour rate)
cat <<EOF
# HELP fail2ban_jail_ban_rate_per_hour Bans per hour over last 24h per jail
# TYPE fail2ban_jail_ban_rate_per_hour gauge
EOF
for jail in $jails; do
local bans_24h ban_rate
bans_24h=$(get_ban_rate "$jail" "24 hours ago")
# Strip whitespace and ensure integer
bans_24h=$(echo "$bans_24h" | tr -d '\n' | tr -d ' ')
bans_24h=${bans_24h:-0}
# Calculate average: total bans in 24h divided by 24 hours
if [ "$bans_24h" -gt 0 ] 2>/dev/null; then
ban_rate=$(awk "BEGIN {printf \"%.2f\", $bans_24h / 24}" 2>/dev/null || echo "0")
else
ban_rate="0.00"
fi
echo "fail2ban_jail_ban_rate_per_hour{jail=\"$jail\"} $ban_rate"
done
echo ""
# NEW METRICS - Repeat Offenders
cat <<EOF
# HELP fail2ban_jail_repeat_offenders IPs banned multiple times (7 day window)
# TYPE fail2ban_jail_repeat_offenders gauge
EOF
for jail in $jails; do
local repeat_2 repeat_5 repeat_10
repeat_2=$(get_repeat_offender_count "$jail" 2)
repeat_5=$(get_repeat_offender_count "$jail" 5)
repeat_10=$(get_repeat_offender_count "$jail" 10)
echo "fail2ban_jail_repeat_offenders{jail=\"$jail\",threshold=\"2+\"} $repeat_2"
echo "fail2ban_jail_repeat_offenders{jail=\"$jail\",threshold=\"5+\"} $repeat_5"
echo "fail2ban_jail_repeat_offenders{jail=\"$jail\",threshold=\"10+\"} $repeat_10"
done
echo ""
# Log file health metrics
local log_size log_age log_rotation
log_size=$(get_log_size)
log_age=$(get_log_age)
log_rotation=$(get_log_rotation_timestamp)
cat <<EOF
# HELP fail2ban_log_size_bytes Size of fail2ban log file
# TYPE fail2ban_log_size_bytes gauge
fail2ban_log_size_bytes $log_size
# HELP fail2ban_log_age_seconds Time since last log file modification
# TYPE fail2ban_log_age_seconds gauge
fail2ban_log_age_seconds $log_age
# HELP fail2ban_log_rotation_timestamp Unix timestamp of last log rotation
# TYPE fail2ban_log_rotation_timestamp gauge
fail2ban_log_rotation_timestamp $log_rotation
EOF
echo ""
# Time since last ban/unban (easier to alert on than timestamps)
local current_time
current_time=$(date +%s)
cat <<EOF
# HELP fail2ban_jail_seconds_since_last_ban Seconds since last ban per jail
# TYPE fail2ban_jail_seconds_since_last_ban gauge
EOF
for jail in $jails; do
local last_ban seconds_since
last_ban=$(get_last_ban_timestamp "$jail")
if [ "$last_ban" -gt 0 ]; then
seconds_since=$((current_time - last_ban))
else
seconds_since=0
fi
echo "fail2ban_jail_seconds_since_last_ban{jail=\"$jail\"} $seconds_since"
done
echo ""
cat <<EOF
# HELP fail2ban_jail_seconds_since_last_unban Seconds since last unban per jail
# TYPE fail2ban_jail_seconds_since_last_unban gauge
EOF
for jail in $jails; do
local last_unban seconds_since
last_unban=$(get_last_unban_timestamp "$jail")
if [ "$last_unban" -gt 0 ]; then
seconds_since=$((current_time - last_unban))
else
seconds_since=0
fi
echo "fail2ban_jail_seconds_since_last_unban{jail=\"$jail\"} $seconds_since"
done
echo ""
# Exporter runtime
local script_end script_duration
script_end=$(date +%s)
script_duration=$((script_end - script_start))
cat <<EOF
# HELP fail2ban_exporter_duration_seconds Time to generate all metrics
# TYPE fail2ban_exporter_duration_seconds gauge
fail2ban_exporter_duration_seconds $script_duration
# HELP fail2ban_exporter_last_run_timestamp Unix timestamp of last successful run
# TYPE fail2ban_exporter_last_run_timestamp gauge
fail2ban_exporter_last_run_timestamp $script_end
EOF
echo ""
}
# ============================================================================
# HTTP SERVER MODE
# ============================================================================
# Run simple HTTP server using netcat
# Serves metrics on /metrics endpoint
run_http_server() {
echo "Starting fail2ban exporter on port $HTTP_PORT..." >&2
if ! command -v nc >/dev/null 2>&1; then
echo "ERROR: netcat (nc) required for HTTP mode" >&2
exit 1
fi
# Infinite loop accepting HTTP requests
while true; do
{
read -r request
# Check if request is for /metrics endpoint
if [[ "$request" =~ ^GET\ /metrics ]]; then
echo -e "HTTP/1.1 200 OK\r\nContent-Type: text/plain; version=0.0.4\r\n\r"
generate_metrics
else # Serve HTML landing page for other requests
echo -e "HTTP/1.1 200 OK\r\nContent-Type: text/html\r\n\r"
cat <<EOF
<!DOCTYPE html>
<html>
<head><title>Fail2ban Exporter v2.0</title></head>
<body>
<h1>Fail2ban Prometheus Exporter (Enhanced v2.0)</h1>
<p><a href="/metrics">Metrics</a></p>
<h2>New Metrics</h2>
<ul>
<li>Last ban/unban timestamps per jail</li>
<li>Ban/unban rates (1h, 24h)</li>
<li>Unique banned IPs per period</li>
<li>Top attackers per jail</li>
<li>Jail configuration info (port, protocol, filter)</li>
<li>Ban rate per hour</li>
</ul>
</body>
</html>
EOF
fi
} | nc -l -p "$HTTP_PORT" -q 1 2>/dev/null # -q 1: wait 1 second after EOF before closing
done
}
# ============================================================================
# MAIN EXECUTION
# ============================================================================
# Main entry point - routes to appropriate output mode
main() {
parse_args "$@"
if [ "$HTTP_MODE" = true ]; then
# Run HTTP server (blocks until killed)
run_http_server
elif [ -n "$OUTPUT_FILE" ]; then
# Textfile collector mode: write atomically using temp file
local output_dir
output_dir="$(dirname "$OUTPUT_FILE")"
mkdir -p "$output_dir"
# Create temp file in SAME directory for atomic rename (same filesystem)
local temp_file
temp_file=$(mktemp "${output_dir}/.fail2ban_metrics.XXXXXX")
# Generate metrics to temp file
if ! generate_metrics > "$temp_file" 2>/dev/null; then
rm -f "$temp_file"
echo "ERROR: Failed to generate metrics" >&2
exit 1
fi
# Validate: file must exist, have content, and contain fail2ban_up 1
# If fail2ban is down, we still get fail2ban_up 0 which is valid
local file_lines
file_lines=$(wc -l < "$temp_file" 2>/dev/null || echo 0)
if [ "$file_lines" -lt 10 ]; then
rm -f "$temp_file"
echo "ERROR: Metrics file too small ($file_lines lines), keeping previous" >&2
exit 1
fi
# Set permissions before move
chmod 644 "$temp_file"
# Atomic rename - no gap where file is missing
mv -f "$temp_file" "$OUTPUT_FILE"
echo "Metrics written to $OUTPUT_FILE ($file_lines lines)" >&2
else
# Default: output to stdout
generate_metrics
fi
}
# Execute main function with all script arguments
main "$@"
+787
View File
@@ -0,0 +1,787 @@
#!/bin/bash
################################################
#### GitLab Metrics Collector ####
#### for Prometheus node_exporter textfile ####
#### ####
#### Author: Phil Connor ####
#### Contact: contact@mylinux.work ####
#### Version: 1.00-030426 ####
################################################
set -o pipefail
SCRIPT_NAME=$(basename "$0")
readonly SCRIPT_NAME
# Default configuration
readonly DEFAULT_NODE_DIR="/var/lib/node_exporter"
readonly DEFAULT_COLLECTION_INTERVAL=60
readonly DEFAULT_MAX_PROJECTS=100
readonly DEFAULT_CURL_TIMEOUT=30
readonly DEFAULT_METRICS_URL="http://localhost/-/metrics"
readonly DEFAULT_SIDEKIQ_URL="http://localhost:8082/metrics"
# Configuration variables (can be overridden by environment)
GITLAB_URL=${GITLAB_URL:-}
GITLAB_TOKEN=${GITLAB_TOKEN:-}
GITLAB_METRICS_URL=${GITLAB_METRICS_URL:-$DEFAULT_METRICS_URL}
GITLAB_SIDEKIQ_URL=${GITLAB_SIDEKIQ_URL:-$DEFAULT_SIDEKIQ_URL}
NODE_DIR=${NODE_DIR:-$DEFAULT_NODE_DIR}
COLLECTION_INTERVAL=${COLLECTION_INTERVAL:-$DEFAULT_COLLECTION_INTERVAL}
MAX_PROJECTS=${MAX_PROJECTS:-$DEFAULT_MAX_PROJECTS}
DEBUG=${DEBUG:-}
# Runtime flags
RUN_MODE="once"
LOCAL_MODE=false
# Error tracking
ERRORS_TOTAL=0
handle_error() {
local exit_code=$1
local line_number=$2
echo "Error: $SCRIPT_NAME failed at line $line_number with exit code $exit_code" >&2
exit "$exit_code"
}
trap 'handle_error $? $LINENO' ERR
debug_echo() {
if [[ -n "$DEBUG" ]]; then
echo "[DEBUG] $*" >&2
fi
}
show_help() {
cat << EOF
Usage: $SCRIPT_NAME [OPTIONS]
GitLab metrics collector for Prometheus node_exporter textfile directory.
OPTIONS:
--once Run collection once and exit (default)
--daemon Run continuously at COLLECTION_INTERVAL
--local Scrape /-/metrics endpoint for server-side metrics (Puma, Sidekiq, Redis, DB)
--help, -h Show this help message
ENVIRONMENT VARIABLES:
GITLAB_URL GitLab base URL (required, e.g. https://gitlab.example.com)
GITLAB_TOKEN GitLab private access token (required for API metrics)
GITLAB_METRICS_URL Local metrics endpoint URL (default: $DEFAULT_METRICS_URL)
GITLAB_SIDEKIQ_URL Sidekiq exporter endpoint URL (default: $DEFAULT_SIDEKIQ_URL)
NODE_DIR Node exporter textfile directory (default: $DEFAULT_NODE_DIR)
COLLECTION_INTERVAL Seconds between collections in daemon mode (default: $DEFAULT_COLLECTION_INTERVAL)
MAX_PROJECTS Maximum number of projects to collect per-project metrics for (default: $DEFAULT_MAX_PROJECTS)
DEBUG Enable debug output
EXAMPLES:
GITLAB_URL=https://gitlab.example.com GITLAB_TOKEN=glpat-xxx $SCRIPT_NAME --once
GITLAB_URL=https://gitlab.example.com GITLAB_TOKEN=glpat-xxx $SCRIPT_NAME --daemon
$SCRIPT_NAME --local --once
GITLAB_URL=https://gitlab.example.com GITLAB_TOKEN=glpat-xxx $SCRIPT_NAME --local --daemon
DEBUG=1 GITLAB_URL=https://gitlab.example.com GITLAB_TOKEN=glpat-xxx $SCRIPT_NAME
EOF
}
sanitize_label() {
local value="$1"
echo "${value//[^a-zA-Z0-9_]/_}"
}
gitlab_api() {
local endpoint="$1"
local include_headers="${2:-false}"
local url="${GITLAB_URL}${endpoint}"
debug_echo "API call: $url"
if [[ "$include_headers" == "true" ]]; then
curl -sf --max-time "$DEFAULT_CURL_TIMEOUT" \
--header "PRIVATE-TOKEN: $GITLAB_TOKEN" \
-D - \
"$url" 2>/dev/null
else
curl -sf --max-time "$DEFAULT_CURL_TIMEOUT" \
--header "PRIVATE-TOKEN: $GITLAB_TOKEN" \
"$url" 2>/dev/null
fi
}
gitlab_api_paginated() {
local endpoint="$1"
local per_page="${2:-100}"
local max_pages="${3:-50}"
local page=1
local all_results="[]"
while [[ $page -le $max_pages ]]; do
local separator="?"
if [[ "$endpoint" == *"?"* ]]; then
separator="&"
fi
local response
response=$(gitlab_api "${endpoint}${separator}per_page=${per_page}&page=${page}" "true" 2>/dev/null) || break
local headers body
headers=$(echo "$response" | sed '/^\r\{0,1\}$/q')
body=$(echo "$response" | sed '1,/^\r\{0,1\}$/d')
if [[ -z "$body" ]] || ! echo "$body" | jq -e '.' >/dev/null 2>&1; then
break
fi
local count
count=$(echo "$body" | jq 'length' 2>/dev/null) || break
if [[ "$count" -eq 0 ]]; then
break
fi
all_results=$(echo "$all_results" "$body" | jq -s '.[0] + .[1]' 2>/dev/null) || break
local next_page
next_page=$(echo "$headers" | grep -i '^x-next-page:' | tr -d '[:space:]' | cut -d: -f2)
if [[ -z "$next_page" ]]; then
break
fi
page=$((page + 1))
done
echo "$all_results"
}
collect_instance_health() {
local metrics=""
debug_echo "Collecting instance health metrics"
# Check if instance is reachable
local up=0
if gitlab_api "/-/health" >/dev/null 2>&1; then
up=1
fi
metrics+="# HELP gitlab_instance_up Whether the GitLab instance is reachable\n"
metrics+="# TYPE gitlab_instance_up gauge\n"
metrics+="gitlab_instance_up $up\n"
# Version info
local version_json
if version_json=$(gitlab_api "/api/v4/version" 2>/dev/null); then
local version revision
version=$(echo "$version_json" | jq -r '.version // "unknown"' 2>/dev/null)
revision=$(echo "$version_json" | jq -r '.revision // "unknown"' 2>/dev/null)
metrics+="# HELP gitlab_instance_version_info GitLab version information\n"
metrics+="# TYPE gitlab_instance_version_info gauge\n"
metrics+="gitlab_instance_version_info{version=\"$version\",revision=\"$revision\"} 1\n"
debug_echo "GitLab version: $version ($revision)"
else
ERRORS_TOTAL=$((ERRORS_TOTAL + 1))
debug_echo "Failed to collect version info"
fi
echo -e "$metrics"
}
collect_project_statistics() {
local metrics=""
debug_echo "Collecting project statistics"
local projects
if ! projects=$(gitlab_api_paginated "/api/v4/projects?statistics=true" 100 2>/dev/null); then
ERRORS_TOTAL=$((ERRORS_TOTAL + 1))
debug_echo "Failed to collect project statistics"
return
fi
local project_count
project_count=$(echo "$projects" | jq 'length' 2>/dev/null) || project_count=0
debug_echo "Found $project_count projects"
metrics+="# HELP gitlab_project_stars_count Number of stars for a project\n"
metrics+="# TYPE gitlab_project_stars_count gauge\n"
metrics+="# HELP gitlab_project_forks_count Number of forks for a project\n"
metrics+="# TYPE gitlab_project_forks_count gauge\n"
metrics+="# HELP gitlab_project_open_issues_count Number of open issues for a project\n"
metrics+="# TYPE gitlab_project_open_issues_count gauge\n"
metrics+="# HELP gitlab_project_commit_count Number of commits in default branch\n"
metrics+="# TYPE gitlab_project_commit_count gauge\n"
metrics+="# HELP gitlab_project_storage_size_bytes Total storage size in bytes\n"
metrics+="# TYPE gitlab_project_storage_size_bytes gauge\n"
metrics+="# HELP gitlab_project_repository_size_bytes Repository size in bytes\n"
metrics+="# TYPE gitlab_project_repository_size_bytes gauge\n"
metrics+="# HELP gitlab_project_lfs_objects_size_bytes LFS objects size in bytes\n"
metrics+="# TYPE gitlab_project_lfs_objects_size_bytes gauge\n"
metrics+="# HELP gitlab_project_job_artifacts_size_bytes Job artifacts size in bytes\n"
metrics+="# TYPE gitlab_project_job_artifacts_size_bytes gauge\n"
metrics+="# HELP gitlab_project_packages_size_bytes Packages size in bytes\n"
metrics+="# TYPE gitlab_project_packages_size_bytes gauge\n"
metrics+="# HELP gitlab_project_wiki_size_bytes Wiki size in bytes\n"
metrics+="# TYPE gitlab_project_wiki_size_bytes gauge\n"
metrics+="# HELP gitlab_project_snippets_size_bytes Snippets size in bytes\n"
metrics+="# TYPE gitlab_project_snippets_size_bytes gauge\n"
metrics+="# HELP gitlab_project_uploads_size_bytes Uploads size in bytes\n"
metrics+="# TYPE gitlab_project_uploads_size_bytes gauge\n"
echo "$projects" | jq -c '.[]' 2>/dev/null | while IFS= read -r project; do
local name namespace
name=$(echo "$project" | jq -r '.name // "unknown"' 2>/dev/null)
namespace=$(echo "$project" | jq -r '.namespace.name // "unknown"' 2>/dev/null)
name=$(sanitize_label "$name")
namespace=$(sanitize_label "$namespace")
local labels="project=\"$name\",namespace=\"$namespace\""
local stars forks issues
stars=$(echo "$project" | jq -r '.star_count // 0' 2>/dev/null)
forks=$(echo "$project" | jq -r '.forks_count // 0' 2>/dev/null)
issues=$(echo "$project" | jq -r '.open_issues_count // 0' 2>/dev/null)
metrics+="gitlab_project_stars_count{$labels} $stars\n"
metrics+="gitlab_project_forks_count{$labels} $forks\n"
metrics+="gitlab_project_open_issues_count{$labels} $issues\n"
local commit_count storage_size repo_size lfs_size artifacts_size packages_size wiki_size snippets_size uploads_size
commit_count=$(echo "$project" | jq -r '.statistics.commit_count // 0' 2>/dev/null)
storage_size=$(echo "$project" | jq -r '.statistics.storage_size // 0' 2>/dev/null)
repo_size=$(echo "$project" | jq -r '.statistics.repository_size // 0' 2>/dev/null)
lfs_size=$(echo "$project" | jq -r '.statistics.lfs_objects_size // 0' 2>/dev/null)
artifacts_size=$(echo "$project" | jq -r '.statistics.job_artifacts_size // 0' 2>/dev/null)
packages_size=$(echo "$project" | jq -r '.statistics.packages_size // 0' 2>/dev/null)
wiki_size=$(echo "$project" | jq -r '.statistics.wiki_size // 0' 2>/dev/null)
snippets_size=$(echo "$project" | jq -r '.statistics.snippets_size // 0' 2>/dev/null)
uploads_size=$(echo "$project" | jq -r '.statistics.uploads_size // 0' 2>/dev/null)
metrics+="gitlab_project_commit_count{$labels} $commit_count\n"
metrics+="gitlab_project_storage_size_bytes{$labels} $storage_size\n"
metrics+="gitlab_project_repository_size_bytes{$labels} $repo_size\n"
metrics+="gitlab_project_lfs_objects_size_bytes{$labels} $lfs_size\n"
metrics+="gitlab_project_job_artifacts_size_bytes{$labels} $artifacts_size\n"
metrics+="gitlab_project_packages_size_bytes{$labels} $packages_size\n"
metrics+="gitlab_project_wiki_size_bytes{$labels} $wiki_size\n"
metrics+="gitlab_project_snippets_size_bytes{$labels} $snippets_size\n"
metrics+="gitlab_project_uploads_size_bytes{$labels} $uploads_size\n"
done
echo -e "$metrics"
}
collect_pipeline_metrics() {
local metrics=""
debug_echo "Collecting pipeline metrics"
local projects
if ! projects=$(gitlab_api "/api/v4/projects?per_page=${MAX_PROJECTS}&simple=true" 2>/dev/null); then
ERRORS_TOTAL=$((ERRORS_TOTAL + 1))
debug_echo "Failed to fetch projects for pipeline metrics"
return
fi
metrics+="# HELP gitlab_pipeline_status Count of pipelines by status\n"
metrics+="# TYPE gitlab_pipeline_status gauge\n"
metrics+="# HELP gitlab_pipeline_duration_seconds Duration of the latest pipeline\n"
metrics+="# TYPE gitlab_pipeline_duration_seconds gauge\n"
echo "$projects" | jq -c '.[]' 2>/dev/null | while IFS= read -r project; do
local project_id name namespace
project_id=$(echo "$project" | jq -r '.id' 2>/dev/null)
name=$(sanitize_label "$(echo "$project" | jq -r '.name // "unknown"' 2>/dev/null)")
namespace=$(sanitize_label "$(echo "$project" | jq -r '.namespace.name // "unknown"' 2>/dev/null)")
local pipelines
if ! pipelines=$(gitlab_api "/api/v4/projects/${project_id}/pipelines?per_page=20" 2>/dev/null); then
ERRORS_TOTAL=$((ERRORS_TOTAL + 1))
debug_echo "Failed to fetch pipelines for project $project_id"
continue
fi
# Count pipelines per status
local statuses
statuses=$(echo "$pipelines" | jq -r '.[].status // empty' 2>/dev/null | sort | uniq -c | awk '{print $2 " " $1}')
while IFS= read -r line; do
if [[ -n "$line" ]]; then
local status count
status=$(echo "$line" | awk '{print $1}')
count=$(echo "$line" | awk '{print $2}')
metrics+="gitlab_pipeline_status{project=\"$name\",namespace=\"$namespace\",status=\"$status\"} $count\n"
fi
done <<< "$statuses"
# Latest pipeline duration
local duration
duration=$(echo "$pipelines" | jq -r '.[0].duration // empty' 2>/dev/null)
if [[ -n "$duration" && "$duration" != "null" ]]; then
metrics+="gitlab_pipeline_duration_seconds{project=\"$name\",namespace=\"$namespace\"} $duration\n"
fi
done
echo -e "$metrics"
}
collect_runner_metrics() {
local metrics=""
debug_echo "Collecting runner metrics"
local runners
if ! runners=$(gitlab_api_paginated "/api/v4/runners" 100 2>/dev/null); then
ERRORS_TOTAL=$((ERRORS_TOTAL + 1))
debug_echo "Failed to collect runner metrics"
return
fi
local total online_total
total=$(echo "$runners" | jq 'length' 2>/dev/null) || total=0
online_total=$(echo "$runners" | jq '[.[] | select(.status == "online")] | length' 2>/dev/null) || online_total=0
metrics+="# HELP gitlab_runner_active Whether a runner is active\n"
metrics+="# TYPE gitlab_runner_active gauge\n"
metrics+="# HELP gitlab_runner_online Whether a runner is online\n"
metrics+="# TYPE gitlab_runner_online gauge\n"
metrics+="# HELP gitlab_runners_total Total number of runners\n"
metrics+="# TYPE gitlab_runners_total gauge\n"
metrics+="# HELP gitlab_runners_online_total Total number of online runners\n"
metrics+="# TYPE gitlab_runners_online_total gauge\n"
echo "$runners" | jq -c '.[]' 2>/dev/null | while IFS= read -r runner; do
local runner_name runner_type active status
runner_name=$(sanitize_label "$(echo "$runner" | jq -r '.description // "unknown"' 2>/dev/null)")
runner_type=$(echo "$runner" | jq -r '.runner_type // "unknown"' 2>/dev/null)
active=$(echo "$runner" | jq -r '.active // false' 2>/dev/null)
status=$(echo "$runner" | jq -r '.status // "unknown"' 2>/dev/null)
local active_val=0
if [[ "$active" == "true" ]]; then
active_val=1
fi
local online_val=0
if [[ "$status" == "online" ]]; then
online_val=1
fi
metrics+="gitlab_runner_active{runner_name=\"$runner_name\",runner_type=\"$runner_type\"} $active_val\n"
metrics+="gitlab_runner_online{runner_name=\"$runner_name\",runner_type=\"$runner_type\"} $online_val\n"
done
metrics+="gitlab_runners_total $total\n"
metrics+="gitlab_runners_online_total $online_total\n"
echo -e "$metrics"
}
collect_user_metrics() {
local metrics=""
debug_echo "Collecting user metrics"
local response
if ! response=$(gitlab_api "/api/v4/users?per_page=1" "true" 2>/dev/null); then
ERRORS_TOTAL=$((ERRORS_TOTAL + 1))
debug_echo "Failed to collect user metrics"
return
fi
local total
total=$(echo "$response" | grep -i '^x-total:' | tr -d '[:space:]' | cut -d: -f2)
if [[ -n "$total" ]]; then
metrics+="# HELP gitlab_users_total Total number of GitLab users\n"
metrics+="# TYPE gitlab_users_total gauge\n"
metrics+="gitlab_users_total $total\n"
debug_echo "Total users: $total"
else
ERRORS_TOTAL=$((ERRORS_TOTAL + 1))
debug_echo "Failed to parse user count from X-Total header"
fi
echo -e "$metrics"
}
collect_group_metrics() {
local metrics=""
debug_echo "Collecting group metrics"
local groups
if ! groups=$(gitlab_api_paginated "/api/v4/groups" 100 2>/dev/null); then
ERRORS_TOTAL=$((ERRORS_TOTAL + 1))
debug_echo "Failed to collect group metrics"
return
fi
local total
total=$(echo "$groups" | jq 'length' 2>/dev/null) || total=0
metrics+="# HELP gitlab_groups_total Total number of GitLab groups\n"
metrics+="# TYPE gitlab_groups_total gauge\n"
metrics+="gitlab_groups_total $total\n"
debug_echo "Total groups: $total"
echo -e "$metrics"
}
collect_job_metrics() {
local metrics=""
debug_echo "Collecting job metrics"
local projects
if ! projects=$(gitlab_api "/api/v4/projects?per_page=${MAX_PROJECTS}&simple=true" 2>/dev/null); then
ERRORS_TOTAL=$((ERRORS_TOTAL + 1))
debug_echo "Failed to fetch projects for job metrics"
return
fi
metrics+="# HELP gitlab_jobs_by_status Count of jobs by status\n"
metrics+="# TYPE gitlab_jobs_by_status gauge\n"
echo "$projects" | jq -c '.[]' 2>/dev/null | while IFS= read -r project; do
local project_id name namespace
project_id=$(echo "$project" | jq -r '.id' 2>/dev/null)
name=$(sanitize_label "$(echo "$project" | jq -r '.name // "unknown"' 2>/dev/null)")
namespace=$(sanitize_label "$(echo "$project" | jq -r '.namespace.name // "unknown"' 2>/dev/null)")
local jobs
if ! jobs=$(gitlab_api "/api/v4/projects/${project_id}/jobs?per_page=20" 2>/dev/null); then
ERRORS_TOTAL=$((ERRORS_TOTAL + 1))
debug_echo "Failed to fetch jobs for project $project_id"
continue
fi
local statuses
statuses=$(echo "$jobs" | jq -r '.[].status // empty' 2>/dev/null | sort | uniq -c | awk '{print $2 " " $1}')
while IFS= read -r line; do
if [[ -n "$line" ]]; then
local status count
status=$(echo "$line" | awk '{print $1}')
count=$(echo "$line" | awk '{print $2}')
metrics+="gitlab_jobs_by_status{project=\"$name\",namespace=\"$namespace\",status=\"$status\"} $count\n"
fi
done <<< "$statuses"
done
echo -e "$metrics"
}
collect_merge_request_metrics() {
local metrics=""
debug_echo "Collecting merge request metrics"
local projects
if ! projects=$(gitlab_api "/api/v4/projects?per_page=${MAX_PROJECTS}&simple=true" 2>/dev/null); then
ERRORS_TOTAL=$((ERRORS_TOTAL + 1))
debug_echo "Failed to fetch projects for merge request metrics"
return
fi
metrics+="# HELP gitlab_open_merge_requests Number of open merge requests\n"
metrics+="# TYPE gitlab_open_merge_requests gauge\n"
echo "$projects" | jq -c '.[]' 2>/dev/null | while IFS= read -r project; do
local project_id name namespace
project_id=$(echo "$project" | jq -r '.id' 2>/dev/null)
name=$(sanitize_label "$(echo "$project" | jq -r '.name // "unknown"' 2>/dev/null)")
namespace=$(sanitize_label "$(echo "$project" | jq -r '.namespace.name // "unknown"' 2>/dev/null)")
local mrs
if ! mrs=$(gitlab_api "/api/v4/projects/${project_id}/merge_requests?state=opened&per_page=100" 2>/dev/null); then
ERRORS_TOTAL=$((ERRORS_TOTAL + 1))
debug_echo "Failed to fetch merge requests for project $project_id"
continue
fi
local count
count=$(echo "$mrs" | jq 'length' 2>/dev/null) || count=0
metrics+="gitlab_open_merge_requests{project=\"$name\",namespace=\"$namespace\"} $count\n"
done
echo -e "$metrics"
}
collect_environment_metrics() {
local metrics=""
debug_echo "Collecting environment metrics"
local projects
if ! projects=$(gitlab_api "/api/v4/projects?per_page=${MAX_PROJECTS}&simple=true" 2>/dev/null); then
ERRORS_TOTAL=$((ERRORS_TOTAL + 1))
debug_echo "Failed to fetch projects for environment metrics"
return
fi
metrics+="# HELP gitlab_environments_total Number of environments per project\n"
metrics+="# TYPE gitlab_environments_total gauge\n"
echo "$projects" | jq -c '.[]' 2>/dev/null | while IFS= read -r project; do
local project_id name namespace
project_id=$(echo "$project" | jq -r '.id' 2>/dev/null)
name=$(sanitize_label "$(echo "$project" | jq -r '.name // "unknown"' 2>/dev/null)")
namespace=$(sanitize_label "$(echo "$project" | jq -r '.namespace.name // "unknown"' 2>/dev/null)")
local envs
if ! envs=$(gitlab_api "/api/v4/projects/${project_id}/environments?per_page=100" 2>/dev/null); then
ERRORS_TOTAL=$((ERRORS_TOTAL + 1))
debug_echo "Failed to fetch environments for project $project_id"
continue
fi
local count
count=$(echo "$envs" | jq 'length' 2>/dev/null) || count=0
metrics+="gitlab_environments_total{project=\"$name\",namespace=\"$namespace\"} $count\n"
done
echo -e "$metrics"
}
collect_local_metrics() {
local metrics=""
debug_echo "Scraping local metrics from $GITLAB_METRICS_URL"
local raw_metrics
if ! raw_metrics=$(curl -sf --max-time "$DEFAULT_CURL_TIMEOUT" "$GITLAB_METRICS_URL" 2>/dev/null); then
ERRORS_TOTAL=$((ERRORS_TOTAL + 1))
echo "Error: Failed to scrape $GITLAB_METRICS_URL" >&2
echo "Ensure this host's IP is in gitlab_rails['monitoring_whitelist']" >&2
return
fi
# GitLab version info
local version_patterns="^gitlab_version_info[{ ]"
local version_help="^# (HELP|TYPE) gitlab_version_info"
metrics+=$(echo "$raw_metrics" | grep -E "$version_help|$version_patterns" 2>/dev/null)
metrics+=$'\n'
# Puma metrics
local puma_patterns="^puma_workers[{ ]|^puma_running_workers[{ ]|^puma_running[{ ]|^puma_queued_connections[{ ]|^puma_active_connections[{ ]|^puma_pool_capacity[{ ]|^puma_max_threads[{ ]|^puma_idle_threads[{ ]"
local puma_help="^# (HELP|TYPE) puma_"
metrics+=$(echo "$raw_metrics" | grep -E "$puma_help|$puma_patterns" 2>/dev/null)
metrics+=$'\n'
# Sidekiq metrics (served by separate Sidekiq exporter, default localhost:8082)
local sidekiq_raw
if sidekiq_raw=$(curl -sf --max-time "$DEFAULT_CURL_TIMEOUT" "$GITLAB_SIDEKIQ_URL" 2>/dev/null); then
debug_echo "Scraped Sidekiq metrics from $GITLAB_SIDEKIQ_URL"
# Core Sidekiq job metrics
local sidekiq_patterns="^sidekiq_running_jobs[{ ]|^sidekiq_concurrency[{ ]|^sidekiq_mem_total_bytes[{ ]|^sidekiq_jobs_failed_total[{ ]|^sidekiq_jobs_dead_total[{ ]|^sidekiq_enqueued_jobs_total[{ ]|^sidekiq_jobs_completion_seconds[_{ ]|^sidekiq_jobs_queue_duration_seconds[_{ ]|^sidekiq_jobs_cpu_seconds[_{ ]|^sidekiq_jobs_db_seconds[_{ ]|^sidekiq_jobs_gitaly_seconds[_{ ]|^sidekiq_redis_requests_total[{ ]|^sidekiq_redis_requests_duration_seconds[_{ ]"
local sidekiq_help="^# (HELP|TYPE) sidekiq_(running_jobs|concurrency|mem_total_bytes|jobs_failed_total|jobs_dead_total|enqueued_jobs_total|jobs_completion_seconds|jobs_queue_duration_seconds|jobs_cpu_seconds|jobs_db_seconds|jobs_gitaly_seconds|redis_requests_total|redis_requests_duration_seconds)"
metrics+=$(echo "$sidekiq_raw" | grep -E "$sidekiq_help|$sidekiq_patterns" 2>/dev/null)
metrics+=$'\n'
# CI/CD pipeline internals
local ci_patterns="^pipelines_created_total[{ ]|^deployments[{ ]|^gitlab_ci_pipeline_creation_duration_seconds[_{ ]|^gitlab_ci_pipeline_failure_reasons[{ ]|^gitlab_ci_active_jobs[_{ ]"
local ci_help="^# (HELP|TYPE) (pipelines_created_total|deployments|gitlab_ci_pipeline_creation_duration_seconds|gitlab_ci_pipeline_failure_reasons|gitlab_ci_active_jobs)"
metrics+=$(echo "$sidekiq_raw" | grep -E "$ci_help|$ci_patterns" 2>/dev/null)
metrics+=$'\n'
# Email delivery metrics
local email_patterns="^gitlab_emails_delivered_total[{ ]|^gitlab_emails_delivery_attempts_total[{ ]"
local email_help="^# (HELP|TYPE) gitlab_emails_(delivered_total|delivery_attempts_total)"
metrics+=$(echo "$sidekiq_raw" | grep -E "$email_help|$email_patterns" 2>/dev/null)
metrics+=$'\n'
# External HTTP (webhooks, integrations)
local ext_http_patterns="^gitlab_external_http_total[{ ]|^gitlab_external_http_duration_seconds[_{ ]"
local ext_http_help="^# (HELP|TYPE) gitlab_external_http_(total|duration_seconds)"
metrics+=$(echo "$sidekiq_raw" | grep -E "$ext_http_help|$ext_http_patterns" 2>/dev/null)
metrics+=$'\n'
# Sidekiq SLI apdex/errors
local sli_patterns="^gitlab_sli_sidekiq_execution_apdex_success_total[{ ]|^gitlab_sli_sidekiq_execution_apdex_total[{ ]|^gitlab_sli_sidekiq_execution_error_total[{ ]|^gitlab_sli_sidekiq_execution_total[{ ]"
local sli_help="^# (HELP|TYPE) gitlab_sli_sidekiq_execution"
metrics+=$(echo "$sidekiq_raw" | grep -E "$sli_help|$sli_patterns" 2>/dev/null)
metrics+=$'\n'
# DB transaction duration, primary SQL, threads, cache, workers
local extra_patterns="^gitlab_database_transaction_seconds[_{ ]|^gitlab_sql_primary_duration_seconds[_{ ]|^gitlab_ruby_threads_running_threads[{ ]|^gitlab_ruby_threads_max_expected_threads[{ ]|^limited_capacity_worker_running_jobs[{ ]|^limited_capacity_worker_max_running_jobs[{ ]|^limited_capacity_worker_remaining_work_count[{ ]|^redis_hit_miss_operations_total[{ ]"
local extra_help="^# (HELP|TYPE) (gitlab_database_transaction_seconds|gitlab_sql_primary_duration_seconds|gitlab_ruby_threads_running_threads|gitlab_ruby_threads_max_expected_threads|limited_capacity_worker_running_jobs|limited_capacity_worker_max_running_jobs|limited_capacity_worker_remaining_work_count|redis_hit_miss_operations_total)"
metrics+=$(echo "$sidekiq_raw" | grep -E "$extra_help|$extra_patterns" 2>/dev/null)
metrics+=$'\n'
else
debug_echo "Warning: Could not scrape Sidekiq exporter at $GITLAB_SIDEKIQ_URL (is sidekiq_exporter enabled?)"
fi
# Redis metrics
local redis_patterns="^gitlab_redis_client_requests_total[{ ]|^gitlab_redis_client_exceptions_total[{ ]|^gitlab_redis_client_requests_duration_seconds[_{ ]|^gitlab_redis_client_requests_duration_seconds_sum[{ ]|^gitlab_redis_client_requests_duration_seconds_count[{ ]"
local redis_help="^# (HELP|TYPE) gitlab_redis_client_(requests_total|exceptions_total|requests_duration_seconds)"
metrics+=$(echo "$raw_metrics" | grep -E "$redis_help|$redis_patterns" 2>/dev/null)
metrics+=$'\n'
# Database connection pool metrics
local db_patterns="^gitlab_database_connection_pool_"
local db_help="^# (HELP|TYPE) gitlab_database_connection_pool_"
metrics+=$(echo "$raw_metrics" | grep -E "$db_help|$db_patterns" 2>/dev/null)
metrics+=$'\n'
# Process metrics (CPU, memory, file descriptors)
local process_patterns="^ruby_process_resident_memory_bytes[{ ]|^ruby_process_cpu_seconds_total[{ ]|^process_open_fds[{ ]|^process_max_fds[{ ]|^ruby_gc_stat_heap_live_slots[{ ]|^ruby_gc_stat_heap_free_slots[{ ]"
local process_help="^# (HELP|TYPE) (ruby_process_resident_memory_bytes|ruby_process_cpu_seconds_total|process_open_fds|process_max_fds|ruby_gc_stat_heap_live_slots|ruby_gc_stat_heap_free_slots)"
metrics+=$(echo "$raw_metrics" | grep -E "$process_help|$process_patterns" 2>/dev/null)
metrics+=$'\n'
# GitLab transaction/request metrics
local txn_patterns="^gitlab_transaction_duration_seconds[{ _]|^gitlab_sql_duration_seconds[{ _]|^gitlab_cache_operation_duration_seconds[{ _]"
local txn_help="^# (HELP|TYPE) (gitlab_transaction_duration_seconds|gitlab_sql_duration_seconds|gitlab_cache_operation_duration_seconds)"
metrics+=$(echo "$raw_metrics" | grep -E "$txn_help|$txn_patterns" 2>/dev/null)
metrics+=$'\n'
# User session and ActionCable metrics
local session_patterns="^user_session_logins_total[{ ]|^action_cable_active_connections[{ ]|^action_cable_pool_current_size[{ ]"
local session_help="^# (HELP|TYPE) (user_session_logins_total|action_cable_active_connections|action_cable_pool_current_size)"
metrics+=$(echo "$raw_metrics" | grep -E "$session_help|$session_patterns" 2>/dev/null)
metrics+=$'\n'
local metric_count
metric_count=$(echo "$metrics" | grep -cv '^#\|^$' 2>/dev/null) || metric_count=0
debug_echo "Extracted $metric_count local metrics"
printf '%s\n' "$metrics"
}
run_collection() {
local start_time
start_time=$(date +%s)
ERRORS_TOTAL=0
debug_echo "Starting metrics collection"
local all_metrics=""
# API-based metrics (require GITLAB_URL and GITLAB_TOKEN)
if [[ -n "$GITLAB_URL" && -n "$GITLAB_TOKEN" ]]; then
all_metrics+="$(collect_instance_health)"$'\n'
all_metrics+="$(collect_project_statistics)"$'\n'
all_metrics+="$(collect_pipeline_metrics)"$'\n'
all_metrics+="$(collect_runner_metrics)"$'\n'
all_metrics+="$(collect_user_metrics)"$'\n'
all_metrics+="$(collect_group_metrics)"$'\n'
all_metrics+="$(collect_job_metrics)"$'\n'
all_metrics+="$(collect_merge_request_metrics)"$'\n'
all_metrics+="$(collect_environment_metrics)"$'\n'
fi
# Local server-side metrics (scraped from /-/metrics)
if [[ "$LOCAL_MODE" == true ]]; then
all_metrics+="$(collect_local_metrics)"$'\n'
fi
local end_time duration
end_time=$(date +%s)
duration=$((end_time - start_time))
all_metrics+="# HELP gitlab_collector_duration_seconds Time taken to collect all metrics\n"
all_metrics+="# TYPE gitlab_collector_duration_seconds gauge\n"
all_metrics+="gitlab_collector_duration_seconds $duration\n"
all_metrics+="# HELP gitlab_collector_last_run_timestamp Unix timestamp of last collection run\n"
all_metrics+="# TYPE gitlab_collector_last_run_timestamp gauge\n"
all_metrics+="gitlab_collector_last_run_timestamp $end_time\n"
all_metrics+="# HELP gitlab_collector_errors_total Number of errors during collection\n"
all_metrics+="# TYPE gitlab_collector_errors_total gauge\n"
all_metrics+="gitlab_collector_errors_total $ERRORS_TOTAL\n"
# Write atomically
mkdir -p "$NODE_DIR"
local tmp_file
tmp_file=$(mktemp "${NODE_DIR}/gitlab_metrics.prom.XXXXXX")
echo -e "$all_metrics" > "$tmp_file"
mv "$tmp_file" "${NODE_DIR}/gitlab_metrics.prom"
debug_echo "Collection complete in ${duration}s with $ERRORS_TOTAL errors"
}
parse_arguments() {
while [[ $# -gt 0 ]]; do
case $1 in
--once)
RUN_MODE="once"
shift
;;
--daemon)
RUN_MODE="daemon"
shift
;;
--local)
LOCAL_MODE=true
shift
;;
--help|-h)
show_help
exit 0
;;
*)
echo "Error: Unknown option: $1" >&2
show_help >&2
exit 1
;;
esac
done
}
validate_requirements() {
# API credentials only required when not running local-only
if [[ -z "$GITLAB_URL" && "$LOCAL_MODE" != true ]]; then
echo "Error: GITLAB_URL is required (or use --local for server-side only)" >&2
exit 1
fi
if [[ -z "$GITLAB_TOKEN" && "$LOCAL_MODE" != true ]]; then
echo "Error: GITLAB_TOKEN is required (or use --local for server-side only)" >&2
exit 1
fi
# Strip trailing slash from URLs
GITLAB_URL="${GITLAB_URL%/}"
GITLAB_METRICS_URL="${GITLAB_METRICS_URL%/}"
GITLAB_SIDEKIQ_URL="${GITLAB_SIDEKIQ_URL%/}"
if ! command -v curl >/dev/null 2>&1; then
echo "Error: curl is required but not installed" >&2
exit 1
fi
if [[ -n "$GITLAB_TOKEN" ]]; then
if ! command -v jq >/dev/null 2>&1; then
echo "Error: jq is required but not installed" >&2
exit 1
fi
fi
}
main() {
parse_arguments "$@"
validate_requirements
debug_echo "GitLab URL: $GITLAB_URL"
debug_echo "Metrics URL: $GITLAB_METRICS_URL"
debug_echo "Sidekiq URL: $GITLAB_SIDEKIQ_URL"
debug_echo "Node exporter dir: $NODE_DIR"
debug_echo "Run mode: $RUN_MODE"
debug_echo "Local mode: $LOCAL_MODE"
debug_echo "Max projects: $MAX_PROJECTS"
if [[ "$RUN_MODE" == "daemon" ]]; then
debug_echo "Running in daemon mode with ${COLLECTION_INTERVAL}s interval"
while true; do
run_collection
sleep "$COLLECTION_INTERVAL"
done
else
run_collection
fi
debug_echo "Script completed successfully"
}
# Execute main function if script is run directly
if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
main "$@"
fi
File diff suppressed because it is too large Load Diff
+531
View File
@@ -0,0 +1,531 @@
#!/bin/bash
####################################################################
#### Code-Server Install Script ####
#### For RHEL/Rocky/Alma, Oracle Linux, Debian & Ubuntu ####
#### ####
#### Author: Phil Connor ####
#### Contact: contact@mylinux.work ####
#### License: MIT ####
#### Version: 1.3 ####
#### ####
#### Usage: sudo ./install-code-server.sh ####
####################################################################
#############################
#### User Configurations ####
#############################
CODEDIR=/code # Home directory for your Code
EMAIL=admin@mydomain.com # your domain email address
HTTPTYPE=APACHE # Choose Apache, Caddy or Nginx All UPPER Case
PASSWD=pAsSwOrD # Your Password for Code-server used for Apache, Nginx and Caddy
UNAME=MyUser # Username Used for Caddy
SERVDIR=/usr/local/code-server # where you want the code-server installed
SERVERNAME=code.mydomain.cloud # server fqdn name
USRDIR=/var/lib/code-server
########################
#### System Configs ####
########################
CADPASS="$(echo -e "${PASSWD}\n$PASSWD" | caddy hash-password 2>/dev/null | tail --lines=1)"
OS=$(grep PRETTY_NAME /etc/os-release | sed 's/PRETTY_NAME=//g' | tr -d '="' | awk '{print $1}' | tr '[:upper:]' '[:lower:]')
OSVER=$(grep VERSION_ID /etc/os-release | sed 's/VERSION_ID=//g' | tr -d '="' | awk -F. '{print $1}')
define() {
IFS=$'\n' read -r -d '' "$1"
}
###########################################################
#### Detect Package Manger from OS and OSVer Variables ####
###########################################################
if [ "${OS}" = ubuntu ]; then
PAKMGR="apt-get -y"
elif [[ ${OS} = centos || ${OS} = red || ${OS} = oracle || ${OS} = rocky || ${OS} = alma ]]; then
if [ "${OSVER}" = 7 ]; then
PAKMGR="yum -y"
fi
if [[ ${OSVER} = 8 || ${OSVER} = 9 ]]; then
PAKMGR="dnf -y"
fi
fi
################################
#### Check if OS is Updated ####
################################
if [ "${OS}" = ubuntu ]; then
${PAKMGR} upgrade
${PAKMGR} install libc6 libstdc++6
else
${PAKMGR} update
fi
###############################################
#### Get the latest version of Code Server ####
###############################################
get_latest_version() {
{
version="$(curl -fsSLI -o /dev/null -w "%{url_effective}" https://github.com/coder/code-server/releases/latest)"
version="${version#https://github.com/coder/code-server/releases/tag/}"
version="${version#v}"
echo "$version"
}
}
#########################################
#### Download and Install Codeserver ####
#########################################
install_codeserver() {
{
# check if command wget exists
if ! command -v wget >/dev/null 2>&1; then
${PAKMGR} install wget
fi
cd ~/ || exit
wget "https://github.com/coder/code-server/releases/download/v$version/code-server-$version-linux-amd64.tar.gz"
tar xvf "code-server-$version-linux-amd64.tar.gz"
mkdir -p ${SERVDIR}
cp -r ~/code-server-"$version"-linux-amd64/* ${SERVDIR}
ln -s ${SERVDIR}/bin/code-server /usr/bin/code-server
# Code Directory
mkdir -p "${CODEDIR}"
# User Directory
mkdir -p "${USRDIR}"
csserv=/lib/systemd/system
touch $csserv/code-server.service
OUTFILE1="$csserv/code-server.service"
define SFILE << EOF
[Unit]
Description=code-server
After=nginx.service
[Service]
Type=simple
Environment=PASSWORD=$PASSWD
ExecStart=/usr/bin/code-server --bind-addr 127.0.0.1:8080 --user-data-dir ${USRDIR} --auth password
Restart=always
[Install]
WantedBy=multi-user.target
EOF
{
printf "%s\n" "$SFILE" | cut -c 2-
} > "$OUTFILE1"
if [ $HTTPTYPE = CADDY ]; then
sed -i 's/After=nginx.service/After=caddy.service/g' $csserv/code-server.service
sed -i 's/auth: password/auth: none/' /root/.config/code-server/config.yaml
sed -i "s|ExecStart=/usr/bin/code-server --bind-addr 127.0.0.1:8080 --user-data-dir ${USRDIR} --auth password|ExecStart=/usr/bin/code-server --bind-addr 127.0.0.1:8080 --user-data-dir ${USRDIR}|" $csserv/code-server.service
fi
systemctl daemon-reload
systemctl start code-server
systemctl enable code-server
}
}
########################################
#### Install Apache, Nginx or Caddy ####
########################################
install_http() {
{
if [ $HTTPTYPE = APACHE ]; then
csserv=/lib/systemd/system
sed -i 's/After=nginx.service/After=apache.service/g' $csserv/code-server.service
if [[ ${OS} = centos || ${OS} = red || ${OS} = oracle || ${OS} = rocky || ${OS} = alma ]]; then
if ! command -v httpd &> /dev/null; then
${PAKMGR} install httpd
systemctl enable --now httpd
fi
AOUTFILE="/etc/httpd/conf.d/code-server.conf"
elif [ "${OS}" = ubuntu ]; then
if ! command -v apache2 &> /dev/null; then
${PAKMGR} install apache2
systemctl enable --now apache2
fi
AOUTFILE="/etc/apache2/sites-available/code-server.conf"
fi
define ACONF << 'EOF'
<VirtualHost *:80>
ServerName $SERVERNAME
#ProxyPreserveHost On
RewriteEngine On
RewriteCond %{HTTP:Upgrade} =websocket [NC]
RewriteRule /(.*) ws://127.0.0.1:8080/$1 [P,L]
RewriteCond %{HTTP:Upgrade} !=websocket [NC]
RewriteRule /(.*) http://127.0.0.1:8080/$1 [P,L]
ProxyRequests off
#RequestHeader set X-Forwarded-Proto https
#RequestHeader set X-Forwarded-Port 443
ProxyPass / http://127.0.0.1:8080/ nocanon
ProxyPassReverse / http://127.0.0.1:8080/
</VirtualHost>
EOF
{
printf "%s\n" "$ACONF" | cut -c 4-
} > "$AOUTFILE"
systemctl daemon-reload
systemctl restart code-server
systemctl restart httpd
fi
if [ $HTTPTYPE = NGINX ]; then
if [[ ${OS} = centos || ${OS} = red || ${OS} = oracle || ${OS} = rocky || ${OS} = alma ]]; then
OUTFILE="/etc/yum.repos.d/nginx.repo"
define NYUM << 'EOF'
[nginx-stable]
name=nginx stable repo
baseurl=http://nginx.org/packages/centos/$releasever/$basearch/
gpgcheck=1
enabled=1
gpgkey=https://nginx.org/keys/nginx_signing.key
module_hotfixes=true
EOF
{
printf "%s\n" "$NYUM" | cut -c 4-
} > "$OUTFILE"
if [ "${OSVER}" = 8 ] || [ "${OSVER}" = 9 ]; then
# shellcheck disable=2016
sed -i 's/baseurl=http:\/\/nginx.org\/packages\/centos\/7\/$basearch\//baseurl=http:\/\/nginx.org\/packages\/centos\/8\/$basearch\//g' $OUTFILE
fi
fi
if [ "${OS}" = ubuntu ]; then
${PAKMGR} install curl gnupg2 ca-certificates lsb-release
echo "deb http://nginx.org/packages/ubuntu $(lsb_release -cs) nginx" | sudo tee /etc/apt/sources.list.d/nginx.list
echo -e "Package: *\nPin: origin nginx.org\nPin: release o=nginx\nPin-Priority: 900\n" | sudo tee /etc/apt/preferences.d/99nginx
curl -o /tmp/nginx_signing.key https://nginx.org/keys/nginx_signing.key
if [ "$OSVER" = 16 ]; then
gpg --with-fingerprint /tmp/nginx_signing.key
else
gpg --dry-run --quiet --import --import-options show-only /tmp/nginx_signing.key
fi
sudo mv /tmp/nginx_signing.key /etc/apt/trusted.gpg.d/nginx_signing.asc
sudo apt update
fi
${PAKMGR} install nginx
if [[ ${OS} = centos || ${OS} = red || ${OS} = oracle || ${OS} = rocky || ${OS} = alma ]]; then
nxdir=/etc/nginx/conf.d
elif [ "${OS}" = ubuntu ]; then
if [ "$OSVER" = 16 ]; then
nxdir=/etc/nginx/sites-available
else
nxdir=/etc/nginx/conf.d
fi
fi
OUTFILE2="$nxdir/code-server.conf"
define NFIG << EOF
server {
listen 80;
listen [::]:80;
server_name $SERVERNAME;
location / {
proxy_pass http://localhost:8080/;
proxy_set_header Host \$host;
proxy_set_header Upgrade \$http_upgrade;
proxy_set_header Connection upgrade;
proxy_set_header Accept-Encoding gzip;
}
}
EOF
{
printf "%s\n" "$NFIG" | cut -c 2-
} > "$OUTFILE2"
if [ "${OS}" = ubuntu ]; then
mv $nxdir/default $nxdir/default.orig
ln -sf /etc/nginx/sites-available/code-server.conf /etc/nginx/sites-enabled/code-server.conf
else
mv $nxdir/default.conf $nxdir/default.conf.orig
fi
systemctl start nginx
systemctl enable nginx
fi
if [ "$HTTPTYPE" = CADDY ]; then
if [ "${OS}" = ubuntu ]; then
${PAKMGR} debian-keyring debian-archive-keyring apt-transport-https
curl -1sLf 'https://dl.cloudsmith.io/public/caddy/stable/cfg/gpg/gpg.155B6D79CA56EA34.key' | apt-key add -
curl -1sLf 'https://dl.cloudsmith.io/public/caddy/stable/cfg/setup/config.deb.txt?distro=debian&version=any-version' | tee -a /etc/apt/sources.list.d/caddy-stable.list
${PAKMGR} update
${PAKMGR} install caddy
elif [[ ${OS} = centos || ${OS} = red || ${OS} = oracle || ${OS} = rocky || ${OS} = alma ]]; then
if [ "${OSVER}" = 7 ]; then
${PAKMGR} install yum-plugin-copr
elif [ "${OSVER}" = 8 ] || [ "${OSVER}" = 9 ]; then
${PAKMGR} install 'dnf-command(copr)'
fi
${PAKMGR} copr enable @caddy/caddy
${PAKMGR} install caddy
fi
caddir=/etc/caddy
mv $caddir/Caddyfile $caddir/Caddyfile.orig
touch $caddir/Caddyfile
OUTFILE3="$caddir/Caddyfile"
define CFILE << EOF
{ #### Remove these 3 lines
acme_ca https://acme-staging-v02.api.letsencrypt.org/directory #### to make server live
} #### and grab cert from letsencrypt
$SERVERNAME {
basicauth /* {
$UNAME $CADPASS
}
reverse_proxy 127.0.0.1:8080
}
EOF
{
printf "%s\n" "$CFILE" | cut -c 2-
} > "$OUTFILE3"
systemctl enable caddy
systemctl start caddy
fi
}
}
##########################################
#### Install Certbot and request Cert ####
##########################################
install_certbot() {
{
if [ $HTTPTYPE = NGINX ];then
if [ "${OS}" = ubuntu ]; then
${PAKMGR} remove letsencrypt
${PAKMGR} remove certbot
snap install core; snap refresh core
snap install --classic certbot
${PAKMGR} install python3-certbot-nginx
elif [[ ${OS} = centos || ${OS} = red || ${OS} = oracle || ${OS} = rocky || ${OS} = alma ]]; then
${PAKMGR} remove certbot
${PAKMGR} install epel-release
${PAKMGR} install snapd
if [ "$OSVER" = 7 ]; then
${PAKMGR} install python2-certbot-nginx
elif [ "${OSVER}" = 8 ] || [ "${OSVER}" = 9 ]; then
${PAKMGR} install python3-certbot-nginx
fi
fi
fi
if [ $HTTPTYPE = APACHE ];then
if [ "${OS}" = ubuntu ]; then
${PAKMGR} remove letsencrypt
${PAKMGR} remove certbot
snap install core; snap refresh core
snap install --classic certbot
${PAKMGR} install python3-certbot-apache
elif [[ ${OS} = centos || ${OS} = red || ${OS} = oracle || ${OS} = rocky || ${OS} = alma ]]; then
${PAKMGR} remove certbot
${PAKMGR} install epel-release
${PAKMGR} install snapd
if [ "$OSVER" = 7 ]; then
${PAKMGR} install python2-certbot-apache
elif [ "${OSVER}" = 8 ] || [ "${OSVER}" = 9 ]; then
${PAKMGR} install python3-certbot-apache
fi
fi
fi
systemctl enable --now snapd.socket
ln -s /var/lib/snapd/snap /snap
snap install core; snap refresh core
snap install --classic certbot
ln -s /snap/bin/certbot /usr/bin/certbot
#certbot certonly --redirect --agree-tos --nginx -d $SERVERNAME -m "$EMAIL" --dry-run
if [ "$HTTPTYPE" = NGINX ]; then
certbot --non-interactive --redirect --agree-tos --nginx -d $SERVERNAME -m "$EMAIL"
systemctl restart nginx
elif [ "$HTTPTYPE" = APACHE ]; then
certbot --non-interactive --redirect --agree-tos --apache -d $SERVERNAME -m "$EMAIL"
if [[ ${OS} = centos || ${OS} = red || ${OS} = oracle || ${OS} = rocky || ${OS} = alma ]]; then
systemctl restart httpd
else
systemctl restart apache2
fi
fi
if [ $HTTPTYPE = NGINX ]; then
if [[ ${OS} = centos || ${OS} = red || ${OS} = oracle || ${OS} = rocky || ${OS} = alma ]]; then
if ! grep "certbot" /var/spool/cron/root; then
echo "0 */12 * * * root certbot -q renew --nginx" >> /var/spool/cron/root
fi
elif [ "${OS}" = ubuntu ]; then
if ! grep "certbot" /var/spool/cron/crontabs/root; then
echo "0 */12 * * * root certbot -q renew --nginx" >> /var/spool/cron/crontabs/root
fi
fi
elif [ $HTTPTYPE = APACHE ]; then
if [[ ${OS} = centos || ${OS} = red || ${OS} = oracle || ${OS} = rocky || ${OS} = alma ]]; then
if ! grep "certbot" /var/spool/cron/root; then
echo "0 */12 * * * root certbot -q renew --apache" >> /var/spool/cron/root
fi
elif [ "${OS}" = ubuntu ]; then
if ! grep "certbot" /var/spool/cron/crontabs/root; then
echo "0 */12 * * * root certbot -q renew --apache" >> /var/spool/cron/crontabs/root
fi
fi
fi
if [[ ${OS} != "ubuntu" && ${OS} != "debian" ]]; then
grep nginx /var/log/audit/audit.log | audit2allow -M nginx 2>/dev/null || true
semodule -i nginx.pp 2>/dev/null || true
fi
}
}
function install_firewall() {
{
if [[ ${OS} = centos || ${OS} = red || ${OS} = oracle || ${OS} = rocky || ${OS} = alma ]]; then
${PAKMGR} install ipset perl-libwww-perl.noarch perl-LWP-Protocol-https.noarch perl-GDGraph perl-Sys-Syslog perl-Math-BigInt
elif [ "${OS}" = ubuntu ]; then
${PAKMGR} install ipset libwww-perl liblwp-protocol-https-perl libgd-graph-perl
fi
cd /usr/src || exit
# rm -fv csf.tgz
wget https://download.configserver.com/csf.tgz
tar -xzf csf.tgz
cd csf || exit
./install.sh
echo ''
echo '###########################################'
echo '#### Testing if CSF firewall will work ####'
echo '###########################################'
echo ''
perl /usr/local/csf/bin/csftest.pl
##### Initial Settings #####
sed -i 's/TESTING = "1"/TESTING = "0"/g' /etc/csf/csf.conf
sed -i 's/RESTRICT_SYSLOG = "0"/RESTRICT_SYSLOG = "3"/g' /etc/csf/csf.conf
sed -i '/^RESTRICT_UI/c\RESTRICT_UI = "1"' /etc/csf/csf.conf
sed -i '/^AUTO_UPDATES/c\AUTO_UPDATES = "1"' /etc/csf/csf.conf
##### IPv4 Port Settings #####
sed -i 's/TCP_IN = "20,21,22,25,53,80,110,143,443,465,587,993,995"/TCP_IN = "22,80,443,5666,10000"/g' /etc/csf/csf.conf
sed -i 's/TCP_OUT = "20,21,22,25,53,80,110,113,443,587,993,995"/TCP_OUT = "22,25,53,80,443,5666,10000"/g' /etc/csf/csf.conf
sed -i 's/UDP_IN = "20,21,53,80,443"/UDP_IN = "80,443"/g' /etc/csf/csf.conf
sed -i 's/UDP_OUT = "20,21,53,113,123"/UDP_OUT = "53,113,123"/g' /etc/csf/csf.conf
sed -i '/^ICMP_IN_RATE/c\ICMP_IN_RATE = "1/s"' /etc/csf/csf.conf
##### IPv6 Port Settings #####
sed -i 's/IPV6 = "0"/IPV6 = "1"/g' /etc/csf/csf.conf
sed -i 's/TCP6_IN = "20,21,22,25,53,80,110,143,443,465,587,993,995"/TCP6_IN = "22,80,443,5666"/g' /etc/csf/csf.conf
sed -i 's/TCP6_OUT = "20,21,22,25,53,80,110,113,443,587,993,995"/TCP6_OUT = "22,80,443,5666"/g' /etc/csf/csf.conf
sed -i 's/UDP6_IN = "20,21,53,80,443"/UDP6_IN = "80,443"/g' /etc/csf/csf.conf
sed -i 's/UDP6_OUT = "20,21,53,113,123"/UDP6_OUT = "53,113,123"/g' /etc/csf/csf.conf
##### General Settings #####
sed -i 's/SYSLOG_CHECK = "0"/SYSLOG_CHECK = "300"/g' /etc/csf/csf.conf
sed -i '/^IGNORE_ALLOW/c\IGNORE_ALLOW = "0"' /etc/csf/csf.conf
sed -i '/^LF_CSF/c\LF_CSF = "1"' /etc/csf/csf.conf
sed -i 's/LF_IPSET = "0"/LF_IPSET = "1"/g' /etc/csf/csf.conf
sed -i '/^PACKET_FILTER/c\PACKET_FILTER = "1"' /etc/csf/csf.conf
##### SMTP Settings #####
sed -i 's/SMTP_BLOCK = "0"/SMTP_BLOCK = "1"/g' /etc/csf/csf.conf
##### Port Flood Settings #####
sed -i 's/SYNFLOOD = "0"/SYNFLOOD = "1"/g' /etc/csf/csf.conf
sed -i 's/CONNLIMIT = ""/CONNLIMIT= "22;5,25;3,80;10"/g' /etc/csf/csf.conf
sed -i 's/PORTFLOOD = ""/PORTFLOOD = "22;tcp;5;300,25;tcp;5;300,80;tcp;20;5"/g' /etc/csf/csf.conf
sed -i 's/UDPFLOOD = "0"/UDPFLOOD = "1"/g' /etc/csf/csf.conf
##### Logging Settings #####
sed -i 's/SYSLOG = "0"/SYSLOG = "1"/g' /etc/csf/csf.conf
sed -i '/^DROP_LOGGING/c\DROP_LOGGING = "1"' /etc/csf/csf.conf
sed -i '/^DROP_ONLYRES/c\DROP_ONLYRES = "0"' /etc/csf/csf.conf
sed -i '/^UDPFLOOD_LOGGING/c\UDPFLOOD_LOGGING = "1"' /etc/csf/csf.conf
##### Temp to Perm/Netblock Settings #####
sed -i '/^LF_PERMBLOCK^/c\LF_PERMBLOCK = "1"' /etc/csf/csf.conf
sed -i 's/LF_NETBLOCK = "0"/LF_NETBLOCK = "1"/g' /etc/csf/csf.conf
##### Login Failure Blocking and Alerts #####
sed -i 's/LF_SSHD = "5"/LF_SSHD = "3"/g' /etc/csf/csf.conf
sed -i 's/LF_FTPD = "10"/LF_FTPD = "5"/g' /etc/csf/csf.conf
sed -i 's/LF_SMTPAUTH = "0"/LF_SMTPAUTH = "5"/g' /etc/csf/csf.conf
sed -i 's/LF_EXIMSYNTAX = "0"/LF_EXIMSYNTAX = "10"/g' /etc/csf/csf.conf
sed -i 's/LF_POP3D = "0"/LF_POP3D = "5"/g' /etc/csf/csf.conf
sed -i 's/LF_IMAPD = "0"/LF_IMAPD = "5"/g' /etc/csf/csf.conf
sed -i 's/LF_HTACCESS = "0"/LF_HTACCESS = "5"/g' /etc/csf/csf.conf
sed -i 's/LF_MODSEC = "5"/LF_MODSEC = "3"/g' /etc/csf/csf.conf
sed -i 's/LF_CXS = "0"/LF_CXS = "1"/g' /etc/csf/csf.conf
sed -i 's/LF_SYMLINK = "0"/LF_SYMLINK = "5"/g' /etc/csf/csf.conf
sed -i 's/LF_WEBMIN = "0"/LF_WEBMIN = "3"/g' /etc/csf/csf.conf
sed -i '/^LF_SSH_EMAIL_ALERT/c\LF_SSH_EMAIL_ALERT = "1"' /etc/csf/csf.conf
sed -i '/^LF_SU_EMAIL_ALERT/c\LF_SU_EMAIL_ALERT = "1"' /etc/csf/csf.conf
sed -i '/^LF_SUDO_EMAIL_ALERT/c\LF_SUDO_EMAIL_ALERT = "1"' /etc/csf/csf.conf
sed -i '/^LF_WEBMIN_EMAIL_ALERT/c\LF_WEBMIN_EMAIL_ALERT = "1"' /etc/csf/csf.conf
sed -i '/^LF_CONSOLE_EMAIL_ALERT/c\LF_CONSOLE_EMAIL_ALERT = "1"' /etc/csf/csf.conf
sed -i '/^LF_BLOCKINONLY/c\LF_BLOCKINONLY = "0"' /etc/csf/csf.conf
##### Directory Watching & Integrity #####
sed -i '/^LF_DIRWATCH^/c\LF_DIRWATCH = "300"' /etc/csf/csf.conf
sed -i '/^LF_INTEGRITY/c\LF_INTEGRITY = "3600"' /etc/csf/csf.conf
##### Distributed Attacks #####
sed -i 's/LF_DISTATTACK = "0"/LF_DISTATTACK = "1"/g' /etc/csf/csf.conf
sed -i 's/LF_DISTFTP = "0"/LF_DISTFTP = "5"/g' /etc/csf/csf.conf
sed -i 's/LF_DISTSMTP = "0"/LF_DISTSMTP = "5"/g' /etc/csf/csf.conf
##### Connection Tracking #####
sed -i 's/CT_LIMIT = "0"/CT_LIMIT = "300"/g' /etc/csf/csf.conf
##### Process Tracking #####
sed -i '/^PT_LIMIT/c\PT_LIMIT = "60"' /etc/csf/csf.conf
sed -i '/^PT_SKIP_HTTP/c\PT_SKIP_HTTP = "0"' /etc/csf/csf.conf
sed -i 's/PT_DELETED = "0"/PT_DELETED = "1"/g' /etc/csf/csf.conf
sed -i 's/PT_USERTIME = "1800"/PT_USERTIME = "0"/g' /etc/csf/csf.conf
sed -i 's/PT_FORKBOMB = "0"/PT_FORKBOMB = "250"/g' /etc/csf/csf.conf
##### Port Scan Tracking #####
sed -i 's/PS_INTERVAL = "0"/PS_INTERVAL = "300"/g' /etc/csf/csf.conf
sed -i '/^PS_EMAIL_ALERT/c\PS_EMAIL_ALERT = "1"' /etc/csf/csf.conf
##### User ID Tracking #####
sed -i 's/UID_INTERVAL = "0"/UID_INTERVAL = "600"/g' /etc/csf/csf.conf
##### Account Tracking #####
sed -i 's/AT_ALERT = "2"/AT_ALERT = "1"/g' /etc/csf/csf.conf
systemctl enable --now csf
systemctl enable --now lfd
}
}
function install_webmin() {
{
if [[ ${OS} = centos || ${OS} = red || ${OS} = oracle || ${OS} = rocky || ${OS} = alma ]]; then
OUTFILE="/etc/yum.repos.d/webmin.repo"
define WYUM << 'EOF'
[Webmin]
name=Webmin Distribution Neutral
#baseurl=https://download.webmin.com/download/yum
mirrorlist=https://download.webmin.com/download/yum/mirrorlist
enabled=1
EOF
{
printf "%s\n" "$WYUM" | cut -c 3-
} > "$OUTFILE"
wget https://download.webmin.com/jcameron-key.asc
rpm --import jcameron-key.asc
if [ "${OSVER}" = 7 ]; then
${PAKMGR} install perl-Encode-Detect perl-Net-SSLeay perl-Data-Dumper tcp_wrappers-devel perl-IO-Tty webmin unzip
elif [ "${OSVER}" = 8 ] || [ "${OSVER}" = 9 ]; then
${PAKMGR} install perl-Encode-Detect perl-Net-SSLeay perl-Data-Dumper tcp_wrappers tcp_wrappers-libs unzip
dnf config-manager --set-enabled powertools
${PAKMGR} install perl-IO-Tty webmin
fi
elif [ "${OS}" = ubuntu ]; then
{
echo ''
echo '############################'
echo '#### Adding Webmin Repo ####'
echo '############################'
echo ''
echo 'deb https://download.webmin.com/download/repository sarge contrib'
} >> /etc/apt/sources.list
wget https://download.webmin.com/jcameron-key.asc
apt-key add jcameron-key.asc
${PAKMGR} install apt-transport-https
${PAKMGR} update
${PAKMGR} install webmin
fi
}
}
get_latest_version
install_codeserver
install_http
install_certbot
install_firewall
install_webmin
+189
View File
@@ -0,0 +1,189 @@
#!/bin/bash
#############################################################
#### ntfy Push Notification Server Setup ####
#### Install and configure ntfy as a systemd service ####
#### ####
#### Author: Phil Connor ####
#### Contact: contact@mylinux.work ####
#### License: MIT ####
#### Version: 1.0 ####
#### ####
#### Usage: sudo ./install-ntfy-server.sh ####
#############################################################
set -euo pipefail
# --- Configuration (edit these before running) ---
NTFY_VERSION="2.8.0"
DOMAIN="ntfy.example.com"
NTFY_USER="ntfy"
NTFY_DIR="/var/lib/ntfy"
CONFIG_DIR="/etc/ntfy"
# Ensure script is run as root
if [[ $EUID -ne 0 ]]; then
echo "ERROR: This script must be run as root (use sudo)."
exit 1
fi
echo "=== Installing ntfy v${NTFY_VERSION} ==="
# Create ntfy user
if ! id "$NTFY_USER" &>/dev/null; then
echo "Creating ntfy user..."
useradd --system --no-create-home --shell /usr/sbin/nologin "$NTFY_USER"
fi
# Create directories
echo "Creating directories..."
mkdir -p "$NTFY_DIR" "$CONFIG_DIR"
chown "$NTFY_USER:$NTFY_USER" "$NTFY_DIR"
# Download and install ntfy
echo "Downloading ntfy..."
rm -rf /tmp/ntfy_extract
mkdir -p /tmp/ntfy_extract
wget -q -O /tmp/ntfy.tar.gz "https://github.com/binwiederhier/ntfy/releases/download/v${NTFY_VERSION}/ntfy_${NTFY_VERSION}_linux_amd64.tar.gz"
tar -xzf /tmp/ntfy.tar.gz -C /tmp/ntfy_extract
find /tmp/ntfy_extract -name "ntfy" -type f -exec mv {} /usr/local/bin/ntfy \;
chmod +x /usr/local/bin/ntfy
rm -rf /tmp/ntfy.tar.gz /tmp/ntfy_extract
# Verify installation
echo "Verifying installation..."
if [ -x /usr/local/bin/ntfy ]; then
echo "✓ ntfy binary installed at /usr/local/bin/ntfy"
else
echo "✗ ntfy binary not found"
exit 1
fi
# Create configuration
echo "Installing configuration..."
cat > "$CONFIG_DIR/server.yml" << EOF
# ntfy server configuration
# Location: /etc/ntfy/server.yml
# Base URL for the server (used in notification links)
base-url: "http://${DOMAIN}"
# Listen address - use internal port, proxy externally
listen-http: "127.0.0.1:8090"
# Authentication - deny by default, require tokens
auth-default-access: "deny-all"
auth-file: "/var/lib/ntfy/user.db"
# Cache for offline message delivery
cache-file: "/var/lib/ntfy/cache.db"
cache-duration: "24h"
# Behind nginx/caddy reverse proxy
behind-proxy: true
# Attachment settings
attachment-cache-dir: "/var/lib/ntfy/attachments"
attachment-total-size-limit: "1G"
attachment-file-size-limit: "10M"
attachment-expiry-duration: "24h"
# Logging
log-level: "info"
log-format: "json"
# Rate limiting per visitor
visitor-subscription-limit: 30
visitor-request-limit-burst: 60
visitor-request-limit-replenish: "5s"
EOF
cat > /etc/systemd/system/ntfy.service << 'EOF'
# ntfy systemd service
# Location: /etc/systemd/system/ntfy.service
[Unit]
Description=ntfy push notification server
Documentation=https://ntfy.sh/docs/
After=network.target
[Service]
Type=simple
User=ntfy
Group=ntfy
ExecStart=/usr/local/bin/ntfy serve --config /etc/ntfy/server.yml
Restart=always
RestartSec=5
# Security hardening
NoNewPrivileges=yes
PrivateTmp=yes
ProtectSystem=strict
ProtectHome=yes
ReadWritePaths=/var/lib/ntfy
# Resource limits
LimitNOFILE=65535
MemoryMax=512M
[Install]
WantedBy=multi-user.target
EOF
# Enable and start service
echo "Enabling ntfy service..."
systemctl daemon-reload
systemctl enable ntfy
systemctl start ntfy
# Wait for service to start
sleep 2
# Check status
if systemctl is-active --quiet ntfy; then
echo "✓ ntfy service is running"
else
echo "✗ ntfy service failed to start"
systemctl status ntfy
exit 1
fi
echo ""
echo "=== Setting up authentication ==="
echo ""
# Create admin user (skip if exists)
echo "Creating admin user..."
if ntfy user list 2>/dev/null | grep -q "^admin "; then
echo "✓ admin user already exists"
else
ntfy user add --role=admin admin
fi
echo ""
# Set access permissions for alert topics
echo "Setting access permissions for alert topics..."
ntfy access admin 'alerts-*' rw
echo "✓ admin has rw access to alerts-*"
echo ""
echo "=== Next Steps ==="
echo ""
echo "1. Create user accounts for desktop clients:"
echo " ntfy user add --role=user <username>"
echo " ntfy token add <username>"
echo ""
echo "2. Grant topic access:"
echo " ntfy access <username> alerts-myapp ro # Read-only to app alerts"
echo " ntfy access <username> alerts-critical ro # Read-only to critical alerts"
echo ""
echo "3. Set up a reverse proxy (nginx/caddy) for ${DOMAIN}"
echo " pointing to 127.0.0.1:8090"
echo ""
echo "4. Test with:"
echo " curl -u admin:<password> -d 'Test notification' http://127.0.0.1:8090/alerts-test"
echo ""
echo "=== Installation complete ==="
+1652
View File
File diff suppressed because it is too large Load Diff
+628
View File
@@ -0,0 +1,628 @@
#!/bin/bash
################################################################################
# Script Name: iptables-blocklist-metrics.sh
# Version: 2.0
# Description: Prometheus exporter for iptables threat feed blocking metrics
# Author: Phil Connor
# Contact: contact@mylinux.work
# Website: https://mylinux.work
# License: MIT
################################################################################
# Ensure PATH includes sbin (for ipset/iptables when run from cron)
export PATH="/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:$PATH"
#
# EXPORTED METRICS:
# - iptables_blocklist_info - Exporter metadata
# - iptables_blocklist_enabled_feeds - Count of enabled feeds
# - iptables_blocklist_ipset_size - IPs per feed ipset (IPv4/v6)
# - iptables_blocklist_blocked_total - Block counts per feed (1h, 24h)
# - iptables_blocklist_effectiveness - Blocks per 1000 IPs (24h)
# - iptables_blocklist_last_update_timestamp - Feed cache file mtime
# - iptables_blocklist_cache_age_seconds - Age of feed cache files
# - iptables_blocklist_file_size_bytes - Feed parsed file sizes
# - iptables_blocklist_ip_version_ratio - IPv4 vs IPv6 distribution per feed
# - iptables_blocklist_total_unique_ips - Total unique IPs across all feeds
# - iptables_blocklist_total_rules - Total iptables rules
# - iptables_blocklist_rule_packets - Packet counts from iptables rules
# - iptables_blocklist_rule_bytes - Byte counts from iptables rules
# - iptables_blocklist_conntrack_entries - Current conntrack entries
# - iptables_blocklist_conntrack_max - Maximum conntrack entries
# - iptables_blocklist_conntrack_usage_percent - Conntrack usage percentage
# - iptables_blocklist_whitelist_size - Whitelist ipset sizes
# - iptables_blocklist_exporter_runtime_seconds - Script execution time
CONFIG_DIR="/etc/iptables-threats"
CACHE_DIR="$CONFIG_DIR/cache"
FEEDS_CONFIG="$CONFIG_DIR/feeds.conf"
IPSET_PREFIX="iptables-feed"
WHITELIST_IPSET="iptables-whitelist"
WHITELIST_IPSET_V6="iptables-whitelist-v6"
LOG_FILE="/var/log/iptables-threats.log"
TEXTFILE_DIR="/var/lib/node_exporter"
OUTPUT_FILE=""
HTTP_MODE=false
HTTP_PORT=9419
SCRIPT_START_TIME=$(date +%s)
LOCK_FILE="/var/run/iptables-blocklist-metrics.lock"
show_usage() {
cat <<EOF
Usage: $0 [OPTIONS]
Export per-feed iptables threat statistics as Prometheus metrics.
MODES:
--textfile Write to node_exporter textfile collector
--http Run HTTP server on port $HTTP_PORT
OPTIONS:
-p, --port HTTP port (default: 9419)
-o, --output Output file
-h, --help Show this help
EXAMPLES:
# Write to textfile collector
$0 --textfile
# Run as HTTP server
$0 --http --port 9419
# Generate metrics to stdout
$0
EOF
exit 0
}
parse_args() {
while [[ $# -gt 0 ]]; do
case $1 in
-h|--help) show_usage ;;
--textfile) OUTPUT_FILE="$TEXTFILE_DIR/iptables_blocklist.prom"; shift ;;
--http) HTTP_MODE=true; shift ;;
-p|--port) HTTP_PORT="$2"; shift 2 ;;
-o|--output) OUTPUT_FILE="$2"; shift 2 ;;
*) echo "Unknown: $1"; exit 1 ;;
esac
done
}
get_ipset_size() {
local ipset_name="$1"
local size
size=$(ipset list "$ipset_name" 2>/dev/null | grep '^[0-9a-fA-F.:]' | wc -l 2>/dev/null)
echo "${size:-0}"
}
get_feed_blocks() {
local feed="$1"
local period="$2"
local count
count=$(journalctl -k --since "$period" 2>/dev/null | grep "\[THREAT:${feed}\]" | wc -l 2>/dev/null)
echo "${count:-0}"
}
get_feed_blocks_v6() {
local feed="$1"
local period="$2"
local count
count=$(journalctl -k --since "$period" 2>/dev/null | grep "\[THREAT-v6:${feed}\]" | wc -l 2>/dev/null)
echo "${count:-0}"
}
get_file_timestamp() {
[ -f "$1" ] && stat -c %Y "$1" 2>/dev/null || echo "0"
}
get_file_size() {
[ -f "$1" ] && stat -c %s "$1" 2>/dev/null || echo "0"
}
get_cache_age() {
if [ -f "$1" ]; then
echo $(($(date +%s) - $(stat -c %Y "$1" 2>/dev/null || echo 0)))
else
echo "0"
fi
}
get_iptables_rule_stats() {
local chain="$1"
local feed="$2"
# Extract packet and byte counts from iptables -L -v -n -x (exact numbers, no human-readable K/M/G)
iptables -L "$chain" -v -n -x 2>/dev/null | grep "${IPSET_PREFIX}-${feed}" | head -1 | awk '{print $1"|"$2}'
}
get_total_unique_ips() {
local ip_version="$1"
local count=0
if [ "$ip_version" = "4" ]; then
count=$(cat "$CACHE_DIR/"*-v4.parsed 2>/dev/null | sort -u | wc -l 2>/dev/null)
elif [ "$ip_version" = "6" ]; then
count=$(cat "$CACHE_DIR/"*-v6.parsed 2>/dev/null | sort -u | wc -l 2>/dev/null)
fi
echo "${count:-0}"
}
get_conntrack_count() {
if [ -f /proc/sys/net/netfilter/nf_conntrack_count ]; then
cat /proc/sys/net/netfilter/nf_conntrack_count
else
echo "0"
fi
}
get_conntrack_max() {
if [ -f /proc/sys/net/netfilter/nf_conntrack_max ]; then
cat /proc/sys/net/netfilter/nf_conntrack_max
else
echo "0"
fi
}
get_ipset_memory() {
local ipset_name="$1"
local mem
mem=$(ipset list "$ipset_name" -t 2>/dev/null | grep "Size in memory:" | awk '{print $4}')
echo "${mem:-0}"
}
get_cache_disk_usage() {
if [ -d "$CACHE_DIR" ]; then
df -B1 "$CACHE_DIR" 2>/dev/null | tail -1 | awk '{print $3"|"$4"|"$5}'
else
echo "0|0|0%"
fi
}
get_total_cache_size() {
if [ -d "$CACHE_DIR" ]; then
du -sb "$CACHE_DIR" 2>/dev/null | awk '{print $1}'
else
echo "0"
fi
}
acquire_lock() {
if [ -f "$LOCK_FILE" ]; then
local pid=$(cat "$LOCK_FILE" 2>/dev/null)
if [ -n "$pid" ] && kill -0 "$pid" 2>/dev/null; then
echo "ERROR: Another instance is already running (PID: $pid)" >&2
exit 1
else
echo "Removing stale lock file" >&2
rm -f "$LOCK_FILE"
fi
fi
echo $$ > "$LOCK_FILE"
trap cleanup EXIT INT TERM
}
cleanup() {
rm -f "$LOCK_FILE"
}
generate_metrics() {
local start_time=$(date +%s)
local current_time=$(date +%s)
cat <<EOF
# HELP iptables_blocklist_info Per-feed iptables threat blocking info
# TYPE iptables_blocklist_info gauge
iptables_blocklist_info{mode="per-feed",version="2.0"} 1
# HELP iptables_blocklist_enabled_feeds Total enabled feeds
# TYPE iptables_blocklist_enabled_feeds gauge
iptables_blocklist_enabled_feeds $(grep -c '^1|' "$FEEDS_CONFIG" 2>/dev/null || echo 0)
# HELP iptables_blocklist_ipset_size Number of IPs per feed ipset
# TYPE iptables_blocklist_ipset_size gauge
EOF
# Only export metrics for ipsets that actually exist
for ipset_name in $(ipset list -n 2>/dev/null | grep "^${IPSET_PREFIX}-"); do
# Extract feed name and IP version
local feed_name="${ipset_name#${IPSET_PREFIX}-}"
local ip_version="4"
if [[ "$feed_name" =~ -v6$ ]]; then
feed_name="${feed_name%-v6}"
ip_version="6"
fi
# Get status from config
local status="disabled"
if grep -q "^1|${feed_name}|" "$FEEDS_CONFIG" 2>/dev/null; then
status="enabled"
fi
local size=$(get_ipset_size "$ipset_name")
echo "iptables_blocklist_ipset_size{feed=\"$feed_name\",ip_version=\"$ip_version\",status=\"$status\"} $size"
done
cat <<EOF
# HELP iptables_blocklist_blocked_total Blocked attempts per feed
# TYPE iptables_blocklist_blocked_total counter
EOF
# Per-feed block counts (IPv4 and IPv6)
if [ -f "$FEEDS_CONFIG" ]; then
while IFS='|' read -r enabled name url type description; do
[[ "$enabled" =~ ^#.*$ ]] && continue
[[ -z "$enabled" ]] && continue
[ "$enabled" != "1" ] && continue
local blocks_1h_v4 blocks_24h_v4 blocks_1h_v6 blocks_24h_v6
blocks_1h_v4=$(get_feed_blocks "$name" "1 hour ago")
blocks_24h_v4=$(get_feed_blocks "$name" "24 hours ago")
blocks_1h_v6=$(get_feed_blocks_v6 "$name" "1 hour ago")
blocks_24h_v6=$(get_feed_blocks_v6 "$name" "24 hours ago")
echo "iptables_blocklist_blocked_total{feed=\"$name\",ip_version=\"4\",period=\"1h\"} $blocks_1h_v4"
echo "iptables_blocklist_blocked_total{feed=\"$name\",ip_version=\"4\",period=\"24h\"} $blocks_24h_v4"
echo "iptables_blocklist_blocked_total{feed=\"$name\",ip_version=\"6\",period=\"1h\"} $blocks_1h_v6"
echo "iptables_blocklist_blocked_total{feed=\"$name\",ip_version=\"6\",period=\"24h\"} $blocks_24h_v6"
done < "$FEEDS_CONFIG"
fi
# Feed effectiveness (blocks per 1000 IPs)
cat <<EOF
# HELP iptables_blocklist_effectiveness Blocks per 1000 IPs in feed (24h)
# TYPE iptables_blocklist_effectiveness gauge
EOF
if [ -f "$FEEDS_CONFIG" ]; then
while IFS='|' read -r enabled name url type description; do
[[ "$enabled" =~ ^#.*$ ]] && continue
[[ -z "$enabled" ]] && continue
[ "$enabled" != "1" ] && continue
local ipset_size blocks_v4 blocks_v6 effectiveness_v4 effectiveness_v6
ipset_size=$(get_ipset_size "${IPSET_PREFIX}-${name}")
blocks_v4=$(get_feed_blocks "$name" "24 hours ago")
blocks_v6=$(get_feed_blocks_v6 "$name" "24 hours ago")
# Strip whitespace and ensure integers
ipset_size=$(echo "$ipset_size" | tr -d '\n' | tr -d ' ')
blocks_v4=$(echo "$blocks_v4" | tr -d '\n' | tr -d ' ')
blocks_v6=$(echo "$blocks_v6" | tr -d '\n' | tr -d ' ')
ipset_size=${ipset_size:-0}
blocks_v4=${blocks_v4:-0}
blocks_v6=${blocks_v6:-0}
if [ "$ipset_size" -gt 0 ] 2>/dev/null; then
effectiveness_v4=$(awk "BEGIN {printf \"%.2f\", ($blocks_v4 / $ipset_size) * 1000}" 2>/dev/null || echo "0")
effectiveness_v6=$(awk "BEGIN {printf \"%.2f\", ($blocks_v6 / $ipset_size) * 1000}" 2>/dev/null || echo "0")
else
effectiveness_v4="0"
effectiveness_v6="0"
fi
echo "iptables_blocklist_effectiveness{feed=\"$name\",ip_version=\"4\"} $effectiveness_v4"
echo "iptables_blocklist_effectiveness{feed=\"$name\",ip_version=\"6\"} $effectiveness_v6"
done < "$FEEDS_CONFIG"
fi
# Feed update/cache metrics
cat <<EOF
# HELP iptables_blocklist_last_update_timestamp Feed cache file last modified timestamp
# TYPE iptables_blocklist_last_update_timestamp gauge
EOF
if [ -f "$FEEDS_CONFIG" ]; then
while IFS='|' read -r enabled name url type description; do
[[ "$enabled" =~ ^#.*$ ]] && continue
[[ -z "$enabled" ]] && continue
local v4_file="${CACHE_DIR}/${name}-v4.parsed"
local v6_file="${CACHE_DIR}/${name}-v6.parsed"
local v4_ts v6_ts
v4_ts=$(get_file_timestamp "$v4_file")
v6_ts=$(get_file_timestamp "$v6_file")
echo "iptables_blocklist_last_update_timestamp{feed=\"$name\",ip_version=\"4\"} $v4_ts"
echo "iptables_blocklist_last_update_timestamp{feed=\"$name\",ip_version=\"6\"} $v6_ts"
done < "$FEEDS_CONFIG"
fi
cat <<EOF
# HELP iptables_blocklist_cache_age_seconds Age of feed cache files
# TYPE iptables_blocklist_cache_age_seconds gauge
EOF
if [ -f "$FEEDS_CONFIG" ]; then
while IFS='|' read -r enabled name url type description; do
[[ "$enabled" =~ ^#.*$ ]] && continue
[[ -z "$enabled" ]] && continue
local v4_file="${CACHE_DIR}/${name}-v4.parsed"
local v6_file="${CACHE_DIR}/${name}-v6.parsed"
local v4_age v6_age
v4_age=$(get_cache_age "$v4_file")
v6_age=$(get_cache_age "$v6_file")
echo "iptables_blocklist_cache_age_seconds{feed=\"$name\",ip_version=\"4\"} $v4_age"
echo "iptables_blocklist_cache_age_seconds{feed=\"$name\",ip_version=\"6\"} $v6_age"
done < "$FEEDS_CONFIG"
fi
cat <<EOF
# HELP iptables_blocklist_file_size_bytes Feed parsed file sizes
# TYPE iptables_blocklist_file_size_bytes gauge
EOF
if [ -f "$FEEDS_CONFIG" ]; then
while IFS='|' read -r enabled name url type description; do
[[ "$enabled" =~ ^#.*$ ]] && continue
[[ -z "$enabled" ]] && continue
local v4_file="${CACHE_DIR}/${name}-v4.parsed"
local v6_file="${CACHE_DIR}/${name}-v6.parsed"
local v4_size v6_size
v4_size=$(get_file_size "$v4_file")
v6_size=$(get_file_size "$v6_file")
echo "iptables_blocklist_file_size_bytes{feed=\"$name\",ip_version=\"4\",type=\"parsed\"} $v4_size"
echo "iptables_blocklist_file_size_bytes{feed=\"$name\",ip_version=\"6\",type=\"parsed\"} $v6_size"
done < "$FEEDS_CONFIG"
fi
# IP version distribution ratio
cat <<EOF
# HELP iptables_blocklist_ip_version_ratio Ratio of IPv4 to IPv6 addresses per feed
# TYPE iptables_blocklist_ip_version_ratio gauge
EOF
if [ -f "$FEEDS_CONFIG" ]; then
while IFS='|' read -r enabled name url type description; do
[[ "$enabled" =~ ^#.*$ ]] && continue
[[ -z "$enabled" ]] && continue
local v4_size v6_size total ratio_v4 ratio_v6
v4_size=$(get_ipset_size "${IPSET_PREFIX}-${name}")
v6_size=$(get_ipset_size "${IPSET_PREFIX}-${name}-v6")
v4_size=${v4_size:-0}
v6_size=${v6_size:-0}
total=$((v4_size + v6_size))
if [ "$total" -gt 0 ] 2>/dev/null; then
ratio_v4=$(awk "BEGIN {printf \"%.4f\", $v4_size / $total}" 2>/dev/null || echo "0")
ratio_v6=$(awk "BEGIN {printf \"%.4f\", $v6_size / $total}" 2>/dev/null || echo "0")
else
ratio_v4="0"
ratio_v6="0"
fi
echo "iptables_blocklist_ip_version_ratio{feed=\"$name\",version=\"4\"} $ratio_v4"
echo "iptables_blocklist_ip_version_ratio{feed=\"$name\",version=\"6\"} $ratio_v6"
done < "$FEEDS_CONFIG"
fi
# Total metrics
cat <<EOF
# HELP iptables_blocklist_total_unique_ips Total unique IPs across all feeds
# TYPE iptables_blocklist_total_unique_ips gauge
iptables_blocklist_total_unique_ips{ip_version="4"} $(get_total_unique_ips "4")
iptables_blocklist_total_unique_ips{ip_version="6"} $(get_total_unique_ips "6")
# HELP iptables_blocklist_total_rules Total iptables rules
# TYPE iptables_blocklist_total_rules gauge
iptables_blocklist_total_rules $(iptables -S 2>/dev/null | wc -l)
# HELP iptables_blocklist_rule_packets Packet counts from iptables rules
# TYPE iptables_blocklist_rule_packets counter
EOF
if [ -f "$FEEDS_CONFIG" ]; then
while IFS='|' read -r enabled name url type description; do
[[ "$enabled" =~ ^#.*$ ]] && continue
[[ -z "$enabled" ]] && continue
[ "$enabled" != "1" ] && continue
local stats_log stats_drop packets_log bytes_log packets_drop bytes_drop
stats_log=$(iptables -L INPUT -v -n -x 2>/dev/null | grep "${IPSET_PREFIX}-${name}" | grep LOG | head -1 | awk '{print $1"|"$2}')
stats_drop=$(iptables -L INPUT -v -n -x 2>/dev/null | grep "${IPSET_PREFIX}-${name}" | grep DROP | head -1 | awk '{print $1"|"$2}')
if [ -n "$stats_log" ]; then
packets_log=$(echo "$stats_log" | cut -d'|' -f1)
bytes_log=$(echo "$stats_log" | cut -d'|' -f2)
echo "iptables_blocklist_rule_packets{feed=\"$name\",ip_version=\"4\",action=\"log\"} ${packets_log:-0}"
fi
if [ -n "$stats_drop" ]; then
packets_drop=$(echo "$stats_drop" | cut -d'|' -f1)
bytes_drop=$(echo "$stats_drop" | cut -d'|' -f2)
echo "iptables_blocklist_rule_packets{feed=\"$name\",ip_version=\"4\",action=\"drop\"} ${packets_drop:-0}"
fi
done < "$FEEDS_CONFIG"
fi
cat <<EOF
# HELP iptables_blocklist_rule_bytes Byte counts from iptables rules
# TYPE iptables_blocklist_rule_bytes counter
EOF
if [ -f "$FEEDS_CONFIG" ]; then
while IFS='|' read -r enabled name url type description; do
[[ "$enabled" =~ ^#.*$ ]] && continue
[[ -z "$enabled" ]] && continue
[ "$enabled" != "1" ] && continue
local stats_log stats_drop packets_log bytes_log packets_drop bytes_drop
stats_log=$(iptables -L INPUT -v -n -x 2>/dev/null | grep "${IPSET_PREFIX}-${name}" | grep LOG | head -1 | awk '{print $1"|"$2}')
stats_drop=$(iptables -L INPUT -v -n -x 2>/dev/null | grep "${IPSET_PREFIX}-${name}" | grep DROP | head -1 | awk '{print $1"|"$2}')
if [ -n "$stats_log" ]; then
packets_log=$(echo "$stats_log" | cut -d'|' -f1)
bytes_log=$(echo "$stats_log" | cut -d'|' -f2)
echo "iptables_blocklist_rule_bytes{feed=\"$name\",ip_version=\"4\",action=\"log\"} ${bytes_log:-0}"
fi
if [ -n "$stats_drop" ]; then
packets_drop=$(echo "$stats_drop" | cut -d'|' -f1)
bytes_drop=$(echo "$stats_drop" | cut -d'|' -f2)
echo "iptables_blocklist_rule_bytes{feed=\"$name\",ip_version=\"4\",action=\"drop\"} ${bytes_drop:-0}"
fi
done < "$FEEDS_CONFIG"
fi
cat <<EOF
# HELP iptables_blocklist_ipset_memory_bytes Memory used by each ipset
# TYPE iptables_blocklist_ipset_memory_bytes gauge
EOF
if [ -f "$FEEDS_CONFIG" ]; then
while IFS='|' read -r enabled name url type description; do
[[ "$enabled" =~ ^#.*$ ]] && continue
[[ -z "$enabled" ]] && continue
mem_v4=$(get_ipset_memory "${IPSET_PREFIX}-${name}")
mem_v6=$(get_ipset_memory "${IPSET_PREFIX}-${name}-v6")
echo "iptables_blocklist_ipset_memory_bytes{feed=\"$name\",ip_version=\"4\"} $mem_v4"
echo "iptables_blocklist_ipset_memory_bytes{feed=\"$name\",ip_version=\"6\"} $mem_v6"
done < "$FEEDS_CONFIG"
fi
# Conntrack metrics
local conntrack_count conntrack_max conntrack_usage
conntrack_count=$(get_conntrack_count)
conntrack_max=$(get_conntrack_max)
if [ "$conntrack_max" -gt 0 ] 2>/dev/null; then
conntrack_usage=$(awk "BEGIN {printf \"%.2f\", ($conntrack_count / $conntrack_max) * 100}" 2>/dev/null || echo "0")
else
conntrack_usage="0"
fi
# Cache disk metrics
local disk_info cache_size disk_used disk_avail disk_pct
disk_info=$(get_cache_disk_usage)
cache_size=$(get_total_cache_size)
disk_used=$(echo "$disk_info" | cut -d'|' -f1)
disk_avail=$(echo "$disk_info" | cut -d'|' -f2)
disk_pct=$(echo "$disk_info" | cut -d'|' -f3 | tr -d '%')
cat <<EOF
# HELP iptables_blocklist_conntrack_entries Current conntrack entries
# TYPE iptables_blocklist_conntrack_entries gauge
iptables_blocklist_conntrack_entries $conntrack_count
# HELP iptables_blocklist_conntrack_max Maximum conntrack entries
# TYPE iptables_blocklist_conntrack_max gauge
iptables_blocklist_conntrack_max $conntrack_max
# HELP iptables_blocklist_conntrack_usage_percent Conntrack usage percentage
# TYPE iptables_blocklist_conntrack_usage_percent gauge
iptables_blocklist_conntrack_usage_percent $conntrack_usage
# HELP iptables_blocklist_cache_disk_used_bytes Disk space used by cache partition
# TYPE iptables_blocklist_cache_disk_used_bytes gauge
iptables_blocklist_cache_disk_used_bytes $disk_used
# HELP iptables_blocklist_cache_disk_available_bytes Disk space available on cache partition
# TYPE iptables_blocklist_cache_disk_available_bytes gauge
iptables_blocklist_cache_disk_available_bytes $disk_avail
# HELP iptables_blocklist_cache_disk_usage_percent Cache partition disk usage percentage
# TYPE iptables_blocklist_cache_disk_usage_percent gauge
iptables_blocklist_cache_disk_usage_percent ${disk_pct:-0}
# HELP iptables_blocklist_cache_total_size_bytes Total size of cache directory
# TYPE iptables_blocklist_cache_total_size_bytes gauge
iptables_blocklist_cache_total_size_bytes $cache_size
# HELP iptables_blocklist_whitelist_size Whitelist ipset size
# TYPE iptables_blocklist_whitelist_size gauge
iptables_blocklist_whitelist_size{ip_version="4"} $(get_ipset_size "$WHITELIST_IPSET")
iptables_blocklist_whitelist_size{ip_version="6"} $(get_ipset_size "$WHITELIST_IPSET_V6")
# HELP iptables_blocklist_exporter_runtime_seconds Exporter runtime in seconds
# TYPE iptables_blocklist_exporter_runtime_seconds gauge
iptables_blocklist_exporter_runtime_seconds $((current_time - start_time))
EOF
echo ""
}
run_http_server() {
echo "Starting iptables blocklist exporter on port $HTTP_PORT..."
if ! command -v nc >/dev/null 2>&1; then
echo "ERROR: netcat (nc) is required for HTTP mode"
echo "Install with: yum install nmap-ncat (RHEL/CentOS)"
echo " or: apt install netcat (Debian/Ubuntu)"
exit 1
fi
while true; do
{
read -r request
if [[ "$request" =~ ^GET\ /metrics ]]; then
echo -e "HTTP/1.1 200 OK\r\nContent-Type: text/plain; version=0.0.4\r\n\r"
generate_metrics
else
echo -e "HTTP/1.1 200 OK\r\nContent-Type: text/html\r\n\r"
echo "<h1>iptables Blocklist Metrics Exporter</h1>"
echo "<p>Per-feed threat blocking statistics</p>"
echo "<p><a href='/metrics'>Metrics</a></p>"
fi
} | nc -l -p "$HTTP_PORT" -q 1 2>/dev/null
done
}
main() {
parse_args "$@"
[ ! -d "$CONFIG_DIR" ] && { echo "ERROR: $CONFIG_DIR not found. Run iptables-blocklists.sh first" >&2; exit 1; }
# Prevent multiple instances (skip for HTTP mode as it should run continuously)
[ "$HTTP_MODE" != true ] && acquire_lock
if [ "$HTTP_MODE" = true ]; then
run_http_server
elif [ -n "$OUTPUT_FILE" ]; then
# Ensure output directory exists
mkdir -p "$(dirname "$OUTPUT_FILE")"
# Create temp file in /tmp (not in node_exporter directory!)
# This prevents node_exporter from seeing partial writes
local temp_file=$(mktemp /tmp/iptables_metrics.XXXXXX)
# Generate metrics to temp file
generate_metrics > "$temp_file"
# FORCE NEW INODE: Delete old file first, then move
# Some node_exporter versions cache file descriptors
rm -f "$OUTPUT_FILE"
# Move temp file to final location
mv "$temp_file" "$OUTPUT_FILE"
# Ensure node_exporter user can read it
chmod 644 "$OUTPUT_FILE"
# Force filesystem sync (optional but helps)
sync
else
generate_metrics
fi
}
main "$@"
+757
View File
@@ -0,0 +1,757 @@
#!/bin/bash
################################################################################
# Script Name: iptables-blocklists.sh
# Version: 1.0
# Description: Per-feed iptables threat intelligence blocking with ipset
# Author: Phil Connor
# Contact: contact@mylinux.work
# Website: https://mylinux.work
# License: MIT
################################################################################
# Don't use 'set -e' - causes issues with ipset error handling
CONFIG_DIR="/etc/iptables-threats"
FEEDS_CONFIG="$CONFIG_DIR/feeds.conf"
CACHE_DIR="$CONFIG_DIR/cache"
BACKUP_DIR="$CONFIG_DIR/backups"
IPSET_PREFIX="iptables-feed"
WHITELIST_IPSET="iptables-whitelist"
WHITELIST_IPSET_V6="iptables-whitelist-v6"
LOG_FILE="/var/log/iptables-threats.log"
SSH_PORT="22"
ENABLE_AUTO_UPDATE=true
UPDATE_INTERVAL="daily"
ENABLE_IPV6=true
MAX_BACKUPS=5
show_usage() {
cat <<EOF
Usage: $0 [OPTIONS] [COMMAND]
PER-FEED VERSION for iptables: Each threat feed gets its own ipset.
Provides detailed per-feed blocking statistics and metrics.
COMMANDS:
install Install and configure threat feed blocking
update Update all enabled feeds now (ipsets only, no rules reload)
apply-rules Regenerate and apply iptables rules (use with caution!)
test-rules Test rule generation without applying (dry-run)
add-feed NAME URL Add a custom feed
remove-feed NAME Remove a feed
enable-feed NAME Enable a disabled feed
disable-feed NAME Disable a feed
list-feeds List all configured feeds
show-stats Show blocking statistics per feed
whitelist-add IP Add IP/CIDR to whitelist
whitelist-init Initialize whitelist with RFC1918/Docker networks
whitelist-list Show all whitelisted IPs
OPTIONS:
-h, --help Show this help message
-s, --ssh-port PORT SSH port (default: 22)
--no-auto-update Disable automatic updates
--no-ipv6 Disable IPv6
--update-interval TIME hourly, daily, weekly (default: daily)
EXAMPLES:
# Install with default feeds
sudo $0 install
# Update feeds manually (safe - only updates ipsets)
sudo $0 update
# Test rule generation (safe - no changes)
sudo $0 test-rules
# Apply rules after testing (regenerates iptables)
sudo $0 apply-rules
# Add custom feed
sudo $0 add-feed "my-blocklist" "https://example.com/blocklist.txt"
# View statistics
sudo $0 show-stats
EOF
exit 0
}
log_message() {
echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1" | tee -a "$LOG_FILE"
}
parse_args() {
COMMAND=""
while [[ $# -gt 0 ]]; do
case $1 in
-h|--help) show_usage ;;
-s|--ssh-port) SSH_PORT="$2"; shift 2 ;;
--no-auto-update) ENABLE_AUTO_UPDATE=false; shift ;;
--no-ipv6) ENABLE_IPV6=false; shift ;;
--update-interval) UPDATE_INTERVAL="$2"; shift 2 ;;
install|update|apply-rules|test-rules|list-feeds|show-stats|whitelist-init|whitelist-list) COMMAND="$1"; shift ;;
add-feed) COMMAND="add-feed"; FEED_NAME="$2"; FEED_URL="$3"; shift 3 ;;
remove-feed|enable-feed|disable-feed) COMMAND="$1"; FEED_NAME="$2"; shift 2 ;;
whitelist-add) COMMAND="whitelist-add"; WHITELIST_IP="$2"; shift 2 ;;
*) echo "Unknown: $1"; exit 1 ;;
esac
done
[ -z "$COMMAND" ] && COMMAND="install"
}
check_requirements() {
[ "$EUID" -ne 0 ] && { echo "Run as root"; exit 1; }
# Install iptables, ipset, curl if needed
if ! command -v iptables >/dev/null 2>&1 || ! command -v ipset >/dev/null 2>&1; then
if command -v dnf >/dev/null 2>&1; then
dnf install -y iptables ipset curl iptables-services
elif command -v yum >/dev/null 2>&1; then
yum install -y iptables ipset curl iptables-services
elif command -v apt-get >/dev/null 2>&1; then
apt-get update && apt-get install -y iptables ipset curl iptables-persistent
else
echo "Cannot install requirements automatically"
exit 1
fi
fi
}
create_directory_structure() {
mkdir -p "$CONFIG_DIR" "$CACHE_DIR" "$BACKUP_DIR"
touch "$LOG_FILE"
chmod 700 "$CONFIG_DIR"
chmod 600 "$LOG_FILE"
}
cleanup_old_backups() {
local backup_count
backup_count=$(find "$BACKUP_DIR" -name 'iptables-save-*.txt' | wc -l)
if [ "$backup_count" -gt "$MAX_BACKUPS" ]; then
local to_delete=$((backup_count - MAX_BACKUPS))
find "$BACKUP_DIR" -name 'iptables-save-*.txt' -type f | \
sort | head -n "$to_delete" | xargs rm -f
log_message "Cleaned up $to_delete old backups (keeping last $MAX_BACKUPS)"
fi
}
initialize_feeds_config() {
[ -f "$FEEDS_CONFIG" ] && return
cat > "$FEEDS_CONFIG" <<'EOF'
# Threat Intelligence Feeds Configuration
# Format: ENABLED|NAME|URL|TYPE|DESCRIPTION
#
# ENABLED: 1 (enabled) or 0 (disabled)
# NAME: Unique feed identifier
# URL: Feed URL
# TYPE: Format type (plain, cidr, commented, custom)
# DESCRIPTION: Feed description
1|cinsarmy|http://cinsscore.com/list/ci-badguys.txt|plain|CINS Army Malicious IPs
1|firehol-level1|https://raw.githubusercontent.com/ktsaou/blocklist-ipsets/master/firehol_level1.netset|cidr|FireHOL Level 1 - Most aggressive attackers
1|firehol-level2|https://raw.githubusercontent.com/ktsaou/blocklist-ipsets/master/firehol_level2.netset|cidr|FireHOL Level 2 - Attacks in last 48h
0|firehol-level3|https://raw.githubusercontent.com/ktsaou/blocklist-ipsets/master/firehol_level3.netset|cidr|FireHOL Level 3 - Attacks in last 30d
1|ipsum-1|https://raw.githubusercontent.com/stamparm/ipsum/master/levels/1.txt|plain|IPsum Level 1 - Most dangerous
0|ipsum-2|https://raw.githubusercontent.com/stamparm/ipsum/master/levels/2.txt|plain|IPsum Level 2 - Dangerous
0|ipsum-3|https://raw.githubusercontent.com/stamparm/ipsum/master/levels/3.txt|plain|IPsum Level 3 - Suspicious
0|spamhaus-drop|https://www.spamhaus.org/drop/drop.txt|commented|Spamhaus DROP List
0|spamhaus-edrop|https://www.spamhaus.org/drop/edrop.txt|commented|Spamhaus EDROP List
1|spamhaus-dropv6|https://www.spamhaus.org/drop/dropv6.txt|commented|Spamhaus DROP V6 List
0|feodo-tracker|https://feodotracker.abuse.ch/downloads/ipblocklist.txt|commented|Feodo Tracker C2 IPs
0|sslbl-aggressive|https://sslbl.abuse.ch/blacklist/sslipblacklist_aggressive.txt|commented|SSL Blacklist Aggressive
0|sslbl-all|https://sslbl.abuse.ch/blacklist/sslipblacklist.txt|commented|SSL Blacklist All
1|blocklist-de|https://lists.blocklist.de/lists/all.txt|plain|Blocklist.de All Attacks
0|greensnow|https://blocklist.greensnow.co/greensnow.txt|plain|GreenSnow Blacklist
0|emergingthreats|https://rules.emergingthreats.net/fwrules/emerging-Block-IPs.txt|plain|Emerging Threats IPs
0|bruteforce-ssh|https://lists.blocklist.de/lists/ssh.txt|plain|SSH Bruteforce Attempts
1|binarydefense|https://www.binarydefense.com/banlist.txt|plain|Binary Defense Blacklist
1|bruteforce-bl|https://danger.rulez.sk/projects/bruteforceblocker/blist.php|commented|BruteForce Blocker
0|dshield-top|https://www.dshield.org/block.txt|commented|DShield Top Attackers
1|dshield-fhol|https://iplists.firehol.org/files/dshield.netset|commented|Dshield FireHol top 20
0|tor-exit|https://check.torproject.org/torbulkexitlist|plain|TOR Exit Nodes (optional)
0|abuseipdb-1d|https://raw.githubusercontent.com/borestad/blocklist-abuseipdb/main/abuseipdb-s100-1d.ipv4|commented|AbuseIPDB confidence score 100 1 day
0|abuseipd-3d|https://raw.githubusercontent.com/borestad/blocklist-abuseipdb/main/abuseipdb-s100-3d.ipv4|commented|AbuseIPDB confidence score 100 3 day
0|abuseipdb-7d|https://raw.githubusercontent.com/borestad/blocklist-abuseipdb/main/abuseipdb-s100-7d.ipv4|commented|AbuseIPDB confidence score 100 7 day
1|abuseipdb-14d|https://raw.githubusercontent.com/borestad/blocklist-abuseipdb/main/abuseipdb-s100-14d.ipv4|commented|AbuseIPDB confidence score 100 14 day
0|abuseipdb-30d|https://raw.githubusercontent.com/borestad/blocklist-abuseipdb/main/abuseipdb-s100-30d.ipv4|commented|AbuseIPDB confidence score 100 30 day
# Add custom feeds below this line
EOF
chmod 600 "$FEEDS_CONFIG"
}
setup_ipsets() {
log_message "Setting up per-feed ipsets..."
# Whitelist
if ! ipset list "$WHITELIST_IPSET" >/dev/null 2>&1; then
ipset create "$WHITELIST_IPSET" hash:net family inet hashsize 1024 maxelem 10000
ipset add "$WHITELIST_IPSET" 127.0.0.1 2>/dev/null || true
fi
if [ "$ENABLE_IPV6" = true ] && ! ipset list "$WHITELIST_IPSET_V6" >/dev/null 2>&1; then
ipset create "$WHITELIST_IPSET_V6" hash:net family inet6 hashsize 1024 maxelem 10000
ipset add "$WHITELIST_IPSET_V6" ::1 2>/dev/null || true
fi
# Create ipset per feed
while IFS='|' read -r enabled name url type description; do
[[ "$enabled" =~ ^#.*$ ]] && continue
[[ -z "$enabled" ]] && continue
[ "$enabled" != "1" ] && continue
if ! ipset list "${IPSET_PREFIX}-${name}" >/dev/null 2>&1; then
ipset create "${IPSET_PREFIX}-${name}" hash:net family inet hashsize 4096 maxelem 200000
fi
if [ "$ENABLE_IPV6" = true ] && ! ipset list "${IPSET_PREFIX}-${name}-v6" >/dev/null 2>&1; then
ipset create "${IPSET_PREFIX}-${name}-v6" hash:net family inet6 hashsize 4096 maxelem 200000
fi
done < "$FEEDS_CONFIG"
}
download_feed() {
curl -f -s -m 30 -L "$1" -o "$2" 2>/dev/null
}
parse_feed() {
local file="$1" type="$2" out_v4="$3" out_v6="$4"
true > "$out_v4"
true > "$out_v6"
case "$type" in
plain)
grep -E '^[0-9.]+(/[0-9]+)?$' "$file" >> "$out_v4" 2>/dev/null || true
[ "$ENABLE_IPV6" = true ] && grep -E '^[0-9a-fA-F:]+(/[0-9]+)?$' "$file" | grep ':' >> "$out_v6" 2>/dev/null || true
;;
cidr)
grep -E '^[0-9.]+' "$file" | cut -d' ' -f1 | cut -d'#' -f1 | grep -v '^$' >> "$out_v4" 2>/dev/null || true
[ "$ENABLE_IPV6" = true ] && grep -E '^[0-9a-fA-F:]+' "$file" | grep ':' | cut -d' ' -f1 | cut -d'#' -f1 >> "$out_v6" 2>/dev/null || true
;;
commented)
grep -v -E '^[#;]|^$' "$file" | grep -oE '[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+(/[0-9]+)?' >> "$out_v4" 2>/dev/null || true
[ "$ENABLE_IPV6" = true ] && grep -v -E '^[#;]|^$' "$file" | grep -oE '[0-9a-fA-F:]+(/[0-9]+)?' | grep -E '^[0-9a-fA-F]{1,4}:[0-9a-fA-F:]+' >> "$out_v6" 2>/dev/null || true
;;
esac
}
update_feeds() {
log_message "Starting per-feed update (FAST ipset restore mode)..."
# Auto-cleanup cache and ipsets for disabled feeds
local enabled_feeds=$(grep '^1|' "$FEEDS_CONFIG" 2>/dev/null | cut -d'|' -f2)
local cleaned_cache=0
local cleaned_ipsets=0
# Clean cache files
for cache_file in "$CACHE_DIR"/*.raw "$CACHE_DIR"/*-v4.parsed "$CACHE_DIR"/*-v6.parsed "$CACHE_DIR"/*-v4.restore "$CACHE_DIR"/*-v6.restore; do
[ -f "$cache_file" ] || continue
local bn=$(basename "$cache_file")
local fn="${bn%%.raw}"; fn="${fn%%-v4.parsed}"; fn="${fn%%-v6.parsed}"; fn="${fn%%-v4.restore}"; fn="${fn%%-v6.restore}"
if ! echo "$enabled_feeds" | grep -q "^${fn}$"; then
rm -f "$cache_file" && cleaned_cache=$((cleaned_cache + 1))
fi
done
# Clean ipsets for disabled feeds
while IFS='|' read -r enabled name url type description; do
[[ "$enabled" =~ ^#.*$ ]] && continue
[[ -z "$enabled" ]] && continue
[ "$enabled" = "1" ] && continue
if ipset list "${IPSET_PREFIX}-${name}" >/dev/null 2>&1; then
ipset destroy "${IPSET_PREFIX}-${name}" 2>/dev/null && cleaned_ipsets=$((cleaned_ipsets + 1))
fi
if ipset list "${IPSET_PREFIX}-${name}-v6" >/dev/null 2>&1; then
ipset destroy "${IPSET_PREFIX}-${name}-v6" 2>/dev/null && cleaned_ipsets=$((cleaned_ipsets + 1))
fi
done < "$FEEDS_CONFIG"
[ "$cleaned_cache" -gt 0 ] && log_message " Cleaned $cleaned_cache stale cache files"
[ "$cleaned_ipsets" -gt 0 ] && log_message " Destroyed $cleaned_ipsets stale ipsets"
local total=0 failed=0
while IFS='|' read -r enabled name url type description; do
[[ "$enabled" =~ ^#.*$ ]] && continue
[[ -z "$enabled" ]] && continue
[ "$enabled" != "1" ] && continue
total=$((total + 1))
log_message "Updating: $name"
local raw="$CACHE_DIR/${name}.raw"
local v4="$CACHE_DIR/${name}-v4.parsed"
local v6="$CACHE_DIR/${name}-v6.parsed"
if download_feed "$url" "$raw" && parse_feed "$raw" "$type" "$v4" "$v6"; then
local c4 c6=0
c4=$(wc -l < "$v4" 2>/dev/null || echo 0)
[ "$ENABLE_IPV6" = true ] && c6=$(wc -l < "$v6" 2>/dev/null || echo 0)
# FAST IPv4: Use ipset restore
if [ "$c4" -gt 0 ]; then
# Ensure target ipset exists for swap
if ! ipset list "${IPSET_PREFIX}-${name}" >/dev/null 2>&1; then
ipset create "${IPSET_PREFIX}-${name}" hash:net family inet hashsize 4096 maxelem 200000
fi
{
echo "create ${IPSET_PREFIX}-${name}-tmp hash:net family inet hashsize 4096 maxelem 200000"
echo "flush ${IPSET_PREFIX}-${name}-tmp"
while IFS= read -r ip; do
[ -z "$ip" ] && continue
echo "add ${IPSET_PREFIX}-${name}-tmp $ip"
done < "$v4"
echo "swap ${IPSET_PREFIX}-${name} ${IPSET_PREFIX}-${name}-tmp"
echo "destroy ${IPSET_PREFIX}-${name}-tmp"
} > "$CACHE_DIR/${name}-v4.restore"
ipset restore < "$CACHE_DIR/${name}-v4.restore" 2>/dev/null || {
log_message " ⚠ Batch load failed for $name IPv4, using fallback"
ipset flush "${IPSET_PREFIX}-${name}" 2>/dev/null || true
while IFS= read -r ip; do
[ -z "$ip" ] && continue
ipset add "${IPSET_PREFIX}-${name}" "$ip" 2>/dev/null || true
done < "$v4"
}
fi
# FAST IPv6: Use ipset restore
if [ "$ENABLE_IPV6" = true ] && [ "$c6" -gt 0 ]; then
# Ensure target ipset exists for swap
if ! ipset list "${IPSET_PREFIX}-${name}-v6" >/dev/null 2>&1; then
ipset create "${IPSET_PREFIX}-${name}-v6" hash:net family inet6 hashsize 4096 maxelem 200000
fi
{
echo "create ${IPSET_PREFIX}-${name}-v6-tmp hash:net family inet6 hashsize 4096 maxelem 200000"
echo "flush ${IPSET_PREFIX}-${name}-v6-tmp"
while IFS= read -r ip; do
[ -z "$ip" ] && continue
echo "add ${IPSET_PREFIX}-${name}-v6-tmp $ip"
done < "$v6"
echo "swap ${IPSET_PREFIX}-${name}-v6 ${IPSET_PREFIX}-${name}-v6-tmp"
echo "destroy ${IPSET_PREFIX}-${name}-v6-tmp"
} > "$CACHE_DIR/${name}-v6.restore"
ipset restore < "$CACHE_DIR/${name}-v6.restore" 2>/dev/null || {
log_message " ⚠ Batch load failed for $name IPv6, using fallback"
ipset flush "${IPSET_PREFIX}-${name}-v6" 2>/dev/null || true
while IFS= read -r ip; do
[ -z "$ip" ] && continue
ipset add "${IPSET_PREFIX}-${name}-v6" "$ip" 2>/dev/null || true
done < "$v6"
}
fi
log_message "$name: $c4 IPv4, $c6 IPv6"
else
log_message " ✗ Failed: $name"
failed=$((failed + 1))
fi
done < "$FEEDS_CONFIG"
# Save ipsets
ipset save > /etc/sysconfig/ipset 2>/dev/null || ipset save > /etc/iptables/ipsets 2>/dev/null || true
log_message "✓ Updated $total feeds ($failed failed) - FAST IPSET RESTORE MODE"
}
apply_iptables_rules() {
log_message "Applying per-feed iptables rules..."
# Backup current rules
iptables-save > "$BACKUP_DIR/iptables-save-$(date +%Y%m%d-%H%M%S).txt" 2>/dev/null || true
cleanup_old_backups
# Remove old threat feed rules
iptables -D INPUT -m set --match-set "$WHITELIST_IPSET" src -j ACCEPT 2>/dev/null || true
while IFS='|' read -r enabled name url type description; do
[[ "$enabled" =~ ^#.*$ ]] && continue
[[ -z "$enabled" ]] && continue
iptables -D INPUT -m set --match-set "${IPSET_PREFIX}-${name}" src -m limit --limit 5/min -j LOG --log-prefix "[THREAT:${name}] " 2>/dev/null || true
iptables -D INPUT -m set --match-set "${IPSET_PREFIX}-${name}" src -j DROP 2>/dev/null || true
done < "$FEEDS_CONFIG" 2>/dev/null || true
if [ "$ENABLE_IPV6" = true ]; then
ip6tables -D INPUT -m set --match-set "$WHITELIST_IPSET_V6" src -j ACCEPT 2>/dev/null || true
while IFS='|' read -r enabled name url type description; do
[[ "$enabled" =~ ^#.*$ ]] && continue
[[ -z "$enabled" ]] && continue
ip6tables -D INPUT -m set --match-set "${IPSET_PREFIX}-${name}-v6" src -m limit --limit 5/min -j LOG --log-prefix "[THREAT-v6:${name}] " 2>/dev/null || true
ip6tables -D INPUT -m set --match-set "${IPSET_PREFIX}-${name}-v6" src -j DROP 2>/dev/null || true
done < "$FEEDS_CONFIG" 2>/dev/null || true
fi
# Add whitelist rules (highest priority)
iptables -I INPUT 1 -m set --match-set "$WHITELIST_IPSET" src -j ACCEPT
[ "$ENABLE_IPV6" = true ] && ip6tables -I INPUT 1 -m set --match-set "$WHITELIST_IPSET_V6" src -j ACCEPT
# Add per-feed rules
local line=2
while IFS='|' read -r enabled name url type description; do
[[ "$enabled" =~ ^#.*$ ]] && continue
[[ -z "$enabled" ]] && continue
[ "$enabled" != "1" ] && continue
# IPv4
iptables -I INPUT $line -m set --match-set "${IPSET_PREFIX}-${name}" src -m limit --limit 5/min -j LOG --log-prefix "[THREAT:${name}] "
line=$((line + 1))
iptables -I INPUT $line -m set --match-set "${IPSET_PREFIX}-${name}" src -j DROP
line=$((line + 1))
# IPv6
if [ "$ENABLE_IPV6" = true ]; then
ip6tables -A INPUT -m set --match-set "${IPSET_PREFIX}-${name}-v6" src -m limit --limit 5/min -j LOG --log-prefix "[THREAT-v6:${name}] "
ip6tables -A INPUT -m set --match-set "${IPSET_PREFIX}-${name}-v6" src -j DROP
fi
done < "$FEEDS_CONFIG"
# SSH rate limiting
if ! iptables -C INPUT -p tcp --dport "$SSH_PORT" -m conntrack --ctstate NEW -m recent --set 2>/dev/null; then
iptables -I INPUT -p tcp --dport "$SSH_PORT" -m conntrack --ctstate NEW -m recent --set
iptables -I INPUT -p tcp --dport "$SSH_PORT" -m conntrack --ctstate NEW -m recent --update --seconds 60 --hitcount 4 -j DROP
fi
# Save rules
if [ -d /etc/sysconfig ]; then
iptables-save > /etc/sysconfig/iptables
[ "$ENABLE_IPV6" = true ] && ip6tables-save > /etc/sysconfig/ip6tables
elif [ -d /etc/iptables ]; then
iptables-save > /etc/iptables/rules.v4
[ "$ENABLE_IPV6" = true ] && ip6tables-save > /etc/iptables/rules.v6
fi
log_message "✓ iptables rules applied (per-feed)"
}
setup_iptables_persistence() {
log_message "Setting up iptables persistence..."
# Create systemd service for iptables restore
cat > /etc/systemd/system/iptables-restore.service <<'EOF'
[Unit]
Description=Restore iptables rules
Before=network-pre.target
Wants=network-pre.target
[Service]
Type=oneshot
RemainAfterExit=yes
ExecStart=/bin/bash -c 'ipset restore -f /etc/sysconfig/ipset 2>/dev/null || ipset restore -f /etc/iptables/ipsets 2>/dev/null || true'
ExecStart=/bin/bash -c 'iptables-restore /etc/sysconfig/iptables 2>/dev/null || iptables-restore /etc/iptables/rules.v4 2>/dev/null || true'
ExecStart=/bin/bash -c 'ip6tables-restore /etc/sysconfig/ip6tables 2>/dev/null || ip6tables-restore /etc/iptables/rules.v6 2>/dev/null || true'
[Install]
WantedBy=multi-user.target
EOF
systemctl daemon-reload
systemctl enable iptables-restore.service 2>/dev/null || true
log_message "✓ iptables persistence configured"
}
setup_auto_update() {
[ "$ENABLE_AUTO_UPDATE" = false ] && return
local script=$(readlink -f "$0")
cat > /etc/systemd/system/iptables-threat-feeds-update.service <<EOF
[Unit]
Description=Update iptables threat feeds (per-feed)
After=network-online.target
[Service]
Type=oneshot
ExecStart=$script update
EOF
cat > /etc/systemd/system/iptables-threat-feeds-update.timer <<EOF
[Unit]
Description=Update threat feeds $UPDATE_INTERVAL
[Timer]
OnCalendar=$UPDATE_INTERVAL
Persistent=true
[Install]
WantedBy=timers.target
EOF
systemctl daemon-reload
systemctl enable --now iptables-threat-feeds-update.timer
}
cmd_show_stats() {
echo "Per-Feed Blocking Statistics"
printf "%-25s %10s %10s %12s\n" "FEED" "IPv4" "IPv6" "BLOCKS(1h)"
echo "-------------------------------------------------------------------"
while IFS='|' read -r enabled name url type description; do
[[ "$enabled" =~ ^#.*$ ]] && continue
[[ -z "$enabled" ]] && continue
[ "$enabled" != "1" ] && continue
local v4 v6=0 blocks
v4=$(ipset list "${IPSET_PREFIX}-${name}" 2>/dev/null | grep -c '^[0-9.]' || echo 0)
[ "$ENABLE_IPV6" = true ] && v6=$(ipset list "${IPSET_PREFIX}-${name}-v6" 2>/dev/null | grep -c '^[0-9a-fA-F:]' || echo 0)
blocks=$(journalctl -k --since "1 hour ago" 2>/dev/null | grep -c "\[THREAT:${name}\]" || echo 0)
printf "%-25s %10s %10s %12s\n" "$name" "$v4" "$v6" "$blocks"
done < "$FEEDS_CONFIG"
}
cmd_list_feeds() {
printf "%-10s %-25s %s\n" "STATUS" "NAME" "DESC"
while IFS='|' read -r enabled name url type description; do
[[ "$enabled" =~ ^#.*$ ]] && continue
[[ -z "$enabled" ]] && continue
printf "%-10s %-25s %s\n" "$([ "$enabled" = "1" ] && echo "ENABLED" || echo "DISABLED")" "$name" "$description"
done < "$FEEDS_CONFIG"
}
cmd_whitelist_add() {
[ -z "$WHITELIST_IP" ] && { echo "Usage: $0 whitelist-add <IP|CIDR>"; exit 1; }
if echo "$WHITELIST_IP" | grep -q ':'; then
ipset add "$WHITELIST_IPSET_V6" "$WHITELIST_IP" 2>/dev/null && \
log_message "✓ Added to IPv6 whitelist: $WHITELIST_IP" || \
{ echo "Failed to add $WHITELIST_IP"; exit 1; }
else
ipset add "$WHITELIST_IPSET" "$WHITELIST_IP" 2>/dev/null && \
log_message "✓ Added to IPv4 whitelist: $WHITELIST_IP" || \
{ echo "Failed to add $WHITELIST_IP"; exit 1; }
fi
ipset save > /etc/sysconfig/ipset 2>/dev/null || ipset save > /etc/iptables/ipsets 2>/dev/null || true
}
cmd_whitelist_init() {
log_message "Initializing whitelist with private networks..."
local private_networks=(
"10.0.0.0/8"
"172.16.0.0/12"
"192.168.0.0/16"
"169.254.0.0/16"
"127.0.0.0/8"
)
local private_networks_v6=(
"fc00::/7"
"fe80::/10"
"::1"
)
echo "Adding IPv4 private networks to whitelist..."
for net in "${private_networks[@]}"; do
if ipset add "$WHITELIST_IPSET" "$net" 2>/dev/null; then
echo "$net"
else
echo " - $net (already exists or error)"
fi
done
if [ "$ENABLE_IPV6" = true ]; then
echo "Adding IPv6 private networks to whitelist..."
for net in "${private_networks_v6[@]}"; do
if ipset add "$WHITELIST_IPSET_V6" "$net" 2>/dev/null; then
echo "$net"
else
echo " - $net (already exists or error)"
fi
done
fi
ipset save > /etc/sysconfig/ipset 2>/dev/null || ipset save > /etc/iptables/ipsets 2>/dev/null || true
log_message "✓ Whitelist initialized with RFC1918/private networks"
}
cmd_whitelist_list() {
echo "=========================================="
echo "IPv4 Whitelist ($WHITELIST_IPSET)"
echo "=========================================="
ipset list "$WHITELIST_IPSET" 2>/dev/null | grep '^[0-9]' || echo "No entries"
if [ "$ENABLE_IPV6" = true ]; then
echo ""
echo "=========================================="
echo "IPv6 Whitelist ($WHITELIST_IPSET_V6)"
echo "=========================================="
ipset list "$WHITELIST_IPSET_V6" 2>/dev/null | grep '^[0-9a-fA-F:]' || echo "No entries"
fi
}
cmd_add_feed() {
[ -z "$FEED_NAME" ] || [ -z "$FEED_URL" ] && { echo "Usage: $0 add-feed <NAME> <URL>"; exit 1; }
grep -q "^[01]|${FEED_NAME}|" "$FEEDS_CONFIG" 2>/dev/null && { echo "Feed exists"; exit 1; }
echo "1|${FEED_NAME}|${FEED_URL}|plain|Custom: ${FEED_NAME}" >> "$FEEDS_CONFIG"
log_message "✓ Added feed: $FEED_NAME"
}
cmd_remove_feed() {
[ -z "$FEED_NAME" ] && { echo "Usage: $0 remove-feed <NAME>"; exit 1; }
sed -i "/|${FEED_NAME}|/d" "$FEEDS_CONFIG"
# Remove ipsets and rules
ipset destroy "${IPSET_PREFIX}-${FEED_NAME}" 2>/dev/null || true
ipset destroy "${IPSET_PREFIX}-${FEED_NAME}-v6" 2>/dev/null || true
log_message "✓ Removed feed: $FEED_NAME"
log_message "Reapplying rules..."
apply_iptables_rules
}
cmd_enable_feed() {
[ -z "$FEED_NAME" ] && { echo "Usage: $0 enable-feed <NAME>"; exit 1; }
sed -i "s/^0|${FEED_NAME}|/1|${FEED_NAME}|/" "$FEEDS_CONFIG"
log_message "✓ Enabled: $FEED_NAME"
# Create ipsets if they don't exist
if ! ipset list "${IPSET_PREFIX}-${FEED_NAME}" >/dev/null 2>&1; then
ipset create "${IPSET_PREFIX}-${FEED_NAME}" hash:net family inet hashsize 4096 maxelem 200000
fi
if [ "$ENABLE_IPV6" = true ] && ! ipset list "${IPSET_PREFIX}-${FEED_NAME}-v6" >/dev/null 2>&1; then
ipset create "${IPSET_PREFIX}-${FEED_NAME}-v6" hash:net family inet6 hashsize 4096 maxelem 200000
fi
log_message "Run 'update' to download IPs, then 'apply-rules' to add firewall rules"
}
cmd_disable_feed() {
[ -z "$FEED_NAME" ] && { echo "Usage: $0 disable-feed <NAME>"; exit 1; }
sed -i "s/^1|${FEED_NAME}|/0|${FEED_NAME}|/" "$FEEDS_CONFIG"
# Destroy ipsets to clear metrics
ipset destroy "${IPSET_PREFIX}-${FEED_NAME}" 2>/dev/null || true
ipset destroy "${IPSET_PREFIX}-${FEED_NAME}-v6" 2>/dev/null || true
log_message "✓ Disabled: $FEED_NAME"
log_message "Reapplying rules..."
apply_iptables_rules
}
cmd_install() {
log_message "Installing per-feed mode..."
check_requirements
create_directory_structure
initialize_feeds_config
setup_ipsets
update_feeds
apply_iptables_rules
setup_iptables_persistence
setup_auto_update
echo ""
echo "=========================================="
echo "✓ Per-feed installation complete"
echo "=========================================="
echo "Feeds: $(grep -c '^1|' "$FEEDS_CONFIG")"
echo "Config: $FEEDS_CONFIG"
echo "Log: $LOG_FILE"
echo ""
echo "Commands:"
echo " $0 show-stats"
echo " $0 list-feeds"
echo " $0 update"
echo " $0 whitelist-add <IP>"
echo "=========================================="
}
cmd_test_rules() {
log_message "Testing iptables rule generation (dry-run mode)..."
echo "=========================================="
echo "Rule Generation Test"
echo "=========================================="
echo ""
# Count enabled feeds
local enabled_count=0
while IFS='|' read -r enabled name url type description; do
[[ "$enabled" =~ ^#.*$ ]] && continue
[[ -z "$enabled" ]] && continue
[ "$enabled" != "1" ] && continue
enabled_count=$((enabled_count + 1))
done < "$FEEDS_CONFIG"
echo "✓ Found $enabled_count enabled feeds"
echo ""
# Show what would be generated
echo "IPv4 rules that would be created:"
echo " 1. Whitelist bypass: -I INPUT 1 -m set --match-set $WHITELIST_IPSET src -j ACCEPT"
local line=2
while IFS='|' read -r enabled name url type description; do
[[ "$enabled" =~ ^#.*$ ]] && continue
[[ -z "$enabled" ]] && continue
[ "$enabled" != "1" ] && continue
echo " $line. [${name}] LOG: -I INPUT $line -m set --match-set ${IPSET_PREFIX}-${name} src -m limit --limit 5/min -j LOG"
line=$((line + 1))
echo " $line. [${name}] DROP: -I INPUT $line -m set --match-set ${IPSET_PREFIX}-${name} src -j DROP"
line=$((line + 1))
done < "$FEEDS_CONFIG"
echo ""
echo "Total IPv4 rules: $((line - 1))"
if [ "$ENABLE_IPV6" = true ]; then
echo ""
echo "IPv6 rules that would be created:"
echo " 1. Whitelist bypass: -I INPUT 1 -m set --match-set $WHITELIST_IPSET_V6 src -j ACCEPT"
local v6_count=0
while IFS='|' read -r enabled name url type description; do
[[ "$enabled" =~ ^#.*$ ]] && continue
[[ -z "$enabled" ]] && continue
[ "$enabled" != "1" ] && continue
v6_count=$((v6_count + 1))
echo " $((v6_count * 2)). [${name}] LOG: -A INPUT -m set --match-set ${IPSET_PREFIX}-${name}-v6 src -j LOG"
echo " $((v6_count * 2 + 1)). [${name}] DROP: -A INPUT -m set --match-set ${IPSET_PREFIX}-${name}-v6 src -j DROP"
done < "$FEEDS_CONFIG"
echo ""
echo "Total IPv6 rules: $((v6_count * 2 + 1))"
fi
echo ""
echo "=========================================="
echo "✓ Test passed - rules would be generated successfully"
echo " To apply these rules, run: $0 apply-rules"
echo "=========================================="
}
main() {
parse_args "$@"
case "$COMMAND" in
install) cmd_install ;;
update)
check_requirements
create_directory_structure
update_feeds
# DO NOT apply rules here - only update ipsets
# To regenerate rules, use: apply-rules, enable-feed, disable-feed, or remove-feed
;;
apply-rules)
check_requirements
apply_iptables_rules
;;
test-rules) cmd_test_rules ;;
list-feeds) cmd_list_feeds ;;
show-stats) cmd_show_stats ;;
add-feed) cmd_add_feed ;;
remove-feed) cmd_remove_feed ;;
enable-feed) cmd_enable_feed ;;
disable-feed) cmd_disable_feed ;;
whitelist-add) cmd_whitelist_add ;;
whitelist-init) cmd_whitelist_init ;;
whitelist-list) cmd_whitelist_list ;;
esac
}
main "$@"
+565
View File
@@ -0,0 +1,565 @@
#!/bin/bash
##############################################################################
#### Promtail to Grafana Alloy Migration Script ####
#### ####
#### Detects OS, reads existing Promtail config for Loki URL/hostname, ####
#### generates equivalent Alloy River config, installs Alloy, and ####
#### handles the cutover from Promtail to Alloy. ####
#### ####
#### Supports: Ubuntu, Debian, RHEL, CentOS, Rocky, Alma, Amazon Linux ####
#### ####
#### Author: Phil Connor ####
#### License: MIT ####
#### Contact: contact@mylinux.work ####
#### Version: 1.0.0-030326 ####
##############################################################################
set -euo pipefail
readonly SCRIPT_NAME=$(basename "$0")
readonly SCRIPT_VERSION="1.0.0-030326"
# Defaults
ALLOY_CONFIG_DIR="/etc/alloy"
ALLOY_CONFIG_FILE="/etc/alloy/config.alloy"
PROMTAIL_CONFIG="/etc/promtail/config.yml"
LOKI_URL=""
CUSTOM_HOSTNAME=""
DRY_RUN=false
GENERATE_ONLY=false
SKIP_INSTALL=false
SKIP_CUTOVER=false
KEEP_PROMTAIL=true
VERBOSE=false
INCLUDE_JOURNAL=true
INCLUDE_NGINX=false
INCLUDE_APACHE=false
# Colors
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m'
log() { echo -e "${GREEN}[INFO]${NC} $1"; }
warn() { echo -e "${YELLOW}[WARN]${NC} $1"; }
error() { echo -e "${RED}[ERROR]${NC} $1" >&2; }
debug() { [[ "$VERBOSE" == true ]] && echo -e "${BLUE}[DEBUG]${NC} $1"; }
show_help() {
cat << EOF
Usage: $SCRIPT_NAME [OPTIONS]
Migrate from Promtail to Grafana Alloy. Generates an Alloy config that
maintains Promtail-compatible labels so existing dashboards keep working.
OPTIONS:
--loki-url URL Loki push URL (default: extracted from Promtail config)
--hostname NAME Override hostname (default: auto-detect or from Promtail)
--promtail-config F Path to existing Promtail config (default: /etc/promtail/config.yml)
--output FILE Alloy config output path (default: /etc/alloy/config.alloy)
--generate-only Only generate the Alloy config, don't install or cutover
--skip-install Skip Alloy installation (already installed)
--skip-cutover Generate config and install, but don't stop Promtail
--no-journal Skip systemd journal collection
--include-nginx Include nginx log collection
--include-apache Include Apache log collection
--remove-promtail Remove Promtail package after cutover (default: keep)
--dry-run Show what would be done without making changes
--verbose Enable verbose output
--version Show version
--help, -h Show this help message
EXAMPLES:
# Auto-detect everything from existing Promtail config
sudo $SCRIPT_NAME
# Specify Loki URL and hostname
sudo $SCRIPT_NAME --loki-url http://loki.example.com:3100 --hostname web-01
# Generate config only (don't install or cutover)
$SCRIPT_NAME --generate-only --loki-url http://loki:3100 --output /tmp/config.alloy
# Full migration with nginx logs
sudo $SCRIPT_NAME --include-nginx --remove-promtail
# Dry run to see what would happen
sudo $SCRIPT_NAME --dry-run
EOF
}
parse_arguments() {
while [[ $# -gt 0 ]]; do
case $1 in
--loki-url) LOKI_URL="$2"; shift 2 ;;
--hostname) CUSTOM_HOSTNAME="$2"; shift 2 ;;
--promtail-config) PROMTAIL_CONFIG="$2"; shift 2 ;;
--output) ALLOY_CONFIG_FILE="$2"; shift 2 ;;
--generate-only) GENERATE_ONLY=true; shift ;;
--skip-install) SKIP_INSTALL=true; shift ;;
--skip-cutover) SKIP_CUTOVER=true; shift ;;
--no-journal) INCLUDE_JOURNAL=false; shift ;;
--include-nginx) INCLUDE_NGINX=true; shift ;;
--include-apache) INCLUDE_APACHE=true; shift ;;
--remove-promtail) KEEP_PROMTAIL=false; shift ;;
--dry-run) DRY_RUN=true; shift ;;
--verbose) VERBOSE=true; shift ;;
--version) echo "$SCRIPT_NAME version $SCRIPT_VERSION"; exit 0 ;;
--help|-h) show_help; exit 0 ;;
*) error "Unknown option: $1"; show_help; exit 1 ;;
esac
done
}
detect_os() {
if [[ -f /etc/os-release ]]; then
. /etc/os-release
OS=$ID
OS_PRETTY="$PRETTY_NAME"
else
error "Cannot detect OS"
exit 1
fi
debug "Detected OS: $OS_PRETTY"
}
detect_hostname() {
if [[ -n "$CUSTOM_HOSTNAME" ]]; then
DETECTED_HOSTNAME="$CUSTOM_HOSTNAME"
debug "Using custom hostname: $DETECTED_HOSTNAME"
return
fi
# Try to extract from Promtail config
if [[ -f "$PROMTAIL_CONFIG" ]]; then
local pt_host
pt_host=$(grep -m1 'host:' "$PROMTAIL_CONFIG" 2>/dev/null | awk '{print $2}' | tr -d '"' || true)
if [[ -n "$pt_host" ]]; then
DETECTED_HOSTNAME="$pt_host"
debug "Extracted hostname from Promtail config: $DETECTED_HOSTNAME"
return
fi
fi
DETECTED_HOSTNAME=$(hostname -f 2>/dev/null || hostname)
debug "Using system hostname: $DETECTED_HOSTNAME"
}
detect_loki_url() {
if [[ -n "$LOKI_URL" ]]; then
debug "Using provided Loki URL: $LOKI_URL"
return
fi
# Extract from Promtail config
if [[ -f "$PROMTAIL_CONFIG" ]]; then
LOKI_URL=$(grep -m1 'url:' "$PROMTAIL_CONFIG" 2>/dev/null | awk '{print $2}' | tr -d '"' | sed 's|/loki/api/v1/push||' || true)
if [[ -n "$LOKI_URL" ]]; then
debug "Extracted Loki URL from Promtail config: $LOKI_URL"
return
fi
fi
error "Could not determine Loki URL"
error "Provide with --loki-url or ensure Promtail config exists at $PROMTAIL_CONFIG"
exit 1
}
check_promtail_status() {
if systemctl is-active --quiet promtail 2>/dev/null; then
PROMTAIL_RUNNING=true
log "Promtail is currently running"
else
PROMTAIL_RUNNING=false
debug "Promtail is not running"
fi
}
# Generate an Alloy loki.source.file block if the log file exists
generate_file_source() {
local label="$1"
local path="$2"
local job="$3"
local extra_labels="$4"
if [[ "$DRY_RUN" == true ]] || [[ -f "$path" ]] || [[ "$path" == *"*"* ]]; then
cat << EOF
loki.source.file "$label" {
targets = [
{
"__path__" = "$path",
"job" = "$job",
"host" = "$DETECTED_HOSTNAME",${extra_labels}
},
]
forward_to = [loki.write.default.receiver]
}
EOF
else
debug "Skipping $path (file does not exist)"
fi
}
generate_alloy_config() {
log "Generating Alloy config for $OS ($DETECTED_HOSTNAME)..."
local os_label
case "$OS" in
ubuntu|debian) os_label="ubuntu" ;;
rhel|centos|rocky|almalinux|amzn) os_label="rhel-family" ;;
*) os_label="$OS" ;;
esac
local config=""
# Header
config+="// Grafana Alloy Configuration for $DETECTED_HOSTNAME
// Migrated from Promtail on $(date +%Y-%m-%d)
// OS: $OS_PRETTY
// Labels maintained for Promtail dashboard compatibility
logging {
level = \"info\"
}
"
# Journal source
if [[ "$INCLUDE_JOURNAL" == true ]]; then
config+="
// System logs via systemd journal
loki.source.journal \"systemd_journal\" {
max_age = \"12h\"
labels = {
job = \"systemd-journal\",
host = \"$DETECTED_HOSTNAME\",
os = \"$os_label\",
}
forward_to = [loki.relabel.journal_relabel.receiver]
}
loki.relabel \"journal_relabel\" {
forward_to = [loki.write.default.receiver]
rule {
source_labels = [\"__journal__systemd_unit\"]
target_label = \"unit\"
}
rule {
source_labels = [\"__journal_priority\"]
target_label = \"priority\"
}
rule {
source_labels = [\"__journal__hostname\"]
target_label = \"hostname\"
}
}
"
fi
# OS-specific file sources
case "$OS" in
ubuntu|debian)
config+="
// Ubuntu/Debian system logs"
config+=$(generate_file_source "syslog" "/var/log/syslog" "messages" "
\"os\" = \"ubuntu\",")
config+=$(generate_file_source "auth" "/var/log/auth.log" "auth" "
\"log_type\" = \"authentication\",")
config+=$(generate_file_source "kern" "/var/log/kern.log" "kernel" "")
config+=$(generate_file_source "cron" "/var/log/cron.log" "cron" "")
config+=$(generate_file_source "mail" "/var/log/mail.log" "mail" "")
config+=$(generate_file_source "apt" "/var/log/apt/history.log" "packages" "
\"package_manager\" = \"apt\",")
config+=$(generate_file_source "boot" "/var/log/boot.log" "boot" "")
;;
rhel|centos|rocky|almalinux|amzn)
config+="
// RHEL/CentOS/Rocky/Alma/Amazon Linux system logs"
config+=$(generate_file_source "messages" "/var/log/messages" "messages" "
\"os\" = \"rhel-family\",")
config+=$(generate_file_source "secure" "/var/log/secure" "auth" "
\"log_type\" = \"authentication\",")
config+=$(generate_file_source "cron" "/var/log/cron" "cron" "")
config+=$(generate_file_source "maillog" "/var/log/maillog" "mail" "")
config+=$(generate_file_source "yum" "/var/log/yum.log" "packages" "
\"package_manager\" = \"yum\",")
config+=$(generate_file_source "boot" "/var/log/boot.log" "boot" "")
;;
*)
config+="
// Generic system logs"
config+=$(generate_file_source "syslog" "/var/log/syslog" "messages" "")
config+=$(generate_file_source "auth" "/var/log/auth.log" "auth" "
\"log_type\" = \"authentication\",")
;;
esac
# Application wildcard
config+=$(generate_file_source "application_logs" "/var/log/*.log" "application" "")
# Nginx
if [[ "$INCLUDE_NGINX" == true ]]; then
config+="
// Nginx logs"
config+=$(generate_file_source "nginx_access" "/var/log/nginx/access.log" "nginx" "
\"log_type\" = \"access\",")
config+=$(generate_file_source "nginx_error" "/var/log/nginx/error.log" "nginx" "
\"log_type\" = \"error\",")
fi
# Apache
if [[ "$INCLUDE_APACHE" == true ]]; then
config+="
// Apache logs"
config+=$(generate_file_source "apache_access" "/var/log/apache2/access.log" "apache" "
\"log_type\" = \"access\",")
config+=$(generate_file_source "apache_error" "/var/log/apache2/error.log" "apache" "
\"log_type\" = \"error\",")
config+=$(generate_file_source "httpd_access" "/var/log/httpd/access_log" "apache" "
\"log_type\" = \"access\",")
config+=$(generate_file_source "httpd_error" "/var/log/httpd/error_log" "apache" "
\"log_type\" = \"error\",")
fi
# Loki write endpoint
config+="
// Write to Loki
loki.write \"default\" {
endpoint {
url = \"${LOKI_URL}/loki/api/v1/push\"
}
}
"
GENERATED_CONFIG="$config"
}
write_config() {
local output_file="$1"
if [[ "$DRY_RUN" == true ]]; then
log "DRY RUN: Would write config to $output_file"
echo "--- Generated config.alloy ---"
echo "$GENERATED_CONFIG"
echo "--- End config ---"
return
fi
local output_dir
output_dir=$(dirname "$output_file")
mkdir -p "$output_dir"
# Backup existing config
if [[ -f "$output_file" ]]; then
local backup="${output_file}.bak.$(date +%Y%m%d%H%M%S)"
cp "$output_file" "$backup"
log "Backed up existing config to $backup"
fi
echo "$GENERATED_CONFIG" > "$output_file"
log "Alloy config written to $output_file"
}
install_alloy() {
if [[ "$DRY_RUN" == true ]]; then
log "DRY RUN: Would install Grafana Alloy"
return
fi
# Check if already installed
if command -v alloy >/dev/null 2>&1; then
log "Alloy is already installed: $(alloy --version 2>&1 | head -1)"
return
fi
log "Installing Grafana Alloy..."
case "$OS" in
ubuntu|debian)
apt-get install -y apt-transport-https software-properties-common
mkdir -p /etc/apt/keyrings/
wget -q -O - https://apt.grafana.com/gpg.key | gpg --dearmor | tee /etc/apt/keyrings/grafana.gpg > /dev/null
echo "deb [signed-by=/etc/apt/keyrings/grafana.gpg] https://apt.grafana.com stable main" | tee /etc/apt/sources.list.d/grafana.list
apt-get update -qq
apt-get install -y alloy
;;
rhel|centos|rocky|almalinux|amzn)
cat > /etc/yum.repos.d/grafana.repo << 'REPO'
[grafana]
name=grafana
baseurl=https://rpm.grafana.com
repo_gpgcheck=1
enabled=1
gpgcheck=1
gpgkey=https://rpm.grafana.com/gpg.key
sslverify=1
sslcacert=/etc/pki/tls/certs/ca-bundle.crt
REPO
if command -v dnf >/dev/null 2>&1; then
dnf install -y alloy
else
yum install -y alloy
fi
;;
*)
error "Unsupported OS for automatic installation: $OS"
error "Install Alloy manually: https://grafana.com/docs/alloy/latest/set-up/install/"
exit 1
;;
esac
log "Alloy installed: $(alloy --version 2>&1 | head -1)"
}
validate_config() {
if [[ "$DRY_RUN" == true ]]; then
log "DRY RUN: Would validate config with 'alloy fmt'"
return
fi
if ! command -v alloy >/dev/null 2>&1; then
warn "Alloy not installed, skipping validation"
return
fi
log "Validating Alloy config..."
if alloy fmt "$ALLOY_CONFIG_FILE" >/dev/null 2>&1; then
log "Config validation passed"
else
error "Config validation failed. Check $ALLOY_CONFIG_FILE for syntax errors"
error "Run: alloy fmt $ALLOY_CONFIG_FILE"
exit 1
fi
}
perform_cutover() {
if [[ "$DRY_RUN" == true ]]; then
log "DRY RUN: Would stop Promtail and start Alloy"
return
fi
# Stop Promtail
if systemctl is-active --quiet promtail 2>/dev/null; then
log "Stopping Promtail..."
systemctl stop promtail
systemctl disable promtail
log "Promtail stopped and disabled"
fi
# Add alloy user to required groups
if getent group adm >/dev/null 2>&1; then
usermod -a -G adm alloy 2>/dev/null || true
fi
if getent group systemd-journal >/dev/null 2>&1; then
usermod -a -G systemd-journal alloy 2>/dev/null || true
fi
# Start Alloy
log "Starting Alloy..."
systemctl enable --now alloy
sleep 2
if systemctl is-active --quiet alloy; then
log "Alloy is running"
else
error "Alloy failed to start. Check: journalctl -u alloy --no-pager -n 30"
error "Rolling back — restarting Promtail"
systemctl enable --now promtail 2>/dev/null || true
exit 1
fi
# Remove Promtail if requested
if [[ "$KEEP_PROMTAIL" == false ]]; then
log "Removing Promtail package..."
case "$OS" in
ubuntu|debian) apt-get remove -y promtail 2>/dev/null || true ;;
*) yum remove -y promtail 2>/dev/null || dnf remove -y promtail 2>/dev/null || true ;;
esac
log "Promtail removed"
else
log "Promtail package kept (use 'systemctl start promtail' to rollback)"
fi
}
print_summary() {
echo ""
echo "=========================================="
echo " Migration Summary"
echo "=========================================="
echo " OS: $OS_PRETTY"
echo " Hostname: $DETECTED_HOSTNAME"
echo " Loki URL: $LOKI_URL"
echo " Alloy config: $ALLOY_CONFIG_FILE"
if [[ "$DRY_RUN" != true ]] && [[ "$GENERATE_ONLY" != true ]]; then
echo ""
echo " Alloy status: $(systemctl is-active alloy 2>/dev/null || echo 'not checked')"
echo ""
echo " Verify:"
echo " systemctl status alloy"
echo " journalctl -u alloy -f"
echo " curl http://localhost:12345 (Alloy UI)"
echo ""
echo " Rollback:"
echo " sudo systemctl stop alloy"
echo " sudo systemctl start promtail"
fi
if [[ "$GENERATE_ONLY" == true ]]; then
echo ""
echo " Config generated. Review and deploy manually."
fi
echo "=========================================="
echo ""
}
main() {
parse_arguments "$@"
log "Promtail → Alloy Migration Script v${SCRIPT_VERSION}"
echo ""
# Check root (unless generate-only)
if [[ "$GENERATE_ONLY" != true ]] && [[ "$DRY_RUN" != true ]] && [[ "$EUID" -ne 0 ]]; then
error "This script must be run as root (or use --generate-only)"
exit 1
fi
detect_os
detect_hostname
detect_loki_url
check_promtail_status
# Generate config
generate_alloy_config
write_config "$ALLOY_CONFIG_FILE"
if [[ "$GENERATE_ONLY" == true ]]; then
print_summary
exit 0
fi
# Install Alloy
if [[ "$SKIP_INSTALL" != true ]]; then
install_alloy
fi
# Validate
validate_config
# Cutover
if [[ "$SKIP_CUTOVER" != true ]]; then
perform_cutover
fi
print_summary
}
main "$@"
+305
View File
@@ -0,0 +1,305 @@
#!/bin/bash
#############################################################
#### Grafana Mimir Install Script for Oracle Linux, ####
#### Centos/Redhat and Debian/Ubuntu Servers. ####
#### ####
#### Author: Phil Connor 01/09/2025 ####
#### License: MIT ####
#### Contact: contact@mylinux.work ####
#### Version 1.00.010925 ####
#### ####
#### To use this script chmod it to 755 ####
#### or simply type bash <filename.sh> ####
#############################################################
########################
### System Variables ###
########################
if [ "$(command -v lsb_release)" ]; then
OS=$(lsb_release -i | awk '{print $3}' | tr '[:upper:]' '[:lower:]')
else
OS=$(grep PRETTY_NAME /etc/os-release | sed 's/PRETTY_NAME=//g' | tr -d '="' | awk '{print $1}' | tr '[:upper:]' '[:lower:]')
fi
domain=mylinux.work
bindir=/usr/local/bin
mimirdir=/etc/prometheus
datadir=/mimir
if [ -d "/usr/lib/systemd/system" ]; then
psdir='/etc/systemd/system'
else
psdir='/usr/lib/systemd/system'
fi
#########################
### Check permissions ###
#########################
if [[ $EUID -ne 0 ]]; then
echo ''
echo "$(basename "$0") This script must be run as root! Login as root, or sudo/su."
echo ''
exit 1
fi
######################
### Package Manager ##
######################
pkgmgr="yum -y"
if [ "$OS" = "ubuntu" ]; then
pkgmgr="apt -y"
fi
#################################
#### Add Mimir User/Group ####
#################################
if ! grep mimir /etc/passwd; then
groupadd --system mimir
if [ "$OS" = "ubuntu" ]; then
useradd -s /sbin/nologin --system -g mimir mimir
else
useradd -m -s /bin/false mimir -g mimir
fi
fi
#################################
#### Check for wget and curl ####
#################################
if [ ! "$(command -v wget)" ]; then
$pkgmgr install wget
fi
if [ ! "$(command -v curl)" ]; then
$pkgmgr install curl
fi
if [ ! "$(command -v unzip)" ]; then
$pkgmgr install unzip
fi
##########################
### Install Mimir ###
##########################
install_mimir() {
{
# Create base directories if they don't exist
if [ ! -d "$mimirdir" ]; then
mkdir -p $mimirdir || { echo "Failed to create $mimirdir directory"; exit 1; }
fi
if [ ! -d "$datadir" ]; then
mkdir -p $datadir || { echo "Failed to create $datadir directory"; exit 1; }
fi
# Create Mimir subdirectories
mkdir -p $datadir/{tsdb-sync,data/tsdb,mimir-tsdb,compactor,mimir-ruler}
chown -R mimir:mimir $datadir
# Download and install Mimir
cd /tmp || exit 2
echo "Downloading latest Grafana Mimir..."
curl -s https://api.github.com/repos/grafana/mimir/releases/latest | grep browser_download_url | grep linux-amd64 | cut -d '"' -f 4 | wget -qi - || { echo "Failed to download Mimir"; exit 1; }
tar -xvf mimir-linux-amd64.tar.gz
mv mimir-linux-amd64 $bindir/mimir || exit 1
chown mimir:mimir $bindir/mimir || exit 1
rm -rf /tmp/mimir-linux-amd64.tar.gz
# Get server IP address
SERVER_IP=$(hostname -I | awk '{print $1}')
# Create Mimir config
touch $mimirdir/mimir.yml
{
echo '# Mimir Configuration - Single Instance Mode'
echo 'multitenancy_enabled: false'
echo ''
echo 'blocks_storage:'
echo ' backend: filesystem'
echo ' bucket_store:'
echo " sync_dir: $datadir/tsdb-sync"
echo ' filesystem:'
echo " dir: $datadir/data/tsdb"
echo ' tsdb:'
echo " dir: $datadir/mimir-tsdb"
echo ' retention_period: 720h'
echo ''
echo 'compactor:'
echo " data_dir: $datadir/compactor"
echo ' sharding_ring:'
echo ' kvstore:'
echo ' store: inmemory'
echo ''
echo 'distributor:'
echo ' ring:'
echo ' kvstore:'
echo ' store: inmemory'
echo ''
echo 'ingester:'
echo ' ring:'
echo ' kvstore:'
echo ' store: inmemory'
echo ' replication_factor: 1'
echo ''
echo 'ruler_storage:'
echo ' backend: filesystem'
echo ' filesystem:'
echo " dir: $datadir/mimir-ruler"
echo ''
echo 'server:'
echo ' http_listen_port: 9009'
echo ' log_level: info'
echo ''
echo 'memberlist:'
echo ' abort_if_cluster_join_fails: false'
echo ' bind_port: 7946'
echo " advertise_addr: $SERVER_IP"
echo ' join_members: []'
echo ''
echo 'store_gateway:'
echo ' sharding_ring:'
echo ' replication_factor: 1'
echo ' kvstore:'
echo ' store: inmemory'
echo ''
echo 'limits:'
echo ' max_global_series_per_user: 0'
echo ' max_global_exemplars_per_user: 100000'
} > $mimirdir/mimir.yml
chown mimir:mimir $mimirdir/mimir.yml
# Create systemd service
{
echo '[Unit]'
echo 'Description=Grafana Mimir'
echo 'Documentation=https://grafana.com/docs/mimir/'
echo 'After=network-online.target'
echo 'Wants=network-online.target'
echo ''
echo '[Service]'
echo 'Type=simple'
echo 'User=mimir'
echo 'Group=mimir'
echo "ExecStart=$bindir/mimir -config.file=$mimirdir/mimir.yml"
echo "ExecReload=/bin/kill -HUP \$MAINPID"
echo 'TimeoutStopSec=20s'
echo 'SendSIGKILL=no'
echo ''
echo '# Output to journal'
echo 'StandardOutput=journal'
echo 'StandardError=journal'
echo 'SyslogIdentifier=mimir'
echo ''
echo '# Restart'
echo 'Restart=on-failure'
echo 'RestartSec=5s'
echo ''
echo '# Security'
echo 'NoNewPrivileges=yes'
echo 'PrivateTmp=yes'
echo 'ProtectSystem=full'
echo 'ProtectHome=yes'
echo "ReadWritePaths=$datadir"
echo ''
echo '# Resource limits'
echo 'LimitNOFILE=1048576'
echo 'LimitNPROC=1048576'
echo ''
echo '# Environment'
echo 'Environment=GOMAXPROCS=4'
echo ''
echo '[Install]'
echo 'WantedBy=multi-user.target'
} > $psdir/mimir.service
systemctl daemon-reload
systemctl enable --now mimir
echo ""
echo "=========================================="
echo "Mimir installation complete!"
echo "=========================================="
echo "Mimir UI: http://localhost:9009"
echo "Config: $mimirdir/mimir.yml"
echo "Data: $datadir"
echo ""
echo "Add to Prometheus remote_write:"
echo " remote_write:"
echo " - url: http://localhost:9009/api/v1/push"
echo ""
}
}
################################
### Install and Config Nginx ###
################################
install_nginx() {
{
$pkgmgr install nginx
if [ -d "/etc/nginx/sites-available" ]; then
sitesa=/etc/nginx/sites-available
sitese=/etc/nginx/sites-enabled/
elif [ -d "/etc/nginx/conf.d" ]; then
sitesa=/etc/nginx/conf.d
fi
touch "$sitesa"/mimir.conf
{
echo 'server {'
echo ' listen 80;'
echo ' listen [::]:80;'
echo ''
echo " server_name mimir.$domain;"
echo ''
echo ' location / {'
echo ' proxy_pass http://localhost:9009/;'
# shellcheck disable=SC2016
echo ' proxy_set_header Host $host;'
# shellcheck disable=SC2016
echo ' proxy_set_header X-Real-IP $remote_addr;'
# shellcheck disable=SC2016
echo ' proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;'
# shellcheck disable=SC2016
echo ' proxy_set_header X-Forwarded-Proto $scheme;'
echo ' proxy_read_timeout 300s;'
echo ' proxy_connect_timeout 75s;'
echo ' }'
echo '}'
} > "$sitesa"/mimir.conf
if [ -d "/etc/nginx/sites-available" ]; then
ln -s "$sitesa"/mimir.conf "$sitese" 2>/dev/null || true
fi
if nginx -t; then
systemctl restart nginx
echo "Nginx configured for Mimir at mimir.$domain"
else
echo "Nginx configuration test failed"
fi
}
}
######################
### Function Calls ###
######################
install_mimir
# Uncomment to install nginx reverse proxy
# install_nginx
#############################################################
echo ""
echo "=========================================="
echo "Installation Summary"
echo "=========================================="
echo "Mimir version: $(mimir --version 2>&1 | head -1)"
echo "Status: $(systemctl is-active mimir)"
echo ""
echo "Check status: systemctl status mimir"
echo "View logs: journalctl -u mimir -f"
echo ""
File diff suppressed because it is too large Load Diff
+263
View File
@@ -0,0 +1,263 @@
#!/bin/bash
#############################################################
#### ntfy Desktop Client Setup for Linux ####
#### Subscribe to ntfy push notifications with desktop ####
#### alerts via systemd user service ####
#### ####
#### Author: Phil Connor ####
#### Contact: contact@mylinux.work ####
#### License: MIT ####
#### Version: 1.0 ####
#### ####
#### Usage: ./ntfy-client-setup-linux.sh ####
#############################################################
set -euo pipefail
NTFY_VERSION="2.11.0"
# ── Detect the actual desktop user ─────────────────────────
# Handles both sudo and non-sudo execution
if [ -n "${SUDO_USER:-}" ]; then
DESKTOP_USER="$SUDO_USER"
DESKTOP_HOME=$(getent passwd "$SUDO_USER" | cut -d: -f6)
else
DESKTOP_USER="$USER"
DESKTOP_HOME="$HOME"
fi
CONFIG_DIR="$DESKTOP_HOME/.config/ntfy"
SYSTEMD_DIR="$DESKTOP_HOME/.config/systemd/user"
# ── Helper functions ───────────────────────────────────────
info() { echo -e "$*"; }
warn() { echo -e "$*"; }
error() { echo -e "$*" >&2; }
run_as_user() {
# Run a command as the desktop user (handles sudo case)
if [ "$(id -u)" -eq 0 ] && [ "$DESKTOP_USER" != "root" ]; then
sudo -u "$DESKTOP_USER" "$@"
else
"$@"
fi
}
install_package() {
local pkg_apt="$1"
local pkg_dnf="${2:-$1}"
local pkg_pacman="${3:-$1}"
if command -v apt &> /dev/null; then
sudo apt install -y "$pkg_apt"
elif command -v dnf &> /dev/null; then
sudo dnf install -y "$pkg_dnf"
elif command -v pacman &> /dev/null; then
sudo pacman -S --noconfirm "$pkg_pacman"
else
error "Could not detect package manager. Please install '$pkg_apt' manually."
return 1
fi
}
# ── Banner ─────────────────────────────────────────────────
echo ""
echo "==========================================="
echo " ntfy Desktop Client Setup for Linux"
echo "==========================================="
echo ""
echo " User: $DESKTOP_USER"
echo " Home: $DESKTOP_HOME"
echo ""
# ── Step 1: Install dependencies ───────────────────────────
echo "── Checking dependencies ──────────────────"
echo ""
if ! command -v notify-send &> /dev/null; then
echo " Installing libnotify for desktop notifications..."
install_package libnotify-bin libnotify libnotify
info "libnotify installed"
else
info "notify-send already available"
fi
if ! command -v curl &> /dev/null; then
echo " Installing curl..."
install_package curl curl curl
info "curl installed"
else
info "curl already available"
fi
echo ""
# ── Step 2: Install ntfy binary ────────────────────────────
echo "── Installing ntfy client ─────────────────"
echo ""
# Determine install location based on privileges
if [ "$(id -u)" -eq 0 ] || sudo -n true 2>/dev/null; then
NTFY_BIN="/usr/local/bin/ntfy"
INSTALL_SYSTEM=true
else
NTFY_BIN="$DESKTOP_HOME/.local/bin/ntfy"
INSTALL_SYSTEM=false
fi
if [ -x "$NTFY_BIN" ]; then
info "ntfy already installed at $NTFY_BIN"
else
# Detect architecture
ARCH=$(uname -m)
case "$ARCH" in
x86_64) NTFY_ARCH="amd64" ;;
aarch64) NTFY_ARCH="arm64" ;;
armv7l) NTFY_ARCH="armv7" ;;
*)
error "Unsupported architecture: $ARCH"
exit 1
;;
esac
DOWNLOAD_URL="https://github.com/binwiederhier/ntfy/releases/download/v${NTFY_VERSION}/ntfy_${NTFY_VERSION}_linux_${NTFY_ARCH}.tar.gz"
echo " Downloading ntfy v${NTFY_VERSION} (${NTFY_ARCH})..."
TEMP_DIR=$(mktemp -d)
trap 'rm -rf "$TEMP_DIR"' EXIT
curl -sL -o "$TEMP_DIR/ntfy.tar.gz" "$DOWNLOAD_URL"
tar -xzf "$TEMP_DIR/ntfy.tar.gz" -C "$TEMP_DIR"
if [ "$INSTALL_SYSTEM" = true ]; then
sudo find "$TEMP_DIR" -name "ntfy" -type f -exec mv {} "$NTFY_BIN" \;
sudo chmod +x "$NTFY_BIN"
else
mkdir -p "$(dirname "$NTFY_BIN")"
find "$TEMP_DIR" -name "ntfy" -type f -exec mv {} "$NTFY_BIN" \;
chmod +x "$NTFY_BIN"
fi
rm -rf "$TEMP_DIR"
trap - EXIT
info "ntfy installed to $NTFY_BIN"
fi
echo ""
# ── Step 3: Interactive configuration ──────────────────────
echo "── Configuration ──────────────────────────"
echo ""
read -rp " Server URL [https://ntfy.example.com]: " INPUT_SERVER
SERVER_URL="${INPUT_SERVER:-https://ntfy.example.com}"
echo ""
read -rp " Access token (leave empty for public topics): " ACCESS_TOKEN
echo ""
echo " Enter topics to subscribe to (space-separated)."
echo " Examples: alerts monitoring backup-status"
read -rp " Topics: " TOPICS
if [ -z "$TOPICS" ]; then
error "At least one topic is required."
exit 1
fi
echo ""
echo " Server: $SERVER_URL"
echo " Topics: $TOPICS"
echo " Token: ${ACCESS_TOKEN:+(set)}${ACCESS_TOKEN:-(none)}"
echo ""
# ── Step 4: Create client config ───────────────────────────
echo "── Creating configuration files ────────────"
echo ""
run_as_user mkdir -p "$CONFIG_DIR"
run_as_user mkdir -p "$SYSTEMD_DIR"
# Build the subscribe section for client.yml
SUBSCRIBE_BLOCK=""
for topic in $TOPICS; do
SUBSCRIBE_BLOCK+=" - topic: ${SERVER_URL}/${topic}"$'\n'
if [ -n "$ACCESS_TOKEN" ]; then
SUBSCRIBE_BLOCK+=" token: ${ACCESS_TOKEN}"$'\n'
fi
done
# Write client.yml
cat > "$CONFIG_DIR/client.yml" << EOF
# ntfy client configuration
# Documentation: https://docs.ntfy.sh/subscribe/cli/
subscribe:
${SUBSCRIBE_BLOCK}EOF
# Fix ownership if running as root
if [ "$(id -u)" -eq 0 ] && [ "$DESKTOP_USER" != "root" ]; then
chown -R "$DESKTOP_USER:$DESKTOP_USER" "$CONFIG_DIR"
fi
info "Config saved to $CONFIG_DIR/client.yml"
# ── Step 5: Create systemd user service ────────────────────
cat > "$SYSTEMD_DIR/ntfy-subscribe.service" << EOF
[Unit]
Description=ntfy desktop notification subscriber
After=network-online.target
Wants=network-online.target
[Service]
Type=simple
ExecStart=${NTFY_BIN} subscribe --from-config
Restart=on-failure
RestartSec=10
[Install]
WantedBy=default.target
EOF
# Fix ownership if running as root
if [ "$(id -u)" -eq 0 ] && [ "$DESKTOP_USER" != "root" ]; then
chown -R "$DESKTOP_USER:$DESKTOP_USER" "$SYSTEMD_DIR"
fi
info "Systemd user service created"
echo ""
# ── Done ───────────────────────────────────────────────────
echo "==========================================="
echo " Setup Complete"
echo "==========================================="
echo ""
echo " To start receiving notifications, run as $DESKTOP_USER"
echo " from a graphical desktop session:"
echo ""
echo " systemctl --user daemon-reload"
echo " systemctl --user enable --now ntfy-subscribe"
echo ""
echo " Useful commands:"
echo ""
echo " Status: systemctl --user status ntfy-subscribe"
echo " Logs: journalctl --user -u ntfy-subscribe -f"
echo " Restart: systemctl --user restart ntfy-subscribe"
echo " Stop: systemctl --user stop ntfy-subscribe"
echo " Disable: systemctl --user disable --now ntfy-subscribe"
echo ""
echo " Test with:"
echo ""
echo " curl -d 'Hello from ntfy!' ${SERVER_URL}/${TOPICS%% *}"
echo ""
+236
View File
@@ -0,0 +1,236 @@
#############################################################
#### ntfy Desktop Client Setup for Windows ####
#### Subscribe to ntfy push notifications with Windows ####
#### toast notifications ####
#### ####
#### Author: Phil Connor ####
#### Contact: contact@mylinux.work ####
#### License: MIT ####
#### Version: 1.0 ####
#### ####
#### Usage: .\ntfy-client-setup-windows.ps1 ####
#############################################################
$ErrorActionPreference = "Stop"
# --- Configuration ---
$NtfyVersion = "2.8.0"
$InstallDir = "$env:LOCALAPPDATA\ntfy"
$ConfigDir = "$env:APPDATA\ntfy"
# --- Interactive Prompts ---
Write-Host ""
Write-Host "=== ntfy Desktop Notifications Setup ===" -ForegroundColor Cyan
Write-Host "Installing for user: $env:USERNAME"
Write-Host ""
# Server URL
$ServerUrl = Read-Host "Enter your ntfy server URL (e.g. https://ntfy.example.com)"
$ServerUrl = $ServerUrl.TrimEnd("/")
if ([string]::IsNullOrWhiteSpace($ServerUrl)) {
Write-Host "ERROR: Server URL is required." -ForegroundColor Red
exit 1
}
# Access token (optional — some servers allow anonymous access)
$Token = Read-Host "Enter your access token (leave blank if not required)"
# Topics
$topicInput = Read-Host "Enter topics to subscribe to, comma-separated (e.g. alerts-critical,alerts-all)"
if ([string]::IsNullOrWhiteSpace($topicInput)) {
Write-Host "ERROR: At least one topic is required." -ForegroundColor Red
exit 1
}
$Topics = $topicInput -split "," | ForEach-Object { $_.Trim() } | Where-Object { $_ -ne "" }
Write-Host ""
Write-Host "Server: $ServerUrl" -ForegroundColor White
Write-Host "Topics: $($Topics -join ', ')" -ForegroundColor White
Write-Host "Token: $(if ($Token) { '********' } else { '(none)' })" -ForegroundColor White
Write-Host ""
# --- Create directories ---
New-Item -ItemType Directory -Force -Path $InstallDir | Out-Null
New-Item -ItemType Directory -Force -Path $ConfigDir | Out-Null
# --- Download ntfy if not already installed ---
if (Test-Path "$InstallDir\ntfy.exe") {
Write-Host "ntfy already installed at: $InstallDir\ntfy.exe" -ForegroundColor Green
} else {
Write-Host "Downloading ntfy v$NtfyVersion..."
$downloadUrl = "https://github.com/binwiederhier/ntfy/releases/download/v$NtfyVersion/ntfy_${NtfyVersion}_windows_amd64.zip"
$zipPath = "$env:TEMP\ntfy.zip"
Invoke-WebRequest -Uri $downloadUrl -OutFile $zipPath
Write-Host "Extracting..."
$extractPath = "$env:TEMP\ntfy_extract"
Remove-Item -Path $extractPath -Recurse -Force -ErrorAction SilentlyContinue
Expand-Archive -Path $zipPath -DestinationPath $extractPath -Force
Remove-Item $zipPath
# Find the exe (may be in a subfolder)
$ntfyExe = Get-ChildItem -Path $extractPath -Recurse -Filter "ntfy.exe" | Select-Object -First 1
if ($ntfyExe) {
Copy-Item -Path $ntfyExe.FullName -Destination "$InstallDir\ntfy.exe" -Force
} else {
Write-Host "ERROR: Could not find ntfy.exe in downloaded archive." -ForegroundColor Red
exit 1
}
Remove-Item -Path $extractPath -Recurse -Force -ErrorAction SilentlyContinue
Write-Host "Installed to: $InstallDir\ntfy.exe" -ForegroundColor Green
}
Write-Host ""
# --- Create client.yml config ---
$clientYml = @"
default-host: $ServerUrl
"@
if ($Token) {
$clientYml += "`ndefault-token: $Token"
}
$clientYmlPath = "$ConfigDir\client.yml"
$clientYml | Out-File -FilePath $clientYmlPath -Encoding UTF8
Write-Host "Client config saved to: $clientYmlPath" -ForegroundColor Green
# --- Build topic URLs ---
$topicUrls = @()
foreach ($topic in $Topics) {
$topicUrls += "$ServerUrl/$topic"
}
$topicUrlsString = $topicUrls -join " "
# --- Create PowerShell notification script ---
# Build the token environment line only if a token was provided
$tokenLine = ""
if ($Token) {
$tokenLine = "`$env:NTFY_TOKEN = `"$Token`""
}
$psScriptContent = @"
Add-Type -AssemblyName System.Windows.Forms
# Create a persistent notification icon in the system tray
`$global:notifyIcon = New-Object System.Windows.Forms.NotifyIcon
`$global:notifyIcon.Icon = [System.Drawing.SystemIcons]::Information
`$global:notifyIcon.Visible = `$true
`$global:notifyIcon.Text = "ntfy alerts"
function Show-Notification {
param([string]`$Title, [string]`$Message, [int]`$Priority)
# Map ntfy priority levels to Windows balloon icon types
# 1 (min), 2 (low) -> None
# 3 (default) -> Info
# 4 (high), 5 (max) -> Error
`$icon = [System.Windows.Forms.ToolTipIcon]::Info
if (`$Priority -ge 4) { `$icon = [System.Windows.Forms.ToolTipIcon]::Error }
elseif (`$Priority -le 2) { `$icon = [System.Windows.Forms.ToolTipIcon]::None }
`$global:notifyIcon.BalloonTipIcon = `$icon
`$global:notifyIcon.BalloonTipTitle = `$Title
`$global:notifyIcon.BalloonTipText = `$Message
`$global:notifyIcon.ShowBalloonTip(30000)
}
# Set access token if configured
$tokenLine
`$ntfyExe = "$InstallDir\ntfy.exe"
# Subscribe and process JSON output line by line
& `$ntfyExe subscribe $topicUrlsString 2>&1 | ForEach-Object {
`$line = `$_
if (`$line -match '"event":"message"') {
try {
`$json = `$line | ConvertFrom-Json
`$title = if (`$json.title) { `$json.title } else { `$json.topic }
`$message = `$json.message
`$priority = if (`$json.priority) { `$json.priority } else { 3 }
Show-Notification -Title `$title -Message `$message -Priority `$priority
} catch { }
}
}
`$global:notifyIcon.Dispose()
"@
$psScriptPath = "$ConfigDir\run-subscribe.ps1"
$psScriptContent | Out-File -FilePath $psScriptPath -Encoding UTF8
Write-Host "Notification script saved to: $psScriptPath" -ForegroundColor Green
# --- Create VBS wrapper for hidden startup (no console window) ---
$vbsContent = @"
Set WshShell = CreateObject("WScript.Shell")
WshShell.Run "powershell -ExecutionPolicy Bypass -WindowStyle Hidden -File ""$psScriptPath""", 0
Set WshShell = Nothing
"@
$vbsPath = "$ConfigDir\run-subscribe-hidden.vbs"
$vbsContent | Out-File -FilePath $vbsPath -Encoding ASCII
Write-Host "Hidden launcher saved to: $vbsPath" -ForegroundColor Green
Write-Host ""
# --- Create startup shortcut ---
Write-Host "Creating startup shortcut..."
$startupPath = "$env:APPDATA\Microsoft\Windows\Start Menu\Programs\Startup"
$shortcutPath = "$startupPath\ntfy-subscribe.lnk"
$shell = New-Object -ComObject WScript.Shell
$shortcut = $shell.CreateShortcut($shortcutPath)
$shortcut.TargetPath = "wscript.exe"
$shortcut.Arguments = "`"$vbsPath`""
$shortcut.WorkingDirectory = $ConfigDir
$shortcut.WindowStyle = 7 # Minimized
$shortcut.Description = "ntfy notification subscriber"
$shortcut.Save()
Write-Host "Startup shortcut created at: $shortcutPath" -ForegroundColor Green
Write-Host ""
# --- Start the subscriber now ---
Write-Host "Starting ntfy subscriber..."
# Stop any existing ntfy or subscriber processes
Stop-Process -Name ntfy -ErrorAction SilentlyContinue
Get-Process powershell -ErrorAction SilentlyContinue | Where-Object { $_.Id -ne $PID } | ForEach-Object {
try {
$cmdLine = (Get-CimInstance Win32_Process -Filter "ProcessId = $($_.Id)" -ErrorAction SilentlyContinue).CommandLine
if ($cmdLine -like "*run-subscribe*") { Stop-Process -Id $_.Id -Force -ErrorAction SilentlyContinue }
} catch {}
}
Start-Sleep -Seconds 1
$process = Start-Process -FilePath "powershell" `
-ArgumentList @("-ExecutionPolicy", "Bypass", "-WindowStyle", "Hidden", "-File", $psScriptPath) `
-WindowStyle Hidden `
-PassThru
Start-Sleep -Seconds 2
# --- Print status and management commands ---
if ($process -and !$process.HasExited) {
Write-Host ""
Write-Host "=== Setup Complete ===" -ForegroundColor Green
Write-Host ""
Write-Host "ntfy is running and will start automatically on login." -ForegroundColor Green
Write-Host "You should see Windows toast notifications when messages arrive."
Write-Host ""
Write-Host "Management commands (run in PowerShell):" -ForegroundColor Cyan
Write-Host " Check status: Get-Process ntfy -ErrorAction SilentlyContinue"
Write-Host " Stop: Stop-Process -Name ntfy"
Write-Host " Start manually: wscript.exe '$vbsPath'"
Write-Host " Edit config: notepad '$clientYmlPath'"
Write-Host " Edit topics: notepad '$psScriptPath'"
Write-Host ""
} else {
Write-Host ""
Write-Host "WARNING: ntfy may not have started correctly." -ForegroundColor Yellow
Write-Host "Try running manually: wscript.exe '$vbsPath'"
Write-Host ""
}
Write-Host "To test, send a notification from another machine:" -ForegroundColor Cyan
Write-Host " curl -d 'Test message' $ServerUrl/$($Topics[0])"
Write-Host ""
+990
View File
@@ -0,0 +1,990 @@
#!/bin/bash
################################################################################
# Script Name: postfix-metrics.sh
# Description: Prometheus exporter for Postfix mail server metrics
#
# Usage:
# # Output to stdout
# ./postfix-metrics.sh
#
# # Textfile collector mode (atomic write)
# ./postfix-metrics.sh --textfile
#
# # Custom output file
# ./postfix-metrics.sh -o /path/to/metrics.prom
#
################################################################################
# ============================================================================
# CONFIGURATION VARIABLES
# ============================================================================
TEXTFILE_DIR="/var/lib/node_exporter"
OUTPUT_FILE=""
HTTP_MODE=false
HTTP_PORT=9192
QUEUE_DIR="/var/spool/postfix"
LOG_FILE="/var/log/mail.log"
HOSTNAME=$(hostname)
# ============================================================================
# HELPER FUNCTIONS
# ============================================================================
show_usage() {
cat <<EOF
Usage: $0 [OPTIONS]
Export Postfix statistics as Prometheus metrics.
MODES:
--textfile Write to node_exporter textfile collector
(writes to $TEXTFILE_DIR/postfix-metrics.prom)
--http Run HTTP server on port $HTTP_PORT
OPTIONS:
-p, --port HTTP port (default: $HTTP_PORT)
-o, --output Output file path (for custom locations)
-h, --help Show this help message
EXAMPLES:
$0 # Output to stdout
$0 --textfile # Write to textfile collector
$0 --http # Run HTTP server on port $HTTP_PORT
$0 --http -p 9192 # Run HTTP server on custom port
$0 -o /tmp/postfix.prom # Write to custom file
EOF
exit 0
}
parse_args() {
while [[ $# -gt 0 ]]; do
case $1 in
-h|--help) show_usage ;;
--textfile) OUTPUT_FILE="$TEXTFILE_DIR/postfix.prom"; shift ;;
--http) HTTP_MODE=true; shift ;;
-p|--port) HTTP_PORT="$2"; shift 2 ;;
-o|--output) OUTPUT_FILE="$2"; shift 2 ;;
*) echo "Unknown option: $1" >&2; exit 1 ;;
esac
done
}
# Helper function to count grep matches (returns 0 if no match)
grep_count() {
local result
result=$(grep -c "$@" 2>/dev/null) || result=0
echo "$result"
}
# ============================================================================
# METRIC GENERATION
# ============================================================================
generate_metrics() {
local START_TIME
START_TIME=$(date +%s.%N)
# Queue sizes
echo "# HELP postfix_queue_size Number of messages in each Postfix queue"
echo "# TYPE postfix_queue_size gauge"
for queue in incoming active deferred hold corrupt; do
count=$(find "${QUEUE_DIR}/${queue}" -type f 2>/dev/null | wc -l)
echo "postfix_queue_size{queue=\"${queue}\",hostname=\"${HOSTNAME}\"} ${count}"
done
# Oldest message in queue (seconds)
echo "# HELP postfix_queue_oldest_seconds Age of oldest message in queue"
echo "# TYPE postfix_queue_oldest_seconds gauge"
for queue in deferred hold; do
oldest=$(find "${QUEUE_DIR}/${queue}" -type f -printf '%T@\n' 2>/dev/null | sort -n | head -1)
if [[ -n "$oldest" ]]; then
age=$(echo "$(date +%s) - ${oldest%.*}" | bc)
else
age=0
fi
echo "postfix_queue_oldest_seconds{queue=\"${queue}\",hostname=\"${HOSTNAME}\"} ${age}"
done
# Message counters by status
echo "# HELP postfix_messages_total Total messages by status"
echo "# TYPE postfix_messages_total counter"
for status in sent bounced deferred expired; do
count=$(grep_count "status=${status}" "$LOG_FILE")
echo "postfix_messages_total{status=\"${status}\",hostname=\"${HOSTNAME}\"} ${count}"
done
rejected=$(grep_count 'reject:' "$LOG_FILE")
echo "postfix_messages_total{status=\"rejected\",hostname=\"${HOSTNAME}\"} ${rejected}"
# SMTP connections
echo "# HELP postfix_smtp_connections SMTP connection stats"
echo "# TYPE postfix_smtp_connections counter"
connections=$(grep_count 'connect from' "$LOG_FILE")
disconnections=$(grep_count 'disconnect from' "$LOG_FILE")
echo "postfix_smtp_connections{type=\"connect\",hostname=\"${HOSTNAME}\"} ${connections}"
echo "postfix_smtp_connections{type=\"disconnect\",hostname=\"${HOSTNAME}\"} ${disconnections}"
# Connection timeouts
echo "# HELP postfix_timeout_total Connection timeout events"
echo "# TYPE postfix_timeout_total counter"
timeout_count=$(grep_count 'timeout after' "$LOG_FILE")
echo "postfix_timeout_total{hostname=\"${HOSTNAME}\"} ${timeout_count}"
# SASL authentication
echo "# HELP postfix_sasl_auth_total SASL authentication attempts"
echo "# TYPE postfix_sasl_auth_total counter"
sasl_success=$(grep_count 'sasl_username=' "$LOG_FILE")
sasl_fail=$(grep_count 'authentication failed' "$LOG_FILE")
echo "postfix_sasl_auth_total{result=\"success\",hostname=\"${HOSTNAME}\"} ${sasl_success}"
echo "postfix_sasl_auth_total{result=\"failed\",hostname=\"${HOSTNAME}\"} ${sasl_fail}"
# Message sizes (bytes)
echo "# HELP postfix_message_size_bytes_total Total bytes of messages processed"
echo "# TYPE postfix_message_size_bytes_total counter"
total_bytes=$(grep -oP 'size=\K\d+' "$LOG_FILE" 2>/dev/null | awk '{sum+=$1} END {print sum+0}')
echo "postfix_message_size_bytes_total{hostname=\"${HOSTNAME}\"} ${total_bytes}"
echo "# HELP postfix_message_size_bytes_avg Average message size"
echo "# TYPE postfix_message_size_bytes_avg gauge"
avg_size=$(grep -oP 'size=\K\d+' "$LOG_FILE" 2>/dev/null | awk '{sum+=$1; count++} END {if(count>0) print int(sum/count); else print 0}')
echo "postfix_message_size_bytes_avg{hostname=\"${HOSTNAME}\"} ${avg_size}"
echo "# HELP postfix_message_size_bytes_max Largest message size"
echo "# TYPE postfix_message_size_bytes_max gauge"
max_size=$(grep -oP 'size=\K\d+' "$LOG_FILE" 2>/dev/null | sort -rn | head -1)
echo "postfix_message_size_bytes_max{hostname=\"${HOSTNAME}\"} ${max_size:-0}"
# Per-recipient domain stats (top domains)
echo "# HELP postfix_recipient_domain_total Messages per recipient domain"
echo "# TYPE postfix_recipient_domain_total counter"
grep -oP 'to=<[^@]+@\K[^>]+' "$LOG_FILE" 2>/dev/null | sort | uniq -c | sort -rn | head -20 | while read -r count domain; do
echo "postfix_recipient_domain_total{domain=\"${domain}\",hostname=\"${HOSTNAME}\"} ${count}"
done
# Sender domain stats
echo "# HELP postfix_sender_domain_total Messages per sender domain"
echo "# TYPE postfix_sender_domain_total counter"
grep -oP 'from=<[^@]+@\K[^>]+' "$LOG_FILE" 2>/dev/null | sort | uniq -c | sort -rn | head -20 | while read -r count domain; do
echo "postfix_sender_domain_total{domain=\"${domain}\",hostname=\"${HOSTNAME}\"} ${count}"
done
# Bounce reasons
echo "# HELP postfix_bounce_reason_total Bounces by reason"
echo "# TYPE postfix_bounce_reason_total counter"
bounce_user=$(grep_count 'User unknown' "$LOG_FILE")
bounce_quota=$(grep_count -i 'over quota\|mailbox full' "$LOG_FILE")
bounce_spam=$(grep_count -i 'blocked\|spam\|blacklist' "$LOG_FILE")
bounce_dns=$(grep_count 'Host or domain name not found' "$LOG_FILE")
bounce_refused=$(grep_count 'Connection refused' "$LOG_FILE")
echo "postfix_bounce_reason_total{reason=\"user_unknown\",hostname=\"${HOSTNAME}\"} ${bounce_user}"
echo "postfix_bounce_reason_total{reason=\"over_quota\",hostname=\"${HOSTNAME}\"} ${bounce_quota}"
echo "postfix_bounce_reason_total{reason=\"spam_blocked\",hostname=\"${HOSTNAME}\"} ${bounce_spam}"
echo "postfix_bounce_reason_total{reason=\"dns_error\",hostname=\"${HOSTNAME}\"} ${bounce_dns}"
echo "postfix_bounce_reason_total{reason=\"connection_refused\",hostname=\"${HOSTNAME}\"} ${bounce_refused}"
# Relay stats
echo "# HELP postfix_relay_total Messages by relay"
echo "# TYPE postfix_relay_total counter"
grep -oP 'relay=\K[^,\[]+' "$LOG_FILE" 2>/dev/null | sort | uniq -c | sort -rn | head -10 | while read -r count relay; do
echo "postfix_relay_total{relay=\"${relay}\",hostname=\"${HOSTNAME}\"} ${count}"
done
# Client connections (top IPs)
echo "# HELP postfix_client_connections_total Connections per client IP"
echo "# TYPE postfix_client_connections_total counter"
grep -oP 'connect from \S+\[\K[^\]]+' "$LOG_FILE" 2>/dev/null | sort | uniq -c | sort -rn | head -10 | while read -r count ip; do
echo "postfix_client_connections_total{client_ip=\"${ip}\",hostname=\"${HOSTNAME}\"} ${count}"
done
# TLS stats
echo "# HELP postfix_tls_connections_total TLS connection statistics"
echo "# TYPE postfix_tls_connections_total counter"
tls_in=$(grep_count 'Anonymous TLS connection established from' "$LOG_FILE")
tls_out=$(grep_count 'Anonymous TLS connection established to' "$LOG_FILE")
verified_in=$(grep_count 'Trusted TLS connection established from' "$LOG_FILE")
verified_out=$(grep_count 'Trusted TLS connection established to' "$LOG_FILE")
untrusted_in=$(grep_count 'Untrusted TLS connection established from' "$LOG_FILE")
untrusted_out=$(grep_count 'Untrusted TLS connection established to' "$LOG_FILE")
echo "postfix_tls_connections_total{direction=\"inbound\",verified=\"anonymous\",hostname=\"${HOSTNAME}\"} ${tls_in}"
echo "postfix_tls_connections_total{direction=\"outbound\",verified=\"anonymous\",hostname=\"${HOSTNAME}\"} ${tls_out}"
echo "postfix_tls_connections_total{direction=\"inbound\",verified=\"trusted\",hostname=\"${HOSTNAME}\"} ${verified_in}"
echo "postfix_tls_connections_total{direction=\"outbound\",verified=\"trusted\",hostname=\"${HOSTNAME}\"} ${verified_out}"
echo "postfix_tls_connections_total{direction=\"inbound\",verified=\"untrusted\",hostname=\"${HOSTNAME}\"} ${untrusted_in}"
echo "postfix_tls_connections_total{direction=\"outbound\",verified=\"untrusted\",hostname=\"${HOSTNAME}\"} ${untrusted_out}"
# TLS protocol versions
echo "# HELP postfix_tls_protocol_total TLS protocol version usage"
echo "# TYPE postfix_tls_protocol_total counter"
for proto in TLSv1 TLSv1.1 TLSv1.2 TLSv1.3; do
count=$(grep_count "${proto} with cipher" "$LOG_FILE")
echo "postfix_tls_protocol_total{protocol=\"${proto}\",hostname=\"${HOSTNAME}\"} ${count}"
done
# Delay stats (queue time)
echo "# HELP postfix_delay_seconds_total Total delay time in seconds"
echo "# TYPE postfix_delay_seconds_total counter"
total_delay=$(grep -oP 'delay=\K[\d.]+' "$LOG_FILE" 2>/dev/null | awk '{sum+=$1} END {print sum+0}')
echo "postfix_delay_seconds_total{hostname=\"${HOSTNAME}\"} ${total_delay}"
echo "# HELP postfix_delay_seconds_avg Average delivery delay"
echo "# TYPE postfix_delay_seconds_avg gauge"
avg_delay=$(grep -oP 'delay=\K[\d.]+' "$LOG_FILE" 2>/dev/null | awk '{sum+=$1; count++} END {if(count>0) printf "%.2f", sum/count; else print 0}')
echo "postfix_delay_seconds_avg{hostname=\"${HOSTNAME}\"} ${avg_delay}"
echo "# HELP postfix_delay_seconds_max Maximum delivery delay"
echo "# TYPE postfix_delay_seconds_max gauge"
max_delay=$(grep -oP 'delay=\K[\d.]+' "$LOG_FILE" 2>/dev/null | sort -rn | head -1)
echo "postfix_delay_seconds_max{hostname=\"${HOSTNAME}\"} ${max_delay:-0}"
# Postfix process count
echo "# HELP postfix_processes Number of running postfix processes"
echo "# TYPE postfix_processes gauge"
proc_count=$(pgrep -c -f "postfix" 2>/dev/null) || proc_count=0
echo "postfix_processes{hostname=\"${HOSTNAME}\"} ${proc_count}"
# Mail loop detection
echo "# HELP postfix_mail_loop_total Detected mail loops"
echo "# TYPE postfix_mail_loop_total counter"
loops=$(grep_count 'mail forwarding loop' "$LOG_FILE")
echo "postfix_mail_loop_total{hostname=\"${HOSTNAME}\"} ${loops}"
# Service status
echo "# HELP postfix_up Postfix service status (1=running, 0=stopped)"
echo "# TYPE postfix_up gauge"
if postfix status &>/dev/null || systemctl is-active postfix &>/dev/null; then
echo "postfix_up{hostname=\"${HOSTNAME}\"} 1"
else
echo "postfix_up{hostname=\"${HOSTNAME}\"} 0"
fi
# Queue age distribution (messages by age bucket)
echo "# HELP postfix_queue_age_bucket Messages in deferred queue by age"
echo "# TYPE postfix_queue_age_bucket gauge"
now=$(date +%s)
for mins in 5 15 60 360 1440; do
count=$(find "${QUEUE_DIR}/deferred" -type f -mmin +${mins} 2>/dev/null | wc -l)
echo "postfix_queue_age_bucket{le=\"${mins}m\",hostname=\"${HOSTNAME}\"} ${count}"
done
# Delivery attempts (retries)
echo "# HELP postfix_delivery_attempts_total Delivery attempts by result"
echo "# TYPE postfix_delivery_attempts_total counter"
first_attempt=$(grep_count 'delay=.*delays=0/' "$LOG_FILE")
retry_attempt=$(grep -c 'status=deferred.*will be retried' "$LOG_FILE" 2>/dev/null) || retry_attempt=0
echo "postfix_delivery_attempts_total{type=\"first\",hostname=\"${HOSTNAME}\"} ${first_attempt}"
echo "postfix_delivery_attempts_total{type=\"retry\",hostname=\"${HOSTNAME}\"} ${retry_attempt}"
# DSN status codes breakdown
echo "# HELP postfix_dsn_total Delivery Status Notification codes"
echo "# TYPE postfix_dsn_total counter"
for dsn in "2.0.0" "4.7.1" "5.1.1" "5.1.2" "5.2.1" "5.2.2" "5.4.1" "5.7.1"; do
count=$(grep_count "dsn=${dsn}" "$LOG_FILE")
echo "postfix_dsn_total{code=\"${dsn}\",hostname=\"${HOSTNAME}\"} ${count}"
done
# Delay breakdown by phase
echo "# HELP postfix_delay_phase_seconds_total Delay time by phase"
echo "# TYPE postfix_delay_phase_seconds_total counter"
grep -oP 'delays=\K[\d.]+/[\d.]+/[\d.]+/[\d.]+' "$LOG_FILE" 2>/dev/null | awk -F'/' '{
before_qmgr+=$1; in_qmgr+=$2; conn_setup+=$3; transmission+=$4
} END {
print "before_qmgr " before_qmgr+0
print "in_qmgr " in_qmgr+0
print "conn_setup " conn_setup+0
print "transmission " transmission+0
}' | while read -r phase total; do
echo "postfix_delay_phase_seconds_total{phase=\"${phase}\",hostname=\"${HOSTNAME}\"} ${total}"
done
# RBL rejections (per blocklist)
echo "# HELP postfix_rbl_reject_total Rejections by RBL"
echo "# TYPE postfix_rbl_reject_total counter"
for rbl in "zen.spamhaus.org" "bl.spamcop.net" "b.barracudacentral.org" "dnsbl.sorbs.net"; do
count=$(grep_count "${rbl}" "$LOG_FILE")
echo "postfix_rbl_reject_total{rbl=\"${rbl}\",hostname=\"${HOSTNAME}\"} ${count}"
done
# Invalid HELO/EHLO attempts
echo "# HELP postfix_helo_invalid_total Invalid HELO/EHLO attempts"
echo "# TYPE postfix_helo_invalid_total counter"
helo_invalid=$(grep_count 'Helo command rejected' "$LOG_FILE")
echo "postfix_helo_invalid_total{hostname=\"${HOSTNAME}\"} ${helo_invalid}"
# Anvil rate limiting
echo "# HELP postfix_rate_limited_total Anvil rate limit events"
echo "# TYPE postfix_rate_limited_total counter"
rate_conn=$(grep_count 'anvil.*connection rate' "$LOG_FILE")
rate_msg=$(grep_count 'anvil.*message rate' "$LOG_FILE")
rate_rcpt=$(grep_count 'anvil.*recipient rate' "$LOG_FILE")
echo "postfix_rate_limited_total{type=\"connection\",hostname=\"${HOSTNAME}\"} ${rate_conn}"
echo "postfix_rate_limited_total{type=\"message\",hostname=\"${HOSTNAME}\"} ${rate_msg}"
echo "postfix_rate_limited_total{type=\"recipient\",hostname=\"${HOSTNAME}\"} ${rate_rcpt}"
# Milter/content filter rejections
echo "# HELP postfix_milter_reject_total Milter rejection events"
echo "# TYPE postfix_milter_reject_total counter"
milter_reject=$(grep_count 'milter-reject' "$LOG_FILE")
echo "postfix_milter_reject_total{hostname=\"${HOSTNAME}\"} ${milter_reject}"
# Header/body checks rejections
echo "# HELP postfix_header_checks_reject_total Header/body check rejections"
echo "# TYPE postfix_header_checks_reject_total counter"
header_reject=$(grep_count 'header_checks:' "$LOG_FILE")
body_reject=$(grep_count 'body_checks:' "$LOG_FILE")
echo "postfix_header_checks_reject_total{type=\"header\",hostname=\"${HOSTNAME}\"} ${header_reject}"
echo "postfix_header_checks_reject_total{type=\"body\",hostname=\"${HOSTNAME}\"} ${body_reject}"
# Policy daemon deferrals
echo "# HELP postfix_policyd_total Policy daemon events"
echo "# TYPE postfix_policyd_total counter"
policyd_defer=$(grep_count 'policy.*DEFER' "$LOG_FILE")
policyd_reject=$(grep_count 'policy.*REJECT' "$LOG_FILE")
echo "postfix_policyd_total{action=\"defer\",hostname=\"${HOSTNAME}\"} ${policyd_defer}"
echo "postfix_policyd_total{action=\"reject\",hostname=\"${HOSTNAME}\"} ${policyd_reject}"
# DKIM signing (if OpenDKIM is used)
echo "# HELP postfix_dkim_total DKIM signing/verification results"
echo "# TYPE postfix_dkim_total counter"
dkim_signed=$(grep_count 'DKIM-Signature field added' "$LOG_FILE")
dkim_pass=$(grep_count 'dkim=pass' "$LOG_FILE")
dkim_fail=$(grep_count 'dkim=fail' "$LOG_FILE")
echo "postfix_dkim_total{action=\"signed\",hostname=\"${HOSTNAME}\"} ${dkim_signed}"
echo "postfix_dkim_total{result=\"pass\",hostname=\"${HOSTNAME}\"} ${dkim_pass}"
echo "postfix_dkim_total{result=\"fail\",hostname=\"${HOSTNAME}\"} ${dkim_fail}"
# SPF results
echo "# HELP postfix_spf_total SPF check results"
echo "# TYPE postfix_spf_total counter"
for result in pass fail softfail neutral none permerror temperror; do
count=$(grep_count -i "spf=${result}\|SPF: ${result}" "$LOG_FILE")
echo "postfix_spf_total{result=\"${result}\",hostname=\"${HOSTNAME}\"} ${count}"
done
# DMARC results (if OpenDMARC is used)
# OpenDMARC logs: "opendmarc[PID]: QUEUEID: domain.com pass/fail/none"
echo "# HELP postfix_dmarc_total DMARC check results"
echo "# TYPE postfix_dmarc_total counter"
for result in pass fail none; do
count=$(grep -cE "opendmarc\[.*\]: [A-F0-9]+: [^ ]+ ${result}$" "$LOG_FILE" 2>/dev/null) || count=0
echo "postfix_dmarc_total{result=\"${result}\",hostname=\"${HOSTNAME}\"} ${count}"
done
# Hourly volume (traffic patterns)
echo "# HELP postfix_hourly_volume Messages processed per hour"
echo "# TYPE postfix_hourly_volume gauge"
current_date=$(date +%b" "%d)
for hour in $(seq -w 0 23); do
count=$(grep_count "^${current_date} ${hour}:" "$LOG_FILE" | grep -c 'status=sent' 2>/dev/null) || count=0
count=$(grep "^${current_date} ${hour}:" "$LOG_FILE" 2>/dev/null | grep -c 'status=sent') || count=0
echo "postfix_hourly_volume{hour=\"${hour}\",hostname=\"${HOSTNAME}\"} ${count}"
done
# Recent throughput (last 5/15/60 minutes)
echo "# HELP postfix_messages_recent Messages sent in recent time windows"
echo "# TYPE postfix_messages_recent gauge"
for mins in 5 15 60; do
since=$(date -d "${mins} minutes ago" '+%b %d %H:%M' 2>/dev/null) || since=""
if [[ -n "$since" ]]; then
count=$(awk -v since="$since" '$0 >= since && /status=sent/' "$LOG_FILE" 2>/dev/null | wc -l)
else
count=0
fi
echo "postfix_messages_recent{window=\"${mins}m\",hostname=\"${HOSTNAME}\"} ${count}"
done
# Active SMTP sessions estimate
echo "# HELP postfix_smtp_sessions_active Estimated active SMTP sessions"
echo "# TYPE postfix_smtp_sessions_active gauge"
smtp_procs=$(pgrep -c -x smtp 2>/dev/null) || smtp_procs=0
smtpd_procs=$(pgrep -c -x smtpd 2>/dev/null) || smtpd_procs=0
echo "postfix_smtp_sessions_active{type=\"outbound\",hostname=\"${HOSTNAME}\"} ${smtp_procs}"
echo "postfix_smtp_sessions_active{type=\"inbound\",hostname=\"${HOSTNAME}\"} ${smtpd_procs}"
# Qmgr active recipients
echo "# HELP postfix_qmgr_recipients Active recipients in queue manager"
echo "# TYPE postfix_qmgr_recipients gauge"
active_recipients=$(find "${QUEUE_DIR}/active" -type f -exec cat {} \; 2>/dev/null | wc -l) || active_recipients=0
echo "postfix_qmgr_recipients{hostname=\"${HOSTNAME}\"} ${active_recipients}"
# Estimated queue memory usage (based on file sizes)
echo "# HELP postfix_queue_size_bytes Total size of queue files in bytes"
echo "# TYPE postfix_queue_size_bytes gauge"
for queue in incoming active deferred hold; do
size=$(du -sb "${QUEUE_DIR}/${queue}" 2>/dev/null | cut -f1) || size=0
echo "postfix_queue_size_bytes{queue=\"${queue}\",hostname=\"${HOSTNAME}\"} ${size}"
done
# Warnings and fatal errors
echo "# HELP postfix_log_events_total Log events by severity"
echo "# TYPE postfix_log_events_total counter"
warnings=$(grep_count 'warning:' "$LOG_FILE")
fatals=$(grep_count 'fatal:' "$LOG_FILE")
panics=$(grep_count 'panic:' "$LOG_FILE")
echo "postfix_log_events_total{level=\"warning\",hostname=\"${HOSTNAME}\"} ${warnings}"
echo "postfix_log_events_total{level=\"fatal\",hostname=\"${HOSTNAME}\"} ${fatals}"
echo "postfix_log_events_total{level=\"panic\",hostname=\"${HOSTNAME}\"} ${panics}"
# SMTP response codes
echo "# HELP postfix_smtp_response_total SMTP response codes"
echo "# TYPE postfix_smtp_response_total counter"
smtp_2xx=$(grep_count 'status=sent' "$LOG_FILE")
smtp_4xx=$(grep_count 'status=deferred' "$LOG_FILE")
smtp_5xx=$(grep_count 'status=bounced' "$LOG_FILE")
echo "postfix_smtp_response_total{code=\"2xx\",hostname=\"${HOSTNAME}\"} ${smtp_2xx}"
echo "postfix_smtp_response_total{code=\"4xx\",hostname=\"${HOSTNAME}\"} ${smtp_4xx}"
echo "postfix_smtp_response_total{code=\"5xx\",hostname=\"${HOSTNAME}\"} ${smtp_5xx}"
# Specific SMTP error codes (check multiple patterns)
# Postfix logs SMTP errors in various formats:
# - "said: 550 5.1.1 User unknown"
# - "status=bounced (host ... said: 550 ...)"
# - "dsn=5.1.1" (DSN codes start with same digit)
# - Remote server responses with just the code
echo "# HELP postfix_smtp_error_code_total Specific SMTP error codes"
echo "# TYPE postfix_smtp_error_code_total counter"
for code in 421 450 451 452 500 501 502 503 504 550 551 552 553 554; do
# Multiple patterns: "said: 550", "(550 ", "smtp.*550", host responses
count=$(grep -cE "(said: ${code}|said:${code}|\(${code} |host .*\[.*\].*${code} |smtp.*${code}[^0-9])" "$LOG_FILE" 2>/dev/null) || count=0
echo "postfix_smtp_error_code_total{code=\"${code}\",hostname=\"${HOSTNAME}\"} ${count}"
done
# TLS cipher suites (top 10)
# Requires smtpd_tls_loglevel=1 and smtp_tls_loglevel=1 in main.cf
# Postfix logs: "TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)"
echo "# HELP postfix_tls_cipher_total TLS cipher suite usage"
echo "# TYPE postfix_tls_cipher_total counter"
cipher_output=$({
grep -oP 'with cipher \K[A-Za-z0-9_-]+' "$LOG_FILE" 2>/dev/null
grep -oP 'cipher=\K[A-Za-z0-9_-]+' "$LOG_FILE" 2>/dev/null
} | sort | uniq -c | sort -rn | head -10)
if [[ -n "$cipher_output" ]]; then
echo "$cipher_output" | while read -r count cipher; do
[[ -n "$cipher" ]] && echo "postfix_tls_cipher_total{cipher=\"${cipher}\",hostname=\"${HOSTNAME}\"} ${count}"
done
else
echo "postfix_tls_cipher_total{cipher=\"unknown\",hostname=\"${HOSTNAME}\"} 0"
fi
# TLS certificate expiry (check multiple locations)
echo "# HELP postfix_cert_expiry_seconds Seconds until TLS certificate expires"
echo "# TYPE postfix_cert_expiry_seconds gauge"
CERT_FILE=""
for cert in "/etc/ssl/certs/postfix.pem" \
"/home/user-data/ssl/ssl_certificate.pem" \
"/etc/letsencrypt/live/$(hostname)/fullchain.pem" \
"/etc/letsencrypt/live/$(hostname -f)/fullchain.pem" \
"/etc/ssl/certs/ssl-cert-snakeoil.pem"; do
if [[ -f "$cert" ]]; then
CERT_FILE="$cert"
break
fi
done
cert_seconds=0
if [[ -n "$CERT_FILE" ]] && command -v openssl &>/dev/null; then
expiry=$(openssl x509 -enddate -noout -in "$CERT_FILE" 2>/dev/null | cut -d= -f2)
if [[ -n "$expiry" ]]; then
expiry_epoch=$(date -d "$expiry" +%s 2>/dev/null) || expiry_epoch=0
now=$(date +%s)
cert_seconds=$((expiry_epoch - now))
fi
fi
echo "postfix_cert_expiry_seconds{hostname=\"${HOSTNAME}\"} ${cert_seconds}"
# LMTP delivery metrics (Postfix side)
# Matches: "postfix/lmtp[PID]: ... status=sent"
echo "# HELP postfix_lmtp_delivery_total LMTP delivery stats"
echo "# TYPE postfix_lmtp_delivery_total counter"
lmtp_sent=$(grep_count 'postfix/lmtp\[.*status=sent' "$LOG_FILE")
lmtp_deferred=$(grep_count 'postfix/lmtp\[.*status=deferred' "$LOG_FILE")
lmtp_bounced=$(grep_count 'postfix/lmtp\[.*status=bounced' "$LOG_FILE")
echo "postfix_lmtp_delivery_total{status=\"sent\",hostname=\"${HOSTNAME}\"} ${lmtp_sent}"
echo "postfix_lmtp_delivery_total{status=\"deferred\",hostname=\"${HOSTNAME}\"} ${lmtp_deferred}"
echo "postfix_lmtp_delivery_total{status=\"bounced\",hostname=\"${HOSTNAME}\"} ${lmtp_bounced}"
echo "# HELP postfix_lmtp_connections_total LMTP connection events"
echo "# TYPE postfix_lmtp_connections_total counter"
lmtp_connect=$(grep_count 'postfix/lmtp\[.*connect' "$LOG_FILE")
lmtp_disconnect=$(grep_count 'postfix/lmtp\[.*disconnect' "$LOG_FILE")
lmtp_timeout=$(grep_count 'postfix/lmtp\[.*timeout' "$LOG_FILE")
lmtp_refused=$(grep_count 'postfix/lmtp\[.*Connection refused' "$LOG_FILE")
echo "postfix_lmtp_connections_total{type=\"connect\",hostname=\"${HOSTNAME}\"} ${lmtp_connect}"
echo "postfix_lmtp_connections_total{type=\"disconnect\",hostname=\"${HOSTNAME}\"} ${lmtp_disconnect}"
echo "postfix_lmtp_connections_total{type=\"timeout\",hostname=\"${HOSTNAME}\"} ${lmtp_timeout}"
echo "postfix_lmtp_connections_total{type=\"refused\",hostname=\"${HOSTNAME}\"} ${lmtp_refused}"
echo "# HELP postfix_lmtp_delay_seconds LMTP delivery delay stats"
echo "# TYPE postfix_lmtp_delay_seconds gauge"
lmtp_avg_delay=$(grep 'postfix/lmtp\[' "$LOG_FILE" 2>/dev/null | grep -oP 'delay=\K[\d.]+' | awk '{sum+=$1; count++} END {if(count>0) printf "%.2f", sum/count; else print 0}')
lmtp_max_delay=$(grep 'postfix/lmtp\[' "$LOG_FILE" 2>/dev/null | grep -oP 'delay=\K[\d.]+' | sort -rn | head -1)
echo "postfix_lmtp_delay_seconds{stat=\"avg\",hostname=\"${HOSTNAME}\"} ${lmtp_avg_delay}"
echo "postfix_lmtp_delay_seconds{stat=\"max\",hostname=\"${HOSTNAME}\"} ${lmtp_max_delay:-0}"
# Dovecot LMTP/LDA delivery stats (check multiple log locations)
DOVECOT_LOG=""
for log in "/var/log/dovecot.log" "/var/log/mail.log" "/var/log/syslog"; do
if [[ -f "$log" ]] && grep -q 'dovecot' "$log" 2>/dev/null; then
DOVECOT_LOG="$log"
break
fi
done
if [[ -n "$DOVECOT_LOG" ]]; then
echo "# HELP postfix_dovecot_delivery_total Dovecot local delivery stats"
echo "# TYPE postfix_dovecot_delivery_total counter"
lmtp_delivered=$(grep_count 'lmtp.*saved mail' "$DOVECOT_LOG")
lda_delivered=$(grep_count 'lda.*saved mail' "$DOVECOT_LOG")
echo "postfix_dovecot_delivery_total{type=\"lmtp\",hostname=\"${HOSTNAME}\"} ${lmtp_delivered}"
echo "postfix_dovecot_delivery_total{type=\"lda\",hostname=\"${HOSTNAME}\"} ${lda_delivered}"
echo "# HELP postfix_dovecot_sieve_total Dovecot sieve filter actions"
echo "# TYPE postfix_dovecot_sieve_total counter"
sieve_fileinto=$(grep_count 'sieve.*fileinto' "$DOVECOT_LOG")
sieve_discard=$(grep_count 'sieve.*discard' "$DOVECOT_LOG")
sieve_redirect=$(grep_count 'sieve.*redirect' "$DOVECOT_LOG")
echo "postfix_dovecot_sieve_total{action=\"fileinto\",hostname=\"${HOSTNAME}\"} ${sieve_fileinto}"
echo "postfix_dovecot_sieve_total{action=\"discard\",hostname=\"${HOSTNAME}\"} ${sieve_discard}"
echo "postfix_dovecot_sieve_total{action=\"redirect\",hostname=\"${HOSTNAME}\"} ${sieve_redirect}"
echo "# HELP postfix_dovecot_auth_total Dovecot authentication attempts"
echo "# TYPE postfix_dovecot_auth_total counter"
auth_success=$(grep_count 'auth.*successful' "$DOVECOT_LOG")
auth_fail=$(grep_count 'auth.*failed' "$DOVECOT_LOG")
echo "postfix_dovecot_auth_total{result=\"success\",hostname=\"${HOSTNAME}\"} ${auth_success}"
echo "postfix_dovecot_auth_total{result=\"failed\",hostname=\"${HOSTNAME}\"} ${auth_fail}"
echo "# HELP postfix_dovecot_imap_connections_total Dovecot IMAP connections"
echo "# TYPE postfix_dovecot_imap_connections_total counter"
imap_login=$(grep_count 'imap-login:.*Login' "$DOVECOT_LOG")
imap_disconnect=$(grep_count 'imap.*Disconnected' "$DOVECOT_LOG")
echo "postfix_dovecot_imap_connections_total{type=\"login\",hostname=\"${HOSTNAME}\"} ${imap_login}"
echo "postfix_dovecot_imap_connections_total{type=\"disconnect\",hostname=\"${HOSTNAME}\"} ${imap_disconnect}"
echo "# HELP postfix_dovecot_pop3_connections_total Dovecot POP3 connections"
echo "# TYPE postfix_dovecot_pop3_connections_total counter"
pop3_login=$(grep_count 'pop3-login:.*Login' "$DOVECOT_LOG")
pop3_disconnect=$(grep_count 'pop3.*Disconnected' "$DOVECOT_LOG")
echo "postfix_dovecot_pop3_connections_total{type=\"login\",hostname=\"${HOSTNAME}\"} ${pop3_login}"
echo "postfix_dovecot_pop3_connections_total{type=\"disconnect\",hostname=\"${HOSTNAME}\"} ${pop3_disconnect}"
fi
# SpamAssassin metrics (supports spamd, spampd, and amavis)
SPAM_LOG="/var/log/mail.log"
# Detect which spam daemon is in use (check spampd first as it's more specific)
if grep -q 'spampd' "$SPAM_LOG" 2>/dev/null; then
SPAM_DAEMON="spampd"
elif grep -q 'spamd\[' "$SPAM_LOG" 2>/dev/null; then
SPAM_DAEMON="spamd"
elif grep -q 'amavis' "$SPAM_LOG" 2>/dev/null; then
SPAM_DAEMON="amavis"
else
SPAM_DAEMON=""
fi
if [[ -n "$SPAM_DAEMON" ]]; then
echo "# HELP postfix_spamassassin_total SpamAssassin scan results"
echo "# TYPE postfix_spamassassin_total counter"
if [[ "$SPAM_DAEMON" == "spampd" ]]; then
# spampd format: "clean message <...> (SCORE/THRESHOLD)" or "identified spam <...> (SCORE/THRESHOLD)"
spam_identified=$(grep_count 'spampd.*identified spam' "$SPAM_LOG")
ham_clean=$(grep_count 'spampd.*clean message' "$SPAM_LOG")
elif [[ "$SPAM_DAEMON" == "amavis" ]]; then
spam_identified=$(grep_count 'amavis.*Blocked SPAM' "$SPAM_LOG")
ham_clean=$(grep_count 'amavis.*Passed CLEAN' "$SPAM_LOG")
else
spam_identified=$(grep_count 'spamd.*identified spam' "$SPAM_LOG")
ham_clean=$(grep_count 'spamd.*clean message' "$SPAM_LOG")
fi
echo "postfix_spamassassin_total{result=\"spam\",hostname=\"${HOSTNAME}\"} ${spam_identified}"
echo "postfix_spamassassin_total{result=\"ham\",hostname=\"${HOSTNAME}\"} ${ham_clean}"
echo "# HELP postfix_spamassassin_score_total SpamAssassin score distribution"
echo "# TYPE postfix_spamassassin_score_total counter"
if [[ "$SPAM_DAEMON" == "spampd" ]]; then
# spampd format: (SCORE/THRESHOLD) like (-0.30/5.00) or (15.2/5.0)
score_neg=$(grep -oP 'spampd.*\(\K-[\d.]+(?=/)' "$SPAM_LOG" 2>/dev/null | wc -l)
score_0_5=$(grep -oP 'spampd.*\(\K-?[\d.]+(?=/)' "$SPAM_LOG" 2>/dev/null | awk '$1 >= 0 && $1 < 5 {count++} END {print count+0}')
score_5_10=$(grep -oP 'spampd.*\(\K-?[\d.]+(?=/)' "$SPAM_LOG" 2>/dev/null | awk '$1 >= 5 && $1 < 10 {count++} END {print count+0}')
score_10_plus=$(grep -oP 'spampd.*\(\K-?[\d.]+(?=/)' "$SPAM_LOG" 2>/dev/null | awk '$1 >= 10 {count++} END {print count+0}')
elif [[ "$SPAM_DAEMON" == "amavis" ]]; then
score_neg=$(grep -oP 'amavis.*Hits: \K-[\d.]+' "$SPAM_LOG" 2>/dev/null | wc -l)
score_0_5=$(grep -oP 'amavis.*Hits: \K-?[\d.]+' "$SPAM_LOG" 2>/dev/null | awk '$1 >= 0 && $1 < 5 {count++} END {print count+0}')
score_5_10=$(grep -oP 'amavis.*Hits: \K-?[\d.]+' "$SPAM_LOG" 2>/dev/null | awk '$1 >= 5 && $1 < 10 {count++} END {print count+0}')
score_10_plus=$(grep -oP 'amavis.*Hits: \K-?[\d.]+' "$SPAM_LOG" 2>/dev/null | awk '$1 >= 10 {count++} END {print count+0}')
else
score_neg=0
score_0_5=$(grep -oP 'spamd.*score=\K[\d.]+' "$SPAM_LOG" 2>/dev/null | awk '$1 >= 0 && $1 < 5 {count++} END {print count+0}')
score_5_10=$(grep -oP 'spamd.*score=\K[\d.]+' "$SPAM_LOG" 2>/dev/null | awk '$1 >= 5 && $1 < 10 {count++} END {print count+0}')
score_10_plus=$(grep -oP 'spamd.*score=\K[\d.]+' "$SPAM_LOG" 2>/dev/null | awk '$1 >= 10 {count++} END {print count+0}')
fi
echo "postfix_spamassassin_score_total{bucket=\"negative\",hostname=\"${HOSTNAME}\"} ${score_neg:-0}"
echo "postfix_spamassassin_score_total{bucket=\"0-5\",hostname=\"${HOSTNAME}\"} ${score_0_5}"
echo "postfix_spamassassin_score_total{bucket=\"5-10\",hostname=\"${HOSTNAME}\"} ${score_5_10}"
echo "postfix_spamassassin_score_total{bucket=\"10+\",hostname=\"${HOSTNAME}\"} ${score_10_plus}"
echo "# HELP postfix_spamassassin_score_avg Average SpamAssassin score"
echo "# TYPE postfix_spamassassin_score_avg gauge"
if [[ "$SPAM_DAEMON" == "spampd" ]]; then
avg_score=$(grep -oP 'spampd.*\(\K-?[\d.]+(?=/)' "$SPAM_LOG" 2>/dev/null | awk '{sum+=$1; count++} END {if(count>0) printf "%.2f", sum/count; else print 0}')
elif [[ "$SPAM_DAEMON" == "amavis" ]]; then
avg_score=$(grep -oP 'amavis.*Hits: \K-?[\d.]+' "$SPAM_LOG" 2>/dev/null | awk '{sum+=$1; count++} END {if(count>0) printf "%.2f", sum/count; else print 0}')
else
avg_score=$(grep -oP 'spamd.*score=\K[\d.]+' "$SPAM_LOG" 2>/dev/null | awk '{sum+=$1; count++} END {if(count>0) printf "%.2f", sum/count; else print 0}')
fi
echo "postfix_spamassassin_score_avg{hostname=\"${HOSTNAME}\"} ${avg_score}"
echo "# HELP postfix_spamassassin_score_max Maximum SpamAssassin score seen"
echo "# TYPE postfix_spamassassin_score_max gauge"
if [[ "$SPAM_DAEMON" == "spampd" ]]; then
max_score=$(grep -oP 'spampd.*\(\K-?[\d.]+(?=/)' "$SPAM_LOG" 2>/dev/null | sort -rn | head -1)
elif [[ "$SPAM_DAEMON" == "amavis" ]]; then
max_score=$(grep -oP 'amavis.*Hits: \K-?[\d.]+' "$SPAM_LOG" 2>/dev/null | sort -rn | head -1)
else
max_score=$(grep -oP 'spamd.*score=\K[\d.]+' "$SPAM_LOG" 2>/dev/null | sort -rn | head -1)
fi
echo "postfix_spamassassin_score_max{hostname=\"${HOSTNAME}\"} ${max_score:-0}"
# Messages scanned total
echo "# HELP postfix_spamassassin_scanned_total Total messages scanned"
echo "# TYPE postfix_spamassassin_scanned_total counter"
scanned_total=$((spam_identified + ham_clean))
echo "postfix_spamassassin_scanned_total{hostname=\"${HOSTNAME}\"} ${scanned_total}"
echo "# HELP postfix_spamassassin_scan_time_seconds SpamAssassin scan time stats"
echo "# TYPE postfix_spamassassin_scan_time_seconds gauge"
if [[ "$SPAM_DAEMON" == "spampd" ]]; then
# spampd format: "in 2.15s"
avg_time=$(grep -oP 'spampd.* in \K[\d.]+(?=s)' "$SPAM_LOG" 2>/dev/null | awk '{sum+=$1; count++} END {if(count>0) printf "%.2f", sum/count; else print 0}')
max_time=$(grep -oP 'spampd.* in \K[\d.]+(?=s)' "$SPAM_LOG" 2>/dev/null | sort -rn | head -1)
else
avg_time=$(grep -oP "${SPAM_DAEMON}.* in \K[\d.]+(?= seconds)" "$SPAM_LOG" 2>/dev/null | awk '{sum+=$1; count++} END {if(count>0) printf "%.2f", sum/count; else print 0}')
max_time=$(grep -oP "${SPAM_DAEMON}.* in \K[\d.]+(?= seconds)" "$SPAM_LOG" 2>/dev/null | sort -rn | head -1)
fi
echo "postfix_spamassassin_scan_time_seconds{stat=\"avg\",hostname=\"${HOSTNAME}\"} ${avg_time:-0}"
echo "postfix_spamassassin_scan_time_seconds{stat=\"max\",hostname=\"${HOSTNAME}\"} ${max_time:-0}"
# spampd-specific: message size stats
if [[ "$SPAM_DAEMON" == "spampd" ]]; then
echo "# HELP postfix_spamassassin_message_size_bytes SpamAssassin processed message sizes"
echo "# TYPE postfix_spamassassin_message_size_bytes gauge"
avg_size=$(grep -oP 'spampd.*, \K\d+(?= bytes)' "$SPAM_LOG" 2>/dev/null | awk '{sum+=$1; count++} END {if(count>0) printf "%.0f", sum/count; else print 0}')
max_size=$(grep -oP 'spampd.*, \K\d+(?= bytes)' "$SPAM_LOG" 2>/dev/null | sort -rn | head -1)
echo "postfix_spamassassin_message_size_bytes{stat=\"avg\",hostname=\"${HOSTNAME}\"} ${avg_size:-0}"
echo "postfix_spamassassin_message_size_bytes{stat=\"max\",hostname=\"${HOSTNAME}\"} ${max_size:-0}"
echo "# HELP postfix_spamassassin_threshold SpamAssassin spam threshold"
echo "# TYPE postfix_spamassassin_threshold gauge"
threshold=$(grep -oP 'spampd.*/-?\K[\d.]+(?=\))' "$SPAM_LOG" 2>/dev/null | head -1)
echo "postfix_spamassassin_threshold{hostname=\"${HOSTNAME}\"} ${threshold:-5}"
fi
# SpamAssassin rules (only available with spamd or if logging to separate file)
# NOTE: spampd (used by Mail-in-a-Box) does NOT log individual rules to mail.log
# Rules are only available if using standalone spamd with verbose logging or a separate log file
SA_RULES_LOG=""
for log in "/var/log/spamassassin.log" "/var/log/spamd.log" "$SPAM_LOG"; do
if [[ -f "$log" ]] && grep -q 'tests=' "$log" 2>/dev/null; then
SA_RULES_LOG="$log"
break
fi
done
if [[ -n "$SA_RULES_LOG" ]]; then
echo "# HELP postfix_spamassassin_rules_total Top SpamAssassin rules triggered"
echo "# TYPE postfix_spamassassin_rules_total counter"
grep -oP 'tests=\K[^,\]\s]+' "$SA_RULES_LOG" 2>/dev/null | tr ',' '\n' | tr -d ' ' | sort | uniq -c | sort -rn | head -15 | while read -r count rule; do
[[ -n "$rule" ]] && echo "postfix_spamassassin_rules_total{rule=\"${rule}\",hostname=\"${HOSTNAME}\"} ${count}"
done
fi
# Daemon status
echo "# HELP postfix_spamassassin_up SpamAssassin daemon status"
echo "# TYPE postfix_spamassassin_up gauge"
if pgrep -f "${SPAM_DAEMON}" &>/dev/null; then
echo "postfix_spamassassin_up{daemon=\"${SPAM_DAEMON}\",hostname=\"${HOSTNAME}\"} 1"
else
echo "postfix_spamassassin_up{daemon=\"${SPAM_DAEMON}\",hostname=\"${HOSTNAME}\"} 0"
fi
echo "# HELP postfix_spamassassin_processes Number of spam daemon processes"
echo "# TYPE postfix_spamassassin_processes gauge"
spam_procs=$(pgrep -c -f "${SPAM_DAEMON}" 2>/dev/null) || spam_procs=0
echo "postfix_spamassassin_processes{daemon=\"${SPAM_DAEMON}\",hostname=\"${HOSTNAME}\"} ${spam_procs}"
fi
# Greylisting stats (postgrey)
echo "# HELP postfix_greylist_total Greylisting events"
echo "# TYPE postfix_greylist_total counter"
greylist_defer=$(grep_count 'action=greylist' "$LOG_FILE")
greylist_pass=$(grep_count 'action=pass.*reason=triplet' "$LOG_FILE")
greylist_whitelist=$(grep_count 'action=pass.*reason=client whitelist\|action=pass, reason=client AWL' "$LOG_FILE")
echo "postfix_greylist_total{action=\"defer\",hostname=\"${HOSTNAME}\"} ${greylist_defer}"
echo "postfix_greylist_total{action=\"pass\",hostname=\"${HOSTNAME}\"} ${greylist_pass}"
echo "postfix_greylist_total{action=\"whitelist\",hostname=\"${HOSTNAME}\"} ${greylist_whitelist}"
echo "# HELP postfix_greylist_reason_total Greylisting by reason"
echo "# TYPE postfix_greylist_reason_total counter"
grey_new=$(grep_count 'reason=new' "$LOG_FILE")
grey_early=$(grep_count 'reason=early-retry' "$LOG_FILE")
grey_triplet=$(grep_count 'reason=triplet found' "$LOG_FILE")
echo "postfix_greylist_reason_total{reason=\"new\",hostname=\"${HOSTNAME}\"} ${grey_new}"
echo "postfix_greylist_reason_total{reason=\"early_retry\",hostname=\"${HOSTNAME}\"} ${grey_early}"
echo "postfix_greylist_reason_total{reason=\"triplet_found\",hostname=\"${HOSTNAME}\"} ${grey_triplet}"
echo "# HELP postfix_greylist_delay_seconds Greylist delay statistics"
echo "# TYPE postfix_greylist_delay_seconds gauge"
avg_delay=$(grep -oP 'delay=\K\d+' "$LOG_FILE" 2>/dev/null | grep -v '^0$' | awk '{sum+=$1; count++} END {if(count>0) printf "%.0f", sum/count; else print 0}')
max_delay=$(grep -oP 'postgrey.*delay=\K\d+' "$LOG_FILE" 2>/dev/null | sort -rn | head -1)
echo "postfix_greylist_delay_seconds{type=\"avg\",hostname=\"${HOSTNAME}\"} ${avg_delay:-0}"
echo "postfix_greylist_delay_seconds{type=\"max\",hostname=\"${HOSTNAME}\"} ${max_delay:-0}"
echo "# HELP postfix_greylist_clients_total Unique greylisted client IPs"
echo "# TYPE postfix_greylist_clients_total gauge"
grey_clients=$(grep 'action=greylist' "$LOG_FILE" 2>/dev/null | grep -oP 'client_address=\K[^,]+' | sort -u | wc -l)
echo "postfix_greylist_clients_total{hostname=\"${HOSTNAME}\"} ${grey_clients:-0}"
echo "# HELP postfix_greylist_top_senders Top greylisted sender domains"
echo "# TYPE postfix_greylist_top_senders counter"
grep 'action=greylist' "$LOG_FILE" 2>/dev/null | grep -oP 'sender=\K[^,]+' | sed 's/.*@//' | sort | uniq -c | sort -rn | head -10 | while read -r count domain; do
[[ -n "$domain" ]] && echo "postfix_greylist_top_senders{domain=\"${domain}\",hostname=\"${HOSTNAME}\"} ${count}"
done
# Cleanup daemon stats (total messages entering system)
echo "# HELP postfix_cleanup_total Messages processed by cleanup daemon"
echo "# TYPE postfix_cleanup_total counter"
cleanup_count=$(grep_count 'message-id=' "$LOG_FILE")
echo "postfix_cleanup_total{hostname=\"${HOSTNAME}\"} ${cleanup_count}"
# Virtual mailbox errors
echo "# HELP postfix_virtual_errors_total Virtual mailbox lookup errors"
echo "# TYPE postfix_virtual_errors_total counter"
virtual_not_found=$(grep_count 'mailbox not found\|User unknown in virtual' "$LOG_FILE")
echo "postfix_virtual_errors_total{hostname=\"${HOSTNAME}\"} ${virtual_not_found}"
# Address verification failures
echo "# HELP postfix_address_verify_total Address verification events"
echo "# TYPE postfix_address_verify_total counter"
verify_fail=$(grep_count 'address verification failed' "$LOG_FILE")
verify_success=$(grep_count 'address verification succeeded\|cache hit' "$LOG_FILE")
echo "postfix_address_verify_total{result=\"failed\",hostname=\"${HOSTNAME}\"} ${verify_fail}"
echo "postfix_address_verify_total{result=\"success\",hostname=\"${HOSTNAME}\"} ${verify_success}"
# Postfix master process uptime (based on pid file age)
echo "# HELP postfix_master_uptime_seconds Postfix master process uptime"
echo "# TYPE postfix_master_uptime_seconds gauge"
MASTER_PID_FILE="/var/spool/postfix/pid/master.pid"
if [[ -f "$MASTER_PID_FILE" ]]; then
master_start=$(stat -c %Y "$MASTER_PID_FILE" 2>/dev/null) || master_start=0
if [[ $master_start -gt 0 ]]; then
uptime_seconds=$(($(date +%s) - master_start))
else
uptime_seconds=0
fi
else
uptime_seconds=0
fi
echo "postfix_master_uptime_seconds{hostname=\"${HOSTNAME}\"} ${uptime_seconds}"
# DNS lookup failures
echo "# HELP postfix_dns_errors_total DNS lookup errors"
echo "# TYPE postfix_dns_errors_total counter"
dns_not_found=$(grep_count 'Host not found\|Name service error\|Host or domain name not found' "$LOG_FILE")
dns_timeout=$(grep_count 'DNS lookup.*timeout\|name server.*timeout' "$LOG_FILE")
dns_servfail=$(grep_count 'SERVFAIL\|server failure' "$LOG_FILE")
echo "postfix_dns_errors_total{type=\"not_found\",hostname=\"${HOSTNAME}\"} ${dns_not_found}"
echo "postfix_dns_errors_total{type=\"timeout\",hostname=\"${HOSTNAME}\"} ${dns_timeout}"
echo "postfix_dns_errors_total{type=\"servfail\",hostname=\"${HOSTNAME}\"} ${dns_servfail}"
# STARTTLS usage - count TLS connections vs total SMTP connections
# "used" = successful TLS connections (inbound + outbound)
# "total" = total SMTP connections for ratio calculation
echo "# HELP postfix_starttls_total STARTTLS connection counts"
echo "# TYPE postfix_starttls_total counter"
starttls_inbound=$(grep_count 'TLS connection established from' "$LOG_FILE")
starttls_outbound=$(grep_count 'TLS connection established to' "$LOG_FILE")
echo "postfix_starttls_total{type=\"inbound\",hostname=\"${HOSTNAME}\"} ${starttls_inbound}"
echo "postfix_starttls_total{type=\"outbound\",hostname=\"${HOSTNAME}\"} ${starttls_outbound}"
# Sender/recipient access rejections
echo "# HELP postfix_access_reject_total Sender/recipient access rejections"
echo "# TYPE postfix_access_reject_total counter"
sender_reject=$(grep_count 'Sender address rejected' "$LOG_FILE")
recipient_reject=$(grep_count 'Recipient address rejected' "$LOG_FILE")
client_reject=$(grep_count 'Client host rejected' "$LOG_FILE")
echo "postfix_access_reject_total{type=\"sender\",hostname=\"${HOSTNAME}\"} ${sender_reject}"
echo "postfix_access_reject_total{type=\"recipient\",hostname=\"${HOSTNAME}\"} ${recipient_reject}"
echo "postfix_access_reject_total{type=\"client\",hostname=\"${HOSTNAME}\"} ${client_reject}"
# Queue filesystem usage
echo "# HELP postfix_queue_filesystem_usage_percent Queue filesystem usage percentage"
echo "# TYPE postfix_queue_filesystem_usage_percent gauge"
queue_usage=$(df "${QUEUE_DIR}" 2>/dev/null | awk 'NR==2 {gsub(/%/,""); print $5}') || queue_usage=0
echo "postfix_queue_filesystem_usage_percent{hostname=\"${HOSTNAME}\"} ${queue_usage:-0}"
# Postfix file descriptor count (for master process)
echo "# HELP postfix_file_descriptors Open file descriptors by postfix"
echo "# TYPE postfix_file_descriptors gauge"
if [[ -f "$MASTER_PID_FILE" ]]; then
master_pid=$(tr -d '[:space:]' < "$MASTER_PID_FILE" 2>/dev/null)
if [[ -n "$master_pid" ]] && [[ -d "/proc/${master_pid}/fd" ]]; then
fd_count=$(find "/proc/${master_pid}/fd" -maxdepth 1 2>/dev/null | wc -l)
else
fd_count=0
fi
else
fd_count=0
fi
echo "postfix_file_descriptors{hostname=\"${HOSTNAME}\"} ${fd_count}"
# Script execution time
# Dovecot IMAP/POP3 login metrics
echo "# HELP dovecot_logins_total Successful logins by protocol"
echo "# TYPE dovecot_logins_total counter"
imap_logins=$(grep_count 'imap-login: Info: Login:' "$LOG_FILE")
pop3_logins=$(grep_count 'pop3-login: Info: Login:' "$LOG_FILE")
echo "dovecot_logins_total{protocol=\"imap\",hostname=\"${HOSTNAME}\"} ${imap_logins}"
echo "dovecot_logins_total{protocol=\"pop3\",hostname=\"${HOSTNAME}\"} ${pop3_logins}"
echo "# HELP dovecot_login_auth_method_total Logins by authentication method"
echo "# TYPE dovecot_login_auth_method_total counter"
for method in PLAIN LOGIN CRAM-MD5 DIGEST-MD5; do
count=$(grep_count "Login:.*method=${method}" "$LOG_FILE")
echo "dovecot_login_auth_method_total{method=\"${method}\",hostname=\"${HOSTNAME}\"} ${count}"
done
echo "# HELP dovecot_login_tls_total Logins with/without TLS"
echo "# TYPE dovecot_login_tls_total counter"
tls_logins=$(grep -c 'Login:.*TLS' "$LOG_FILE" 2>/dev/null) || tls_logins=0
notls_logins=$(grep 'Login:' "$LOG_FILE" 2>/dev/null | grep -cv 'TLS') || notls_logins=0
echo "dovecot_login_tls_total{tls=\"yes\",hostname=\"${HOSTNAME}\"} ${tls_logins}"
echo "dovecot_login_tls_total{tls=\"no\",hostname=\"${HOSTNAME}\"} ${notls_logins}"
echo "# HELP dovecot_login_failed_total Failed login attempts"
echo "# TYPE dovecot_login_failed_total counter"
imap_failed=$(grep_count 'imap-login: Info: Aborted login\|imap-login:.*auth failed' "$LOG_FILE")
pop3_failed=$(grep_count 'pop3-login: Info: Aborted login\|pop3-login:.*auth failed' "$LOG_FILE")
echo "dovecot_login_failed_total{protocol=\"imap\",hostname=\"${HOSTNAME}\"} ${imap_failed}"
echo "dovecot_login_failed_total{protocol=\"pop3\",hostname=\"${HOSTNAME}\"} ${pop3_failed}"
echo "# HELP dovecot_login_user_total Logins per user (top 20)"
echo "# TYPE dovecot_login_user_total counter"
grep -oP 'Login: user=<\K[^>]+' "$LOG_FILE" 2>/dev/null | sort | uniq -c | sort -rn | head -20 | while read -r count user; do
echo "dovecot_login_user_total{user=\"${user}\",hostname=\"${HOSTNAME}\"} ${count}"
done
echo "# HELP dovecot_login_client_ip_total Logins per client IP (top 20)"
echo "# TYPE dovecot_login_client_ip_total counter"
grep -oP 'Login:.*rip=\K[^,]+' "$LOG_FILE" 2>/dev/null | sort | uniq -c | sort -rn | head -20 | while read -r count ip; do
echo "dovecot_login_client_ip_total{client_ip=\"${ip}\",hostname=\"${HOSTNAME}\"} ${count}"
done
local END_TIME
END_TIME=$(date +%s.%N)
local DURATION
DURATION=$(echo "$END_TIME - $START_TIME" | bc)
echo "# HELP postfix_collector_duration_seconds Time taken to collect metrics"
echo "# TYPE postfix_collector_duration_seconds gauge"
echo "postfix_collector_duration_seconds{hostname=\"${HOSTNAME}\"} ${DURATION}"
echo "# HELP postfix_collector_last_run_timestamp Unix timestamp of last collection"
echo "# TYPE postfix_collector_last_run_timestamp gauge"
echo "postfix_collector_last_run_timestamp{hostname=\"${HOSTNAME}\"} $(date +%s)"
}
# ============================================================================
# HTTP SERVER MODE
# ============================================================================
run_http_server() {
echo "Starting Postfix metrics exporter on port $HTTP_PORT..." >&2
if ! command -v nc >/dev/null 2>&1; then
echo "ERROR: netcat (nc) required for HTTP mode" >&2
exit 1
fi
while true; do
{
read -r request
if [[ "$request" =~ ^GET\ /metrics ]]; then
echo -e "HTTP/1.1 200 OK\r\nContent-Type: text/plain; version=0.0.4\r\n\r"
generate_metrics
else
echo -e "HTTP/1.1 200 OK\r\nContent-Type: text/html\r\n\r"
cat <<EOF
<!DOCTYPE html>
<html>
<head><title>Postfix Metrics Exporter</title></head>
<body>
<h1>Postfix Prometheus Exporter</h1>
<p><a href="/metrics">Metrics</a></p>
<h2>Available Metrics</h2>
<ul>
<li>Queue sizes and ages</li>
<li>Message counts by status</li>
<li>TLS connection stats</li>
<li>SASL authentication</li>
<li>Bounce reasons</li>
<li>SpamAssassin scores</li>
<li>Dovecot delivery stats</li>
</ul>
</body>
</html>
EOF
fi
} | nc -l -p "$HTTP_PORT" -q 1 2>/dev/null
done
}
# ============================================================================
# MAIN EXECUTION
# ============================================================================
main() {
parse_args "$@"
if [ "$HTTP_MODE" = true ]; then
run_http_server
elif [ -n "$OUTPUT_FILE" ]; then
# Textfile collector mode: write atomically using temp file
local output_dir
output_dir="$(dirname "$OUTPUT_FILE")"
mkdir -p "$output_dir"
# Create temp file in SAME directory for atomic rename (same filesystem)
local temp_file
temp_file=$(mktemp "${output_dir}/.postfix_metrics.XXXXXX")
# Generate metrics to temp file
if ! generate_metrics > "$temp_file" 2>/dev/null; then
rm -f "$temp_file"
echo "ERROR: Failed to generate metrics" >&2
exit 1
fi
# Validate: file must exist and have content
local file_lines
file_lines=$(wc -l < "$temp_file" 2>/dev/null || echo 0)
if [ "$file_lines" -lt 10 ]; then
rm -f "$temp_file"
echo "ERROR: Metrics file too small ($file_lines lines), keeping previous" >&2
exit 1
fi
# Set permissions before move
chmod 644 "$temp_file"
# Atomic rename - no gap where file is missing
mv -f "$temp_file" "$OUTPUT_FILE"
echo "Metrics written to $OUTPUT_FILE ($file_lines lines)" >&2
else
# Default: output to stdout
generate_metrics
fi
}
# Execute main function with all script arguments
main "$@"
+535
View File
@@ -0,0 +1,535 @@
#!/bin/bash
################################################
#### Salt Key Manager ####
#### Automate salt-key operations ####
#### ####
#### Author: Phil Connor ####
#### Contact: contact@mylinux.work ####
#### Version: 1.00-030526 ####
################################################
set -o pipefail
SCRIPT_NAME=$(basename "$0")
readonly SCRIPT_NAME
# Default configuration
readonly DEFAULT_STALE_DAYS=30
readonly DEFAULT_CACHE_DIR="/var/cache/salt/master/minions"
# Configuration variables
DEBUG=${DEBUG:-}
# Runtime flags
ACTION=""
TARGET_MINION=""
STALE_DAYS=$DEFAULT_STALE_DAYS
EXPORT_PATH=""
BULK_FILE=""
AUTO_YES=false
USE_COLOR=true
# Colors
C_GREEN=""
C_YELLOW=""
C_RED=""
C_CYAN=""
C_RESET=""
handle_error() {
local exit_code=$1
local line_number=$2
echo "Error: $SCRIPT_NAME failed at line $line_number with exit code $exit_code" >&2
exit "$exit_code"
}
trap 'handle_error $? $LINENO' ERR
debug_echo() {
if [[ -n "$DEBUG" ]]; then
echo "[DEBUG] $*" >&2
fi
}
log_info() {
echo "[INFO] $*"
}
log_warn() {
echo "[WARN] $*" >&2
}
log_error() {
echo "[ERROR] $*" >&2
}
setup_colors() {
if [[ "$USE_COLOR" == true ]] && [[ -t 1 ]]; then
C_GREEN='\033[0;32m'
C_YELLOW='\033[0;33m'
C_RED='\033[0;31m'
C_CYAN='\033[0;36m'
C_RESET='\033[0m'
fi
}
show_help() {
cat << EOF
Usage: $SCRIPT_NAME [ACTION] [OPTIONS]
Manage Salt minion keys — accept, reject, delete, verify, rotate, and
clean up stale keys.
ACTIONS:
--list List all keys by status with counts
--verify Show pending keys with fingerprints for verification
--accept-all Accept all pending keys
--accept MINION Accept a specific minion key
--reject MINION Reject a specific minion key
--delete MINION Delete a specific minion key
--rotate MINION Rotate a minion key (delete, re-accept on reconnect)
--cleanup-stale [DAYS] Delete keys for minions not seen in DAYS days (default: $DEFAULT_STALE_DAYS)
--export PATH Export all accepted key fingerprints to a file
--bulk-accept FILE Accept minions listed in a file (one per line)
OPTIONS:
--yes Skip confirmation prompts
--no-color Disable colored output
--help, -h Show this help message
ENVIRONMENT VARIABLES:
DEBUG Enable debug output
EXAMPLES:
# List all keys with status
sudo $SCRIPT_NAME --list
# Show pending keys for verification
sudo $SCRIPT_NAME --verify
# Accept all pending keys
sudo $SCRIPT_NAME --accept-all --yes
# Accept a specific minion
sudo $SCRIPT_NAME --accept web01
# Clean up minions not seen in 60 days
sudo $SCRIPT_NAME --cleanup-stale 60
# Export fingerprints for auditing
sudo $SCRIPT_NAME --export /tmp/salt-keys.txt
# Bulk accept from a file
sudo $SCRIPT_NAME --bulk-accept /tmp/new-minions.txt --yes
EOF
}
count_keys() {
local status="$1"
salt-key --list "$status" 2>/dev/null | grep -cv "^$status\|^$" || echo 0
}
do_list() {
echo "Salt Key Status"
echo "==============="
echo ""
local accepted unaccepted denied rejected
accepted=$(count_keys "accepted")
unaccepted=$(count_keys "unaccepted")
denied=$(count_keys "denied")
rejected=$(count_keys "rejected")
printf ' %bAccepted:%b %d\n' "$C_GREEN" "$C_RESET" "$accepted"
printf ' %bPending:%b %d\n' "$C_YELLOW" "$C_RESET" "$unaccepted"
printf ' %bDenied:%b %d\n' "$C_RED" "$C_RESET" "$denied"
printf ' %bRejected:%b %d\n' "$C_RED" "$C_RESET" "$rejected"
echo ""
if ((accepted > 0)); then
printf '%bAccepted Keys:%b\n' "$C_GREEN" "$C_RESET"
salt-key --list accepted 2>/dev/null | grep -v "^Accepted Keys:" | sed 's/^/ /'
echo ""
fi
if ((unaccepted > 0)); then
printf '%bPending Keys:%b\n' "$C_YELLOW" "$C_RESET"
salt-key --list unaccepted 2>/dev/null | grep -v "^Unaccepted Keys:" | sed 's/^/ /'
echo ""
fi
if ((denied > 0)); then
printf '%bDenied Keys:%b\n' "$C_RED" "$C_RESET"
salt-key --list denied 2>/dev/null | grep -v "^Denied Keys:" | sed 's/^/ /'
echo ""
fi
if ((rejected > 0)); then
printf '%bRejected Keys:%b\n' "$C_RED" "$C_RESET"
salt-key --list rejected 2>/dev/null | grep -v "^Rejected Keys:" | sed 's/^/ /'
echo ""
fi
}
do_verify() {
local pending
pending=$(salt-key --list unaccepted 2>/dev/null | grep -v "^Unaccepted Keys:$" | grep -v "^$")
if [[ -z "$pending" ]]; then
log_info "No pending keys to verify"
return 0
fi
echo "Master Fingerprint:"
printf ' %b' "$C_CYAN"
salt-key -F master 2>/dev/null | grep -A1 "master.pub" | tail -1 | tr -d ' '
printf '%b\n\n' "$C_RESET"
echo "Pending Keys with Fingerprints:"
echo ""
while IFS= read -r minion; do
[[ -z "$minion" ]] && continue
minion=$(echo "$minion" | tr -d '[:space:]')
local fingerprint
fingerprint=$(salt-key -f "$minion" 2>/dev/null | grep -v "^Unaccepted Keys:" | awk '{print $2}' | head -1)
printf ' %b%-30s%b %s\n' "$C_YELLOW" "$minion" "$C_RESET" "${fingerprint:-unknown}"
done <<< "$pending"
echo ""
log_info "Verify each fingerprint matches the minion's local fingerprint:"
log_info " (on minion) salt-call --local key.finger"
}
do_accept_all() {
local pending
pending=$(count_keys "unaccepted")
if ((pending == 0)); then
log_info "No pending keys to accept"
return 0
fi
log_info "Accepting $pending pending key(s)..."
if [[ "$AUTO_YES" != true ]]; then
echo "Accept all $pending pending keys? [y/N] "
read -r confirm
if [[ "$confirm" != "y" && "$confirm" != "Y" ]]; then
log_info "Aborted"
return 0
fi
fi
salt-key -A -y 2>/dev/null
log_info "All pending keys accepted"
}
do_accept() {
local minion="$1"
log_info "Accepting key for: $minion"
if [[ "$AUTO_YES" != true ]]; then
local fingerprint
fingerprint=$(salt-key -f "$minion" 2>/dev/null | grep -v "^Unaccepted Keys:" | awk '{print $2}' | head -1)
echo "Fingerprint: ${fingerprint:-unknown}"
echo "Accept key for $minion? [y/N] "
read -r confirm
if [[ "$confirm" != "y" && "$confirm" != "Y" ]]; then
log_info "Aborted"
return 0
fi
fi
salt-key -a "$minion" -y 2>/dev/null
log_info "Key accepted for $minion"
}
do_reject() {
local minion="$1"
log_info "Rejecting key for: $minion"
if [[ "$AUTO_YES" != true ]]; then
echo "Reject key for $minion? [y/N] "
read -r confirm
if [[ "$confirm" != "y" && "$confirm" != "Y" ]]; then
log_info "Aborted"
return 0
fi
fi
salt-key -r "$minion" -y 2>/dev/null
log_info "Key rejected for $minion"
}
do_delete() {
local minion="$1"
log_info "Deleting key for: $minion"
if [[ "$AUTO_YES" != true ]]; then
echo "Delete key for $minion? This cannot be undone. [y/N] "
read -r confirm
if [[ "$confirm" != "y" && "$confirm" != "Y" ]]; then
log_info "Aborted"
return 0
fi
fi
salt-key -d "$minion" -y 2>/dev/null
log_info "Key deleted for $minion"
}
do_rotate() {
local minion="$1"
log_info "Rotating key for: $minion"
log_info "This will delete the current key — the minion must reconnect to get a new key accepted"
if [[ "$AUTO_YES" != true ]]; then
echo "Rotate key for $minion? [y/N] "
read -r confirm
if [[ "$confirm" != "y" && "$confirm" != "Y" ]]; then
log_info "Aborted"
return 0
fi
fi
salt-key -d "$minion" -y 2>/dev/null
log_info "Key deleted for $minion — accept the new key when the minion reconnects"
log_info "On the minion, restart salt-minion: systemctl restart salt-minion"
}
do_cleanup_stale() {
local days="$1"
log_info "Finding minions not seen in $days days..."
if [[ ! -d "$DEFAULT_CACHE_DIR" ]]; then
log_error "Minion cache directory not found: $DEFAULT_CACHE_DIR"
return 1
fi
local stale_minions=()
local cutoff
cutoff=$(date -d "-${days} days" +%s 2>/dev/null) || cutoff=$(date -v-"${days}"d +%s 2>/dev/null)
while IFS= read -r minion_dir; do
local minion_name
minion_name=$(basename "$minion_dir")
local last_modified
last_modified=$(stat -c %Y "$minion_dir" 2>/dev/null) || last_modified=$(stat -f %m "$minion_dir" 2>/dev/null) || continue
if ((last_modified < cutoff)); then
local days_ago=$(( ($(date +%s) - last_modified) / 86400 ))
stale_minions+=("$minion_name")
printf ' %b%-30s%b (last seen %d days ago)\n' "$C_RED" "$minion_name" "$C_RESET" "$days_ago"
fi
done < <(find "$DEFAULT_CACHE_DIR" -maxdepth 1 -mindepth 1 -type d 2>/dev/null)
if [[ ${#stale_minions[@]} -eq 0 ]]; then
log_info "No stale minions found"
return 0
fi
echo ""
log_info "Found ${#stale_minions[@]} stale minion(s)"
if [[ "$AUTO_YES" != true ]]; then
echo "Delete keys for all ${#stale_minions[@]} stale minions? [y/N] "
read -r confirm
if [[ "$confirm" != "y" && "$confirm" != "Y" ]]; then
log_info "Aborted"
return 0
fi
fi
for minion in "${stale_minions[@]}"; do
salt-key -d "$minion" -y 2>/dev/null && log_info "Deleted key: $minion"
done
log_info "Stale key cleanup complete"
}
do_export() {
local output_path="$1"
log_info "Exporting accepted key fingerprints to $output_path..."
{
echo "# Salt Key Fingerprint Export"
echo "# Generated: $(date -u '+%Y-%m-%d %H:%M:%S UTC')"
echo "# Master: $(hostname -f 2>/dev/null || hostname)"
echo "#"
echo "# Format: minion_id fingerprint"
echo ""
salt-key -F accepted 2>/dev/null | grep -v "^Accepted Keys:" | while IFS= read -r line; do
[[ -z "$line" ]] && continue
echo "$line"
done
} > "$output_path"
local count
count=$(grep -cv "^#\|^$" "$output_path" 2>/dev/null) || count=0
log_info "Exported $count key fingerprint(s) to $output_path"
}
do_bulk_accept() {
local input_file="$1"
if [[ ! -f "$input_file" ]]; then
log_error "File not found: $input_file"
return 1
fi
local count=0
local failed=0
while IFS= read -r line; do
[[ -z "$line" || "$line" == \#* ]] && continue
local minion_id="${line%%:*}"
minion_id=$(echo "$minion_id" | tr -d '[:space:]')
if salt-key -a "$minion_id" -y 2>/dev/null; then
log_info "Accepted: $minion_id"
count=$((count + 1))
else
log_error "Failed to accept: $minion_id"
failed=$((failed + 1))
fi
done < "$input_file"
log_info "Bulk accept complete: $count accepted, $failed failed"
}
parse_arguments() {
while [[ $# -gt 0 ]]; do
case $1 in
--list)
ACTION="list"
shift
;;
--verify)
ACTION="verify"
shift
;;
--accept-all)
ACTION="accept-all"
shift
;;
--accept)
ACTION="accept"
TARGET_MINION="$2"
shift 2
;;
--reject)
ACTION="reject"
TARGET_MINION="$2"
shift 2
;;
--delete)
ACTION="delete"
TARGET_MINION="$2"
shift 2
;;
--rotate)
ACTION="rotate"
TARGET_MINION="$2"
shift 2
;;
--cleanup-stale)
ACTION="cleanup-stale"
if [[ -n "${2:-}" && "$2" =~ ^[0-9]+$ ]]; then
STALE_DAYS="$2"
shift 2
else
shift
fi
;;
--export)
ACTION="export"
EXPORT_PATH="$2"
shift 2
;;
--bulk-accept)
ACTION="bulk-accept"
BULK_FILE="$2"
shift 2
;;
--yes)
AUTO_YES=true
shift
;;
--no-color)
USE_COLOR=false
shift
;;
--help|-h)
show_help
exit 0
;;
*)
log_error "Unknown option: $1"
show_help >&2
exit 1
;;
esac
done
}
validate_requirements() {
if [[ $EUID -ne 0 ]]; then
log_error "This script must be run as root (use sudo)"
exit 1
fi
if [[ -z "$ACTION" ]]; then
log_error "An action is required"
show_help >&2
exit 1
fi
if ! command -v salt-key >/dev/null 2>&1; then
log_error "salt-key not found — is salt-master installed?"
exit 1
fi
if [[ "$ACTION" == "accept" || "$ACTION" == "reject" || "$ACTION" == "delete" || "$ACTION" == "rotate" ]]; then
if [[ -z "$TARGET_MINION" ]]; then
log_error "Minion name is required for --$ACTION"
exit 1
fi
fi
if [[ "$ACTION" == "export" && -z "$EXPORT_PATH" ]]; then
log_error "Output path is required for --export"
exit 1
fi
if [[ "$ACTION" == "bulk-accept" && -z "$BULK_FILE" ]]; then
log_error "Input file is required for --bulk-accept"
exit 1
fi
}
main() {
parse_arguments "$@"
validate_requirements
setup_colors
case "$ACTION" in
list) do_list ;;
verify) do_verify ;;
accept-all) do_accept_all ;;
accept) do_accept "$TARGET_MINION" ;;
reject) do_reject "$TARGET_MINION" ;;
delete) do_delete "$TARGET_MINION" ;;
rotate) do_rotate "$TARGET_MINION" ;;
cleanup-stale) do_cleanup_stale "$STALE_DAYS" ;;
export) do_export "$EXPORT_PATH" ;;
bulk-accept) do_bulk_accept "$BULK_FILE" ;;
esac
debug_echo "Script completed successfully"
}
if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
main "$@"
fi
+1314
View File
File diff suppressed because it is too large Load Diff
+509
View File
@@ -0,0 +1,509 @@
#!/bin/bash
################################################
#### Salt Master/Minion Setup Automation ####
#### Install and configure SaltStack ####
#### ####
#### Author: Phil Connor ####
#### Contact: contact@mylinux.work ####
#### Version: 1.00-030526 ####
################################################
set -o pipefail
SCRIPT_NAME=$(basename "$0")
readonly SCRIPT_NAME
# Default configuration
readonly DEFAULT_SALT_VERSION="latest"
readonly DEFAULT_FILE_ROOTS="/srv/salt"
readonly DEFAULT_PILLAR_ROOTS="/srv/pillar"
readonly DEFAULT_MASTER_INTERFACE="0.0.0.0"
readonly DEFAULT_MASTER_PORT_PUB=4505
readonly DEFAULT_MASTER_PORT_RET=4506
# Configuration variables (can be overridden by environment)
SALT_VERSION=${SALT_VERSION:-$DEFAULT_SALT_VERSION}
FILE_ROOTS=${FILE_ROOTS:-$DEFAULT_FILE_ROOTS}
PILLAR_ROOTS=${PILLAR_ROOTS:-$DEFAULT_PILLAR_ROOTS}
DEBUG=${DEBUG:-}
# Runtime flags
MODE=""
MASTER_IP=""
MINION_ID=""
AUTO_ACCEPT=false
AUTO_YES=false
PKG_MANAGER=""
OS_FAMILY=""
OS_VERSION=""
handle_error() {
local exit_code=$1
local line_number=$2
echo "Error: $SCRIPT_NAME failed at line $line_number with exit code $exit_code" >&2
exit "$exit_code"
}
trap 'handle_error $? $LINENO' ERR
debug_echo() {
if [[ -n "$DEBUG" ]]; then
echo "[DEBUG] $*" >&2
fi
}
log_info() {
echo "[INFO] $*"
}
log_warn() {
echo "[WARN] $*" >&2
}
log_error() {
echo "[ERROR] $*" >&2
}
show_help() {
cat << EOF
Usage: $SCRIPT_NAME [OPTIONS]
Automate Salt master and/or minion installation and configuration.
Supports Ubuntu/Debian and RHEL/AlmaLinux. Adds the Salt Project repository,
installs packages, configures services, creates directory structure, and
opens firewall ports.
OPTIONS:
--mode master|minion|both What to install (required)
--master-ip ADDRESS Salt master IP or hostname (required for minion/both)
--minion-id NAME Custom minion ID (default: system hostname)
--auto-accept Enable auto_accept on master (NOT for production)
--salt-version VERSION Pin Salt version (default: latest)
--yes Skip confirmation prompts
--help, -h Show this help message
ENVIRONMENT VARIABLES:
SALT_VERSION Salt version to install (default: $DEFAULT_SALT_VERSION)
FILE_ROOTS Master file_roots path (default: $DEFAULT_FILE_ROOTS)
PILLAR_ROOTS Master pillar_roots path (default: $DEFAULT_PILLAR_ROOTS)
DEBUG Enable debug output
EXAMPLES:
# Install salt-master
sudo $SCRIPT_NAME --mode master --yes
# Install salt-minion pointing to master
sudo $SCRIPT_NAME --mode minion --master-ip 10.0.0.1
# Install both on the same node
sudo $SCRIPT_NAME --mode both --master-ip localhost --yes
# Install with custom minion ID
sudo $SCRIPT_NAME --mode minion --master-ip salt.example.com --minion-id web01
# Install specific Salt version
sudo $SCRIPT_NAME --mode master --salt-version 3006 --yes
EOF
}
detect_os() {
if [[ -f /etc/os-release ]]; then
# shellcheck disable=SC1091
source /etc/os-release
OS_VERSION="$VERSION_ID"
case "$ID" in
ubuntu|debian)
OS_FAMILY="debian"
PKG_MANAGER="apt"
;;
rhel|centos|rocky|almalinux|ol|fedora)
OS_FAMILY="rhel"
if command -v dnf >/dev/null 2>&1; then
PKG_MANAGER="dnf"
else
PKG_MANAGER="yum"
fi
;;
*)
log_error "Unsupported OS: $ID"
exit 1
;;
esac
else
log_error "Cannot detect OS — /etc/os-release not found"
exit 1
fi
debug_echo "Detected OS: $OS_FAMILY ($PKG_MANAGER) version $OS_VERSION"
}
get_cpu_count() {
nproc 2>/dev/null || echo 2
}
add_salt_repo_debian() {
log_info "Adding Salt Project repository (Debian/Ubuntu)..."
apt-get update -qq
apt-get install -y -qq curl gnupg2 >/dev/null
local keyring="/etc/apt/keyrings/salt-archive-keyring.gpg"
mkdir -p /etc/apt/keyrings
curl -fsSL "https://repo.saltproject.io/salt/py3/ubuntu/${OS_VERSION}/amd64/SALT-PROJECT-GPG-PUBKEY-2023.gpg" \
-o "$keyring"
local repo_url="https://repo.saltproject.io/salt/py3/ubuntu/${OS_VERSION}/amd64"
if [[ "$SALT_VERSION" != "latest" ]]; then
repo_url="${repo_url}/${SALT_VERSION}"
fi
echo "deb [signed-by=${keyring}] ${repo_url} ${VERSION_CODENAME} main" \
> /etc/apt/sources.list.d/salt.list
apt-get update -qq
log_info "Salt repository added"
}
add_salt_repo_rhel() {
log_info "Adding Salt Project repository (RHEL)..."
local major_ver="${OS_VERSION%%.*}"
local repo_url="https://repo.saltproject.io/salt/py3/redhat/${major_ver}/x86_64"
if [[ "$SALT_VERSION" != "latest" ]]; then
repo_url="${repo_url}/${SALT_VERSION}"
fi
cat > /etc/yum.repos.d/salt.repo << REPOEOF
[salt]
name=Salt Project for RHEL ${major_ver}
baseurl=${repo_url}
enabled=1
gpgcheck=1
gpgkey=https://repo.saltproject.io/salt/py3/redhat/${major_ver}/x86_64/SALT-PROJECT-GPG-PUBKEY-2023.pub
REPOEOF
"$PKG_MANAGER" clean expire-cache -q
log_info "Salt repository added"
}
install_master() {
log_info "Installing salt-master..."
case "$PKG_MANAGER" in
apt)
apt-get install -y -qq salt-master >/dev/null
;;
dnf|yum)
"$PKG_MANAGER" install -y -q salt-master
;;
esac
log_info "salt-master installed"
}
install_minion() {
log_info "Installing salt-minion..."
case "$PKG_MANAGER" in
apt)
apt-get install -y -qq salt-minion >/dev/null
;;
dnf|yum)
"$PKG_MANAGER" install -y -q salt-minion
;;
esac
log_info "salt-minion installed"
}
configure_master() {
log_info "Configuring salt-master..."
local worker_threads
worker_threads=$(get_cpu_count)
if [[ -f /etc/salt/master ]]; then
cp /etc/salt/master /etc/salt/master.bak."$(date +%Y%m%d%H%M%S)"
log_info "Backed up existing /etc/salt/master"
fi
cat > /etc/salt/master << MASTEREOF
##### Salt Master Configuration #####
##### Managed by salt-setup.sh #####
interface: ${DEFAULT_MASTER_INTERFACE}
file_roots:
base:
- ${FILE_ROOTS}
pillar_roots:
base:
- ${PILLAR_ROOTS}
worker_threads: ${worker_threads}
timeout: 30
state_events: True
presence_events: True
MASTEREOF
if [[ "$AUTO_ACCEPT" == true ]]; then
{
echo ""
echo "# WARNING: NOT recommended for production"
echo "auto_accept: True"
} >> /etc/salt/master
log_warn "auto_accept enabled — NOT recommended for production"
else
{
echo ""
echo "auto_accept: False"
} >> /etc/salt/master
fi
log_info "Master configuration written to /etc/salt/master"
}
configure_minion() {
log_info "Configuring salt-minion..."
local minion_id
minion_id="${MINION_ID:-$(hostname -f 2>/dev/null || hostname)}"
if [[ -f /etc/salt/minion ]]; then
cp /etc/salt/minion /etc/salt/minion.bak."$(date +%Y%m%d%H%M%S)"
log_info "Backed up existing /etc/salt/minion"
fi
cat > /etc/salt/minion << MINIONEOF
##### Salt Minion Configuration #####
##### Managed by salt-setup.sh #####
master: ${MASTER_IP}
id: ${minion_id}
# grains:
# role: webserver
# environment: production
MINIONEOF
log_info "Minion configured (id: ${minion_id}, master: ${MASTER_IP})"
}
create_directory_structure() {
log_info "Creating Salt directory structure..."
mkdir -p "${FILE_ROOTS}" "${PILLAR_ROOTS}"
if [[ ! -f "${FILE_ROOTS}/top.sls" ]]; then
cat > "${FILE_ROOTS}/top.sls" << 'TOPEOF'
base:
'*':
[]
# - common
# - packages
TOPEOF
log_info "Created ${FILE_ROOTS}/top.sls"
fi
if [[ ! -f "${PILLAR_ROOTS}/top.sls" ]]; then
cat > "${PILLAR_ROOTS}/top.sls" << 'PTOPEOF'
base:
'*':
[]
# - common
PTOPEOF
log_info "Created ${PILLAR_ROOTS}/top.sls"
fi
}
open_firewall_ports() {
log_info "Configuring firewall for Salt master ports..."
if command -v ufw >/dev/null 2>&1; then
if ufw status | grep -q "Status: active"; then
ufw allow ${DEFAULT_MASTER_PORT_PUB}/tcp >/dev/null
ufw allow ${DEFAULT_MASTER_PORT_RET}/tcp >/dev/null
log_info "Opened ports ${DEFAULT_MASTER_PORT_PUB}/${DEFAULT_MASTER_PORT_RET} in ufw"
else
debug_echo "ufw not active — skipping"
fi
elif command -v firewall-cmd >/dev/null 2>&1; then
if firewall-cmd --state >/dev/null 2>&1; then
firewall-cmd --permanent --add-port=${DEFAULT_MASTER_PORT_PUB}/tcp >/dev/null
firewall-cmd --permanent --add-port=${DEFAULT_MASTER_PORT_RET}/tcp >/dev/null
firewall-cmd --reload >/dev/null
log_info "Opened ports ${DEFAULT_MASTER_PORT_PUB}/${DEFAULT_MASTER_PORT_RET} in firewalld"
else
debug_echo "firewalld not running — skipping"
fi
else
log_warn "No supported firewall detected — manually open ports ${DEFAULT_MASTER_PORT_PUB} and ${DEFAULT_MASTER_PORT_RET}"
fi
}
start_service() {
local service="$1"
log_info "Enabling and starting ${service}..."
systemctl enable "$service" >/dev/null 2>&1
systemctl restart "$service"
if systemctl is-active "$service" >/dev/null 2>&1; then
log_info "${service} is running"
else
log_error "${service} failed to start"
systemctl status "$service" --no-pager
return 1
fi
}
show_summary() {
echo ""
echo "============================================"
echo " Salt Setup Complete"
echo "============================================"
if [[ "$MODE" == "master" || "$MODE" == "both" ]]; then
echo ""
echo " Master:"
echo " Config: /etc/salt/master"
echo " File roots: ${FILE_ROOTS}"
echo " Pillar roots: ${PILLAR_ROOTS}"
echo " Ports: ${DEFAULT_MASTER_PORT_PUB}, ${DEFAULT_MASTER_PORT_RET}"
echo ""
echo " Master fingerprint:"
salt-key -F master 2>/dev/null | grep -A1 "master.pub" || echo " (not yet generated — restart may be needed)"
echo ""
echo " Next steps:"
echo " salt-key -L # List pending keys"
echo " salt-key -a <minion_id> # Accept a minion key"
echo " salt '*' test.ping # Test connectivity"
fi
if [[ "$MODE" == "minion" || "$MODE" == "both" ]]; then
local minion_id
minion_id="${MINION_ID:-$(hostname -f 2>/dev/null || hostname)}"
echo ""
echo " Minion:"
echo " Config: /etc/salt/minion"
echo " Master: ${MASTER_IP}"
echo " Minion ID: ${minion_id}"
echo ""
echo " Next steps:"
echo " salt-call test.ping # Test master connectivity"
if [[ "$AUTO_ACCEPT" != true ]]; then
echo " (on master) salt-key -a ${minion_id}"
fi
fi
echo ""
echo "============================================"
}
parse_arguments() {
while [[ $# -gt 0 ]]; do
case $1 in
--mode)
MODE="$2"
if [[ "$MODE" != "master" && "$MODE" != "minion" && "$MODE" != "both" ]]; then
log_error "Mode must be 'master', 'minion', or 'both'"
exit 1
fi
shift 2
;;
--master-ip)
MASTER_IP="$2"
shift 2
;;
--minion-id)
MINION_ID="$2"
shift 2
;;
--auto-accept)
AUTO_ACCEPT=true
shift
;;
--salt-version)
SALT_VERSION="$2"
shift 2
;;
--yes)
AUTO_YES=true
shift
;;
--help|-h)
show_help
exit 0
;;
*)
log_error "Unknown option: $1"
show_help >&2
exit 1
;;
esac
done
}
validate_requirements() {
if [[ $EUID -ne 0 ]]; then
log_error "This script must be run as root (use sudo)"
exit 1
fi
if [[ -z "$MODE" ]]; then
log_error "--mode is required (master, minion, or both)"
show_help >&2
exit 1
fi
if [[ "$MODE" == "minion" || "$MODE" == "both" ]]; then
if [[ -z "$MASTER_IP" ]]; then
log_error "--master-ip is required for minion/both modes"
exit 1
fi
fi
detect_os
}
main() {
parse_arguments "$@"
validate_requirements
echo "============================================"
echo " Salt Setup"
echo " Mode: $MODE"
echo " OS: $OS_FAMILY ($PKG_MANAGER)"
if [[ -n "$MASTER_IP" ]]; then
echo " Master: $MASTER_IP"
fi
echo "============================================"
echo ""
if [[ "$AUTO_YES" != true ]]; then
echo "Press Enter to continue, or Ctrl+C to abort..."
read -r
fi
case "$OS_FAMILY" in
debian) add_salt_repo_debian ;;
rhel) add_salt_repo_rhel ;;
esac
if [[ "$MODE" == "master" || "$MODE" == "both" ]]; then
install_master
configure_master
create_directory_structure
open_firewall_ports
start_service salt-master
fi
if [[ "$MODE" == "minion" || "$MODE" == "both" ]]; then
install_minion
configure_minion
start_service salt-minion
fi
show_summary
debug_echo "Script completed successfully"
}
if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
main "$@"
fi
+513
View File
@@ -0,0 +1,513 @@
<#
.SYNOPSIS
Monitors Salt Minion service status and exports metrics for Prometheus windows_exporter.
.DESCRIPTION
This script checks the status of the Salt Minion service and creates Prometheus-formatted metrics.
The metrics are written to a text file that can be consumed by the windows_exporter.
It can also create a scheduled task to run periodically.
.PARAMETER ValidateNotNullOrEmpty
Switch to validate that the MetricsFilePath parameter is not null or empty.
.PARAMETER ValidateScript
Validate that the MetricsFilePath parameter is a valid Windows path.
.PARAMETER MetricsFilePath
The path where the Prometheus metrics file will be written.
.PARAMETER InstallScheduledTask
Switch to create a scheduled task for periodic monitoring.
.PARAMETER TaskIntervalMinutes
The interval in minutes for the scheduled task. Default is 15 minutes.
.PARAMETER TimeoutSeconds
Timeout in seconds for service status checks. Default is 30 seconds.
.PARAMETER TimeoutSeconds
Timeout in seconds for service status checks. Default is 30 seconds.
.PARAMETER SaltMasterPort
The port number for the Salt Master. Default is 4505.
.PARAMETER DryRun
Switch to output metrics to console instead of writing to file.
.PARAMETER Verbose
Switch to enable verbose debug output for troubleshooting.
.PARAMETER Quiet
Switch to suppress non-error output (useful for scheduled tasks).
.PARAMETER NoCron
Switch to skip scheduled task installation.
.PARAMETER Version
Switch to display script version and exit.
.NOTES
Version: 3.3.0-20250915
Author: Phil Connor, contact@mylinux.work
License: MIT
Created: 2025-01-24 loosly based on my salt_status.sh used with the linux servers.
#>
param(
[ValidateNotNullOrEmpty()]
[ValidateScript({
$parentPath = Split-Path $_ -Parent
if ($parentPath -and -not (Test-Path $parentPath)) {
throw "Directory does not exist: $parentPath"
}
if ($_ -match '^[A-Za-z]:\\') {
return $true
}
throw "Invalid file path format"
})]
[string]$MetricsFilePath = "$env:ProgramFiles\windows_exporter\textfile_inputs\salt_status.prom",
[switch]$InstallScheduledTask = $false,
[ValidateRange(1, 1440)] # Validate the interval is between 1 and 1440 minutes for the scheduled task
[int]$TaskIntervalMinutes = 15,
[ValidateRange(1, 300)] # Validate the timeout is between 1 and 3600 seconds for service status checks
[int]$TimeoutSeconds = 30,
[int]$SaltMasterPort = 4505,
[switch]$DryRun = $false, # Output metrics to console instead of file
[switch]$Verbose = $false, # Enable verbose debug output
[switch]$Quiet = $false, # Suppress non-error output
[switch]$NoCron = $false, # Skip scheduled task installation
[switch]$Version = $false # Show version and exit
)
# Handle version display
if ($Version) {
Write-Host "Salt Status Monitor PowerShell Script"
Write-Host "Version: 3.3.0-20250915"
Write-Host "Author: Phil Connor pconnor@ara.com"
exit 0
}
# Set up logging preferences based on Verbose/Quiet flags
if ($Verbose) {
$VerbosePreference = 'Continue'
$InformationPreference = 'Continue'
}
if ($Quiet) {
$VerbosePreference = 'SilentlyContinue'
$InformationPreference = 'SilentlyContinue'
$WarningPreference = 'SilentlyContinue'
}
# Logging functions
function Write-VerboseLog {
param([string]$Message)
if ($Verbose) {
Write-Host "[VERBOSE] $(Get-Date -Format 'yyyy-MM-dd HH:mm:ss') $Message" -ForegroundColor Cyan
}
}
function Write-InfoLog {
param([string]$Message)
if (-not $Quiet) {
Write-Host "[INFO] $(Get-Date -Format 'yyyy-MM-dd HH:mm:ss') $Message" -ForegroundColor Green
}
}
# Create a scheduled task to run this script every 15 minutes
if ($InstallScheduledTask -and -not $NoCron) {
$taskName = "SaltMinionStatusCheck"
$existingTask = Get-ScheduledTask -TaskName $taskName -ErrorAction SilentlyContinue
if (-not $existingTask) {
$taskAction = New-ScheduledTaskAction -Execute "powershell.exe" -Argument "-NoProfile -ExecutionPolicy Bypass -File `"$($MyInvocation.MyCommand.Path)`""
# Add validation
if (-not $TaskIntervalMinutes -or $TaskIntervalMinutes -le 0) {
throw "TaskIntervalMinutes must be a positive integer"
}
$taskTrigger = New-ScheduledTaskTrigger -Once -At (Get-Date).AddMinutes(1) -RepetitionInterval (New-TimeSpan -Minutes $TaskIntervalMinutes) -RepetitionDuration (New-TimeSpan -Days 365)
$taskPrincipal = New-ScheduledTaskPrincipal -UserId "SYSTEM" -LogonType ServiceAccount -RunLevel Highest
try {
Write-InfoLog "Creating scheduled task: $taskName"
Register-ScheduledTask -TaskName $taskName -Action $taskAction -Trigger $taskTrigger -Principal $taskPrincipal -Description "Monitors Salt Minion status every $TaskIntervalMinutes minutes"
# Verify the task was created
$createdTask = Get-ScheduledTask -TaskName $taskName -ErrorAction SilentlyContinue
if (-not $createdTask) {
throw "Failed to verify scheduled task creation"
}
Write-InfoLog "Successfully created scheduled task: $taskName"
} catch {
Write-Error "Failed to create auto-start task: $($_.Exception.Message)"
throw
}
} else {
Write-InfoLog "Scheduled task $taskName already exists. Skipping creation."
}
}
# Function to check if required commands are available
function Test-CommandAvailability {
param([string]$Command)
try {
Get-Command $Command -ErrorAction Stop | Out-Null
return $true
} catch {
Write-Warning "Required command '$Command' is not available"
return $false
}
}
# Function to check if the salt-master is connected
function Test-Port4505Connection {
try {
# Use netstat to check for active connections on the salt-master port
$portCheck = netstat -an 2>$null | Select-String "\s+[^:]+:$SaltMasterPort\s+"
# Check if we found any active connections on the port
if ($null -ne $portCheck) {
Write-VerboseLog "Port $SaltMasterPort is in use and has active connections"
return $true
} else {
Write-VerboseLog "No active connections found on port $SaltMasterPort"
return $false
}
} catch [System.Management.Automation.ActionPreferenceStopException] {
# Silently ignore this specific exception when error action is set to Stop
} catch {
# Log any other unexpected errors and return failure status
Write-Warning "Failed to check port $SaltMasterPort : $($_.Exception.Message)"
return $false
}
}
# Function to check if the salt-master responds to ping
function Test-SaltPing {
param(
[int]$TimeoutSeconds = $TimeoutSeconds
)
if (-not (Test-CommandAvailability "salt-call")) {
Write-Warning "Salt-call command not found"
return $false
}
$job = $null
try {
$job = Start-Job -ScriptBlock { salt-call test.ping --local 2>$null } -ErrorAction Stop
$completed = $job | Wait-Job -Timeout $TimeoutSeconds
if (-not $completed) {
Write-Warning "Salt-call test.ping timed out after $TimeoutSeconds seconds"
return $false
}
$saltTest = $job | Receive-Job -ErrorAction SilentlyContinue
if ($null -eq $saltTest) {
Write-Host "No response from salt-call test.ping"
return $false
}
if ($saltTest -is [array]) {
$saltTest = $saltTest -join "`n"
}
if ($saltTest -match "local:\s*True" -or $saltTest -match "^\s*True\s*$") {
Write-VerboseLog "Salt-call test.ping returned True"
return $true
} else {
Write-VerboseLog "Salt-call test.ping failed or returned unexpected output: $saltTest"
return $false
}
} catch {
Write-Warning "Salt-Call failed: $($_.Exception.Message)"
return $false
} finally {
if ($null -ne $job) {
try {
if ($job.State -eq 'Running') {
$job | Stop-Job -Force -ErrorAction SilentlyContinue
}
} finally {
$job | Remove-Job -Force -ErrorAction SilentlyContinue
}
}
}
}
# Function to check if prometheus named metrics are sanitized or not
function Test-PrometheusMetricName {
param([string]$MetricName)
# Prometheus metric names should match: [a-zA-Z_:][a-zA-Z0-9_:]*
if ($MetricName -match '^[a-zA-Z_:][a-zA-Z0-9_:]*$') {
return $true
}
return $false
}
# Function to format and add a metric to the metrics array
function Add-PrometheusMetric {
param(
[string]$Name,
[string]$Help,
[string]$Type,
[object]$Value,
[ref]$MetricsArray
)
if (-not (Test-PrometheusMetricName $Name)) {
Write-Warning "Invalid metric name: $Name"
return
}
$MetricsArray.Value += "# HELP $Name $Help"
$MetricsArray.Value += "# TYPE $Name $Type"
$MetricsArray.Value += "$Name $Value"
}
# Function to check Windows service status
function Test-SaltMinionService {
try {
$service = Get-Service -Name "salt-minion" -ErrorAction SilentlyContinue
if ($null -eq $service) {
Write-Warning "Salt-minion service not found"
return 2 # Service not found
}
if ($service.Status -eq 'Running') {
return 1 # Service is running
} else {
return 0 # Service is not running
}
} catch {
Write-Warning "Failed to check salt-minion service status: $($_.Exception.Message)"
return 0
}
}
# Function to get Salt version
function Get-SaltVersion {
if (-not (Test-CommandAvailability "salt-call")) {
return "0"
}
try {
$versionOutput = & salt-call --version 2>$null
if ($versionOutput -match "(\d+\.\d+)") {
return $matches[1]
}
return "0"
} catch {
Write-Warning "Failed to get Salt version: $($_.Exception.Message)"
return "0"
}
}
# Function to get Salt-minion memory usage
function Get-SaltMemoryUsage {
try {
$saltProcesses = Get-Process -Name "salt-minion" -ErrorAction SilentlyContinue
if ($null -eq $saltProcesses) {
return 0
}
$totalMemory = 0
foreach ($process in $saltProcesses) {
$totalMemory += $process.WorkingSet64
}
return $totalMemory
} catch {
Write-Warning "Failed to get salt-minion memory usage: $($_.Exception.Message)"
return 0
}
}
# Function to get last successful communication timestamp
function Get-LastCommunicationTimestamp {
if (-not (Test-CommandAvailability "salt-call")) {
return 0
}
try {
$pingResult = Test-SaltPing
if ($pingResult) {
return [int][double]::Parse((Get-Date -UFormat %s))
}
return 0
} catch {
Write-Warning "Failed to get last communication timestamp: $($_.Exception.Message)"
return 0
}
}
# Function to count recent Salt errors in Windows Event Log
function Get-SaltErrorCount {
try {
$24HoursAgo = (Get-Date).AddHours(-24)
$errorEvents = Get-WinEvent -FilterHashtable @{
LogName = 'Application'
Source = 'salt-minion'
Level = 2 # Error level
StartTime = $24HoursAgo
} -ErrorAction SilentlyContinue
if ($null -eq $errorEvents) {
return 0
}
return $errorEvents.Count
} catch {
# Fallback: try to read from salt log file if it exists
$logPath = "${env:ProgramData}\Salt Project\Salt\var\log\salt\minion"
if (Test-Path $logPath) {
try {
$logContent = Get-Content $logPath -Tail 1000 -ErrorAction SilentlyContinue
$errorLines = $logContent | Where-Object { $_ -match "\[ERROR\]" }
return $errorLines.Count
} catch {
return 0
}
}
return 0
}
}
# Function to export Prometheus metrics
function Export-PrometheusMetrics {
#Starts the metrics export.
$startTime = Get-Date
$metrics = @()
$errors = @()
try {
# Connection status metric (port 4505)
try {
if (-not (Test-CommandAvailability "netstat")) {
$errors += "netstat command not found"
$connectionStatus = 2
} else {
$connectionStatus = if (Test-Port4505Connection) { 1 } else { 0 }
}
Add-PrometheusMetric -Name "minion_connection_status" -Help "Shows if Salt-Minion is connected to Salt-Master." -Type "gauge" -Value $connectionStatus -MetricsArray ([ref]$metrics)
} catch {
$errors += "Port 4505 check failed: $($_.Exception.Message)"
Add-PrometheusMetric -Name "minion_connection_status" -Help "Shows if Salt-Minion is connected to Salt-Master." -Type "gauge" -Value 0 -MetricsArray ([ref]$metrics)
}
# Salt ping metric
try {
if (-not (Test-CommandAvailability "salt-call")) {
$errors += "salt-call command not found"
$pingStatus = 2
} else {
$pingStatus = if (Test-SaltPing) { 1 } else { 0 }
}
Add-PrometheusMetric -Name "minion_ping_status" -Help "Shows if Salt-Minion is able to ping Salt-Master." -Type "gauge" -Value $pingStatus -MetricsArray ([ref]$metrics)
} catch {
$errors += "Salt ping check failed: $($_.Exception.Message)"
Add-PrometheusMetric -Name "minion_ping_status" -Help "Shows if Salt-Minion is able to ping Salt-Master." -Type "gauge" -Value 0 -MetricsArray ([ref]$metrics)
}
# Service status metric
try {
$serviceStatus = Test-SaltMinionService
Add-PrometheusMetric -Name "minion_service_status" -Help "Shows if Salt-Minion service is active." -Type "gauge" -Value $serviceStatus -MetricsArray ([ref]$metrics)
} catch {
$errors += "Service status check failed: $($_.Exception.Message)"
Add-PrometheusMetric -Name "minion_service_status" -Help "Shows if Salt-Minion service is active." -Type "gauge" -Value 0 -MetricsArray ([ref]$metrics)
}
# Last communication timestamp
try {
$lastComm = Get-LastCommunicationTimestamp
Add-PrometheusMetric -Name "minion_last_communication_timestamp" -Help "Timestamp of last successful communication with Salt-Master." -Type "gauge" -Value $lastComm -MetricsArray ([ref]$metrics)
} catch {
$errors += "Last communication check failed: $($_.Exception.Message)"
Add-PrometheusMetric -Name "minion_last_communication_timestamp" -Help "Timestamp of last successful communication with Salt-Master." -Type "gauge" -Value 0 -MetricsArray ([ref]$metrics)
}
# Salt version metric
try {
$version = Get-SaltVersion
Add-PrometheusMetric -Name "minion_version" -Help "Salt-Minion version number." -Type "gauge" -Value $version -MetricsArray ([ref]$metrics)
} catch {
$errors += "Version check failed: $($_.Exception.Message)"
Add-PrometheusMetric -Name "minion_version" -Help "Salt-Minion version number." -Type "gauge" -Value 0 -MetricsArray ([ref]$metrics)
}
# Memory usage metric
try {
$memoryUsage = Get-SaltMemoryUsage
Add-PrometheusMetric -Name "minion_memory_usage_bytes" -Help "Salt-Minion process memory usage in bytes." -Type "gauge" -Value $memoryUsage -MetricsArray ([ref]$metrics)
} catch {
$errors += "Memory usage check failed: $($_.Exception.Message)"
Add-PrometheusMetric -Name "minion_memory_usage_bytes" -Help "Salt-Minion process memory usage in bytes." -Type "gauge" -Value 0 -MetricsArray ([ref]$metrics)
}
# Error count metric
try {
$errorCount = Get-SaltErrorCount
Add-PrometheusMetric -Name "minion_error_count" -Help "Number of error entries in Salt-Minion log file." -Type "counter" -Value $errorCount -MetricsArray ([ref]$metrics)
} catch {
$errors += "Error count check failed: $($_.Exception.Message)"
Add-PrometheusMetric -Name "minion_error_count" -Help "Number of error entries in Salt-Minion log file." -Type "counter" -Value 0 -MetricsArray ([ref]$metrics)
}
# Windows-specific: Script execution error count
Add-PrometheusMetric -Name "windows_salt_script_errors_total" -Help "Total number of errors during script execution" -Type "counter" -Value $errors.Count -MetricsArray ([ref]$metrics)
# Windows-specific: Script runtime
$scriptRuntime = (Get-Date) - $startTime
Add-PrometheusMetric -Name "windows_salt_script_runtime_seconds" -Help "Total script execution time in seconds" -Type "gauge" -Value $scriptRuntime.TotalSeconds -MetricsArray ([ref]$metrics)
} finally {
# Ensure cleanup happens regardless of success/failure
if ($errors.Count -gt 0) {
Write-Warning "Script completed with $($errors.Count) errors"
}
}
return $metrics
}
# Output metrics to console or file
try {
# Export metrics as an array of strings
$exportedMetrics = Export-PrometheusMetrics
if ($null -eq $exportedMetrics) {
throw "Export-PrometheusMetrics returned null"
}
if ($DryRun) {
# Dry run mode: output to console
Write-Host "=== DRY RUN MODE - Metrics that would be written to $MetricsFilePath ===" -ForegroundColor Yellow
$exportedMetrics | ForEach-Object { Write-Host $_ }
Write-Host "=== END DRY RUN OUTPUT ===" -ForegroundColor Yellow
} else {
# Normal mode: write to file with retry mechanism
$retryCount = 0
$maxRetries = 3
do {
try {
# Write the metrics to the file
$exportedMetrics | Out-File -FilePath $MetricsFilePath -Encoding UTF8 -Force
break
} catch [System.IO.IOException] {
$retryCount++
if ($retryCount -ge $maxRetries) {
throw
}
# Wait 100ms before retrying
Start-Sleep -Milliseconds 100
}
} while ($retryCount -lt $maxRetries)
}
} catch {
Write-Error "Failed to export metrics: $($_.Exception.Message)"
exit 1
}
# Uncomment the following line to write metrics to the console
# $exportedMetrics = Export-PrometheusMetrics
Executable
+409
View File
@@ -0,0 +1,409 @@
#!/bin/bash
#####################################################
### ###
### Description: Expose metrics from salt-minion. ###
### ###
### Phil Connor, contact@mylinux.work ###
### License: MIT ###
### Version 2.28.0.20250915 ###
### ###
#####################################################
# Exit on any error, treat unset variables as errors, and fail pipes on first failure
set -euo pipefail
# Parse command line arguments
DRY_RUN=false
VERBOSE=false
QUIET=false
NO_CRON=false
SCRIPT_VERSION="2.28.0.20250915"
show_version() {
echo "Salt Status Monitor Bash Script"
echo "Version: $SCRIPT_VERSION"
echo "Author: Phil Connor pconnor@ara.com"
}
show_help() {
echo "Usage: $0 [OPTIONS]"
echo "Monitor Salt minion status and export Prometheus metrics"
echo ""
echo "Options:"
echo " --dry-run Output metrics to console instead of file"
echo " --verbose Enable verbose debug output"
echo " --quiet Suppress non-error output"
echo " --no-cron Skip cron job installation"
echo " --timeout N Override timeout seconds (default: varies by operation)"
echo " --version Show version and exit"
echo " --help Show this help message"
}
# Logging functions
log_verbose() {
[[ "$VERBOSE" == "true" ]] && echo "[$(date '+%Y-%m-%d %H:%M:%S')] [VERBOSE] $1"
}
log_info() {
[[ "$QUIET" == "false" ]] && echo "[$(date '+%Y-%m-%d %H:%M:%S')] [INFO] $1"
}
while [[ $# -gt 0 ]]; do
case $1 in
--dry-run)
DRY_RUN=true
shift
;;
--verbose|-v)
VERBOSE=true
shift
;;
--quiet|-q)
QUIET=true
shift
;;
--no-cron)
NO_CRON=true
shift
;;
--timeout)
if [[ -n "$2" && "$2" =~ ^[0-9]+$ ]]; then
TIMEOUT_OVERRIDE="$2"
shift 2
else
echo "Error: --timeout requires a numeric value" >&2
exit 1
fi
;;
--version)
show_version
exit 0
;;
-h|--help)
show_help
exit 0
;;
*)
echo "Unknown option: $1" >&2
echo "Use --help for usage information" >&2
exit 1
;;
esac
done
# Get absolute path to this script for cron job installation
readonly SCRIPT_PATH="$(readlink -f "$0")"
# Configuration with defaults - can be overridden by environment variables
readonly CRONTAB_USER="${CRONTAB_USER:-root}" # User to install cron job under
readonly NODE_EXPORTER_DIR="${NODE_EXPORTER_DIR:-/var/lib/node_exporter}" # Directory where Prometheus metrics are stored
readonly PROMETHEUS_USER="${PROMETHEUS_USER:-prometheus}" # User that owns the metrics directory
readonly LOCK_DIR="${LOCK_DIR:-/var/run}" # Directory for lock files to prevent concurrent runs
readonly UPDATE_INTERVAL="${UPDATE_INTERVAL:-*/10 * * * *}" # Cron schedule - every 10 minutes by default
readonly SALT_MASTER_PORT=4505 # Salt master communication port
# Status codes used in Prometheus metrics
readonly STATUS_SUCCESS=1 # Service is working correctly
readonly STATUS_FAILURE=0 # Service has failed or is not responding
readonly STATUS_NOT_FOUND=2 # Service/command not found on system
# Validate that critical environment variables are set
[[ -z "$NODE_EXPORTER_DIR" || -z "$PROMETHEUS_USER" ]] && {
echo "ERROR: Required environment variables not set" >&2
exit 1
}
# Error handling function that logs to stderr and exits with specified code
handle_error() {
echo "ERROR: $1" >&2
exit "${2:-1}"
}
# Logging function with timestamp and level
log() {
echo "[$(date '+%Y-%m-%d %H:%M:%S')] [$1] $2"
}
# Find a command in PATH or fallback directories
# Returns the full path to the executable or exits with error
find_command() {
local cmd="$1"
shift
local fallback_paths=("$@")
# First try to find command in PATH
if command -v "$cmd" &>/dev/null; then
command -v "$cmd"
return 0
fi
# If not in PATH, check fallback directories
for path in "${fallback_paths[@]}"; do
local full_path="$path/$cmd"
[[ -x "$full_path" ]] && {
echo "$full_path"
return 0
}
done
# Command not found anywhere
handle_error "Could not find '$cmd' executable"
}
# Install a cron job to run this script periodically
# Only installs if the job doesn't already exist
install_cron_job() {
# Check if cron job already exists
crontab -l 2>/dev/null | grep -q "$SCRIPT_PATH" && return 0
# Create temporary file for new crontab
local temp_cron
temp_cron=$(mktemp)
# Combine existing crontab with new job
{
crontab -l 2>/dev/null || true # Get existing crontab, ignore errors if empty
echo "$UPDATE_INTERVAL $SCRIPT_PATH > $NODE_EXPORTER_DIR/salt_status.prom 2>&1"
} > "$temp_cron"
# Install the new crontab
if crontab -u "$CRONTAB_USER" "$temp_cron"; then
log_info "Cron job installed successfully"
else
rm -f "$temp_cron"
handle_error "Failed to install cron job"
fi
# Clean up temporary file
rm -f "$temp_cron"
}
# Set up file locking to prevent multiple instances of this script running
# Uses file descriptor 9 for the lock
setup_lock() {
# Ensure lock directory exists
[[ ! -d "$LOCK_DIR" ]] && handle_error "Lock directory does not exist: $LOCK_DIR"
# Clean up old lock files (older than 60 minutes)
find "$LOCK_DIR" -name "salt_status.*" -type f -mmin +60 -delete 2>/dev/null || true
# Create unique lock file
lockfile=$(mktemp -p "$LOCK_DIR" salt_status.XXXXXX) || handle_error "Failed to create lock file"
# Open lock file on file descriptor 9 and attempt to lock it
exec 9>"$lockfile"
flock -n 9 || handle_error "Script is already running"
# Set up cleanup trap to release lock and remove file on exit
trap 'flock -u 9; exec 9>&-; rm -f "$lockfile"' EXIT INT TERM
}
# Ensure the Node Exporter directory exists and is writable
# Creates the directory if running as root and sets proper ownership
setup_directories() {
# Return early if directory already exists
[[ -d "$NODE_EXPORTER_DIR" ]] && return 0
# Create directory if running as root
if [[ "$(id -u)" == "0" ]]; then
mkdir -p "$NODE_EXPORTER_DIR"
# Set ownership to prometheus user, ignore errors if user doesn't exist
chown "$PROMETHEUS_USER:" "$NODE_EXPORTER_DIR" 2>/dev/null || true
fi
# Verify the directory is writable
[[ ! -w "$NODE_EXPORTER_DIR" ]] && handle_error "$NODE_EXPORTER_DIR is not writable"
}
# Check if Salt-minion has an active network connection to Salt-master
# Uses ss (socket statistics) to check for established connections on port 4505
check_salt_connection() {
local ss_path
ss_path=$(find_command ss /bin /usr/bin /usr/sbin)
log_verbose "Checking for Salt connection on port $SALT_MASTER_PORT"
# Check for established connections (-nt = numeric, no header, TCP)
if "$ss_path" -nt | grep -q "\b$SALT_MASTER_PORT\b"; then
log_verbose "Found active connection on port $SALT_MASTER_PORT"
echo $STATUS_SUCCESS
else
log_verbose "No active connection found on port $SALT_MASTER_PORT"
echo $STATUS_FAILURE
fi
}
# Test if Salt-minion can successfully ping the Salt-master
# Uses salt-call test.ping to verify two-way communication
check_salt_ping() {
local salt_call_path
# Try to find salt-call command, return NOT_FOUND if missing
if ! salt_call_path=$(find_command salt-call /bin /usr/bin /usr/sbin 2>/dev/null); then
echo $STATUS_NOT_FOUND
return
fi
# Execute ping test and check for True response
if "$salt_call_path" test.ping 2>/dev/null | grep -q '\bTrue\b'; then
echo $STATUS_SUCCESS
else
echo $STATUS_FAILURE
fi
}
# Check if Salt-minion service is active using systemctl
check_salt_service() {
local systemctl_path
# Find systemctl command
if ! systemctl_path=$(find_command systemctl /bin /usr/bin /sbin /usr/sbin 2>/dev/null); then
echo $STATUS_NOT_FOUND
return
fi
# Check if salt-minion service is active
if "$systemctl_path" is-active salt-minion &>/dev/null; then
echo $STATUS_SUCCESS
else
echo $STATUS_FAILURE
fi
}
# Get timestamp of last successful Salt communication
check_salt_last_communication() {
local salt_call_path
# Try to find salt-call command, return 0 if missing
if ! salt_call_path=$(find_command salt-call /bin /usr/bin /usr/sbin 2>/dev/null); then
echo "0"
return
fi
# Get current timestamp if ping succeeds, otherwise 0
if "$salt_call_path" test.ping 2>/dev/null | grep -q '\bTrue\b'; then
date +%s
else
echo "0"
fi
}
# Get Salt-minion version information
get_salt_version() {
local salt_call_path
# Try to find salt-call command, return empty if missing
if ! salt_call_path=$(find_command salt-call /bin /usr/bin /usr/sbin 2>/dev/null); then
echo "0"
return
fi
# Extract version number and convert to numeric (e.g., 3006.1 becomes 3006.1)
local version
version=$("$salt_call_path" --version 2>/dev/null | grep -o '[0-9]\+\.[0-9]\+' | head -1)
echo "${version:-0}"
}
# Get Salt-minion process memory usage in bytes
get_salt_memory_usage() {
local ps_path
# Find ps command
if ! ps_path=$(find_command ps /bin /usr/bin 2>/dev/null); then
echo "0"
return
fi
# Get RSS memory usage in KB and convert to bytes
local memory_kb
memory_kb=$("$ps_path" -eo comm,rss | grep -E '^salt-minion' | awk '{sum+=$2} END {print sum+0}' 2>/dev/null)
[[ -z "$memory_kb" ]] && memory_kb=0
echo "$((memory_kb * 1024))"
}
# Count recent errors in salt-minion log
count_salt_errors() {
local log_file="/var/log/salt/minion"
# Return 0 if log file doesn't exist or isn't readable
[[ ! -r "$log_file" ]] && { echo "0"; return; }
# Count ERROR lines from last 24 hours
local error_count
error_count=$(grep -c "\[ERROR\]" "$log_file" 2>/dev/null)
echo "${error_count:-0}"
}
# Output a Prometheus metric in the correct format
# Parameters: metric_name, value, help_text, metric_type
output_metric() {
local name="$1" value="$2" help="$3" type="$4"
# Output in Prometheus exposition format
cat << EOF
# HELP $name $help
# TYPE $name $type
$name $value
EOF
}
# Main function that orchestrates the metric collection process
main() {
# Skip setup steps in dry-run mode
if [[ "$DRY_RUN" == "false" ]]; then
# Set up file locking to prevent concurrent execution
setup_lock
# Ensure output directory exists and is writable
setup_directories
# Install cron job for periodic execution (only if script file exists and not disabled)
if [[ -f "$SCRIPT_PATH" && "$NO_CRON" == "false" ]]; then
install_cron_job
elif [[ "$NO_CRON" == "true" ]]; then
log_info "Skipping cron job installation (--no-cron specified)"
fi
else
echo "=== DRY RUN MODE - Metrics that would be written to $NODE_EXPORTER_DIR/salt_status.prom ===" >&2
fi
# Collect Salt status metrics
local connection_status ping_status service_status last_comm version memory_usage error_count
connection_status=$(check_salt_connection)
ping_status=$(check_salt_ping)
service_status=$(check_salt_service)
last_comm=$(check_salt_last_communication)
version=$(get_salt_version)
memory_usage=$(get_salt_memory_usage)
error_count=$(count_salt_errors)
# Output metrics in Prometheus format
output_metric "minion_connection_status" "$connection_status" \
"Shows if Salt-Minion is connected to Salt-Master." "gauge"
output_metric "minion_ping_status" "$ping_status" \
"Shows if Salt-Minion is able to ping Salt-Master." "gauge"
output_metric "minion_service_status" "$service_status" \
"Shows if Salt-Minion service is active." "gauge"
output_metric "minion_last_communication_timestamp" "$last_comm" \
"Timestamp of last successful communication with Salt-Master." "gauge"
output_metric "minion_version" "$version" \
"Salt-Minion version number." "gauge"
output_metric "minion_memory_usage_bytes" "$memory_usage" \
"Salt-Minion process memory usage in bytes." "gauge"
output_metric "minion_error_count" "$error_count" \
"Number of error entries in Salt-Minion log file." "counter"
if [[ "$DRY_RUN" == "true" ]]; then
echo "=== END DRY RUN OUTPUT ===" >&2
fi
}
# Execute main function with all script arguments
main "$@"
+210
View File
@@ -0,0 +1,210 @@
#!/bin/bash
#############################################################
#### iperf3 Server Setup ####
#### Install and configure iperf3 as a systemd service ####
#### ####
#### Author: Phil Connor ####
#### Contact: contact@mylinux.work ####
#### License: MIT ####
#### Version: 1.0 ####
#### ####
#### Usage: sudo ./setup-iperf3-server.sh [OPTIONS] ####
#############################################################
set -euo pipefail
# Default configuration
LISTEN_PORT=9182
HARDENED=false
UNINSTALL=false
SERVICE_NAME="iperf3-server"
SERVICE_FILE="/etc/systemd/system/${SERVICE_NAME}.service"
show_help() {
cat <<EOF
Usage: sudo ./setup-iperf3-server.sh [OPTIONS]
Install and configure iperf3 as a systemd service.
Options:
--port PORT Set the iperf3 listen port (default: 9182)
--hardened Use the hardened service file with IP restrictions
and security settings (private networks only)
--uninstall Stop, disable, and remove the iperf3 service
--help Show this help message
Examples:
sudo ./setup-iperf3-server.sh
sudo ./setup-iperf3-server.sh --port 5201
sudo ./setup-iperf3-server.sh --hardened
sudo ./setup-iperf3-server.sh --uninstall
EOF
exit 0
}
parse_args() {
while [[ $# -gt 0 ]]; do
case "$1" in
--port)
if [[ -z "${2:-}" ]]; then
echo "ERROR: --port requires a value"
exit 1
fi
LISTEN_PORT="$2"
shift 2
;;
--hardened)
HARDENED=true
shift
;;
--uninstall)
UNINSTALL=true
shift
;;
--help)
show_help
;;
*)
echo "ERROR: Unknown option: $1"
echo "Run with --help for usage information."
exit 1
;;
esac
done
}
# Ensure script is run as root
if [[ $EUID -ne 0 ]]; then
echo "ERROR: This script must be run as root (use sudo)."
exit 1
fi
install_iperf3() {
if command -v iperf3 >/dev/null 2>&1; then
echo "iperf3 is already installed."
return
fi
echo "Installing iperf3..."
if command -v apt-get >/dev/null 2>&1; then
apt-get update && apt-get install -y iperf3
elif command -v dnf >/dev/null 2>&1; then
dnf install -y iperf3
elif command -v yum >/dev/null 2>&1; then
yum install -y iperf3
else
echo "ERROR: Cannot install iperf3 automatically. Please install manually."
exit 1
fi
}
install_service() {
echo "Installing systemd service..."
if [[ "$HARDENED" == true ]]; then
echo "Using hardened service configuration (private networks only)."
cat > "$SERVICE_FILE" <<EOF
[Unit]
Description=iperf3 Network Performance Testing Server
After=network.target
Wants=network.target
[Service]
Type=simple
User=root
Group=root
ExecStart=/usr/bin/iperf3 -s -p ${LISTEN_PORT}
ExecReload=/bin/kill -HUP \$MAINPID
KillMode=process
Restart=on-failure
RestartSec=5s
# Security settings
NoNewPrivileges=true
PrivateTmp=true
ProtectSystem=strict
ProtectHome=true
ReadWritePaths=/tmp
ProtectKernelTunables=true
ProtectKernelModules=true
ProtectControlGroups=true
RestrictRealtime=true
RestrictSUIDSGID=true
# Network settings — restrict to private networks
IPAddressDeny=any
IPAddressAllow=localhost
IPAddressAllow=192.168.0.0/16
IPAddressAllow=10.0.0.0/8
IPAddressAllow=172.16.0.0/12
[Install]
WantedBy=multi-user.target
EOF
else
cat > "$SERVICE_FILE" <<EOF
[Unit]
Description=iperf3 Network Performance Testing Server
After=network.target
[Service]
Type=simple
User=root
Group=root
ExecStart=/usr/bin/iperf3 -s -p ${LISTEN_PORT}
Restart=on-failure
RestartSec=5s
[Install]
WantedBy=multi-user.target
EOF
fi
chmod 644 "$SERVICE_FILE"
echo "Enabling and starting service..."
systemctl daemon-reload
systemctl enable "${SERVICE_NAME}.service"
systemctl start "${SERVICE_NAME}.service"
echo ""
echo "iperf3 server service installed and started!"
echo ""
systemctl status "${SERVICE_NAME}.service" --no-pager || true
echo ""
echo "Service commands:"
echo " Start: sudo systemctl start ${SERVICE_NAME}"
echo " Stop: sudo systemctl stop ${SERVICE_NAME}"
echo " Status: sudo systemctl status ${SERVICE_NAME}"
echo " Logs: sudo journalctl -u ${SERVICE_NAME} -f"
echo ""
echo "Test connection from another machine:"
echo " iperf3 -c $(hostname -I 2>/dev/null | awk '{print $1}') -p ${LISTEN_PORT} -t 10"
echo ""
echo "To customize settings, edit:"
echo " ${SERVICE_FILE}"
echo "Then run: sudo systemctl daemon-reload && sudo systemctl restart ${SERVICE_NAME}"
}
uninstall_service() {
echo "Removing iperf3 server service..."
systemctl stop "${SERVICE_NAME}" 2>/dev/null || true
systemctl disable "${SERVICE_NAME}" 2>/dev/null || true
rm -f "$SERVICE_FILE"
systemctl daemon-reload
echo "iperf3 server service removed."
}
# --- Main execution ---
parse_args "$@"
if [[ "$UNINSTALL" == true ]]; then
uninstall_service
else
echo "Setting up iperf3 server service on port ${LISTEN_PORT}..."
install_iperf3
install_service
fi
+637
View File
@@ -0,0 +1,637 @@
#!/bin/bash
#############################################################
#### Speedtest Metrics Exporter ####
#### Internet & LAN speed metrics for Prometheus ####
#### ####
#### Author: Phil Connor ####
#### Contact: contact@mylinux.work ####
#### License: MIT ####
#### Version: 2.1 ####
#### ####
#### Usage: ./speedtest-metrics.sh [OPTIONS] ####
#############################################################
set -euo pipefail
#########################
### Output Mode ###
#########################
LISTEN_PORT="${SPEEDTEST_EXPORTER_PORT:-9196}"
TEXTFILE_DIR="/var/lib/node_exporter"
OUTPUT_FILE=""
HTTP_MODE=false
#########################
### Parse Arguments ###
#########################
show_help() {
cat <<EOF
Speedtest Metrics Exporter for Prometheus
Usage: $0 [OPTIONS]
MODES:
--textfile Write to node_exporter textfile collector
--http Run HTTP server on port $LISTEN_PORT
(no flag) Output to stdout (default)
OPTIONS:
-p, --port PORT HTTP port (default: 9196)
-o, --output PATH Output file path
--help Show this help
ENVIRONMENT VARIABLES:
SPEEDTEST_SERVERS Comma-separated Ookla server IDs or "auto" (default: auto)
IPERF_SERVER Local iperf3 server IP (default: 192.168.1.100)
IPERF_PORT iperf3 port (default: 9182)
SPEEDTEST_EXPORTER_PORT Same as --port
EXAMPLES:
$0 # One-shot to stdout
$0 --textfile # Write to textfile collector
$0 --http --port 9196 # Run HTTP server
$0 -o /tmp/speedtest.prom # Write to custom file
EOF
}
handle_request() {
local request
read -r request || true
local path
path=$(echo "$request" | awk '{print $2}')
case "$path" in
/metrics)
local metrics
metrics=$(collect_metrics)
local content_length=${#metrics}
printf "HTTP/1.1 200 OK\r\nContent-Type: text/plain; version=0.0.4; charset=utf-8\r\nContent-Length: %d\r\nConnection: close\r\n\r\n%s" "$content_length" "$metrics"
;;
/)
local body="<html><body><h1>Speedtest Metrics Exporter</h1><p><a href='/metrics'>Metrics</a></p></body></html>"
local content_length=${#body}
printf "HTTP/1.1 200 OK\r\nContent-Type: text/html\r\nContent-Length: %d\r\nConnection: close\r\n\r\n%s" "$content_length" "$body"
;;
*)
printf "HTTP/1.1 404 Not Found\r\nContent-Type: text/plain\r\nContent-Length: 9\r\nConnection: close\r\n\r\nNot Found"
;;
esac
}
parse_args() {
while [[ $# -gt 0 ]]; do
case "$1" in
--textfile)
OUTPUT_FILE="$TEXTFILE_DIR/speedtest.prom"
shift
;;
--http)
HTTP_MODE=true
shift
;;
--port|-p)
LISTEN_PORT="$2"
HTTP_MODE=true
shift 2
;;
--output|-o)
OUTPUT_FILE="$2"
shift 2
;;
--handle-request)
handle_request
exit 0
;;
--help|-h)
show_help
exit 0
;;
*)
echo "Unknown option: $1" >&2
show_help >&2
exit 1
;;
esac
done
}
parse_args "$@"
#########################
### Metrics Collection ###
#########################
collect_metrics() {
# Configuration
TEMP_FILE="/tmp/speedtest_$$"
IPERF_SERVER="${IPERF_SERVER:-192.168.1.100}" # Set to your local iperf3 server IP
IPERF_PORT="${IPERF_PORT:-9182}" # iperf3 port
# Multiple speedtest servers - add/remove server IDs as needed
# Common server IDs for major cities:
# Dallas/DFW: 5029 (AT&T), 12190 (Spectrum), 26847 (Verizon)
# New York: 3737 (Verizon), 11570 (Optimum), 17395 (Spectrum)
SPEEDTEST_SERVERS="${SPEEDTEST_SERVERS:-auto}" # Comma-separated server IDs or "auto"
cleanup() {
rm -f "$TEMP_FILE"
}
trap cleanup EXIT
# Record script start time
SCRIPT_START_TIME=$(date +%s.%N)
# Internet Speed Test - Multiple Servers
echo "# Running internet speedtest on multiple servers..." >&2
# Initialize arrays to store results for all servers
declare -a SERVER_IDS=()
declare -a PING_LATENCIES=()
declare -a PING_JITTERS=()
declare -a PING_LOWS=()
declare -a PING_HIGHS=()
declare -a DOWNLOAD_MBPS=()
declare -a UPLOAD_MBPS=()
declare -a PACKET_LOSSES=()
declare -a EXTERNAL_IPS=()
declare -a TEST_TIMESTAMPS=()
declare -a SERVER_NAMES=()
declare -a SERVER_LOCATIONS=()
declare -a SERVER_COUNTRIES=()
declare -a ISPS=()
declare -a RESULT_URLS=()
declare -a DOWNLOAD_SIZES=()
declare -a UPLOAD_SIZES=()
declare -a SUCCESSES=()
# Convert comma-separated servers to array
IFS=',' read -ra SERVERS <<< "$SPEEDTEST_SERVERS"
# Test each server
for server_id in "${SERVERS[@]}"; do
server_id=$(echo "$server_id" | xargs) # Trim whitespace
echo "# Testing server $server_id..." >&2
TEMP_SERVER_FILE="/tmp/speedtest_${server_id}_$$"
# Handle auto server selection vs specific server ID
if [[ "$server_id" == "auto" ]]; then
speedtest_cmd="speedtest --format=json"
else
speedtest_cmd="speedtest -s $server_id --format=json"
fi
if $speedtest_cmd --accept-license --accept-gdpr > "$TEMP_SERVER_FILE" 2>/dev/null; then
echo "# Server $server_id: SUCCESS" >&2
# Parse results for this server
ping_latency=$(jq -r '.ping.latency // "0"' "$TEMP_SERVER_FILE")
ping_jitter=$(jq -r '.ping.jitter // "0"' "$TEMP_SERVER_FILE")
ping_low=$(jq -r '.ping.low // "0"' "$TEMP_SERVER_FILE")
ping_high=$(jq -r '.ping.high // "0"' "$TEMP_SERVER_FILE")
download_bandwidth=$(jq -r '.download.bandwidth // "0"' "$TEMP_SERVER_FILE")
upload_bandwidth=$(jq -r '.upload.bandwidth // "0"' "$TEMP_SERVER_FILE")
packet_loss=$(jq -r '.packetLoss // "0"' "$TEMP_SERVER_FILE")
external_ip=$(jq -r '.interface.externalIp // "unknown"' "$TEMP_SERVER_FILE")
# Handle timestamp conversion
test_timestamp_raw=$(jq -r '.timestamp // "0"' "$TEMP_SERVER_FILE")
if [[ "$test_timestamp_raw" != "0" ]] && [[ "$test_timestamp_raw" != "unknown" ]]; then
test_timestamp=$(date -d "$test_timestamp_raw" +%s 2>/dev/null || echo "0")
else
test_timestamp=0
fi
server_name=$(jq -r '.server.name // "unknown"' "$TEMP_SERVER_FILE")
server_location=$(jq -r '.server.location // "unknown"' "$TEMP_SERVER_FILE")
server_country=$(jq -r '.server.country // "unknown"' "$TEMP_SERVER_FILE")
isp=$(jq -r '.isp // "unknown"' "$TEMP_SERVER_FILE")
result_url=$(jq -r '.result.url // "unknown"' "$TEMP_SERVER_FILE")
download_size=$(jq -r '.download.bytes // "0"' "$TEMP_SERVER_FILE")
upload_size=$(jq -r '.upload.bytes // "0"' "$TEMP_SERVER_FILE")
# Convert from bits to Mbps (fallback to awk if bc unavailable)
download_mbps=$(echo "scale=2; $download_bandwidth / 125000" | bc -l 2>/dev/null || echo "$download_bandwidth" | awk '{printf "%.2f", $1/125000}')
upload_mbps=$(echo "scale=2; $upload_bandwidth / 125000" | bc -l 2>/dev/null || echo "$upload_bandwidth" | awk '{printf "%.2f", $1/125000}')
success=1
else
echo "# Server $server_id: FAILED" >&2
# Set default values for failed test
ping_latency=0; ping_jitter=0; ping_low=0; ping_high=0
download_mbps=0; upload_mbps=0; packet_loss=0
external_ip="unknown"; test_timestamp=0; server_name="unknown"
server_location="unknown"; server_country="unknown"; isp="unknown"
result_url="unknown"; download_size=0; upload_size=0
success=0
fi
# Store results in arrays
SERVER_IDS+=("$server_id")
PING_LATENCIES+=("$ping_latency")
PING_JITTERS+=("$ping_jitter")
PING_LOWS+=("$ping_low")
PING_HIGHS+=("$ping_high")
DOWNLOAD_MBPS+=("$download_mbps")
UPLOAD_MBPS+=("$upload_mbps")
PACKET_LOSSES+=("$packet_loss")
EXTERNAL_IPS+=("$external_ip")
TEST_TIMESTAMPS+=("$test_timestamp")
SERVER_NAMES+=("$server_name")
SERVER_LOCATIONS+=("$server_location")
SERVER_COUNTRIES+=("$server_country")
ISPS+=("$isp")
RESULT_URLS+=("$result_url")
DOWNLOAD_SIZES+=("$download_size")
UPLOAD_SIZES+=("$upload_size")
SUCCESSES+=("$success")
# Cleanup temp file
rm -f "$TEMP_SERVER_FILE"
done
# Local Network Speed Test (iperf3) - Enhanced with additional metrics
echo "# Testing local network speed..." >&2
if command -v iperf3 >/dev/null 2>&1; then
# Test download from local server (we are client)
if local_down=$(timeout 10 iperf3 -c "$IPERF_SERVER" -p "$IPERF_PORT" -t 5 -J 2>/dev/null); then
local_download_mbps=$(echo "$local_down" | jq -r '.end.sum_received.bits_per_second // "0"' | awk '{printf "%.2f", $1/1000000}')
local_download_bytes=$(echo "$local_down" | jq -r '.end.sum_received.bytes // "0"')
local_download_retransmits=$(echo "$local_down" | jq -r '.end.sum_sent.retransmits // "0"')
local_download_rtt=$(echo "$local_down" | jq -r '.end.streams[0].sender.mean_rtt // "0"' | awk '{printf "%.3f", $1/1000}') # Convert to ms
local_download_rtt_var=$(echo "$local_down" | jq -r '.end.streams[0].sender.rtt_variance // "0"' | awk '{printf "%.3f", $1/1000}')
local_download_cpu_local=$(echo "$local_down" | jq -r '.end.cpu_utilization_percent.host_total // "0"')
local_download_cpu_remote=$(echo "$local_down" | jq -r '.end.cpu_utilization_percent.remote_total // "0"')
local_download_congestion_window=$(echo "$local_down" | jq -r '.end.streams[0].sender.max_snd_cwnd // "0"')
local_download_success=1
else
local_download_mbps=0; local_download_bytes=0; local_download_retransmits=0
local_download_rtt=0; local_download_rtt_var=0; local_download_cpu_local=0
local_download_cpu_remote=0; local_download_congestion_window=0; local_download_success=0
fi
# Test upload to local server (we are client, reverse mode)
if local_up=$(timeout 10 iperf3 -c "$IPERF_SERVER" -p "$IPERF_PORT" -t 5 -R -J 2>/dev/null); then
local_upload_mbps=$(echo "$local_up" | jq -r '.end.sum_sent.bits_per_second // "0"' | awk '{printf "%.2f", $1/1000000}')
local_upload_bytes=$(echo "$local_up" | jq -r '.end.sum_sent.bytes // "0"')
local_upload_retransmits=$(echo "$local_up" | jq -r '.end.sum_received.retransmits // "0"')
local_upload_rtt=$(echo "$local_up" | jq -r '.end.streams[0].receiver.mean_rtt // "0"' | awk '{printf "%.3f", $1/1000}')
local_upload_rtt_var=$(echo "$local_up" | jq -r '.end.streams[0].receiver.rtt_variance // "0"' | awk '{printf "%.3f", $1/1000}')
local_upload_cpu_local=$(echo "$local_up" | jq -r '.end.cpu_utilization_percent.host_total // "0"')
local_upload_cpu_remote=$(echo "$local_up" | jq -r '.end.cpu_utilization_percent.remote_total // "0"')
local_upload_congestion_window=$(echo "$local_up" | jq -r '.end.streams[0].receiver.max_snd_cwnd // "0"')
local_upload_success=1
else
local_upload_mbps=0; local_upload_bytes=0; local_upload_retransmits=0
local_upload_rtt=0; local_upload_rtt_var=0; local_upload_cpu_local=0
local_upload_cpu_remote=0; local_upload_congestion_window=0; local_upload_success=0
fi
else
echo "# iperf3 not installed, skipping local network test" >&2
local_download_mbps=0; local_upload_mbps=0; local_download_bytes=0; local_upload_bytes=0
local_download_retransmits=0; local_upload_retransmits=0; local_download_rtt=0; local_upload_rtt=0
local_download_rtt_var=0; local_upload_rtt_var=0; local_download_cpu_local=0; local_upload_cpu_local=0
local_download_cpu_remote=0; local_upload_cpu_remote=0; local_download_congestion_window=0; local_upload_congestion_window=0
local_download_success=0; local_upload_success=0
fi
# Calculate script runtime
SCRIPT_END_TIME=$(date +%s.%N)
SCRIPT_RUNTIME=$(echo "$SCRIPT_END_TIME - $SCRIPT_START_TIME" | bc -l 2>/dev/null || echo "$SCRIPT_END_TIME $SCRIPT_START_TIME" | awk '{printf "%.3f", $1-$2}')
# Output Prometheus metrics
cat <<EOF
# HELP internet_speedtest_latency Internet connection latency in milliseconds
# TYPE internet_speedtest_latency gauge
EOF
# Generate metrics for each server
for i in "${!SERVER_IDS[@]}"; do
server_id="${SERVER_IDS[$i]}"
server_name="${SERVER_NAMES[$i]}"
server_location="${SERVER_LOCATIONS[$i]}"
server_country="${SERVER_COUNTRIES[$i]}"
isp="${ISPS[$i]}"
external_ip="${EXTERNAL_IPS[$i]}"
result_url="${RESULT_URLS[$i]}"
cat <<EOF
internet_speedtest_latency{server_id="$server_id",server_name="$server_name",server_location="$server_location",server_country="$server_country"} ${PING_LATENCIES[$i]}
EOF
done
cat <<EOF
# HELP internet_speedtest_latency_low Internet connection minimum latency in milliseconds
# TYPE internet_speedtest_latency_low gauge
EOF
for i in "${!SERVER_IDS[@]}"; do
server_id="${SERVER_IDS[$i]}"
server_name="${SERVER_NAMES[$i]}"
server_location="${SERVER_LOCATIONS[$i]}"
server_country="${SERVER_COUNTRIES[$i]}"
cat <<EOF
internet_speedtest_latency_low{server_id="$server_id",server_name="$server_name",server_location="$server_location",server_country="$server_country"} ${PING_LOWS[$i]}
EOF
done
cat <<EOF
# HELP internet_speedtest_latency_high Internet connection maximum latency in milliseconds
# TYPE internet_speedtest_latency_high gauge
EOF
for i in "${!SERVER_IDS[@]}"; do
server_id="${SERVER_IDS[$i]}"
server_name="${SERVER_NAMES[$i]}"
server_location="${SERVER_LOCATIONS[$i]}"
server_country="${SERVER_COUNTRIES[$i]}"
cat <<EOF
internet_speedtest_latency_high{server_id="$server_id",server_name="$server_name",server_location="$server_location",server_country="$server_country"} ${PING_HIGHS[$i]}
EOF
done
cat <<EOF
# HELP internet_speedtest_jitter Internet connection jitter in milliseconds
# TYPE internet_speedtest_jitter gauge
EOF
for i in "${!SERVER_IDS[@]}"; do
server_id="${SERVER_IDS[$i]}"
server_name="${SERVER_NAMES[$i]}"
server_location="${SERVER_LOCATIONS[$i]}"
server_country="${SERVER_COUNTRIES[$i]}"
cat <<EOF
internet_speedtest_jitter{server_id="$server_id",server_name="$server_name",server_location="$server_location",server_country="$server_country"} ${PING_JITTERS[$i]}
EOF
done
cat <<EOF
# HELP internet_speedtest_download Internet download speed in Mbps
# TYPE internet_speedtest_download gauge
EOF
for i in "${!SERVER_IDS[@]}"; do
server_id="${SERVER_IDS[$i]}"
server_name="${SERVER_NAMES[$i]}"
server_location="${SERVER_LOCATIONS[$i]}"
server_country="${SERVER_COUNTRIES[$i]}"
cat <<EOF
internet_speedtest_download{server_id="$server_id",server_name="$server_name",server_location="$server_location",server_country="$server_country"} ${DOWNLOAD_MBPS[$i]}
EOF
done
cat <<EOF
# HELP internet_speedtest_download_size Internet download test data size in bytes
# TYPE internet_speedtest_download_size gauge
EOF
for i in "${!SERVER_IDS[@]}"; do
server_id="${SERVER_IDS[$i]}"
server_name="${SERVER_NAMES[$i]}"
server_location="${SERVER_LOCATIONS[$i]}"
server_country="${SERVER_COUNTRIES[$i]}"
cat <<EOF
internet_speedtest_download_size{server_id="$server_id",server_name="$server_name",server_location="$server_location",server_country="$server_country"} ${DOWNLOAD_SIZES[$i]}
EOF
done
cat <<EOF
# HELP internet_speedtest_upload Internet upload speed in Mbps
# TYPE internet_speedtest_upload gauge
EOF
for i in "${!SERVER_IDS[@]}"; do
server_id="${SERVER_IDS[$i]}"
server_name="${SERVER_NAMES[$i]}"
server_location="${SERVER_LOCATIONS[$i]}"
server_country="${SERVER_COUNTRIES[$i]}"
cat <<EOF
internet_speedtest_upload{server_id="$server_id",server_name="$server_name",server_location="$server_location",server_country="$server_country"} ${UPLOAD_MBPS[$i]}
EOF
done
cat <<EOF
# HELP internet_speedtest_upload_size Internet upload test data size in bytes
# TYPE internet_speedtest_upload_size gauge
EOF
for i in "${!SERVER_IDS[@]}"; do
server_id="${SERVER_IDS[$i]}"
server_name="${SERVER_NAMES[$i]}"
server_location="${SERVER_LOCATIONS[$i]}"
server_country="${SERVER_COUNTRIES[$i]}"
cat <<EOF
internet_speedtest_upload_size{server_id="$server_id",server_name="$server_name",server_location="$server_location",server_country="$server_country"} ${UPLOAD_SIZES[$i]}
EOF
done
cat <<EOF
# HELP internet_speedtest_packet_loss Internet connection packet loss percentage
# TYPE internet_speedtest_packet_loss gauge
EOF
for i in "${!SERVER_IDS[@]}"; do
server_id="${SERVER_IDS[$i]}"
server_name="${SERVER_NAMES[$i]}"
server_location="${SERVER_LOCATIONS[$i]}"
server_country="${SERVER_COUNTRIES[$i]}"
cat <<EOF
internet_speedtest_packet_loss{server_id="$server_id",server_name="$server_name",server_location="$server_location",server_country="$server_country"} ${PACKET_LOSSES[$i]}
EOF
done
cat <<EOF
# HELP internet_speedtest_timestamp Unix timestamp when test was performed
# TYPE internet_speedtest_timestamp gauge
EOF
for i in "${!SERVER_IDS[@]}"; do
server_id="${SERVER_IDS[$i]}"
server_name="${SERVER_NAMES[$i]}"
server_location="${SERVER_LOCATIONS[$i]}"
server_country="${SERVER_COUNTRIES[$i]}"
cat <<EOF
internet_speedtest_timestamp{server_id="$server_id",server_name="$server_name",server_location="$server_location",server_country="$server_country"} ${TEST_TIMESTAMPS[$i]}
EOF
done
cat <<EOF
# HELP internet_speedtest_info Internet speedtest metadata
# TYPE internet_speedtest_info gauge
EOF
for i in "${!SERVER_IDS[@]}"; do
server_id="${SERVER_IDS[$i]}"
server_name="${SERVER_NAMES[$i]}"
server_location="${SERVER_LOCATIONS[$i]}"
server_country="${SERVER_COUNTRIES[$i]}"
external_ip="${EXTERNAL_IPS[$i]}"
isp="${ISPS[$i]}"
result_url="${RESULT_URLS[$i]}"
cat <<EOF
internet_speedtest_info{server_id="$server_id",server_name="$server_name",server_location="$server_location",server_country="$server_country",external_ip="$external_ip",isp="$isp",result_url="$result_url"} 1
EOF
done
cat <<EOF
# HELP internet_speedtest_success Whether internet speedtest succeeded (1=success, 0=failed)
# TYPE internet_speedtest_success gauge
EOF
for i in "${!SERVER_IDS[@]}"; do
server_id="${SERVER_IDS[$i]}"
server_name="${SERVER_NAMES[$i]}"
server_location="${SERVER_LOCATIONS[$i]}"
server_country="${SERVER_COUNTRIES[$i]}"
cat <<EOF
internet_speedtest_success{server_id="$server_id",server_name="$server_name",server_location="$server_location",server_country="$server_country"} ${SUCCESSES[$i]}
EOF
done
cat <<EOF
# HELP local_network_download Local network download speed in Mbps
# TYPE local_network_download gauge
local_network_download $local_download_mbps
# HELP local_network_upload Local network upload speed in Mbps
# TYPE local_network_upload gauge
local_network_upload $local_upload_mbps
# HELP local_network_download_bytes Total bytes downloaded in local network test
# TYPE local_network_download_bytes gauge
local_network_download_bytes $local_download_bytes
# HELP local_network_upload_bytes Total bytes uploaded in local network test
# TYPE local_network_upload_bytes gauge
local_network_upload_bytes $local_upload_bytes
# HELP local_network_download_retransmits TCP retransmissions during download test
# TYPE local_network_download_retransmits gauge
local_network_download_retransmits $local_download_retransmits
# HELP local_network_upload_retransmits TCP retransmissions during upload test
# TYPE local_network_upload_retransmits gauge
local_network_upload_retransmits $local_upload_retransmits
# HELP local_network_download_rtt Mean round-trip time during download test in milliseconds
# TYPE local_network_download_rtt gauge
local_network_download_rtt $local_download_rtt
# HELP local_network_upload_rtt Mean round-trip time during upload test in milliseconds
# TYPE local_network_upload_rtt gauge
local_network_upload_rtt $local_upload_rtt
# HELP local_network_download_rtt_variance RTT variance during download test in milliseconds
# TYPE local_network_download_rtt_variance gauge
local_network_download_rtt_variance $local_download_rtt_var
# HELP local_network_upload_rtt_variance RTT variance during upload test in milliseconds
# TYPE local_network_upload_rtt_variance gauge
local_network_upload_rtt_variance $local_upload_rtt_var
# HELP local_network_download_cpu_local Local CPU utilization during download test (percentage)
# TYPE local_network_download_cpu_local gauge
local_network_download_cpu_local $local_download_cpu_local
# HELP local_network_upload_cpu_local Local CPU utilization during upload test (percentage)
# TYPE local_network_upload_cpu_local gauge
local_network_upload_cpu_local $local_upload_cpu_local
# HELP local_network_download_cpu_remote Remote CPU utilization during download test (percentage)
# TYPE local_network_download_cpu_remote gauge
local_network_download_cpu_remote $local_download_cpu_remote
# HELP local_network_upload_cpu_remote Remote CPU utilization during upload test (percentage)
# TYPE local_network_upload_cpu_remote gauge
local_network_upload_cpu_remote $local_upload_cpu_remote
# HELP local_network_download_congestion_window Maximum TCP congestion window size during download test in bytes
# TYPE local_network_download_congestion_window gauge
local_network_download_congestion_window $local_download_congestion_window
# HELP local_network_upload_congestion_window Maximum TCP congestion window size during upload test in bytes
# TYPE local_network_upload_congestion_window gauge
local_network_upload_congestion_window $local_upload_congestion_window
# HELP local_network_test_success Whether local network test succeeded (1=success, 0=failed)
# TYPE local_network_test_success gauge
local_network_test_success $((local_download_success && local_upload_success))
# HELP speedtest_script_runtime_seconds Total script execution time in seconds
# TYPE speedtest_script_runtime_seconds gauge
speedtest_script_runtime_seconds $SCRIPT_RUNTIME
EOF
}
#########################
### Output Handling ###
#########################
write_output() {
local metrics
metrics=$(collect_metrics)
if [[ -n "$OUTPUT_FILE" ]]; then
mkdir -p "$(dirname "$OUTPUT_FILE")"
local tmp_file="${OUTPUT_FILE}.$$"
echo "$metrics" > "$tmp_file"
mv "$tmp_file" "$OUTPUT_FILE"
echo "Metrics written to $OUTPUT_FILE" >&2
else
echo "$metrics"
fi
}
start_server() {
if ! command -v socat >/dev/null 2>&1; then
echo "socat is required for HTTP mode. Install it first." >&2
exit 1
fi
echo "Starting Speedtest Metrics Exporter on port $LISTEN_PORT" >&2
echo "Metrics available at http://localhost:$LISTEN_PORT/metrics" >&2
while true; do
socat TCP-LISTEN:"$LISTEN_PORT",reuseaddr,fork EXEC:"$0 --handle-request" 2>/dev/null || {
echo "Server error, restarting in 5 seconds..." >&2
sleep 5
}
done
}
# Main execution
if [[ "$HTTP_MODE" == true ]]; then
start_server
elif [[ -n "$OUTPUT_FILE" ]]; then
write_output
else
collect_metrics
fi
+682
View File
@@ -0,0 +1,682 @@
#!/bin/bash
################################################
#### SSL Certificate Deployer ####
#### Deploy certs to multiple services ####
#### ####
#### Author: Phil Connor ####
#### License: MIT ####
#### Contact: contact@mylinux.work ####
#### Version: 1.00-030326 ####
################################################
set -o pipefail
SCRIPT_NAME=$(basename "$0")
readonly SCRIPT_NAME
# Runtime variables
CERT_FILE=""
KEY_FILE=""
CA_FILE=""
TARGETS=""
DRY_RUN=false
BACKUP=false
DEBUG=${DEBUG:-}
handle_error() {
local exit_code=$1
local line_number=$2
echo "Error: $SCRIPT_NAME failed at line $line_number with exit code $exit_code" >&2
exit "$exit_code"
}
trap 'handle_error $? $LINENO' ERR
debug_echo() {
if [[ -n "$DEBUG" ]]; then
echo "[DEBUG] $*" >&2
fi
}
info() {
echo "[INFO] $*"
}
warn() {
echo "[WARN] $*" >&2
}
error() {
echo "[ERROR] $*" >&2
}
show_help() {
cat << EOF
Usage: $SCRIPT_NAME [OPTIONS]
Deploy SSL certificates to multiple service targets in a single run.
OPTIONS:
--cert FILE Path to the SSL certificate file (required)
--key FILE Path to the SSL private key file (required)
--ca FILE Path to the CA bundle file (optional)
--targets LIST Comma-separated list of targets (required)
--dry-run Show what would be done without making changes
--backup Backup existing certificates before overwriting
--help, -h Show this help message
SUPPORTED TARGETS:
nginx Copy cert+key to /etc/nginx/ssl/, reload nginx
apache Copy cert+key to /etc/httpd/ssl/ or /etc/apache2/ssl/, reload
postfix Update TLS cert/key in main.cf, reload postfix
dovecot Update ssl_cert/ssl_key in dovecot config, reload dovecot
artifactory Import cert into Artifactory Java keystore, restart
bitbucket Import cert into Bitbucket Java keystore, restart
jira Import cert into Jira Java keystore, restart
haproxy Concatenate cert+key into PEM at /etc/haproxy/certs/, reload
system Update system CA trust store
ENVIRONMENT VARIABLES:
DEBUG Enable debug output when set
EXAMPLES:
$SCRIPT_NAME --cert server.crt --key server.key --targets nginx,haproxy
$SCRIPT_NAME --cert server.crt --key server.key --ca ca-bundle.crt --targets apache,postfix,dovecot
$SCRIPT_NAME --cert server.crt --key server.key --targets artifactory,bitbucket,jira --backup
$SCRIPT_NAME --cert server.crt --key server.key --targets system --dry-run
DEBUG=1 $SCRIPT_NAME --cert server.crt --key server.key --targets nginx
EOF
}
validate_cert_key_match() {
local cert="$1"
local key="$2"
local cert_modulus
cert_modulus=$(openssl x509 -noout -modulus -in "$cert" 2>/dev/null | openssl md5)
local key_modulus
key_modulus=$(openssl rsa -noout -modulus -in "$key" 2>/dev/null | openssl md5)
if [[ "$cert_modulus" != "$key_modulus" ]]; then
error "Certificate and key do not match (modulus mismatch)"
debug_echo "Cert modulus: $cert_modulus"
debug_echo "Key modulus: $key_modulus"
return 1
fi
debug_echo "Certificate and key match"
return 0
}
backup_file() {
local file="$1"
if [[ -f "$file" ]]; then
local backup_name
backup_name="${file}.bak.$(date +%Y%m%d%H%M%S)"
if [[ "$DRY_RUN" == true ]]; then
info "[DRY RUN] Would backup $file -> $backup_name"
else
cp -a "$file" "$backup_name"
info "Backed up $file -> $backup_name"
fi
fi
}
copy_file() {
local src="$1"
local dest="$2"
if [[ "$BACKUP" == true ]]; then
backup_file "$dest"
fi
if [[ "$DRY_RUN" == true ]]; then
info "[DRY RUN] Would copy $src -> $dest"
else
cp -a "$src" "$dest"
chmod 600 "$dest"
info "Copied $src -> $dest"
fi
}
reload_service() {
local service="$1"
if [[ "$DRY_RUN" == true ]]; then
info "[DRY RUN] Would reload $service"
else
if systemctl is-active --quiet "$service" 2>/dev/null; then
systemctl reload "$service"
info "Reloaded $service"
else
warn "Service $service is not active, skipping reload"
fi
fi
}
restart_service() {
local service="$1"
if [[ "$DRY_RUN" == true ]]; then
info "[DRY RUN] Would restart $service"
else
systemctl restart "$service"
info "Restarted $service"
fi
}
get_keystore_password() {
local password_url="$1"
local storepass=""
# Try Vault HTTP API first if URL provided
if [[ -n "$password_url" ]]; then
debug_echo "Retrieving keystore password from $password_url"
storepass=$(curl -sf -X GET "$password_url" 2>/dev/null | jq -r '.data.password // empty' 2>/dev/null || true)
fi
# Fall back to Vault CLI
if [[ -z "$storepass" ]]; then
debug_echo "Falling back to Vault CLI for keystore password"
storepass=$(vault kv get -field=password secret/keystore 2>/dev/null || true)
fi
# Fall back to default
if [[ -z "$storepass" ]]; then
debug_echo "Using default keystore password"
storepass="changeit"
fi
echo "$storepass"
}
find_java_keystore() {
local -n java_bin_ref=$1
local -n keystore_ref=$2
# Common Java installation paths
local java_paths=(
"/opt/jfrog/artifactory/app/third-party/java"
"/mnt/ebs/bitbucket/*/jre"
"/mnt/ebs/jira/jre"
"/usr/lib/jvm/java-*-openjdk"
"/usr/lib/jvm/default-java"
"/opt/java"
"/usr/java/latest"
)
# Check JAVA_HOME first
if [[ -n "${JAVA_HOME:-}" && -x "$JAVA_HOME/bin/keytool" ]]; then
java_bin_ref="$JAVA_HOME/bin"
keystore_ref="$JAVA_HOME/lib/security/cacerts"
if [[ -f "$keystore_ref" ]]; then
debug_echo "Found Java via JAVA_HOME: $java_bin_ref"
return 0
fi
fi
# Search common paths with glob expansion
for path_pattern in "${java_paths[@]}"; do
for java_dir in $path_pattern; do
if [[ -d "$java_dir" ]]; then
local bin_dir="$java_dir/bin"
local cacerts="$java_dir/lib/security/cacerts"
if [[ -x "$bin_dir/keytool" && -f "$cacerts" ]]; then
java_bin_ref="$bin_dir"
keystore_ref="$cacerts"
debug_echo "Found Java at: $java_dir"
return 0
fi
fi
done
done
# Fallback: try system keytool
if command -v keytool >/dev/null 2>&1; then
java_bin_ref="$(dirname "$(command -v keytool)")"
# Try common system keystore locations
local system_keystores=(
"/etc/ssl/certs/java/cacerts"
"/usr/lib/jvm/default-java/lib/security/cacerts"
"/etc/pki/ca-trust/extracted/java/cacerts"
)
for ks in "${system_keystores[@]}"; do
if [[ -f "$ks" ]]; then
keystore_ref="$ks"
debug_echo "Found system Java at: $java_bin_ref"
return 0
fi
done
fi
return 1
}
deploy_java_keystore() {
local keystore="$1"
local java_bin="$2"
local alias_name="$3"
local vault_url="$4"
local service_name="$5"
local storepass
storepass=$(get_keystore_password "$vault_url")
if [[ "$BACKUP" == true ]]; then
backup_file "$keystore"
fi
if [[ "$DRY_RUN" == true ]]; then
info "[DRY RUN] Would delete alias '$alias_name' from keystore $keystore"
info "[DRY RUN] Would import $CERT_FILE into keystore $keystore"
info "[DRY RUN] Would restart $service_name"
else
"$java_bin/keytool" -delete -alias "$alias_name" -keystore "$keystore" -storepass "$storepass" 2>/dev/null || true
"$java_bin/keytool" -import -noprompt -alias "$alias_name" -keystore "$keystore" -file "$CERT_FILE" -storepass "$storepass"
info "Imported certificate into $keystore"
restart_service "$service_name"
fi
}
# ---- Target handlers ----
deploy_nginx() {
info "Deploying to nginx..."
local ssl_dir="/etc/nginx/ssl"
if [[ "$DRY_RUN" != true ]]; then
mkdir -p "$ssl_dir"
fi
copy_file "$CERT_FILE" "$ssl_dir/server.crt"
copy_file "$KEY_FILE" "$ssl_dir/server.key"
if [[ -n "$CA_FILE" ]]; then
copy_file "$CA_FILE" "$ssl_dir/ca-bundle.crt"
fi
reload_service nginx
}
deploy_apache() {
info "Deploying to apache..."
local ssl_dir=""
if [[ -d "/etc/httpd" ]]; then
ssl_dir="/etc/httpd/ssl"
elif [[ -d "/etc/apache2" ]]; then
ssl_dir="/etc/apache2/ssl"
else
error "Could not detect Apache configuration directory"
return 1
fi
if [[ "$DRY_RUN" != true ]]; then
mkdir -p "$ssl_dir"
fi
copy_file "$CERT_FILE" "$ssl_dir/server.crt"
copy_file "$KEY_FILE" "$ssl_dir/server.key"
if [[ -n "$CA_FILE" ]]; then
copy_file "$CA_FILE" "$ssl_dir/ca-bundle.crt"
fi
# Detect and reload the correct service
if systemctl list-units --type=service --all 2>/dev/null | grep -q "httpd.service"; then
reload_service httpd
elif systemctl list-units --type=service --all 2>/dev/null | grep -q "apache2.service"; then
reload_service apache2
else
warn "Could not detect Apache service name"
fi
}
deploy_postfix() {
info "Deploying to postfix..."
local main_cf="/etc/postfix/main.cf"
if [[ ! -f "$main_cf" ]]; then
error "Postfix main.cf not found at $main_cf"
return 1
fi
if [[ "$DRY_RUN" == true ]]; then
info "[DRY RUN] Would update smtpd_tls_cert_file in $main_cf to $CERT_FILE"
info "[DRY RUN] Would update smtpd_tls_key_file in $main_cf to $KEY_FILE"
info "[DRY RUN] Would reload postfix"
else
if [[ "$BACKUP" == true ]]; then
backup_file "$main_cf"
fi
if grep -q "^smtpd_tls_cert_file" "$main_cf"; then
sed -i "s|^smtpd_tls_cert_file.*|smtpd_tls_cert_file = $CERT_FILE|" "$main_cf"
else
echo "smtpd_tls_cert_file = $CERT_FILE" >> "$main_cf"
fi
if grep -q "^smtpd_tls_key_file" "$main_cf"; then
sed -i "s|^smtpd_tls_key_file.*|smtpd_tls_key_file = $KEY_FILE|" "$main_cf"
else
echo "smtpd_tls_key_file = $KEY_FILE" >> "$main_cf"
fi
info "Updated $main_cf with certificate paths"
reload_service postfix
fi
}
deploy_dovecot() {
info "Deploying to dovecot..."
local dovecot_conf=""
if [[ -f "/etc/dovecot/conf.d/10-ssl.conf" ]]; then
dovecot_conf="/etc/dovecot/conf.d/10-ssl.conf"
elif [[ -f "/etc/dovecot/dovecot.conf" ]]; then
dovecot_conf="/etc/dovecot/dovecot.conf"
else
error "Could not find dovecot configuration"
return 1
fi
if [[ "$DRY_RUN" == true ]]; then
info "[DRY RUN] Would update ssl_cert in $dovecot_conf to <$CERT_FILE"
info "[DRY RUN] Would update ssl_key in $dovecot_conf to <$KEY_FILE"
info "[DRY RUN] Would reload dovecot"
else
if [[ "$BACKUP" == true ]]; then
backup_file "$dovecot_conf"
fi
if grep -q "^ssl_cert" "$dovecot_conf"; then
sed -i "s|^ssl_cert.*|ssl_cert = <$CERT_FILE|" "$dovecot_conf"
else
echo "ssl_cert = <$CERT_FILE" >> "$dovecot_conf"
fi
if grep -q "^ssl_key" "$dovecot_conf"; then
sed -i "s|^ssl_key.*|ssl_key = <$KEY_FILE|" "$dovecot_conf"
else
echo "ssl_key = <$KEY_FILE" >> "$dovecot_conf"
fi
info "Updated $dovecot_conf with certificate paths"
reload_service dovecot
fi
}
deploy_artifactory() {
info "Deploying to artifactory..."
local java_bin="/opt/jfrog/artifactory/app/third-party/java/bin"
local keystore="/opt/jfrog/artifactory/app/third-party/java/lib/security/cacerts"
if [[ ! -x "$java_bin/keytool" || ! -f "$keystore" ]]; then
debug_echo "Artifactory default paths not found, searching for Java"
if ! find_java_keystore java_bin keystore; then
error "Could not find Java keytool or keystore for Artifactory"
return 1
fi
fi
deploy_java_keystore "$keystore" "$java_bin" "ssl-cert" "" "artifactory"
}
deploy_bitbucket() {
info "Deploying to bitbucket..."
local java_bin=""
local keystore=""
# Check app-specific paths first with glob
for bb_dir in /mnt/ebs/bitbucket/*/jre; do
if [[ -d "$bb_dir" && -x "$bb_dir/bin/keytool" && -f "$bb_dir/lib/security/cacerts" ]]; then
java_bin="$bb_dir/bin"
keystore="$bb_dir/lib/security/cacerts"
break
fi
done
if [[ -z "$java_bin" || -z "$keystore" ]]; then
debug_echo "Bitbucket default paths not found, searching for Java"
if ! find_java_keystore java_bin keystore; then
error "Could not find Java keytool or keystore for Bitbucket"
return 1
fi
fi
deploy_java_keystore "$keystore" "$java_bin" "ssl-cert" "" "atlbitbucket"
}
deploy_jira() {
info "Deploying to jira..."
local java_bin="/mnt/ebs/jira/jre/bin"
local keystore="/mnt/ebs/jira/jre/lib/security/cacerts"
if [[ ! -x "$java_bin/keytool" || ! -f "$keystore" ]]; then
debug_echo "Jira default paths not found, searching for Java"
if ! find_java_keystore java_bin keystore; then
error "Could not find Java keytool or keystore for Jira"
return 1
fi
fi
deploy_java_keystore "$keystore" "$java_bin" "ssl-cert" "" "jira"
}
deploy_haproxy() {
info "Deploying to haproxy..."
local cert_dir="/etc/haproxy/certs"
local pem_file="$cert_dir/server.pem"
if [[ "$DRY_RUN" != true ]]; then
mkdir -p "$cert_dir"
fi
if [[ "$BACKUP" == true ]]; then
backup_file "$pem_file"
fi
if [[ "$DRY_RUN" == true ]]; then
info "[DRY RUN] Would concatenate $CERT_FILE + $KEY_FILE -> $pem_file"
info "[DRY RUN] Would reload haproxy"
else
cat "$CERT_FILE" "$KEY_FILE" > "$pem_file"
chmod 600 "$pem_file"
info "Created combined PEM at $pem_file"
reload_service haproxy
fi
}
deploy_system() {
info "Deploying to system CA trust store..."
if [[ -z "$CA_FILE" && -z "$CERT_FILE" ]]; then
error "No certificate or CA bundle provided for system trust store"
return 1
fi
local cert_to_install="${CA_FILE:-$CERT_FILE}"
if command -v update-ca-trust >/dev/null 2>&1; then
# RHEL/CentOS/Fedora/Rocky/Alma
local trust_dir="/etc/pki/ca-trust/source/anchors"
local cert_name
cert_name=$(basename "$cert_to_install")
if [[ "$DRY_RUN" == true ]]; then
info "[DRY RUN] Would copy $cert_to_install -> $trust_dir/$cert_name"
info "[DRY RUN] Would run update-ca-trust"
else
copy_file "$cert_to_install" "$trust_dir/$cert_name"
update-ca-trust
info "Updated system CA trust store (RHEL-based)"
fi
elif command -v update-ca-certificates >/dev/null 2>&1; then
# Debian/Ubuntu
local trust_dir="/usr/local/share/ca-certificates"
local cert_name
cert_name=$(basename "$cert_to_install")
# Debian requires .crt extension
cert_name="${cert_name%.*}.crt"
if [[ "$DRY_RUN" == true ]]; then
info "[DRY RUN] Would copy $cert_to_install -> $trust_dir/$cert_name"
info "[DRY RUN] Would run update-ca-certificates"
else
copy_file "$cert_to_install" "$trust_dir/$cert_name"
update-ca-certificates
info "Updated system CA trust store (Debian-based)"
fi
else
error "Could not find update-ca-trust or update-ca-certificates"
return 1
fi
}
parse_arguments() {
while [[ $# -gt 0 ]]; do
case $1 in
--cert)
CERT_FILE="$2"
shift 2
;;
--key)
KEY_FILE="$2"
shift 2
;;
--ca)
CA_FILE="$2"
shift 2
;;
--targets)
TARGETS="$2"
shift 2
;;
--dry-run)
DRY_RUN=true
shift
;;
--backup)
BACKUP=true
shift
;;
--help|-h)
show_help
exit 0
;;
*)
error "Unknown option: $1"
show_help >&2
exit 1
;;
esac
done
}
validate_inputs() {
if [[ -z "$CERT_FILE" ]]; then
error "Certificate file is required (--cert)"
exit 1
fi
if [[ -z "$KEY_FILE" ]]; then
error "Key file is required (--key)"
exit 1
fi
if [[ -z "$TARGETS" ]]; then
error "At least one target is required (--targets)"
exit 1
fi
if [[ ! -f "$CERT_FILE" ]]; then
error "Certificate file not found: $CERT_FILE"
exit 1
fi
if [[ ! -f "$KEY_FILE" ]]; then
error "Key file not found: $KEY_FILE"
exit 1
fi
if [[ -n "$CA_FILE" && ! -f "$CA_FILE" ]]; then
error "CA bundle file not found: $CA_FILE"
exit 1
fi
if ! openssl x509 -noout -text -in "$CERT_FILE" >/dev/null 2>&1; then
error "Invalid certificate file: $CERT_FILE"
exit 1
fi
if ! openssl rsa -noout -check -in "$KEY_FILE" >/dev/null 2>&1; then
error "Invalid key file: $KEY_FILE"
exit 1
fi
if ! validate_cert_key_match "$CERT_FILE" "$KEY_FILE"; then
exit 1
fi
}
deploy_target() {
local target="$1"
case "$target" in
nginx) deploy_nginx ;;
apache) deploy_apache ;;
postfix) deploy_postfix ;;
dovecot) deploy_dovecot ;;
artifactory) deploy_artifactory ;;
bitbucket) deploy_bitbucket ;;
jira) deploy_jira ;;
haproxy) deploy_haproxy ;;
system) deploy_system ;;
*)
error "Unknown target: $target"
error "Valid targets: nginx, apache, postfix, dovecot, artifactory, bitbucket, jira, haproxy, system"
return 1
;;
esac
}
main() {
parse_arguments "$@"
validate_inputs
if [[ "$DRY_RUN" == true ]]; then
info "Running in DRY RUN mode — no changes will be made"
fi
local failed=0
local succeeded=0
IFS=',' read -ra target_list <<< "$TARGETS"
for target in "${target_list[@]}"; do
# Trim whitespace
target=$(echo "$target" | tr -d '[:space:]')
info "--- Deploying to target: $target ---"
if deploy_target "$target"; then
((succeeded++))
info "Target $target: OK"
else
((failed++))
error "Target $target: FAILED"
fi
echo
done
info "Deployment complete: $succeeded succeeded, $failed failed"
if [[ $failed -gt 0 ]]; then
return 1
fi
}
# Execute main function if script is run directly
if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
main "$@"
fi
+347
View File
@@ -0,0 +1,347 @@
#!/bin/bash
################################################################################
# Script Name: systemd-service-exporter.sh
# Version: 1.0
# Description: Prometheus textfile collector exporter for systemd service status
# Monitors service state, uptime, restart count, and enabled status
#
# Author: Phil Connor
# Contact: contact@mylinux.work
# Website: https://mylinux.work
# License: MIT
# Date: 2026-03-03
#
# Prerequisites:
# - systemctl command available (systemd)
# - node_exporter with textfile collector enabled
# - /var/lib/node_exporter directory exists
#
# Usage:
# # Configure services via environment variable
# SERVICE_LIST="nginx,sshd,cron" ./systemd-service-exporter.sh
#
# # Configure services via config file
# echo -e "nginx\nsshd\ncron" > /etc/systemd-service-exporter.conf
# ./systemd-service-exporter.sh
#
# # Debug mode
# DEBUG=1 SERVICE_LIST="nginx" ./systemd-service-exporter.sh
#
# # Dry run (output to stdout)
# ./systemd-service-exporter.sh --dry-run
#
# Metrics Exported:
# - linux_systemd_service_state{service,state} - Service state (1=current, 0=other)
# - linux_systemd_service_uptime_seconds{service} - Seconds since service became active
# - linux_systemd_service_restarts_total{service} - Number of times the service restarted
# - linux_systemd_service_enabled{service} - Whether the service is enabled (1/0)
#
# Configuration:
# Environment: SERVICE_LIST (comma-separated)
# Config file: /etc/systemd-service-exporter.conf (one per line)
# Textfile directory: /var/lib/node_exporter
#
################################################################################
set -o pipefail
# ============================================================================
# CONFIGURATION
# ============================================================================
readonly VERSION="1.0"
readonly SCRIPT_NAME="${0##*/}"
readonly TEXTFILE_DIR="${TEXTFILE_DIR:-/var/lib/node_exporter}"
readonly OUTPUT_FILE="${TEXTFILE_DIR}/systemd_services.prom"
readonly CONFIG_FILE="${CONFIG_FILE:-/etc/systemd-service-exporter.conf}"
readonly TMP_FILE="${OUTPUT_FILE}.$$"
# Runtime flags
DRY_RUN=false
DEBUG=${DEBUG:-}
# ============================================================================
# HELPER FUNCTIONS
# ============================================================================
debug_echo() {
if [[ -n "$DEBUG" ]]; then
echo "[DEBUG] $*" >&2
fi
}
log_error() {
echo "[ERROR] $*" >&2
}
cleanup() {
rm -f "$TMP_FILE"
}
trap cleanup EXIT
show_help() {
cat <<EOF
Usage: $SCRIPT_NAME [OPTIONS]
Prometheus textfile collector exporter for systemd service status.
Monitors service state, uptime, restart count, and enabled status.
OPTIONS:
--dry-run Output metrics to stdout instead of writing to file
--debug Enable debug output
--help Show this help message
--version Show version
CONFIGURATION:
Services can be configured in two ways:
1. Environment variable (comma-separated):
SERVICE_LIST="nginx,sshd,cron" $SCRIPT_NAME
2. Config file (one service per line):
/etc/systemd-service-exporter.conf
The environment variable takes precedence over the config file.
ENVIRONMENT VARIABLES:
SERVICE_LIST Comma-separated list of services to monitor
CONFIG_FILE Path to config file (default: /etc/systemd-service-exporter.conf)
TEXTFILE_DIR Textfile collector directory (default: /var/lib/node_exporter)
DEBUG Enable debug output when set to any value
EXAMPLES:
SERVICE_LIST="nginx,sshd,cron" $SCRIPT_NAME
SERVICE_LIST="docker" $SCRIPT_NAME --dry-run
DEBUG=1 $SCRIPT_NAME
EOF
exit 0
}
show_version() {
echo "$SCRIPT_NAME version $VERSION"
exit 0
}
# ============================================================================
# SERVICE DISCOVERY
# ============================================================================
load_services() {
local services=()
if [[ -n "${SERVICE_LIST:-}" ]]; then
debug_echo "Loading services from SERVICE_LIST environment variable"
IFS=',' read -ra services <<< "$SERVICE_LIST"
elif [[ -f "$CONFIG_FILE" ]]; then
debug_echo "Loading services from config file: $CONFIG_FILE"
while IFS= read -r line; do
line="${line%%#*}"
line="${line// /}"
if [[ -n "$line" ]]; then
services+=("$line")
fi
done < "$CONFIG_FILE"
else
log_error "No services configured. Set SERVICE_LIST or create $CONFIG_FILE"
exit 1
fi
if [[ ${#services[@]} -eq 0 ]]; then
log_error "No services found in configuration"
exit 1
fi
debug_echo "Monitoring ${#services[@]} services: ${services[*]}"
printf '%s\n' "${services[@]}"
}
# ============================================================================
# METRICS COLLECTION
# ============================================================================
get_service_state() {
local service="$1"
local state
state=$(systemctl is-active "$service" 2>/dev/null) || true
echo "${state:-unknown}"
}
get_service_uptime() {
local service="$1"
local timestamp
timestamp=$(systemctl show "$service" --property=ActiveEnterTimestamp --value 2>/dev/null) || true
if [[ -z "$timestamp" || "$timestamp" == "" ]]; then
echo "0"
return
fi
local active_epoch
active_epoch=$(date -d "$timestamp" +%s 2>/dev/null) || true
if [[ -z "$active_epoch" ]]; then
echo "0"
return
fi
local now
now=$(date +%s)
local uptime=$((now - active_epoch))
if [[ $uptime -lt 0 ]]; then
echo "0"
else
echo "$uptime"
fi
}
get_restart_count() {
local service="$1"
local count
count=$(systemctl show "$service" --property=NRestarts --value 2>/dev/null) || true
echo "${count:-0}"
}
get_enabled_status() {
local service="$1"
local status
status=$(systemctl is-enabled "$service" 2>/dev/null) || true
if [[ "$status" == "enabled" ]]; then
echo "1"
else
echo "0"
fi
}
state_to_value() {
local current_state="$1"
local check_state="$2"
if [[ "$current_state" == "$check_state" ]]; then
echo "1"
else
echo "0"
fi
}
collect_metrics() {
local services=()
while IFS= read -r svc; do
services+=("$svc")
done < <(load_services)
local output=""
# Header comments
output+="# HELP linux_systemd_service_state Current state of the systemd service\n"
output+="# TYPE linux_systemd_service_state gauge\n"
for service in "${services[@]}"; do
local state
state=$(get_service_state "$service")
debug_echo "Service $service: state=$state"
for s in active inactive failed; do
local val
val=$(state_to_value "$state" "$s")
output+="linux_systemd_service_state{service=\"${service}\",state=\"${s}\"} ${val}\n"
done
done
output+="# HELP linux_systemd_service_uptime_seconds Time in seconds since the service became active\n"
output+="# TYPE linux_systemd_service_uptime_seconds gauge\n"
for service in "${services[@]}"; do
local uptime
uptime=$(get_service_uptime "$service")
debug_echo "Service $service: uptime=${uptime}s"
output+="linux_systemd_service_uptime_seconds{service=\"${service}\"} ${uptime}\n"
done
output+="# HELP linux_systemd_service_restarts_total Total number of service restarts\n"
output+="# TYPE linux_systemd_service_restarts_total counter\n"
for service in "${services[@]}"; do
local restarts
restarts=$(get_restart_count "$service")
debug_echo "Service $service: restarts=$restarts"
output+="linux_systemd_service_restarts_total{service=\"${service}\"} ${restarts}\n"
done
output+="# HELP linux_systemd_service_enabled Whether the service is enabled to start at boot\n"
output+="# TYPE linux_systemd_service_enabled gauge\n"
for service in "${services[@]}"; do
local enabled
enabled=$(get_enabled_status "$service")
debug_echo "Service $service: enabled=$enabled"
output+="linux_systemd_service_enabled{service=\"${service}\"} ${enabled}\n"
done
printf '%b' "$output"
}
# ============================================================================
# OUTPUT
# ============================================================================
write_metrics() {
local metrics
metrics=$(collect_metrics)
if [[ "$DRY_RUN" == "true" ]]; then
echo "$metrics"
return
fi
if [[ ! -d "$TEXTFILE_DIR" ]]; then
log_error "Textfile collector directory does not exist: $TEXTFILE_DIR"
exit 1
fi
echo "$metrics" > "$TMP_FILE"
mv "$TMP_FILE" "$OUTPUT_FILE"
debug_echo "Metrics written to $OUTPUT_FILE"
}
# ============================================================================
# MAIN
# ============================================================================
main() {
while [[ $# -gt 0 ]]; do
case "$1" in
--dry-run)
DRY_RUN=true
shift
;;
--debug)
DEBUG=1
shift
;;
--help|-h)
show_help
;;
--version|-v)
show_version
;;
*)
log_error "Unknown option: $1"
echo "Use --help for usage information" >&2
exit 1
;;
esac
done
if ! command -v systemctl &>/dev/null; then
log_error "systemctl not found — this script requires systemd"
exit 1
fi
write_metrics
}
main "$@"
+542
View File
@@ -0,0 +1,542 @@
#!/bin/bash
################################################################################
# Script Name: ufw-blocklist-metrics.sh
# Version: 2.3
# Description: Production Prometheus exporter for UFW Blocklists (OPTIMIZED)
# Author: Phil Connor
# Contact: contact@mylinux.work
# Website: https://mylinux.work
# License: MIT
#
# Optimizations in v2.1:
# - Single journalctl call with cached output
# - Cached feed config parsing
# - Eliminated redundant file operations
# - 4.5 minutes → ~30 seconds typical runtime
#
# Fixes in v2.2:
# - Fixed typo in script name header (bocklist → blocklist)
# - Fixed ipset member counting to use Members: section
# - Fixed empty journal data producing false grep counts
# - Fixed HTTP response headers missing trailing \r\n
# - Fixed SC2155/SC2126/SC2295 shellcheck warnings
# - Added scrape timestamp metric
# - Used SCRIPT_VERSION variable for version strings
#
# Fixes in v2.3:
# - Fixed get_ipset_size using grep -c (exit 1 on 0 matches) causing
# duplicate "0" output lines and arithmetic errors; switched to wc -l
# - Fixed same grep -c || echo 0 bug in ufw_blocklist_enabled and
# ufw_blocklist_total_rules heredoc substitutions
# - Fixed misplaced 2>/dev/null on [ ] test for conntrack and effectiveness
# - Fixed hardcoded v2.1 in usage text; now uses SCRIPT_VERSION
################################################################################
CONFIG_DIR="/etc/ufw-threats"
CACHE_DIR="$CONFIG_DIR/cache"
FEEDS_CONFIG="$CONFIG_DIR/feeds.conf"
IPSET_PREFIX="ufw-feed"
WHITELIST_IPSET="ufw-whitelist"
WHITELIST_IPSET_V6="ufw-whitelist-v6"
SCRIPT_VERSION="2.3"
TEXTFILE_DIR="/var/lib/node_exporter"
OUTPUT_FILE=""
HTTP_MODE=false
HTTP_PORT=9418
LOCK_FILE="/var/run/ufw-blocklist-metrics.lock"
# Global cache variables
JOURNAL_1H=""
JOURNAL_24H=""
FEEDS_ARRAY=()
show_usage() {
cat <<EOF
Usage: $0 [OPTIONS]
Export per-feed UFW threat statistics as Prometheus metrics (v${SCRIPT_VERSION}).
MODES:
--textfile Write to node_exporter textfile collector
--http Run HTTP server on port $HTTP_PORT
OPTIONS:
-p, --port HTTP port
-o, --output Output file
-h, --help Show help
EOF
exit 0
}
parse_args() {
while [[ $# -gt 0 ]]; do
case $1 in
-h|--help) show_usage ;;
--textfile) OUTPUT_FILE="$TEXTFILE_DIR/ufw_blocklist_metrics.prom"; shift ;;
--http) HTTP_MODE=true; shift ;;
-p|--port) HTTP_PORT="$2"; shift 2 ;;
-o|--output) OUTPUT_FILE="$2"; shift 2 ;;
*) echo "Unknown: $1"; exit 1 ;;
esac
done
}
# Load all journal data ONCE at startup
cache_journal_data() {
JOURNAL_1H=$(timeout 10 journalctl --since "1 hour ago" 2>/dev/null | grep '\[THREAT' || echo "")
JOURNAL_24H=$(timeout 30 journalctl --since "24 hours ago" 2>/dev/null | grep '\[THREAT' || echo "")
}
# Parse feeds config ONCE into array
cache_feeds_config() {
FEEDS_ARRAY=()
if [ -f "$FEEDS_CONFIG" ]; then
while IFS='|' read -r enabled name url type description; do
[[ "$enabled" =~ ^#.*$ ]] && continue
[[ -z "$enabled" ]] && continue
FEEDS_ARRAY+=("$enabled|$name|$url|$type|$description")
done < "$FEEDS_CONFIG"
fi
}
get_ipset_size() {
local ipset_name="$1"
local count
count=$(ipset list "$ipset_name" 2>/dev/null | sed -n '/^Members:$/,$p' | tail -n +2 | wc -l)
echo "${count:-0}"
}
# Optimized: Use cached journal data
get_feed_blocks() {
local feed="$1"
local period="$2"
local data
case "$period" in
"1 hour ago") data="$JOURNAL_1H" ;;
"24 hours ago") data="$JOURNAL_24H" ;;
*) echo 0; return ;;
esac
if [ -z "$data" ]; then echo 0; return; fi
local count
count=$(printf '%s' "$data" | grep -c "\[THREAT:${feed}\]" 2>/dev/null)
echo "${count:-0}"
}
get_feed_blocks_v6() {
local feed="$1"
local period="$2"
local data
case "$period" in
"1 hour ago") data="$JOURNAL_1H" ;;
"24 hours ago") data="$JOURNAL_24H" ;;
*) echo 0; return ;;
esac
if [ -z "$data" ]; then echo 0; return; fi
local count
count=$(printf '%s' "$data" | grep -c "\[THREAT-v6:${feed}\]" 2>/dev/null)
echo "${count:-0}"
}
get_file_timestamp() {
[ -f "$1" ] && stat -c %Y "$1" 2>/dev/null || echo "0"
}
get_file_size() {
[ -f "$1" ] && stat -c %s "$1" 2>/dev/null || echo "0"
}
get_cache_age() {
if [ -f "$1" ]; then
echo $(($(date +%s) - $(stat -c %Y "$1" 2>/dev/null || echo 0)))
else
echo "0"
fi
}
get_conntrack_count() {
if [ -f /proc/sys/net/netfilter/nf_conntrack_count ]; then
cat /proc/sys/net/netfilter/nf_conntrack_count
else
echo "0"
fi
}
get_conntrack_max() {
if [ -f /proc/sys/net/netfilter/nf_conntrack_max ]; then
cat /proc/sys/net/netfilter/nf_conntrack_max
else
echo "0"
fi
}
get_ipset_memory() {
local ipset_name="$1"
local mem
mem=$(ipset list "$ipset_name" -t 2>/dev/null | grep "Size in memory:" | awk '{print $4}')
echo "${mem:-0}"
}
get_cache_disk_usage() {
if [ -d "$CACHE_DIR" ]; then
df -B1 "$CACHE_DIR" 2>/dev/null | tail -1 | awk '{print $3"|"$4"|"$5}'
else
echo "0|0|0%"
fi
}
get_total_cache_size() {
if [ -d "$CACHE_DIR" ]; then
du -sb "$CACHE_DIR" 2>/dev/null | awk '{print $1}'
else
echo "0"
fi
}
acquire_lock() {
if [ -f "$LOCK_FILE" ]; then
local pid
pid=$(cat "$LOCK_FILE" 2>/dev/null)
if [ -n "$pid" ] && kill -0 "$pid" 2>/dev/null; then
echo "ERROR: Another instance is already running (PID: $pid)" >&2
exit 1
else
echo "Removing stale lock file" >&2
rm -f "$LOCK_FILE"
fi
fi
echo $$ > "$LOCK_FILE"
trap cleanup EXIT INT TERM
}
cleanup() {
rm -f "$LOCK_FILE"
}
generate_metrics() {
local start_time
start_time=$(date +%s)
cat <<EOF
# HELP ufw_blocklist_info Per-feed UFW threat blocking info
# TYPE ufw_blocklist_info gauge
ufw_blocklist_info{mode="per-feed",version="${SCRIPT_VERSION}"} 1
# HELP ufw_blocklist_enabled Total enabled feeds
# TYPE ufw_blocklist_enabled gauge
ufw_blocklist_enabled $(printf '%s\n' "${FEEDS_ARRAY[@]}" | grep -c '^1|')
# HELP ufw_blocklist_ipset_size Number of IPs per feed ipset
# TYPE ufw_blocklist_ipset_size gauge
EOF
# Only export metrics for ipsets that actually exist and are enabled
for ipset_name in $(ipset list -n 2>/dev/null | grep "^${IPSET_PREFIX}-"); do
# Extract feed name and IP version
local feed_name="${ipset_name#"${IPSET_PREFIX}"-}"
local ip_version="4"
if [[ "$feed_name" =~ -v6$ ]]; then
feed_name="${feed_name%-v6}"
ip_version="6"
fi
# Only show enabled feeds
if ! printf '%s\n' "${FEEDS_ARRAY[@]}" | grep -q "^1|${feed_name}|" 2>/dev/null; then
continue
fi
local size
size=$(get_ipset_size "$ipset_name")
echo "ufw_blocklist_ipset_size{feed=\"$feed_name\",ip_version=\"$ip_version\",status=\"enabled\"} $size"
done
cat <<EOF
# HELP ufw_blocklist_cache_age_seconds Seconds since feed update
# TYPE ufw_blocklist_cache_age_seconds gauge
EOF
for feed_line in "${FEEDS_ARRAY[@]}"; do
IFS='|' read -r enabled name url type description <<< "$feed_line"
[ "$enabled" != "1" ] && continue
age_v4=$(get_cache_age "$CACHE_DIR/${name}-v4.parsed")
age_v6=$(get_cache_age "$CACHE_DIR/${name}-v6.parsed")
echo "ufw_blocklist_cache_age_seconds{feed=\"$name\",ip_version=\"4\"} $age_v4"
echo "ufw_blocklist_cache_age_seconds{feed=\"$name\",ip_version=\"6\"} $age_v6"
done
cat <<EOF
# HELP ufw_blocklist_last_update_timestamp Unix timestamp of last update
# TYPE ufw_blocklist_last_update_timestamp gauge
EOF
for feed_line in "${FEEDS_ARRAY[@]}"; do
IFS='|' read -r enabled name url type description <<< "$feed_line"
[ "$enabled" != "1" ] && continue
ts_v4=$(get_file_timestamp "$CACHE_DIR/${name}-v4.parsed")
ts_v6=$(get_file_timestamp "$CACHE_DIR/${name}-v6.parsed")
echo "ufw_blocklist_last_update_timestamp{feed=\"$name\",ip_version=\"4\"} $ts_v4"
echo "ufw_blocklist_last_update_timestamp{feed=\"$name\",ip_version=\"6\"} $ts_v6"
done
cat <<EOF
# HELP ufw_blocklist_file_size_bytes Feed file sizes
# TYPE ufw_blocklist_file_size_bytes gauge
EOF
for feed_line in "${FEEDS_ARRAY[@]}"; do
IFS='|' read -r enabled name url type description <<< "$feed_line"
[ "$enabled" != "1" ] && continue
parsed_v4=$(get_file_size "$CACHE_DIR/${name}-v4.parsed")
parsed_v6=$(get_file_size "$CACHE_DIR/${name}-v6.parsed")
echo "ufw_blocklist_file_size_bytes{feed=\"$name\",ip_version=\"4\",type=\"parsed\"} $parsed_v4"
echo "ufw_blocklist_file_size_bytes{feed=\"$name\",ip_version=\"6\",type=\"parsed\"} $parsed_v6"
done
cat <<EOF
# HELP ufw_blocklist_ip_version_ratio IPv4 vs IPv6 ratio
# TYPE ufw_blocklist_ip_version_ratio gauge
EOF
for feed_line in "${FEEDS_ARRAY[@]}"; do
IFS='|' read -r enabled name url type description <<< "$feed_line"
[ "$enabled" != "1" ] && continue
v4_size=$(get_ipset_size "${IPSET_PREFIX}-${name}")
v6_size=$(get_ipset_size "${IPSET_PREFIX}-${name}-v6")
total=$((v4_size + v6_size))
if [ "$total" -gt 0 ]; then
ratio_v4=$(awk "BEGIN {printf \"%.4f\", $v4_size / $total}")
ratio_v6=$(awk "BEGIN {printf \"%.4f\", $v6_size / $total}")
else
ratio_v4="0.0000"
ratio_v6="0.0000"
fi
echo "ufw_blocklist_ip_version_ratio{feed=\"$name\",version=\"4\"} $ratio_v4"
echo "ufw_blocklist_ip_version_ratio{feed=\"$name\",version=\"6\"} $ratio_v6"
done
cat <<EOF
# HELP ufw_blocklist_blocked_total Blocked attempts per feed (rolling window)
# TYPE ufw_blocklist_blocked_total gauge
EOF
for feed_line in "${FEEDS_ARRAY[@]}"; do
IFS='|' read -r enabled name url type description <<< "$feed_line"
[ "$enabled" != "1" ] && continue
blocks_1h=$(get_feed_blocks "$name" "1 hour ago")
blocks_24h=$(get_feed_blocks "$name" "24 hours ago")
blocks_1h_v6=$(get_feed_blocks_v6 "$name" "1 hour ago")
blocks_24h_v6=$(get_feed_blocks_v6 "$name" "24 hours ago")
echo "ufw_blocklist_blocked_total{feed=\"$name\",ip_version=\"4\",period=\"1h\"} $blocks_1h"
echo "ufw_blocklist_blocked_total{feed=\"$name\",ip_version=\"4\",period=\"24h\"} $blocks_24h"
echo "ufw_blocklist_blocked_total{feed=\"$name\",ip_version=\"6\",period=\"1h\"} $blocks_1h_v6"
echo "ufw_blocklist_blocked_total{feed=\"$name\",ip_version=\"6\",period=\"24h\"} $blocks_24h_v6"
done
# Calculate total blocks once for hit rate
local total_blocks_24h=0
for feed_line in "${FEEDS_ARRAY[@]}"; do
IFS='|' read -r enabled name url type description <<< "$feed_line"
[ "$enabled" != "1" ] && continue
local b
b=$(get_feed_blocks "$name" "24 hours ago")
total_blocks_24h=$((total_blocks_24h + ${b:-0}))
done
cat <<EOF
# HELP ufw_blocklist_effectiveness Feed hit rate - percentage of total blocks from this feed (24h)
# TYPE ufw_blocklist_effectiveness gauge
EOF
for feed_line in "${FEEDS_ARRAY[@]}"; do
IFS='|' read -r enabled name url type description <<< "$feed_line"
[ "$enabled" != "1" ] && continue
blocks=$(get_feed_blocks "$name" "24 hours ago")
blocks=$(echo "$blocks" | tr -d '\n' | tr -d ' ')
blocks=${blocks:-0}
if [ "${total_blocks_24h:-0}" -gt 0 ]; then
effectiveness=$(awk "BEGIN {printf \"%.2f\", ($blocks / $total_blocks_24h) * 100}" 2>/dev/null || echo "0")
else
effectiveness="0"
fi
echo "ufw_blocklist_effectiveness{feed=\"$name\"} $effectiveness"
done
cat <<EOF
# HELP ufw_blocklist_ipset_memory_bytes Memory used by each ipset
# TYPE ufw_blocklist_ipset_memory_bytes gauge
EOF
for feed_line in "${FEEDS_ARRAY[@]}"; do
IFS='|' read -r enabled name url type description <<< "$feed_line"
[ "$enabled" != "1" ] && continue
mem_v4=$(get_ipset_memory "${IPSET_PREFIX}-${name}")
mem_v6=$(get_ipset_memory "${IPSET_PREFIX}-${name}-v6")
echo "ufw_blocklist_ipset_memory_bytes{feed=\"$name\",ip_version=\"4\"} $mem_v4"
echo "ufw_blocklist_ipset_memory_bytes{feed=\"$name\",ip_version=\"6\"} $mem_v6"
done
# Conntrack metrics (system-wide)
local conntrack_count conntrack_max conntrack_usage
conntrack_count=$(get_conntrack_count)
conntrack_max=$(get_conntrack_max)
if [ "${conntrack_max:-0}" -gt 0 ]; then
conntrack_usage=$(awk "BEGIN {printf \"%.2f\", ($conntrack_count / $conntrack_max) * 100}" 2>/dev/null || echo "0")
else
conntrack_usage="0"
fi
# Cache disk metrics
local disk_info cache_size disk_used disk_avail disk_pct
disk_info=$(get_cache_disk_usage)
cache_size=$(get_total_cache_size)
disk_used=$(echo "$disk_info" | cut -d'|' -f1)
disk_avail=$(echo "$disk_info" | cut -d'|' -f2)
disk_pct=$(echo "$disk_info" | cut -d'|' -f3 | tr -d '%')
cat <<EOF
# HELP ufw_blocklist_conntrack_entries Current conntrack entries
# TYPE ufw_blocklist_conntrack_entries gauge
ufw_blocklist_conntrack_entries $conntrack_count
# HELP ufw_blocklist_conntrack_max Maximum conntrack entries
# TYPE ufw_blocklist_conntrack_max gauge
ufw_blocklist_conntrack_max $conntrack_max
# HELP ufw_blocklist_conntrack_usage_percent Conntrack table usage percentage
# TYPE ufw_blocklist_conntrack_usage_percent gauge
ufw_blocklist_conntrack_usage_percent $conntrack_usage
# HELP ufw_blocklist_cache_disk_used_bytes Disk space used by cache partition
# TYPE ufw_blocklist_cache_disk_used_bytes gauge
ufw_blocklist_cache_disk_used_bytes $disk_used
# HELP ufw_blocklist_cache_disk_available_bytes Disk space available on cache partition
# TYPE ufw_blocklist_cache_disk_available_bytes gauge
ufw_blocklist_cache_disk_available_bytes $disk_avail
# HELP ufw_blocklist_cache_disk_usage_percent Cache partition disk usage percentage
# TYPE ufw_blocklist_cache_disk_usage_percent gauge
ufw_blocklist_cache_disk_usage_percent ${disk_pct:-0}
# HELP ufw_blocklist_cache_total_size_bytes Total size of cache directory
# TYPE ufw_blocklist_cache_total_size_bytes gauge
ufw_blocklist_cache_total_size_bytes $cache_size
# HELP ufw_blocklist_whitelist_size Whitelist ipset size
# TYPE ufw_blocklist_whitelist_size gauge
ufw_blocklist_whitelist_size{ip_version="4"} $(get_ipset_size "$WHITELIST_IPSET")
ufw_blocklist_whitelist_size{ip_version="6"} $(get_ipset_size "$WHITELIST_IPSET_V6")
# HELP ufw_blocklist_total_unique_ips Total deduplicated IPs across all feeds
# TYPE ufw_blocklist_total_unique_ips gauge
ufw_blocklist_total_unique_ips{ip_version="4"} $(cat "$CACHE_DIR"/*-v4.parsed 2>/dev/null | sort -u | wc -l)
ufw_blocklist_total_unique_ips{ip_version="6"} $(cat "$CACHE_DIR"/*-v6.parsed 2>/dev/null | sort -u | wc -l)
# HELP ufw_blocklist_total_rules Total UFW firewall rules
# TYPE ufw_blocklist_total_rules gauge
ufw_blocklist_total_rules $(ufw status numbered 2>/dev/null | grep -c '^\[')
# HELP ufw_blocklist_scrape_timestamp_seconds Unix timestamp of metric generation
# TYPE ufw_blocklist_scrape_timestamp_seconds gauge
ufw_blocklist_scrape_timestamp_seconds $(date +%s)
# HELP ufw_blocklist_exporter_duration_seconds Time to generate all metrics
# TYPE ufw_blocklist_exporter_duration_seconds gauge
ufw_blocklist_exporter_duration_seconds $(($(date +%s) - start_time))
EOF
echo ""
}
run_http_server() {
echo "Starting exporter on port $HTTP_PORT..." >&2
while true; do
{
read -r request
if [[ "$request" =~ ^GET\ /metrics ]]; then
printf "HTTP/1.1 200 OK\r\nContent-Type: text/plain; version=0.0.4; charset=utf-8\r\n\r\n"
cache_journal_data
cache_feeds_config
generate_metrics
else
printf "HTTP/1.1 200 OK\r\nContent-Type: text/html; charset=utf-8\r\n\r\n"
echo "<h1>UFW Blocklist Exporter v${SCRIPT_VERSION}</h1><a href='/metrics'>Metrics</a>"
fi
} | nc -l -p "$HTTP_PORT" -q 1 2>/dev/null
done
}
main() {
parse_args "$@"
# Prevent multiple instances (skip for HTTP mode as it should run continuously)
[ "$HTTP_MODE" != true ] && acquire_lock
if [ "$HTTP_MODE" = true ]; then
run_http_server
elif [ -n "$OUTPUT_FILE" ]; then
# Cache data before generating metrics
cache_journal_data
cache_feeds_config
# Ensure output directory exists
mkdir -p "$(dirname "$OUTPUT_FILE")"
# Create temp file in /tmp (not in node_exporter directory!)
local temp_file
temp_file=$(mktemp /tmp/ufw_metrics.XXXXXX)
# Generate metrics to temp file
generate_metrics > "$temp_file"
# FORCE NEW INODE: Delete old file first, then move
rm -f "$OUTPUT_FILE"
# Move temp file to final location
mv "$temp_file" "$OUTPUT_FILE"
# Ensure node_exporter user can read it
chmod 644 "$OUTPUT_FILE"
# Force filesystem sync
sync
else
cache_journal_data
cache_feeds_config
generate_metrics
fi
}
main "$@"
+996
View File
@@ -0,0 +1,996 @@
#!/bin/bash
################################################################################
# Script Name: ufw-blocklists.sh
# Version: 1.0
# Description: Per-feed UFW threat intelligence blocking with ipset
# Author: Phil Connor
# Contact: contact@mylinux.work
# Website: https://mylinux.work
# License: MIT
################################################################################
# Don't use 'set -e' - it causes silent failures when log file has permission issues
CONFIG_DIR="/etc/ufw-threats"
FEEDS_CONFIG="$CONFIG_DIR/feeds.conf"
CACHE_DIR="$CONFIG_DIR/cache"
LOG_FILE="/var/log/ufw-threats.log"
SSH_PORT="22"
ENABLE_AUTO_UPDATE=true
UPDATE_INTERVAL="daily"
ENABLE_IPV6=true
UFW_RULES_FILE="/etc/ufw/before.rules"
UFW_RULES_V6_FILE="/etc/ufw/before6.rules"
IPSET_PREFIX="ufw-feed"
WHITELIST_IPSET="ufw-whitelist"
WHITELIST_IPSET_V6="ufw-whitelist-v6"
MAX_BACKUPS=10
show_usage() {
cat <<EOF
Usage: $0 [OPTIONS] [COMMAND]
PER-FEED VERSION: Each threat feed gets its own ipset and iptables rule.
Provides detailed per-feed blocking statistics and metrics.
COMMANDS:
install Install and configure threat feed blocking
update Update all enabled feeds now (ipsets only, no UFW reload)
apply-rules Regenerate and apply UFW rules (use with caution!)
test-rules Test rule generation without applying
add-feed NAME URL Add a custom feed
remove-feed NAME Remove a feed
enable-feed NAME Enable a disabled feed
disable-feed NAME Disable a feed
list-feeds List all configured feeds
show-stats Show blocking statistics per feed
whitelist-add IP Add IP/CIDR to whitelist
whitelist-init Initialize whitelist with RFC1918/Docker networks
whitelist-list Show all whitelisted IPs
clean-cache Remove cache files for disabled feeds
OPTIONS:
-h, --help Show this help message
-s, --ssh-port PORT SSH port (default: 22)
--no-auto-update Disable automatic daily updates
--no-ipv6 Disable IPv6 support
--update-interval TIME Update interval: hourly, daily, weekly (default: daily)
EXAMPLES:
sudo $0 install
sudo $0 update # Safe - only updates ipsets
sudo $0 test-rules # Safe - validates without applying
sudo $0 apply-rules # DANGER - regenerates UFW config
sudo $0 show-stats
EOF
exit 0
}
log_message() {
local msg
msg="[$(date '+%Y-%m-%d %H:%M:%S')] $1"
echo "$msg"
echo "$msg" >> "$LOG_FILE" 2>/dev/null || true
}
# Iterate over enabled feeds in $FEEDS_CONFIG, calling the provided callback
# function with arguments: name url type description
# Usage: for_each_enabled_feed my_callback_function
for_each_enabled_feed() {
local callback="$1"
[ -f "$FEEDS_CONFIG" ] || return 0
local enabled name url type description
while IFS='|' read -r enabled name url type description; do
[[ "$enabled" =~ ^#.*$ ]] && continue
[[ -z "$enabled" ]] && continue
[ "$enabled" != "1" ] && continue
"$callback" "$name" "$url" "$type" "$description"
done < "$FEEDS_CONFIG"
}
# Iterate over ALL feeds (enabled + disabled), calling the provided callback
# function with arguments: enabled name url type description
for_each_feed() {
local callback="$1"
[ -f "$FEEDS_CONFIG" ] || return 0
local enabled name url type description
while IFS='|' read -r enabled name url type description; do
[[ "$enabled" =~ ^#.*$ ]] && continue
[[ -z "$enabled" ]] && continue
"$callback" "$enabled" "$name" "$url" "$type" "$description"
done < "$FEEDS_CONFIG"
}
parse_args() {
COMMAND=""
while [[ $# -gt 0 ]]; do
case $1 in
-h|--help) show_usage ;;
-s|--ssh-port) SSH_PORT="$2"; shift 2 ;;
--no-auto-update) ENABLE_AUTO_UPDATE=false; shift ;;
--no-ipv6) ENABLE_IPV6=false; shift ;;
--update-interval) UPDATE_INTERVAL="$2"; shift 2 ;;
install|update|apply-rules|test-rules|list-feeds|show-stats|whitelist-init|whitelist-list|clean-cache) COMMAND="$1"; shift ;;
add-feed) COMMAND="add-feed"; FEED_NAME="$2"; FEED_URL="$3"; shift 3 ;;
remove-feed|enable-feed|disable-feed) COMMAND="$1"; FEED_NAME="$2"; shift 2 ;;
whitelist-add) COMMAND="whitelist-add"; WHITELIST_IP="$2"; shift 2 ;;
*) echo "Unknown option: $1"; exit 1 ;;
esac
done
[ -z "$COMMAND" ] && COMMAND="install"
}
cleanup_old_backups() {
local max_keep=${MAX_BACKUPS:-10}
find "$(dirname "$UFW_RULES_FILE")" -maxdepth 1 -name "$(basename "$UFW_RULES_FILE").backup-*" -printf '%T@ %p\n' 2>/dev/null \
| sort -rn | tail -n +$((max_keep + 1)) | cut -d' ' -f2- | xargs -r rm -f 2>/dev/null || true
if [ "$ENABLE_IPV6" = true ]; then
find "$(dirname "$UFW_RULES_V6_FILE")" -maxdepth 1 -name "$(basename "$UFW_RULES_V6_FILE").backup-*" -printf '%T@ %p\n' 2>/dev/null \
| sort -rn | tail -n +$((max_keep + 1)) | cut -d' ' -f2- | xargs -r rm -f 2>/dev/null || true
fi
rm -f "${UFW_RULES_FILE}.backup-"*.clean "${UFW_RULES_V6_FILE}.backup-"*.clean 2>/dev/null || true
}
check_requirements() {
local enable_ufw="${1:-true}"
[ "$EUID" -ne 0 ] && { echo "Please run as root"; exit 1; }
if ! command -v ufw >/dev/null 2>&1; then
apt-get update && apt-get install -y ufw ipset curl 2>/dev/null || \
dnf install -y ufw ipset curl 2>/dev/null || \
yum install -y ufw ipset curl 2>/dev/null
fi
command -v ipset >/dev/null 2>&1 || apt-get install -y ipset
command -v curl >/dev/null 2>&1 || { echo "ERROR: curl required"; exit 1; }
# CRITICAL: Ensure all ipsets referenced by before.rules exist BEFORE enabling UFW.
# If ipsets are missing (e.g., after reboot, failed persistence), UFW enable will fail
# with "Set ufw-feed-XXX doesn't exist" and block ALL traffic including DNS.
ensure_ipsets_exist
if [ "$enable_ufw" = true ]; then
ufw --force enable
fi
cleanup_old_backups
}
_ensure_feed_ipset() {
local name="$1"
ipset list "${IPSET_PREFIX}-${name}" >/dev/null 2>&1 || \
ipset create "${IPSET_PREFIX}-${name}" hash:net family inet hashsize 4096 maxelem 200000 2>/dev/null || true
if [ "$ENABLE_IPV6" = true ]; then
ipset list "${IPSET_PREFIX}-${name}-v6" >/dev/null 2>&1 || \
ipset create "${IPSET_PREFIX}-${name}-v6" hash:net family inet6 hashsize 4096 maxelem 200000 2>/dev/null || true
fi
}
ensure_ipsets_exist() {
if [ -f /etc/ipset.conf ]; then
ipset restore -f /etc/ipset.conf 2>/dev/null || true
fi
ipset list "$WHITELIST_IPSET" >/dev/null 2>&1 || \
ipset create "$WHITELIST_IPSET" hash:net family inet hashsize 1024 maxelem 10000 2>/dev/null || true
if [ "$ENABLE_IPV6" = true ]; then
ipset list "$WHITELIST_IPSET_V6" >/dev/null 2>&1 || \
ipset create "$WHITELIST_IPSET_V6" hash:net family inet6 hashsize 1024 maxelem 10000 2>/dev/null || true
fi
for_each_enabled_feed _ensure_feed_ipset
}
validate_feed_name() {
local name="$1"
if [ -z "$name" ]; then
echo "ERROR: Feed name cannot be empty"; return 1
fi
if [[ ! "$name" =~ ^[a-zA-Z0-9_-]+$ ]]; then
echo "ERROR: Feed name '$name' contains invalid characters (only a-z, 0-9, _, - allowed)"; return 1
fi
if [ "${#name}" -gt 20 ]; then
echo "ERROR: Feed name '$name' too long (max 20 chars, ipset name limit)"; return 1
fi
}
create_directory_structure() {
mkdir -p "$CONFIG_DIR" "$CACHE_DIR"
touch "$LOG_FILE"
chmod 700 "$CONFIG_DIR"
chmod 600 "$LOG_FILE"
}
initialize_feeds_config() {
local has_feeds
has_feeds=$(grep -c '^[01]|' "$FEEDS_CONFIG" 2>/dev/null || echo 0)
if [ -f "$FEEDS_CONFIG" ] && [ "$has_feeds" -gt 0 ]; then
log_message "Feeds configuration already exists with $has_feeds feeds"
return
fi
log_message "Creating feeds configuration..."
[ -f "$FEEDS_CONFIG" ] && mv "$FEEDS_CONFIG" "${FEEDS_CONFIG}.old-$(date +%Y%m%d-%H%M%S)"
cat > "$FEEDS_CONFIG" <<'EOF'
# Threat Intelligence Feeds Configuration
# Format: ENABLED|NAME|URL|TYPE|DESCRIPTION
#
# ENABLED: 1 (enabled) or 0 (disabled)
# NAME: Unique feed identifier
# URL: Feed URL
# TYPE: Format type (plain, cidr, commented, custom)
# DESCRIPTION: Feed description
1|cinsarmy|http://cinsscore.com/list/ci-badguys.txt|plain|CINS Army Malicious IPs
1|firehol-level1|https://raw.githubusercontent.com/ktsaou/blocklist-ipsets/master/firehol_level1.netset|cidr|FireHOL Level 1 - Most aggressive attackers
1|firehol-level2|https://raw.githubusercontent.com/ktsaou/blocklist-ipsets/master/firehol_level2.netset|cidr|FireHOL Level 2 - Attacks in last 48h
0|firehol-level3|https://raw.githubusercontent.com/ktsaou/blocklist-ipsets/master/firehol_level3.netset|cidr|FireHOL Level 3 - Attacks in last 30d
1|ipsum-1|https://raw.githubusercontent.com/stamparm/ipsum/master/levels/1.txt|plain|IPsum Level 1 - Most dangerous
0|ipsum-2|https://raw.githubusercontent.com/stamparm/ipsum/master/levels/2.txt|plain|IPsum Level 2 - Dangerous
0|ipsum-3|https://raw.githubusercontent.com/stamparm/ipsum/master/levels/3.txt|plain|IPsum Level 3 - Suspicious
0|spamhaus-drop|https://www.spamhaus.org/drop/drop.txt|commented|Spamhaus DROP List
0|spamhaus-edrop|https://www.spamhaus.org/drop/edrop.txt|commented|Spamhaus EDROP List
1|spamhaus-dropv6|https://www.spamhaus.org/drop/dropv6.txt|commented|Spamhaus DROP V6 List
0|feodo-tracker|https://feodotracker.abuse.ch/downloads/ipblocklist.txt|commented|Feodo Tracker C2 IPs
0|sslbl-aggressive|https://sslbl.abuse.ch/blacklist/sslipblacklist_aggressive.txt|commented|SSL Blacklist Aggressive
0|sslbl-all|https://sslbl.abuse.ch/blacklist/sslipblacklist.txt|commented|SSL Blacklist All
1|blocklist-de|https://lists.blocklist.de/lists/all.txt|plain|Blocklist.de All Attacks
0|greensnow|https://blocklist.greensnow.co/greensnow.txt|plain|GreenSnow Blacklist
0|emergingthreats|https://rules.emergingthreats.net/fwrules/emerging-Block-IPs.txt|plain|Emerging Threats IPs
0|bruteforce-ssh|https://lists.blocklist.de/lists/ssh.txt|plain|SSH Bruteforce Attempts
1|binarydefense|https://www.binarydefense.com/banlist.txt|plain|Binary Defense Blacklist
1|bruteforce-bl|https://danger.rulez.sk/projects/bruteforceblocker/blist.php|commented|BruteForce Blocker
0|dshield-top|https://www.dshield.org/block.txt|commented|DShield Top Attackers
1|dshield-fhol|https://iplists.firehol.org/files/dshield.netset|commented|Dshield FireHol top 20
0|tor-exit|https://check.torproject.org/torbulkexitlist|plain|TOR Exit Nodes (optional)
0|abuseipdb-1d|https://raw.githubusercontent.com/borestad/blocklist-abuseipdb/main/abuseipdb-s100-1d.ipv4|commented|AbuseIPDB with confidence score 100 1 day
0|abuseipdb-3d|https://raw.githubusercontent.com/borestad/blocklist-abuseipdb/main/abuseipdb-s100-3d.ipv4|commented|AbuseIPDB with confidence score 100 3 day
0|abuseipdb-7d|https://raw.githubusercontent.com/borestad/blocklist-abuseipdb/main/abuseipdb-s100-7d.ipv4|commented|AbuseIPDB with confidence score 100 7 day
1|abuseipdb-14d|https://raw.githubusercontent.com/borestad/blocklist-abuseipdb/main/abuseipdb-s100-14d.ipv4|commented|AbuseIPDB with confidence score 100 14 day
0|abuseipdb-30d|https://raw.githubusercontent.com/borestad/blocklist-abuseipdb/main/abuseipdb-s100-30d.ipv4|commented|AbuseIPDB with confidence score 100 30 day
# Add custom feeds below this line
EOF
chmod 600 "$FEEDS_CONFIG"
}
_setup_feed_ipset() {
local name="$1"
if ! ipset list "${IPSET_PREFIX}-${name}" >/dev/null 2>&1; then
ipset create "${IPSET_PREFIX}-${name}" hash:net family inet hashsize 4096 maxelem 200000
log_message " Created ipset: ${IPSET_PREFIX}-${name}"
fi
if [ "$ENABLE_IPV6" = true ] && ! ipset list "${IPSET_PREFIX}-${name}-v6" >/dev/null 2>&1; then
ipset create "${IPSET_PREFIX}-${name}-v6" hash:net family inet6 hashsize 4096 maxelem 200000
log_message " Created ipset: ${IPSET_PREFIX}-${name}-v6"
fi
}
setup_ipsets() {
log_message "Setting up ipsets (per-feed mode)..."
if ! ipset list "$WHITELIST_IPSET" >/dev/null 2>&1; then
ipset create "$WHITELIST_IPSET" hash:net family inet hashsize 1024 maxelem 10000
ipset add "$WHITELIST_IPSET" 127.0.0.1
fi
if [ "$ENABLE_IPV6" = true ] && ! ipset list "$WHITELIST_IPSET_V6" >/dev/null 2>&1; then
ipset create "$WHITELIST_IPSET_V6" hash:net family inet6 hashsize 1024 maxelem 10000
ipset add "$WHITELIST_IPSET_V6" ::1
fi
for_each_enabled_feed _setup_feed_ipset
setup_ipset_persistence
}
setup_ipset_persistence() {
cat > /etc/systemd/system/ipset-persistent.service <<'EOF'
[Unit]
Description=ipset persistent configuration
Before=network-pre.target ufw.service
Wants=network-pre.target
[Service]
Type=oneshot
RemainAfterExit=yes
ExecStart=-/sbin/ipset restore -f /etc/ipset.conf
ExecStop=/sbin/ipset save -f /etc/ipset.conf
StandardOutput=null
StandardError=null
[Install]
WantedBy=multi-user.target
EOF
ipset save > /etc/ipset.conf
systemctl enable ipset-persistent.service 2>/dev/null || true
}
download_feed() {
local url="$1" output="$2"
local http_code
http_code=$(curl -f -s -m 60 --connect-timeout 10 -L \
-A "ufw-threat-feeds-per-feed/1.0" \
-w "%{http_code}" -o "$output" "$url" 2>/dev/null) || true
if [ ! -s "$output" ]; then
log_message " Download failed for $url (HTTP $http_code, empty response)"
return 1
fi
return 0
}
parse_feed() {
local file="$1" type="$2" output_v4="$3" output_v6="$4"
: > "$output_v4"
: > "$output_v6"
case "$type" in
plain)
grep -E '^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+(/[0-9]+)?$' "$file" >> "$output_v4" 2>/dev/null || true
if [ "$ENABLE_IPV6" = true ]; then
grep -E '^[0-9a-fA-F:]+(/[0-9]+)?$' "$file" | grep ':' >> "$output_v6" 2>/dev/null || true
fi
;;
cidr)
grep -E '^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+(/[0-9]+)?' "$file" \
| cut -d' ' -f1 | cut -d'#' -f1 | grep -v '^$' >> "$output_v4" 2>/dev/null || true
if [ "$ENABLE_IPV6" = true ]; then
grep -E '^[0-9a-fA-F:]+(/[0-9]+)?' "$file" \
| grep ':' | cut -d' ' -f1 | cut -d'#' -f1 | grep -v '^$' >> "$output_v6" 2>/dev/null || true
fi
;;
commented)
grep -v -E '^[#;]|^$' "$file" \
| grep -oE '[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+(/[0-9]+)?' >> "$output_v4" 2>/dev/null || true
if [ "$ENABLE_IPV6" = true ]; then
grep -v -E '^[#;]|^$' "$file" \
| grep -oE '[0-9a-fA-F:]+(/[0-9]+)?' \
| grep -E '^[0-9a-fA-F]{1,4}:[0-9a-fA-F:]+' >> "$output_v6" 2>/dev/null || true
fi
;;
esac
}
_clean_stale_cache() {
local enabled_feeds="$1"
local cleaned=0
for cache_file in "$CACHE_DIR"/*.raw "$CACHE_DIR"/*-v4.parsed "$CACHE_DIR"/*-v6.parsed; do
[ -f "$cache_file" ] || continue
local bn feed_name
bn=$(basename "$cache_file")
feed_name="${bn%%.raw}"
feed_name="${feed_name%%-v4.parsed}"
feed_name="${feed_name%%-v6.parsed}"
if ! grep -q "^${feed_name}$" <<< "$enabled_feeds"; then
rm -f "$cache_file" && cleaned=$((cleaned + 1))
fi
done
[ "$cleaned" -gt 0 ] && log_message " Cleaned $cleaned stale cache files"
}
_load_ipset_v4() {
local name="$1" v4_file="$2"
{
echo "create ${IPSET_PREFIX}-${name}-tmp hash:net family inet hashsize 4096 maxelem 200000"
while IFS= read -r ip; do
[ -z "$ip" ] && continue
echo "add ${IPSET_PREFIX}-${name}-tmp $ip"
done < "$v4_file"
echo "swap ${IPSET_PREFIX}-${name} ${IPSET_PREFIX}-${name}-tmp"
echo "destroy ${IPSET_PREFIX}-${name}-tmp"
} | ipset restore 2>/dev/null
}
_load_ipset_v6() {
local name="$1" v6_file="$2"
{
echo "create ${IPSET_PREFIX}-${name}-v6-tmp hash:net family inet6 hashsize 4096 maxelem 200000"
while IFS= read -r ip; do
[ -z "$ip" ] && continue
echo "add ${IPSET_PREFIX}-${name}-v6-tmp $ip"
done < "$v6_file"
echo "swap ${IPSET_PREFIX}-${name}-v6 ${IPSET_PREFIX}-${name}-v6-tmp"
echo "destroy ${IPSET_PREFIX}-${name}-v6-tmp"
} | ipset restore 2>/dev/null
}
update_feeds() {
log_message "Starting per-feed update..."
if [ ! -f "$FEEDS_CONFIG" ]; then
echo "ERROR: Feeds config not found: $FEEDS_CONFIG"
echo "Run 'install' command first"
exit 1
fi
local enabled_count
enabled_count=$(grep -c '^1|' "$FEEDS_CONFIG" 2>/dev/null || echo 0)
if [ "$enabled_count" -eq 0 ]; then
echo "ERROR: No enabled feeds found in $FEEDS_CONFIG"
echo "Check the config file format"
exit 1
fi
log_message "Found $enabled_count enabled feeds"
local enabled_feeds
enabled_feeds=$(grep '^1|' "$FEEDS_CONFIG" 2>/dev/null | cut -d'|' -f2)
# NOTE: Do NOT destroy ipsets for disabled feeds here. The before.rules may still
# reference them (if apply-rules hasn't been re-run). Destroying in-use ipsets causes
# "Set doesn't exist" on next UFW reload, which blocks all traffic.
# Ipset cleanup happens safely in cmd_disable_feed/cmd_remove_feed after rules are regenerated.
_clean_stale_cache "$enabled_feeds"
local total_feeds=0
local failed_feeds=0
local enabled name url type description
while IFS='|' read -r enabled name url type description; do
[[ "$enabled" =~ ^#.*$ ]] && continue
[[ -z "$enabled" ]] && continue
[ "$enabled" != "1" ] && continue
total_feeds=$((total_feeds + 1))
log_message "Processing feed: $name"
local raw="$CACHE_DIR/${name}.raw"
local v4_file="$CACHE_DIR/${name}-v4.parsed"
local v6_file="$CACHE_DIR/${name}-v6.parsed"
if download_feed "$url" "$raw" && parse_feed "$raw" "$type" "$v4_file" "$v6_file"; then
local count_v4 count_v6
count_v4=$(wc -l < "$v4_file" 2>/dev/null || echo 0)
count_v6=0
[ "$ENABLE_IPV6" = true ] && count_v6=$(wc -l < "$v6_file" 2>/dev/null || echo 0)
[ "$count_v4" -gt 0 ] && _load_ipset_v4 "$name" "$v4_file"
[ "$ENABLE_IPV6" = true ] && [ "$count_v6" -gt 0 ] && _load_ipset_v6 "$name" "$v6_file"
log_message " $name: $count_v4 IPv4, $count_v6 IPv6"
else
log_message " FAILED: $name"
failed_feeds=$((failed_feeds + 1))
fi
done < "$FEEDS_CONFIG"
ipset save > /etc/ipset.conf
log_message "Updated $total_feeds feeds ($failed_feeds failed)"
}
# Build iptables rules block for IPv4 or IPv6
# Args: v4|v6 output_file
_build_rules_block() {
local family="$1" output="$2"
local chain_prefix whitelist_set set_suffix log_tag
if [ "$family" = "v4" ]; then
chain_prefix="ufw-before-input"
whitelist_set="$WHITELIST_IPSET"
set_suffix=""
log_tag="THREAT"
else
chain_prefix="ufw6-before-input"
whitelist_set="$WHITELIST_IPSET_V6"
set_suffix="-v6"
log_tag="THREAT-v6"
fi
cat > "$output" <<EOF
# UFW THREAT FEEDS - PER-FEED MODE - START
# Whitelist bypass
-A ${chain_prefix} -m set --match-set ${whitelist_set} src -j ACCEPT
EOF
local enabled name url type description
while IFS='|' read -r enabled name url type description; do
[[ "$enabled" =~ ^#.*$ ]] && continue
[[ -z "$enabled" ]] && continue
[ "$enabled" != "1" ] && continue
cat >> "$output" <<EOF
# $description
-A ${chain_prefix} -m set --match-set ${IPSET_PREFIX}-${name}${set_suffix} src -m limit --limit 5/min -j LOG --log-prefix "[${log_tag}:${name}] "
-A ${chain_prefix} -m set --match-set ${IPSET_PREFIX}-${name}${set_suffix} src -j DROP
EOF
done < "$FEEDS_CONFIG"
echo "# UFW THREAT FEEDS - PER-FEED MODE - END" >> "$output"
}
# Insert rules into a UFW template file and validate
# Args: template_file rules_file output_file
# Returns 0 on success, 1 on validation failure
_insert_and_validate_rules() {
local template="$1" rules_file="$2" output="$3"
local insert_line
insert_line=$(grep -n "^# End required lines" "$template" | cut -d: -f1)
if [ -z "$insert_line" ]; then
log_message "ERROR: Could not find '# End required lines' in $template"
return 1
fi
head -n "$insert_line" "$template" > "$output"
cat "$rules_file" >> "$output"
tail -n +"$((insert_line + 1))" "$template" >> "$output"
local filter_count
filter_count=$(grep -c '^\*filter' "$output" 2>/dev/null || echo 0)
if [ "$filter_count" -ne 1 ]; then
log_message "ERROR: Generated rules file has $filter_count *filter blocks (expected 1)"
return 1
fi
return 0
}
_verify_ipsets_callback() {
local name="$1"
if ! ipset list "${IPSET_PREFIX}-${name}" >/dev/null 2>&1; then
log_message "ERROR: Required ipset ${IPSET_PREFIX}-${name} is missing"
_MISSING_SETS=$((_MISSING_SETS + 1))
fi
if [ "$ENABLE_IPV6" = true ] && ! ipset list "${IPSET_PREFIX}-${name}-v6" >/dev/null 2>&1; then
log_message "ERROR: Required ipset ${IPSET_PREFIX}-${name}-v6 is missing"
_MISSING_SETS=$((_MISSING_SETS + 1))
fi
}
apply_ufw_rules() {
log_message "Applying UFW rules (per-feed)..."
if [ ! -f /usr/share/ufw/before.rules ]; then
log_message "ERROR: UFW default template /usr/share/ufw/before.rules not found"
return 1
fi
local tmpdir
tmpdir=$(mktemp -d)
trap 'rm -rf "$tmpdir"' RETURN
[ -f "$UFW_RULES_FILE" ] && cp "$UFW_RULES_FILE" "${UFW_RULES_FILE}.backup-$(date +%Y%m%d-%H%M%S)"
[ "$ENABLE_IPV6" = true ] && [ -f "$UFW_RULES_V6_FILE" ] && \
cp "$UFW_RULES_V6_FILE" "${UFW_RULES_V6_FILE}.backup-$(date +%Y%m%d-%H%M%S)"
cp /usr/share/ufw/before.rules "$UFW_RULES_FILE"
[ "$ENABLE_IPV6" = true ] && cp /usr/share/ufw/before6.rules "$UFW_RULES_V6_FILE"
log_message " Starting from clean UFW templates"
# Build and insert IPv4 rules
local v4_rules="$tmpdir/v4_rules"
local v4_output="$tmpdir/v4_output"
_build_rules_block "v4" "$v4_rules"
if ! _insert_and_validate_rules "$UFW_RULES_FILE" "$v4_rules" "$v4_output"; then
log_message " Aborting to prevent corruption."
return 1
fi
mv "$v4_output" "$UFW_RULES_FILE"
log_message " IPv4 rules generated and validated"
# Build and insert IPv6 rules
if [ "$ENABLE_IPV6" = true ]; then
local v6_rules="$tmpdir/v6_rules"
local v6_output="$tmpdir/v6_output"
_build_rules_block "v6" "$v6_rules"
if _insert_and_validate_rules "$UFW_RULES_V6_FILE" "$v6_rules" "$v6_output"; then
mv "$v6_output" "$UFW_RULES_V6_FILE"
log_message " IPv6 rules generated and validated"
else
log_message " Aborting IPv6 rules. Keeping IPv4 only."
fi
fi
ufw limit "$SSH_PORT/tcp" 2>/dev/null || ufw allow "$SSH_PORT/tcp"
# CRITICAL: Ensure all ipsets exist BEFORE reloading UFW
log_message " Verifying ipsets exist..."
ensure_ipsets_exist
setup_ipsets
_MISSING_SETS=0
for_each_enabled_feed _verify_ipsets_callback
if [ "$_MISSING_SETS" -gt 0 ]; then
log_message "ERROR: $_MISSING_SETS required ipsets missing. Aborting UFW reload to prevent lockout."
return 1
fi
ipset save > /etc/ipset.conf
log_message " Reloading UFW..."
if ufw status | grep -q "Status: active"; then
ufw reload
else
ufw --force enable
fi
cleanup_old_backups
log_message "UFW rules applied and validated successfully"
}
setup_auto_update() {
[ "$ENABLE_AUTO_UPDATE" = false ] && return
local script_path
script_path=$(readlink -f "$0")
cat > /etc/systemd/system/ufw-threat-feeds-update.service <<EOF
[Unit]
Description=Update UFW threat feeds (per-feed)
After=network-online.target
[Service]
Type=oneshot
ExecStart=$script_path update
ExecStartPost=/bin/sh -c 'ipset save > /etc/ipset.conf'
EOF
cat > /etc/systemd/system/ufw-threat-feeds-update.timer <<EOF
[Unit]
Description=Update UFW threat feeds $UPDATE_INTERVAL
[Timer]
Unit=ufw-threat-feeds-update.service
OnCalendar=$UPDATE_INTERVAL
Persistent=true
RandomizedDelaySec=1800
[Install]
WantedBy=timers.target
EOF
systemctl daemon-reload
systemctl enable --now ufw-threat-feeds-update.timer
}
create_management_commands() {
cat > /usr/local/bin/ufw-whitelist <<'EOF'
#!/bin/bash
[ -z "$1" ] && { echo "Usage: ufw-whitelist <IP|CIDR>"; exit 1; }
if [[ "$1" == *:* ]]; then
ipset add ufw-whitelist-v6 "$1" && echo "Whitelisted IPv6: $1"
else
ipset add ufw-whitelist "$1" && echo "Whitelisted IPv4: $1"
fi
ipset save > /etc/ipset.conf
EOF
local script_path
script_path=$(readlink -f "$0")
cat > /usr/local/bin/ufw-threat-reload <<EOF
#!/bin/bash
$script_path apply-rules
EOF
chmod +x /usr/local/bin/ufw-{whitelist,threat-reload}
}
cmd_show_stats() {
echo "=========================================="
echo "Per-Feed Blocking Statistics"
echo "=========================================="
printf "%-25s %10s %10s %12s\n" "FEED" "IPv4 IPs" "IPv6 IPs" "BLOCKS (1h)"
echo "-------------------------------------------------------------------"
if [ ! -f "$FEEDS_CONFIG" ]; then
echo "ERROR: Config not found. Run 'install' first."
return 1
fi
local enabled name url type description
local v4_count v6_count blocks
while IFS='|' read -r enabled name url type description; do
[[ "$enabled" =~ ^#.*$ ]] && continue
[[ -z "$enabled" ]] && continue
[ "$enabled" != "1" ] && continue
v4_count=$(ipset list "${IPSET_PREFIX}-${name}" 2>/dev/null | grep -c '^[0-9]' 2>/dev/null)
v4_count=${v4_count:-0}
v6_count=0
if [ "$ENABLE_IPV6" = true ]; then
v6_count=$(ipset list "${IPSET_PREFIX}-${name}-v6" 2>/dev/null | grep -c '^[0-9a-fA-F:]' 2>/dev/null)
v6_count=${v6_count:-0}
fi
blocks=$(journalctl --since "1 hour ago" 2>/dev/null | grep -c "\[THREAT:${name}\]" 2>/dev/null)
blocks=${blocks:-0}
printf "%-25s %10d %10d %12d\n" "$name" "$v4_count" "$v6_count" "$blocks"
done < "$FEEDS_CONFIG"
}
_list_feed_entry() {
local feed_enabled="$1" name="$2" url="$3" type="$4" description="$5"
local status="DISABLED"
[ "$feed_enabled" = "1" ] && status="ENABLED"
printf "%-10s %-25s %s\n" "$status" "$name" "$description"
}
cmd_list_feeds() {
printf "%-10s %-25s %s\n" "STATUS" "NAME" "DESCRIPTION"
echo "-------------------------------------------------------------------"
for_each_feed _list_feed_entry
}
cmd_add_feed() {
validate_feed_name "$FEED_NAME" || exit 1
grep -q "^[01]|${FEED_NAME}|" "$FEEDS_CONFIG" 2>/dev/null && { echo "Feed exists"; exit 1; }
echo "1|${FEED_NAME}|${FEED_URL}|plain|Custom: ${FEED_NAME}" >> "$FEEDS_CONFIG"
log_message "Added feed: $FEED_NAME"
}
cmd_remove_feed() {
validate_feed_name "$FEED_NAME" || exit 1
sed -i "/^[01]|${FEED_NAME}|/d" "$FEEDS_CONFIG"
log_message "Removed feed: $FEED_NAME"
log_message "Regenerating UFW rules..."
apply_ufw_rules || return 1
ipset destroy "${IPSET_PREFIX}-${FEED_NAME}" 2>/dev/null || true
ipset destroy "${IPSET_PREFIX}-${FEED_NAME}-v6" 2>/dev/null || true
}
cmd_enable_feed() {
validate_feed_name "$FEED_NAME" || exit 1
sed -i "s/^0|${FEED_NAME}|/1|${FEED_NAME}|/" "$FEEDS_CONFIG"
log_message "Enabled: $FEED_NAME"
log_message "Regenerating UFW rules..."
apply_ufw_rules
}
cmd_disable_feed() {
validate_feed_name "$FEED_NAME" || exit 1
sed -i "s/^1|${FEED_NAME}|/0|${FEED_NAME}|/" "$FEEDS_CONFIG"
log_message "Disabled: $FEED_NAME"
log_message "Regenerating UFW rules..."
apply_ufw_rules || return 1
ipset destroy "${IPSET_PREFIX}-${FEED_NAME}" 2>/dev/null || true
ipset destroy "${IPSET_PREFIX}-${FEED_NAME}-v6" 2>/dev/null || true
}
cmd_whitelist_add() {
[ -z "$WHITELIST_IP" ] && { echo "Usage: $0 whitelist-add <IP|CIDR>"; exit 1; }
if [[ "$WHITELIST_IP" == *:* ]]; then
if ipset add "$WHITELIST_IPSET_V6" "$WHITELIST_IP" 2>/dev/null; then
log_message "Added to IPv6 whitelist: $WHITELIST_IP"
else
echo "Failed to add $WHITELIST_IP"; exit 1
fi
else
if ipset add "$WHITELIST_IPSET" "$WHITELIST_IP" 2>/dev/null; then
log_message "Added to IPv4 whitelist: $WHITELIST_IP"
else
echo "Failed to add $WHITELIST_IP"; exit 1
fi
fi
ipset save > /etc/ipset.conf
}
cmd_whitelist_init() {
log_message "Initializing whitelist with private networks..."
local private_networks=(
"10.0.0.0/8"
"172.16.0.0/12"
"192.168.0.0/16"
"169.254.0.0/16"
"127.0.0.0/8"
)
local private_networks_v6=(
"fc00::/7"
"fe80::/10"
"::1"
)
echo "Adding IPv4 private networks to whitelist..."
for net in "${private_networks[@]}"; do
if ipset add "$WHITELIST_IPSET" "$net" 2>/dev/null; then
echo " + $net"
else
echo " - $net (already exists or error)"
fi
done
if [ "$ENABLE_IPV6" = true ]; then
echo "Adding IPv6 private networks to whitelist..."
for net in "${private_networks_v6[@]}"; do
if ipset add "$WHITELIST_IPSET_V6" "$net" 2>/dev/null; then
echo " + $net"
else
echo " - $net (already exists or error)"
fi
done
fi
ipset save > /etc/ipset.conf
log_message "Whitelist initialized with RFC1918/private networks"
}
cmd_whitelist_list() {
echo "=========================================="
echo "IPv4 Whitelist ($WHITELIST_IPSET)"
echo "=========================================="
ipset list "$WHITELIST_IPSET" 2>/dev/null | grep -E '^[0-9]' || echo "No entries"
if [ "$ENABLE_IPV6" = true ]; then
echo ""
echo "=========================================="
echo "IPv6 Whitelist ($WHITELIST_IPSET_V6)"
echo "=========================================="
ipset list "$WHITELIST_IPSET_V6" 2>/dev/null | grep -E '^[0-9a-fA-F:]' || echo "No entries"
fi
}
cmd_clean_cache() {
log_message "Cleaning cache for disabled feeds..."
local removed=0
local kept=0
local enabled_feeds
enabled_feeds=$(grep '^1|' "$FEEDS_CONFIG" 2>/dev/null | cut -d'|' -f2)
for cache_file in "$CACHE_DIR"/*.raw "$CACHE_DIR"/*-v4.parsed "$CACHE_DIR"/*-v6.parsed; do
[ -f "$cache_file" ] || continue
local bn feed_name
bn=$(basename "$cache_file")
feed_name="${bn%%.raw}"
feed_name="${feed_name%%-v4.parsed}"
feed_name="${feed_name%%-v6.parsed}"
if ! grep -q "^${feed_name}$" <<< "$enabled_feeds"; then
rm -f "$cache_file"
removed=$((removed + 1))
else
kept=$((kept + 1))
fi
done
log_message "Removed $removed cache files, kept $kept active feeds"
}
cmd_test_rules() {
log_message "Testing UFW rule generation (dry-run mode)..."
if [ ! -f /usr/share/ufw/before.rules ]; then
echo "ERROR: UFW default template /usr/share/ufw/before.rules not found"
return 1
fi
local test_dir
test_dir=$(mktemp -d)
trap 'rm -rf "$test_dir"' RETURN
local test_v4="$test_dir/before.rules.test"
cp /usr/share/ufw/before.rules "$test_v4"
[ "$ENABLE_IPV6" = true ] && cp /usr/share/ufw/before6.rules "$test_dir/before6.rules.test"
local v4_rules="$test_dir/v4_rules"
local v4_output="$test_dir/v4_output"
_build_rules_block "v4" "$v4_rules"
local feed_count
feed_count=$(grep -c '^1|' "$FEEDS_CONFIG" 2>/dev/null || echo 0)
echo "Generated rules for $feed_count enabled feeds"
if ! _insert_and_validate_rules "$test_v4" "$v4_rules" "$v4_output"; then
echo "VALIDATION FAILED"
return 1
fi
echo "Validation passed: exactly 1 *filter block found"
local total_lines rule_lines
total_lines=$(wc -l < "$v4_output")
rule_lines=$(grep -c "^-A " "$v4_output" 2>/dev/null || echo 0)
echo "Generated $rule_lines iptables rules in $total_lines total lines"
echo ""
echo "=========================================="
echo "Sample of generated rules:"
echo "=========================================="
grep "# UFW THREAT FEEDS" -A 10 "$v4_output" | head -15
echo "..."
echo ""
echo "=========================================="
echo "Test passed - rules would be generated safely"
echo " To apply these rules, run: $0 apply-rules"
echo "=========================================="
}
cmd_install() {
log_message "Installing per-feed threat blocking..."
check_requirements
create_directory_structure
initialize_feeds_config
setup_ipsets
update_feeds
apply_ufw_rules
setup_auto_update
create_management_commands
echo ""
echo "=========================================="
echo "Per-Feed Installation Complete"
echo "=========================================="
echo "Mode: Per-feed ipsets (detailed tracking)"
echo "Feeds: $(grep -c '^1|' "$FEEDS_CONFIG")"
echo "IPv6: $ENABLE_IPV6"
echo "Auto-update: $ENABLE_AUTO_UPDATE ($UPDATE_INTERVAL)"
echo ""
echo "Commands:"
echo " $0 show-stats # View per-feed statistics"
echo " $0 update # Update all feeds"
echo " ufw-whitelist IP # Whitelist an IP"
echo ""
echo "Logs: grep 'THREAT:' /var/log/syslog"
echo "=========================================="
}
main() {
parse_args "$@"
case "$COMMAND" in
install) cmd_install ;;
update)
check_requirements false
create_directory_structure
update_feeds
;;
apply-rules)
check_requirements
apply_ufw_rules
;;
test-rules) cmd_test_rules ;;
list-feeds) cmd_list_feeds ;;
show-stats) cmd_show_stats ;;
add-feed) cmd_add_feed ;;
remove-feed) cmd_remove_feed ;;
enable-feed) cmd_enable_feed ;;
disable-feed) cmd_disable_feed ;;
whitelist-add) cmd_whitelist_add ;;
whitelist-init) cmd_whitelist_init ;;
whitelist-list) cmd_whitelist_list ;;
clean-cache) cmd_clean_cache ;;
*) show_usage ;;
esac
}
main "$@"
+96
View File
@@ -0,0 +1,96 @@
#!/bin/bash
####################################################################
#### Code-Server Update Script ####
#### For RHEL/Rocky/Alma, Oracle Linux, Debian & Ubuntu ####
#### ####
#### Author: Phil Connor ####
#### Contact: contact@mylinux.work ####
#### License: MIT ####
#### Version: 1.2 ####
#### ####
#### Usage: sudo ./update-code-server.sh ####
####################################################################
#############################
#### User Configurations ####
#############################
SERVDIR=/usr/local/code-server # where you want the code-server installed
########################
#### System Configs ####
########################
OS=$(grep PRETTY_NAME /etc/os-release | sed 's/PRETTY_NAME=//g' | tr -d '="' | awk '{print $1}' | tr '[:upper:]' '[:lower:]')
OSVER=$(grep VERSION_ID /etc/os-release | sed 's/VERSION_ID=//g' | tr -d '="' | awk -F. '{print $1}')
CSVER=$(code-server --version | awk '{print $1}')
###########################################################
#### Detect Package Manger from OS and OSVer Variables ####
###########################################################
if [ "${OS}" = ubuntu ]; then
PAKMGR="apt-get -y"
elif [[ ${OS} = centos || ${OS} = red || ${OS} = oracle || ${OS} = rocky || ${OS} = alma ]]; then
if [ "${OSVER}" = 7 ]; then
PAKMGR="yum -y"
fi
if [ "${OSVER}" = 8 ] || [ "${OSVER}" = 9 ]; then
PAKMGR="dnf -y"
fi
fi
###################
#### Update OS ####
###################
function update_os() {
{
if [ "${OS}" = ubuntu ]; then
${PAKMGR} update
${PAKMGR} upgrade
else
${PAKMGR} update
fi
}
}
###############################################
#### Get the latest version of Code Server ####
###############################################
get_latest_version() {
{
version="$(curl -fsSLI -o /dev/null -w "%{url_effective}" https://github.com/coder/code-server/releases/latest)"
version="${version#https://github.com/coder/code-server/releases/tag/}"
version="${version#v}"
echo "$version"
#### Compare Code-Server versions ####
if [[ "$version" != "$CSVER" ]] && [[ "$(printf '%s\n' "$CSVER" "$version" | sort -V | tail -1)" == "$version" ]]; then
compare=1
else
compare=0
fi
}
}
#########################################
#### Download and Update Codeserver ####
#########################################
install_codeserver() {
{
if [ $compare = 1 ]; then
systemctl stop code-server
# check if command wget exists
if ! command -v wget >/dev/null 2>&1; then
${PAKMGR} install wget
fi
cd ~/ || exit
wget "https://github.com/coder/code-server/releases/download/v$version/code-server-$version-linux-amd64.tar.gz"
tar xvf "code-server-$version-linux-amd64.tar.gz"
cp -r ~/code-server-"$version"-linux-amd64/* ${SERVDIR}
rm -f ~/code-server-"$version"-linux-amd64.tar.gz
rm -rf ~/code-server-"$version"-linux-amd64
systemctl start code-server
fi
}
}
#update_os
get_latest_version
install_codeserver
+570
View File
@@ -0,0 +1,570 @@
#!/bin/bash
set -euo pipefail
##########################################################################
## Prometheus Stack Updater ##
## ##
## Updates installed Prometheus ecosystem binaries to latest release ##
## from GitHub. Only touches components that are already installed. ##
## ##
## Supported components: ##
## prometheus, node_exporter, blackbox_exporter, ##
## alertmanager, mysqld_exporter, promtool, amtool, ##
## loki, promtail, alloy, grafana ##
## ##
## Usage: ##
## ./update-prometheus-stack.sh [OPTIONS] ##
## ##
## Options: ##
## --check Show what would be updated (no changes) ##
## --all Update all installed components ##
## --prometheus Update only Prometheus ##
## --node-exporter Update only node_exporter ##
## --blackbox Update only blackbox_exporter ##
## --alertmanager Update only AlertManager ##
## --mysql-exporter Update only mysqld_exporter ##
## --loki Update only Loki ##
## --promtail Update only Promtail ##
## --alloy Update only Alloy ##
## --grafana Update only Grafana (via package manager) ##
## --force Update even if already at latest version ##
## --arch <arch> Override architecture (default: auto-detect) ##
## --backup-only Backup configs only (no updates) ##
## --help Show this help message ##
## ##
## Author: Phil Connor ##
## Contact: pconnor@ara.com ##
##########################################################################
BINDIR="/usr/local/bin"
PROMDIR="/etc/prometheus"
BACKUPDIR="${PROMDIR}/backups"
LOGFILE="/var/log/prometheus-update.log"
TMPDIR_BASE="/tmp/prometheus-update-$$"
CHECK_ONLY=false
BACKUP_ONLY=false
FORCE=false
ARCH=""
UPDATED=0
SKIPPED=0
FAILED=0
COMPONENTS_REQUESTED=()
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
CYAN='\033[0;36m'
NC='\033[0m'
log() {
local msg
msg="[$(date '+%Y-%m-%d %H:%M:%S')] $1"
echo -e "$msg" | tee -a "$LOGFILE" 2>/dev/null || echo -e "$msg"
}
log_ok() { log "${GREEN}${NC} $1"; }
log_warn() { log "${YELLOW}${NC} $1"; }
log_err() { log "${RED}${NC} $1" >&2; }
log_info() { log "${CYAN}${NC} $1"; }
# shellcheck disable=SC2329
cleanup() {
# shellcheck disable=SC2317
[[ -d "$TMPDIR_BASE" ]] && rm -rf "$TMPDIR_BASE"
}
trap cleanup EXIT
show_help() {
sed -n '/^## Usage:/,/^####/{ /^####/d; s/^## //; s/^##$//; p }' "$0"
exit 0
}
detect_arch() {
if [[ -n "$ARCH" ]]; then
echo "$ARCH"
return
fi
local machine
machine=$(uname -m)
case "$machine" in
x86_64) echo "amd64" ;;
aarch64) echo "arm64" ;;
armv7l) echo "armv7" ;;
armv6l) echo "armv6" ;;
*) echo "amd64" ;;
esac
}
get_installed_version() {
local binary="$1"
local path="${BINDIR}/${binary}"
if [[ ! -x "$path" ]]; then
echo "not_installed"
return
fi
case "$binary" in
prometheus|promtool)
"$path" --version 2>&1 | head -1 | grep -oP 'version \K[0-9]+\.[0-9]+\.[0-9]+' || echo "unknown"
;;
node_exporter|blackbox_exporter|mysqld_exporter)
"$path" --version 2>&1 | head -1 | grep -oP 'version \K[0-9]+\.[0-9]+\.[0-9]+' || echo "unknown"
;;
alertmanager|amtool)
"$path" --version 2>&1 | head -1 | grep -oP 'version \K[0-9]+\.[0-9]+\.[0-9]+' || echo "unknown"
;;
loki|promtail)
"$path" --version 2>&1 | head -1 | grep -oP 'version \K[0-9]+\.[0-9]+\.[0-9]+' || echo "unknown"
;;
alloy)
"$path" --version 2>&1 | head -1 | grep -oP '[0-9]+\.[0-9]+\.[0-9]+' || echo "unknown"
;;
*)
echo "unknown"
;;
esac
}
get_latest_version() {
local repo="$1"
local version=""
case "$repo" in
prometheus/*)
local component="${repo#prometheus/}"
version=$(curl -sf "https://prometheus.io/download/" | \
grep -oP "${component}-\K[0-9]+\.[0-9]+\.[0-9]+" | head -1 || echo "")
;;
grafana/*)
version=$(curl -sfL "https://github.com/${repo}/releases/latest" | \
grep -oP 'releases/tag/v\K[0-9]+\.[0-9]+\.[0-9]+' | head -1 || echo "")
;;
esac
if [[ -z "$version" ]]; then
log_err "Failed to query latest version for ${repo}"
return 1
fi
echo "$version"
}
get_download_url() {
local repo="$1"
local version="$2"
local pattern="$3"
local component="${repo#*/}"
case "$repo" in
prometheus/*)
echo "https://github.com/${repo}/releases/download/v${version}/${component}-${version}.${pattern}"
;;
grafana/*)
echo "https://github.com/${repo}/releases/download/v${version}/${pattern}"
;;
esac
}
download_and_extract() {
local url="$1"
local workdir="$2"
mkdir -p "$workdir"
local filename
filename=$(basename "$url")
log_info "Downloading ${filename}"
if ! curl -sfL -o "${workdir}/${filename}" "$url"; then
log_err "Download failed: ${url}"
return 1
fi
cd "$workdir"
case "$filename" in
*.tar.gz|*.tgz)
tar -xzf "$filename"
;;
*.zip)
unzip -q "$filename"
;;
*)
chmod +x "$filename"
;;
esac
}
stop_service() {
local service="$1"
if systemctl is-active --quiet "$service" 2>/dev/null; then
log_info "Stopping ${service}"
systemctl stop "$service"
return 0
fi
return 1
}
start_service() {
local service="$1"
if systemctl is-enabled --quiet "$service" 2>/dev/null; then
log_info "Starting ${service}"
systemctl daemon-reload
systemctl start "$service"
fi
}
backup_binary() {
local binary="$1"
local path="${BINDIR}/${binary}"
if [[ -f "$path" ]]; then
local backup
backup="${path}.backup.$(date +%Y%m%d_%H%M%S)"
cp "$path" "$backup"
log_info "Backed up ${path}${backup}"
fi
}
backup_configs() {
local name="$1"
local config_files="$2"
if [[ -z "$config_files" ]]; then
return 0
fi
mkdir -p "$BACKUPDIR"
local timestamp
timestamp=$(date +%Y%m%d_%H%M%S)
for cfg in $config_files; do
if [[ -f "$cfg" ]]; then
local filename
filename=$(basename "$cfg")
cp "$cfg" "${BACKUPDIR}/${filename}.${timestamp}"
log_info "Config backed up: ${cfg}${BACKUPDIR}/${filename}.${timestamp}"
fi
done
}
update_component() {
local name="$1"
local repo="$2"
local service_name="$3"
local binaries="$4"
local file_pattern="$5"
local owner="${6:-prometheus}"
local config_files="${7:-}"
local hw
hw=$(detect_arch)
local installed
installed=$(get_installed_version "${binaries%% *}")
if [[ "$installed" == "not_installed" ]]; then
return 0
fi
local latest
latest=$(get_latest_version "$repo") || { ((FAILED++)) || true; return 1; }
echo ""
log " ${CYAN}${name}${NC}: installed=${installed} latest=${latest}"
if [[ "$installed" == "$latest" ]] && [[ "$FORCE" == "false" ]]; then
log_ok "Already at latest version"
((SKIPPED++)) || true
return 0
fi
if [[ "$CHECK_ONLY" == "true" ]]; then
if [[ "$installed" != "$latest" ]]; then
log_warn "Update available: ${installed}${latest}"
fi
return 0
fi
local pattern="${file_pattern//ARCH/${hw}}"
local url
url=$(get_download_url "$repo" "$latest" "$pattern")
if [[ -z "$url" ]]; then
log_err "Could not find download URL for ${name} (pattern: ${pattern})"
((FAILED++)) || true
return 1
fi
local workdir="${TMPDIR_BASE}/${name}"
download_and_extract "$url" "$workdir" || { ((FAILED++)) || true; return 1; }
backup_configs "$name" "$config_files"
local was_running=false
if stop_service "$service_name"; then
was_running=true
fi
for bin in $binaries; do
local found
found=$(find "$workdir" \( -name "$bin" -o -name "${bin}-*" \) -type f 2>/dev/null | head -1)
if [[ -n "$found" ]]; then
backup_binary "$bin"
mv "$found" "${BINDIR}/${bin}"
chown "${owner}:${owner}" "${BINDIR}/${bin}" 2>/dev/null || \
chown "${owner}." "${BINDIR}/${bin}" 2>/dev/null || true
chmod 755 "${BINDIR}/${bin}"
log_ok "Updated ${bin}"
else
log_warn "Binary ${bin} not found in download"
fi
done
if [[ "$was_running" == "true" ]]; then
start_service "$service_name"
fi
local new_ver
new_ver=$(get_installed_version "${binaries%% *}")
log_ok "${name} updated: ${installed}${new_ver}"
((UPDATED++)) || true
}
is_pkg_installed() {
local pkg="$1"
if command -v rpm >/dev/null 2>&1; then
rpm -q "$pkg" >/dev/null 2>&1
elif command -v dpkg >/dev/null 2>&1; then
dpkg -l "$pkg" 2>/dev/null | grep -q "^ii"
else
return 1
fi
}
update_alloy() {
if ! command -v alloy >/dev/null 2>&1 && [[ ! -x "${BINDIR}/alloy" ]]; then
return 0
fi
if is_pkg_installed "alloy"; then
log_info "Alloy installed via package manager — updating with dnf/apt"
update_alloy_pkg
else
log_info "Alloy installed as standalone binary — updating from GitHub"
update_component "Alloy" "grafana/alloy" "alloy" "alloy" "alloy-linux-ARCH.zip" "root" "/etc/alloy/config.alloy"
fi
}
update_alloy_pkg() {
local alloy_bin="alloy"
command -v alloy >/dev/null 2>&1 || alloy_bin="${BINDIR}/alloy"
local installed
installed=$("$alloy_bin" --version 2>&1 | grep -oP '[0-9]+\.[0-9]+\.[0-9]+' | head -1 || echo "unknown")
local latest
latest=$(curl -sfL "https://github.com/grafana/alloy/releases/latest" | \
grep -oP 'releases/tag/v\K[0-9]+\.[0-9]+\.[0-9]+' | head -1 || echo "")
if [[ -z "$latest" ]]; then
log_err "Failed to query latest version for Alloy"
((FAILED++)) || true
return 1
fi
echo ""
log " ${CYAN}Alloy${NC}: installed=${installed} latest=${latest}"
if [[ "$installed" == "$latest" ]] && [[ "$FORCE" == "false" ]]; then
log_ok "Already at latest version"
((SKIPPED++)) || true
return 0
fi
if [[ "$CHECK_ONLY" == "true" ]]; then
if [[ "$installed" != "$latest" ]]; then
log_warn "Update available: ${installed}${latest}"
fi
return 0
fi
backup_configs "Alloy" "/etc/alloy/config.alloy"
if command -v apt-get >/dev/null 2>&1; then
apt-get -y update && apt-get -y install --only-upgrade alloy
elif command -v dnf >/dev/null 2>&1; then
dnf -y upgrade alloy
elif command -v yum >/dev/null 2>&1; then
yum -y update alloy
fi
systemctl daemon-reload
systemctl restart alloy
local new_ver
new_ver=$("$alloy_bin" --version 2>&1 | grep -oP '[0-9]+\.[0-9]+\.[0-9]+' | head -1 || echo "unknown")
log_ok "Alloy updated: ${installed}${new_ver}"
((UPDATED++)) || true
}
update_grafana() {
if ! command -v grafana-server >/dev/null 2>&1; then
return 0
fi
local installed
installed=$(grafana-server -v 2>&1 | grep -oP '[0-9]+\.[0-9]+\.[0-9]+' | head -1 || echo "unknown")
local latest
latest=$(curl -sfL "https://github.com/grafana/grafana/releases/latest" | \
grep -oP 'releases/tag/v\K[0-9]+\.[0-9]+\.[0-9]+' | head -1 || echo "")
if [[ -z "$latest" ]]; then
log_err "Failed to query latest version for Grafana"
((FAILED++)) || true
return 1
fi
echo ""
log " ${CYAN}Grafana${NC}: installed=${installed} latest=${latest}"
if [[ "$installed" == "$latest" ]] && [[ "$FORCE" == "false" ]]; then
log_ok "Already at latest version"
((SKIPPED++)) || true
return 0
fi
if [[ "$CHECK_ONLY" == "true" ]]; then
if [[ "$installed" != "$latest" ]]; then
log_warn "Update available: ${installed}${latest}"
fi
return 0
fi
backup_configs "Grafana" "/etc/grafana/grafana.ini /etc/grafana/ldap.toml"
log_info "Updating Grafana via package manager"
if command -v apt-get >/dev/null 2>&1; then
apt-get -y update && apt-get -y install --only-upgrade grafana
elif command -v dnf >/dev/null 2>&1; then
dnf -y upgrade grafana
elif command -v yum >/dev/null 2>&1; then
yum -y update grafana
else
log_err "No supported package manager found for Grafana update"
((FAILED++)) || true
return 1
fi
systemctl daemon-reload
systemctl restart grafana-server
local new_ver
new_ver=$(grafana-server -v 2>&1 | grep -oP '[0-9]+\.[0-9]+\.[0-9]+' | head -1 || echo "unknown")
log_ok "Grafana updated: ${installed}${new_ver}"
((UPDATED++)) || true
}
should_update() {
local component="$1"
if [[ ${#COMPONENTS_REQUESTED[@]} -eq 0 ]]; then
return 0
fi
for c in "${COMPONENTS_REQUESTED[@]}"; do
[[ "$c" == "$component" ]] && return 0
done
return 1
}
parse_arguments() {
while [[ $# -gt 0 ]]; do
case "$1" in
--check) CHECK_ONLY=true; shift ;;
--backup-only) BACKUP_ONLY=true; shift ;;
--force) FORCE=true; shift ;;
--all) COMPONENTS_REQUESTED=(); shift ;;
--prometheus) COMPONENTS_REQUESTED+=("prometheus"); shift ;;
--node-exporter) COMPONENTS_REQUESTED+=("node_exporter"); shift ;;
--blackbox) COMPONENTS_REQUESTED+=("blackbox"); shift ;;
--alertmanager) COMPONENTS_REQUESTED+=("alertmanager"); shift ;;
--mysql-exporter) COMPONENTS_REQUESTED+=("mysql_exporter"); shift ;;
--loki) COMPONENTS_REQUESTED+=("loki"); shift ;;
--promtail) COMPONENTS_REQUESTED+=("promtail"); shift ;;
--alloy) COMPONENTS_REQUESTED+=("alloy"); shift ;;
--grafana) COMPONENTS_REQUESTED+=("grafana"); shift ;;
--arch) ARCH="$2"; shift 2 ;;
--help) show_help ;;
*)
log_err "Unknown option: $1"
show_help
;;
esac
done
}
main() {
parse_arguments "$@"
if [[ $EUID -ne 0 ]]; then
log_err "This script must be run as root"
exit 1
fi
mkdir -p "$TMPDIR_BASE" "$(dirname "$LOGFILE")"
touch "$LOGFILE"
local mode="UPDATE"
[[ "$CHECK_ONLY" == "true" ]] && mode="CHECK"
[[ "$BACKUP_ONLY" == "true" ]] && mode="BACKUP"
echo ""
echo "=============================================="
echo " Prometheus Stack Updater [${mode}]"
echo " $(date '+%Y-%m-%d %H:%M:%S')"
echo " Architecture: $(detect_arch)"
echo "=============================================="
if [[ "$BACKUP_ONLY" == "true" ]]; then
local configs=(
"$PROMDIR/prometheus.yml"
"$PROMDIR/blackbox.yml"
"$PROMDIR/alertmanager.yml"
"/etc/.mysqld_exporter.cnf"
"/etc/loki/loki-config.yml"
"/etc/promtail/promtail-config.yml"
"/etc/alloy/config.alloy"
"/etc/grafana/grafana.ini"
"/etc/grafana/ldap.toml"
)
local backed_up=0
mkdir -p "$BACKUPDIR"
local timestamp
timestamp=$(date +%Y%m%d_%H%M%S)
for cfg in "${configs[@]}"; do
if [[ -f "$cfg" ]]; then
local filename
filename=$(basename "$cfg")
cp "$cfg" "${BACKUPDIR}/${filename}.${timestamp}"
log_ok "Backed up ${cfg}${BACKUPDIR}/${filename}.${timestamp}"
((backed_up++))
fi
done
echo ""
log "Backed up ${backed_up} config file(s) to ${BACKUPDIR}"
exit 0
fi
# Name Repo Service Binaries File Pattern Owner Config Files
should_update "prometheus" && update_component "Prometheus" "prometheus/prometheus" "prometheus" "prometheus promtool" "linux-ARCH.tar.gz" "prometheus" "$PROMDIR/prometheus.yml"
should_update "node_exporter" && update_component "Node Exporter" "prometheus/node_exporter" "node_exporter" "node_exporter" "linux-ARCH.tar.gz" "root" ""
should_update "blackbox" && update_component "Blackbox Exporter" "prometheus/blackbox_exporter" "blackbox_exporter" "blackbox_exporter" "linux-ARCH.tar.gz" "prometheus" "$PROMDIR/blackbox.yml"
should_update "alertmanager" && update_component "AlertManager" "prometheus/alertmanager" "alertmanager" "alertmanager amtool" "linux-ARCH.tar.gz" "alertmanager" "$PROMDIR/alertmanager.yml"
should_update "mysql_exporter" && update_component "MySQL Exporter" "prometheus/mysqld_exporter" "mysqld_exporter" "mysqld_exporter" "linux-ARCH.tar.gz" "prometheus" "/etc/.mysqld_exporter.cnf"
should_update "loki" && update_component "Loki" "grafana/loki" "loki" "loki" "loki-linux-ARCH.zip" "loki" "/etc/loki/loki-config.yml"
should_update "promtail" && update_component "Promtail" "grafana/loki" "promtail" "promtail" "promtail-linux-ARCH.zip" "promtail" "/etc/promtail/promtail-config.yml"
should_update "alloy" && update_alloy
should_update "grafana" && update_grafana
echo ""
echo "=============================================="
echo -e " Results: ${GREEN}${UPDATED} updated${NC} ${YELLOW}${SKIPPED} current${NC} ${RED}${FAILED} failed${NC}"
echo "=============================================="
echo ""
if [[ "$CHECK_ONLY" == "false" ]]; then
log "Log saved to ${LOGFILE}"
fi
[[ $FAILED -gt 0 ]] && exit 1
exit 0
}
main "$@"
+692
View File
@@ -0,0 +1,692 @@
<#
.SYNOPSIS
Monitors RDP user sessions and exports metrics for Prometheus windows_exporter.
.DESCRIPTION
This script monitors the number of active RDP user sessions and creates Prometheus-formatted metrics.
The metrics are written to a text file that can be consumed by the windows_exporter.
It can also run periodically.
.PARAMETER MetricsPath
The path where the Prometheus metrics file will be written.
.PARAMETER IntervalSeconds
The interval in seconds for the scheduled task. Default is 60 seconds.
.Parameter RunOnce
Switch to run the script once and exit instead of creating a scheduled task.
.PARAMETER Debug
Switch to run the script in debug mode.
.PARAMETER RunOnce
Switch to run the script once and exit instead of creating a scheduled task.
.PARAMETER DryRun
Switch to output metrics to console instead of writing to file.
.PARAMETER Verbose
Switch to enable verbose debug output.
.PARAMETER Quiet
Switch to suppress non-error output.
.PARAMETER NoSchedule
Switch to skip scheduled task creation.
.PARAMETER Version
Switch to display script version and exit.
.NOTES
Version: 1.1.2-20251002
Author: Phil Connor contact@mylinux.work
Features:
- Monitors active RDP user sessions using quser command
- Captures username, session name, session ID, state (Active/Disconnected), idle time, and logon time
- Attempts to correlate session IDs with client IP addresses using qwinsta
- Writes metrics to a text file for consumption by windows_exporter.
- Reads last 10 PowerShell commands from each user's PSReadline history file.
#>
param(
[ValidateScript({
if ($_ -and -not (Test-Path (Split-Path $_ -Parent))) {
throw "Directory for metrics path does not exist: $(Split-Path $_ -Parent)"
}
return $true
})]
[string]$MetricsPath = "C:\Program Files\windows_exporter\textfile_inputs\users_logged_in.prom",
[int]$IntervalSeconds = 60,
[switch]$RunOnce,
[switch]$Debug,
[switch]$DryRun,
[switch]$Verbose,
[switch]$Quiet,
[switch]$NoSchedule,
[switch]$Version
)
# Handle version display
if ($Version) {
Write-Host "Windows RDP User Monitor PowerShell Script"
Write-Host "Version: 1.1.0-20250915"
Write-Host "Author: Phil Connor contact@mylinux.work"
exit 0
}
# Set up logging preferences based on Verbose/Quiet flags
if ($Verbose) {
$VerbosePreference = 'Continue'
$InformationPreference = 'Continue'
}
if ($Quiet) {
$VerbosePreference = 'SilentlyContinue'
$InformationPreference = 'SilentlyContinue'
$WarningPreference = 'SilentlyContinue'
}
# Enhanced logging functions
function Write-InfoLog {
param([string]$Message)
if (-not $Quiet) {
Write-Host "[INFO] $(Get-Date -Format 'yyyy-MM-dd HH:mm:ss') $Message" -ForegroundColor Green
}
}
function Write-VerboseLog {
param([string]$Message)
if ($Verbose) {
Write-Host "[VERBOSE] $(Get-Date -Format 'yyyy-MM-dd HH:mm:ss') $Message" -ForegroundColor Cyan
}
}
# Configuration constants for the script
$script:Config = @{
RDP_SESSION_PATTERN = "rdp-tcp#\d+|console" # Regex pattern to match RDP session names
METRIC_NAME = "windows_rdp_users_logged_in" # Primary Prometheus metric name
QWINSTA_IP_REGEX = '^\s*(\S+)\s+(\S+)\s+(\d+)\s+(\S+)\s+(\S+)\s+(\d+\.\d+\.\d+\.\d+)' # Pattern for IP extraction
QUSER_HEADER_REGEX = "USERNAME.*SESSIONNAME.*ID.*STATE" # Expected quser output header format
COLUMNS = @{ # Column positions in quser output
USERNAME = 0; SESSION = 1; ID = 2; STATE = 3; IDLE = 4; LOGON_START = 5
}
}
# Sanitize string values for use as Prometheus metric labels
# Removes or replaces characters that would break Prometheus metric format
function ConvertTo-MetricLabel {
param([AllowEmptyString()][string]$Value)
if ([string]::IsNullOrEmpty($Value)) { return "" }
# Replace problematic characters with underscores to prevent metric parsing issues
$sanitized = $Value -replace '["\\\n\r\t>]', '_'
# Limit length to prevent overly long metric labels (Prometheus best practice)
if ($sanitized.Length -gt 200) {
$sanitized = $sanitized.Substring(0, 200) + "..."
}
return $sanitized
}
# Format metric data into Prometheus text format
function Write-PrometheusMetric {
param(
[ValidateNotNullOrEmpty()][string]$Name,
[ValidateNotNullOrEmpty()][string]$Help,
[ValidateNotNullOrEmpty()][string]$Type,
[ValidateNotNull()][array]$Metrics
)
try {
@(
# Write Prometheus metric header with help text and type
"# HELP $Name $Help"
"# TYPE $Name $Type"
# Format each metric with its labels and value
$Metrics | ForEach-Object {
if ($null -eq $_ -or $null -eq $_.Labels -or $null -eq $_.Value) {
throw "Invalid metric data"
}
"$Name$($_.Labels) $($_.Value)"
}
)
}
catch {
Write-Error "Failed to write metric: $($_.Exception.Message)"
}
}
# Execute quser command and validate output format
# Returns raw quser command output after basic validation
function Get-QUserData {
try {
# Run quser command and suppress stderr to avoid noise
$output = quser 2>$null
# Validate that we got some output
if (-not $output -or $output.Count -eq 0) {
throw "No user sessions found or quser command failed"
}
# Ensure output has expected header format
if ($output.Count -lt 2 -or $output[0] -notmatch $script:Config.QUSER_HEADER_REGEX) {
throw "Unexpected quser output format"
}
return $output
}
catch [System.Management.Automation.CommandNotFoundException] {
throw "quser command not found. This script requires Windows with Terminal Services."
}
}
# Get IP addresses for RDP sessions using qwinsta command
# Attempts to correlate session IDs with client IP addresses for remote sessions
function Get-SessionIPAddresses {
try {
$sessionIPs = @{}
# Run qwinsta to get session information including IP addresses
$qwinstaOutput = qwinsta 2>$null
if ($qwinstaOutput) {
Write-Verbose "Raw qwinsta output:"
$qwinstaOutput | ForEach-Object { Write-Verbose " $_" }
foreach ($line in $qwinstaOutput) {
# Skip header lines and empty lines
if ([string]::IsNullOrWhiteSpace($line) -or $line -match '^\s*SESSIONNAME') {
continue
}
Write-Verbose "Processing qwinsta line: '$line'"
# Look for any IP address in the line and try to correlate with session ID
if ($line -match '(\d+\.\d+\.\d+\.\d+)') {
$ipAddress = $matches[1]
# Try different patterns to find session ID that corresponds to this IP
$sessionId = $null
# Pattern 1: Standard format with session ID as 3rd column
if ($line -match '^\s*(\S+)\s+(\S+)?\s+(\d+)\s+') {
$sessionId = $matches[3]
}
# Pattern 2: RDP session format
elseif ($line -match 'rdp-tcp#\d+.*?\s(\d+)\s+') {
$sessionId = $matches[1]
}
# Pattern 3: Any number that looks like a session ID (between spaces)
elseif ($line -match '\s(\d+)\s+\w+') {
$sessionId = $matches[1]
}
# Store the mapping if we found a valid session ID
if ($sessionId) {
$sessionIPs[$sessionId] = $ipAddress
Write-Verbose "Mapped session ID $sessionId to IP $ipAddress"
}
else {
Write-Verbose "Found IP $ipAddress but could not determine session ID"
}
}
}
}
Write-Verbose "Final session IP mapping: $($sessionIPs | ConvertTo-Json -Compress)"
return $sessionIPs
}
catch {
# Don't fail the entire script if IP detection fails
Write-Warning "Failed to get session IP addresses: $($_.Exception.Message)"
return @{}
}
}
# Parses a single line of quser output into a structured object
# Converts space-separated quser output into a PowerShell object with named properties
function ConvertFrom-QUserLine {
param(
[ValidateNotNullOrEmpty()][string]$Line,
[hashtable]$SessionIPs = @{}
)
# Split the line into fields, normalizing whitespace
$fields = $Line.Trim() -Replace '\s+', ' ' -Split '\s'
# Validate minimum expected field count
if ($fields.Length -lt 6) { return $null }
$cols = $script:Config.COLUMNS
$sessionId = $fields[$cols.ID]
# Look up IP address for this session if available
$ipAddress = if ($SessionIPs.ContainsKey($sessionId)) { $SessionIPs[$sessionId] } else { "unknown" }
# Extract logon time from remaining fields (may span multiple columns)
$logonTime = if ($fields.Length -gt $cols.LOGON_START) {
$endIndex = if ($fields.Length -gt 6) { $fields.Length - 2 } else { $fields.Length - 1 }
$fields[$cols.LOGON_START..$endIndex] -join ' '
}
else { "Unknown" }
# Clean username by removing leading ">" character if present (indicates active session)
$cleanUserName = $fields[$cols.USERNAME] -replace '^>', ''
# Create structured object with all session information
return [PSCustomObject]@{
UserName = $cleanUserName
SessionName = $fields[$cols.SESSION]
ID = $sessionId
State = $fields[$cols.STATE]
IdleTime = $fields[$cols.IDLE]
LogonTime = $logonTime
ClientLocation = if ($fields.Length -gt 6) { $fields[-1] } else { "local" }
IPAddress = $ipAddress
}
}
# Get command history for a specific user session
# Retrieves recent PowerShell commands from the user's PSReadline history file
function Get-UserCommandHistory {
param(
[string]$UserName,
[string]$SessionId,
[int]$MaxCommands = 10
)
try {
# Sanitize username to remove invalid file path characters
$sanitizedUserName = $UserName -replace '[<>:"|?*]', '_'
# Try to get PowerShell history from the user's profile
$historyPath = "C:\Users\$sanitizedUserName\AppData\Roaming\Microsoft\Windows\PowerShell\PSReadline\ConsoleHost_history.txt"
$commands = @()
# Check if PowerShell history file exists
if (Test-Path $historyPath) {
# Read the last N commands from the history file
$historyContent = Get-Content $historyPath -Tail $MaxCommands -ErrorAction SilentlyContinue
if ($historyContent) {
# Clean up the commands by trimming whitespace and removing empty lines
$commands = $historyContent | ForEach-Object { $_.Trim() } | Where-Object { $_ -ne "" }
}
}
# If no PowerShell history, try to get CMD history using doskey
if ($commands.Count -eq 0) {
try {
# Use query session to check if user is active, then try to get command history
$sessionInfo = query session $SessionId 2>$null
if ($sessionInfo) {
# This is a simplified approach - in practice, CMD history is harder to access remotely
$commands = @("No recent command history available")
}
}
catch {
$commands = @("Unable to retrieve command history")
}
}
# Return the most recent commands up to the specified limit
return $commands | Select-Object -First $MaxCommands
}
catch {
Write-Verbose "Failed to get command history for user $UserName (Session $SessionId): $($_.Exception.Message)"
return @("Command history unavailable")
}
}
# Get all active RDP user sessions with detailed information
# Combines quser and qwinsta data to create comprehensive user session objects
function Get-RDPUsers {
try {
# Get raw user session data and IP address mappings
$qUserOutput = Get-QUserData
$sessionIPs = Get-SessionIPAddresses
Write-Verbose "Found $($qUserOutput.Count) total user sessions"
Write-Verbose "Found $($sessionIPs.Count) session IP addresses"
# Process each user session line (skip header line)
$allUsers = $qUserOutput | Select-Object -Skip 1 | ForEach-Object {
# Parse the quser output line into a structured object
$user = ConvertFrom-QUserLine $_ $sessionIPs
if ($null -eq $user) {
Write-Warning "Skipping malformed quser output: $_"
return
}
# Add command history to user object
$commandHistory = Get-UserCommandHistory -UserName $user.UserName -SessionId $user.ID
$user | Add-Member -NotePropertyName "CommandHistory" -NotePropertyValue $commandHistory
$user
} | Where-Object { $_ }
# Filter to only RDP sessions (excluding services and other non-user sessions)
$rdpUsers = $allUsers | Where-Object {
$_.SessionName -match $script:Config.RDP_SESSION_PATTERN -and
![string]::IsNullOrEmpty($_.UserName) -and
![string]::IsNullOrEmpty($_.SessionName) -and
![string]::IsNullOrEmpty($_.State)
}
Write-Verbose "Processed $($allUsers.Count) valid user sessions"
Write-Verbose "Filtered to $($rdpUsers.Count) RDP sessions"
return $rdpUsers
}
catch {
throw "Failed to collect user data: $($_.Exception.Message)"
}
}
# Creates Prometheus metrics from user session data
# Transforms user session objects into Prometheus-formatted metric data
function New-UserMetrics {
param([array]$Users)
if (-not $Users) { return @() }
# Initialize counters and collections for metric generation
$stateCount = @{ Active = 0; Disc = 0 }
$usernames = @()
$userMetrics = @()
$commandMetrics = @()
# Process each user to create individual metrics
foreach ($user in $Users) {
if ($null -eq $user) {
Write-Warning "Found null user in collection"
continue
}
# Track state counts for summary metrics
$stateCount[$user.State]++
$usernames += $user.UserName
# Create individual user session metric
$userMetrics += @{
Labels = "{username=`"$(ConvertTo-MetricLabel $user.UserName)`",session=`"$(ConvertTo-MetricLabel $user.SessionName)`",state=`"$($user.State)`",location=`"$(ConvertTo-MetricLabel $user.ClientLocation)`",ip=`"$(ConvertTo-MetricLabel $user.IPAddress)`"}"
Value = 1
}
# Add command history metrics for each user
if ($user.CommandHistory -and $user.CommandHistory.Count -gt 0) {
for ($i = 0; $i -lt $user.CommandHistory.Count; $i++) {
$command = ConvertTo-MetricLabel $user.CommandHistory[$i]
$commandMetrics += @{
Labels = "{username=`"$(ConvertTo-MetricLabel $user.UserName)`",session=`"$(ConvertTo-MetricLabel $user.SessionName)`",command_index=`"$($i + 1)`",command=`"$command`"}"
Value = 1
}
}
}
}
# Create summary metrics with totals and user list
$summaryMetrics = @(
@{ Labels = '{metric="total"}'; Value = $Users.Count }
@{ Labels = '{metric="active"}'; Value = $stateCount.Active }
@{ Labels = '{metric="disconnected"}'; Value = $stateCount.Disc }
@{ Labels = '{metric="users_list",users="' + $(ConvertTo-MetricLabel (($usernames | Sort-Object) -join ',')) + '"}'; Value = 1 }
)
# Combine all metric types into a single collection
return $summaryMetrics + $userMetrics + $commandMetrics
}
# Write metrics content to file using atomic write operation
function Write-MetricsFile {
param(
[ValidateNotNull()]$Content,
[string]$Path
)
if (-not $Path) {
return $Content
}
# Ensure the directory exists
$directory = Split-Path $Path -Parent
if ($directory -and -not (Test-Path $directory)) {
try {
New-Item -Path $directory -ItemType Directory -Force | Out-Null
Write-Verbose "Created directory: $directory"
}
catch {
Write-Error "Failed to create directory '$directory': $($_.Exception.Message)"
return
}
}
$tempPath = "$Path.tmp"
try {
if ($Content -is [array]) {
$Content -join "`n" | Out-File -FilePath $tempPath -Encoding UTF8
} else {
$Content | Out-File -FilePath $tempPath -Encoding UTF8
}
Move-Item -Path $tempPath -Destination $Path -Force -ErrorAction Stop
}
catch {
Write-Error "Failed to write metrics file: $($_.Exception.Message)"
if (Test-Path $tempPath) { Remove-Item $tempPath -Force }
}
}
# Main function that orchestrates the complete metrics collection process
# Coordinates all data collection, processing, and output generation
function Invoke-MetricsCollection {
$startTime = Get-Date
# Add dry-run header if applicable
if ($DryRun) {
Write-Host "=== DRY RUN MODE - Metrics that would be written to $MetricsPath ===" -ForegroundColor Yellow
}
try {
# Collect RDP user session data
Write-VerboseLog "Collecting RDP user session data..."
$rdpUsers = Get-RDPUsers
if ($null -eq $rdpUsers) {
throw "Get-RDPUsers returned null"
}
Write-VerboseLog "Found $($rdpUsers.Count) RDP users"
# Convert user data to Prometheus metrics
$metrics = New-UserMetrics -Users $rdpUsers
if ($null -eq $metrics) {
throw "New-UserMetrics returned null"
}
# Collect failed login attempts
Write-VerboseLog "Collecting failed login data..."
$failedLoginMetrics = Get-FailedLogins
# Calculate script execution time for performance monitoring
$endTime = Get-Date
$executionTimeMs = [math]::Round(($endTime - $startTime).TotalMilliseconds, 2)
# Add execution time metric for monitoring script performance
$executionMetric = @{
Labels = '{metric="execution_time_ms"}'
Value = $executionTimeMs
}
$metrics += $executionMetric
# Split metrics into different types
$userMetrics = $metrics | Where-Object { $_.Labels -notmatch 'command=' }
$commandMetrics = $metrics | Where-Object { $_.Labels -match 'command=' }
# Generate Prometheus-formatted output
$output = @()
$output += Write-PrometheusMetric -Name $script:Config.METRIC_NAME -Help "Number of RDP users currently logged in" -Type "gauge" -Metrics $userMetrics
# Add command history metrics as a separate metric family
if ($commandMetrics.Count -gt 0) {
$output += Write-PrometheusMetric -Name "windows_rdp_user_command_history" -Help "Recent command history for RDP users" -Type "gauge" -Metrics $commandMetrics
}
# Add failed login metrics
if ($failedLoginMetrics.Count -gt 0) {
$output += Write-PrometheusMetric -Name "windows_user_failed_logins" -Help "Failed login attempts from Windows Event Log" -Type "counter" -Metrics $failedLoginMetrics
}
if ($null -eq $output) {
throw "Write-PrometheusMetric returned null"
}
Write-VerboseLog "Metrics collection completed (execution time: ${executionTimeMs}ms)"
# Output to console and/or file based on mode
if ($DryRun) {
Write-Host $output
Write-Host "=== END DRY RUN OUTPUT ===" -ForegroundColor Yellow
} else {
Write-Output $output
Write-MetricsFile -Content $output -Path $MetricsPath
}
}
catch {
Write-Error "Failed to collect metrics: $($_.Exception.Message)"
# Attempt to write partial results if available
if ($MetricsPath -and $output -and -not $DryRun) {
$output | Out-File -FilePath $MetricsPath -Encoding UTF8
}
}
}
# Register cleanup handler for graceful shutdown
Register-EngineEvent -SourceIdentifier PowerShell.Exiting -Action {
Write-Host "Shutting down gracefully..."
}
# Create scheduled task for periodic execution
function New-MetricsScheduledTask {
param(
[int]$IntervalSeconds = 60,
[string]$TaskName = "PrometheusRDPMetrics"
)
try {
# Check if scheduled task already exists
if (Get-ScheduledTask -TaskName $TaskName -ErrorAction SilentlyContinue) {
Write-InfoLog "Scheduled task '$TaskName' already exists. Skipping creation."
return
}
$principal = New-ScheduledTaskPrincipal -UserId "SYSTEM" -LogonType ServiceAccount -RunLevel Highest
$action = New-ScheduledTaskAction -Execute "powershell.exe" -Argument "-NoProfile -ExecutionPolicy Bypass -File `"$($MyInvocation.MyCommand.Path)`" -MetricsPath `"$MetricsPath`" -RunOnce"
$trigger = New-ScheduledTaskTrigger -Once -At (Get-Date) -RepetitionInterval (New-TimeSpan -Seconds $IntervalSeconds)
$settings = New-ScheduledTaskSettingsSet -AllowStartIfOnBatteries -DontStopIfGoingOnBatteries -StartWhenAvailable
Register-ScheduledTask -TaskName $TaskName -Action $action -Trigger $trigger -Principal $principal -Settings $settings -Force
Write-InfoLog "Scheduled task '$TaskName' created successfully with $IntervalSeconds second interval"
}
catch {
Write-Error "Failed to create scheduled task: $($_.Exception.Message)"
}
}
# Debug function to test qwinsta parsing
function Test-QwinstaOutput {
Write-Host "=== Testing qwinsta output parsing ===" -ForegroundColor Cyan
try {
$qwinstaOutput = qwinsta 2>$null
Write-Host "Raw qwinsta output:" -ForegroundColor Yellow
$qwinstaOutput | ForEach-Object { Write-Host " $_" }
Write-Host "`nTesting IP address extraction:" -ForegroundColor Yellow
$sessionIPs = Get-SessionIPAddresses
$sessionIPs.GetEnumerator() | ForEach-Object {
Write-Host " Session ID $($_.Key) -> IP $($_.Value)" -ForegroundColor Green
}
Write-Host "`nTesting quser output:" -ForegroundColor Yellow
$quserOutput = quser 2>$null
$quserOutput | ForEach-Object { Write-Host " $_" }
}
catch {
Write-Error "Test failed: $($_.Exception.Message)"
}
}
# Get failed login attempts from Windows Event Log
function Get-FailedLogins {
try {
$failedLogins = @()
$24HoursAgo = (Get-Date).AddHours(-24)
# Query Windows Security Event Log for failed logon attempts (Event ID 4625)
$failedLogonEvents = Get-WinEvent -FilterHashtable @{
LogName = 'Security'
Id = 4625 # Failed logon attempts
StartTime = $24HoursAgo
} -ErrorAction SilentlyContinue | Select-Object -First 50
if ($failedLogonEvents) {
foreach ($event in $failedLogonEvents) {
try {
$eventXml = [xml]$event.ToXml()
$eventData = $eventXml.Event.EventData.Data
# Extract relevant information from event data
$targetUserName = ($eventData | Where-Object {$_.Name -eq 'TargetUserName'}).'#text'
$workstationName = ($eventData | Where-Object {$_.Name -eq 'WorkstationName'}).'#text'
$sourceNetworkAddress = ($eventData | Where-Object {$_.Name -eq 'IpAddress'}).'#text'
$failureReason = ($eventData | Where-Object {$_.Name -eq 'SubStatus'}).'#text'
# Clean up values
if ([string]::IsNullOrWhiteSpace($targetUserName)) { $targetUserName = "unknown" }
if ([string]::IsNullOrWhiteSpace($sourceNetworkAddress) -or $sourceNetworkAddress -eq '-') { $sourceNetworkAddress = "local" }
if ([string]::IsNullOrWhiteSpace($workstationName)) { $workstationName = "unknown" }
# Determine failure type based on sub status
$failureType = switch ($failureReason) {
"0xC0000064" { "invalid_user" }
"0xC000006A" { "wrong_password" }
"0xC0000234" { "account_locked" }
"0xC0000072" { "account_disabled" }
"0xC000006F" { "logon_time_restriction" }
"0xC0000070" { "workstation_restriction" }
default { "other_failure" }
}
$failedLogins += @{
Labels = "{username=`"$targetUserName`",source_ip=`"$sourceNetworkAddress`",workstation=`"$workstationName`",failure_type=`"$failureType`"}"
Value = 1
}
} catch {
Write-VerboseLog "Failed to parse event: $($_.Exception.Message)"
}
}
}
return $failedLogins
} catch {
Write-Warning "Failed to get failed login events: $($_.Exception.Message)"
return @()
}
}
# Main execution logic - determines script behavior based on parameters
if ($Debug) {
# Debug mode: test qwinsta and quser output parsing
Test-QwinstaOutput
}
elseif ($RunOnce -or $DryRun) {
# Single execution mode: collect metrics once and exit
Invoke-MetricsCollection
}
else {
# Scheduled mode: create scheduled task (unless NoSchedule) and run immediately
if (-not $NoSchedule) {
New-MetricsScheduledTask -IntervalSeconds $IntervalSeconds
} else {
Write-InfoLog "Skipping scheduled task creation (-NoSchedule specified)"
}
# Run metrics collection immediately
Invoke-MetricsCollection
}
+619
View File
@@ -0,0 +1,619 @@
#!/bin/bash
########################################################################################
#### users_logged_in.sh ####
#### ####
#### This script monitors and reports information about users currently logged into ####
#### a Linux system. It's designed to work with Prometheus monitoring system to ####
#### track user activity on Amazon, Ubuntu, and RedHat Linux servers. ####
#### ####
#### Contact: Phil Connor contact@mylinux.work ####
#### Version 3.3.1-20250923 ####
########################################################################################
set -euo pipefail
# CLI flags
DRY_RUN=false
VERBOSE=false
QUIET=false
NO_CRON=false
SCRIPT_VERSION="3.3.1-20250923"
# Parse command line arguments
parse_arguments() {
while [[ $# -gt 0 ]]; do
case $1 in
--dry-run)
DRY_RUN=true
shift
;;
--verbose|-v)
VERBOSE=true
DEBUG=1
shift
;;
--quiet|-q)
QUIET=true
shift
;;
--no-cron)
NO_CRON=true
shift
;;
--version)
echo "User Login Monitor"
echo "Version: $SCRIPT_VERSION"
echo "Author: Phil Connor contact@mylinux.work"
exit 0
;;
-h|--help)
echo "Usage: $0 [OPTIONS]"
echo "Monitor user login activity and export Prometheus metrics"
echo ""
echo "Options:"
echo " --dry-run Output metrics to console instead of file"
echo " --verbose Enable verbose debug output"
echo " --quiet Suppress non-error output"
echo " --no-cron Skip cron job installation"
echo " --version Show version and exit"
echo " --help Show this help message"
exit 0
;;
*)
echo "Unknown option: $1" >&2
echo "Use --help for usage information" >&2
exit 1
;;
esac
done
}
# Enhanced logging functions
log_verbose() {
[[ "$VERBOSE" == "true" ]] && echo "[$(date '+%Y-%m-%d %H:%M:%S')] [VERBOSE] $1"
}
log_info() {
[[ "$QUIET" == "false" ]] && echo "[$(date '+%Y-%m-%d %H:%M:%S')] [INFO] $1"
}
# System Configuration - Define default values and paths
readonly NODE_EXPORTER_DIR="${NODE_EXPORTER_DIR:-/var/lib/node_exporter}" # Directory where Prometheus metrics are stored
readonly PROMETHEUS_USER="${PROMETHEUS_USER:-prometheus}" # User that owns the Prometheus files
readonly CRONTAB_USER="${CRONTAB_USER:-root}" # User under which the cron job runs
readonly SCRIPT_PATH="$(readlink -f "$0")" # Full path to this script
readonly UPDATE_INTERVAL="${UPDATE_INTERVAL:-*/3 * * * *}" # Cron schedule (every 3 minutes by default)
readonly LOCKFILE="/var/run/users_logged_in.lock" # Prevents multiple instances from running
# Required commands - Map of commands to their expected locations
declare -A COMMANDS=(
[awk]="/usr/bin" # Text processing utility
[cut]="/usr/bin" # Extract columns from text
[grep]="/usr/bin" # Search text patterns
[sed]="/usr/bin" # Stream editor for text manipulation
[sort]="/usr/bin" # Sort lines of text
[uniq]="/usr/bin" # Remove duplicate lines
[who]="/usr/bin" # Show logged in users
)
# Command paths (populated by find_commands function)
declare -A CMD_PATHS
# Validation - Ensure required environment variables are set
[[ -z "$NODE_EXPORTER_DIR" || -z "$PROMETHEUS_USER" ]] && {
echo "ERROR: Required environment variables not set" >&2
exit 1
}
# Error handling function - Display error message and exit with specified code
handle_error() {
local err_msg="$1"
local exit_code="${2:-1}"
echo "ERROR: $err_msg" >&2
exit "$exit_code"
}
# Logging function - Output timestamped log messages
log() {
local level="$1"
local message="$2"
echo "[$(date '+%Y-%m-%d %H:%M:%S')] [$level] $message"
}
# Find command location - Locate executable path or use fallback
find_command() {
local command_name="$1"
local fallback_path="$2"
local path
path=$(command -v "$command_name" 2>/dev/null) || path="$fallback_path/$command_name"
[[ -x "$path" ]] || handle_error "Cannot find or execute '$command_name'"
echo "$path"
}
# Initialize command paths - Populate CMD_PATHS array with actual command locations
find_commands() {
for cmd in "${!COMMANDS[@]}"; do
CMD_PATHS[$cmd]=$(find_command "$cmd" "${COMMANDS[$cmd]}")
done
}
# Cleanup function - Remove lockfile on script exit
cleanup() {
rm -f "$LOCKFILE"
}
# Setup Prometheus directory - Create and set permissions for metrics output directory
setup_directory() {
if [[ ! -d "$NODE_EXPORTER_DIR" ]]; then
if [[ $(id -u) -eq 0 ]]; then
mkdir -p "$NODE_EXPORTER_DIR"
chown "$PROMETHEUS_USER": "$NODE_EXPORTER_DIR" 2>/dev/null || true
fi
fi
[[ -w "$NODE_EXPORTER_DIR" ]] || handle_error "$NODE_EXPORTER_DIR is not writable"
}
# Setup lockfile - Prevent multiple script instances from running simultaneously
setup_lockfile() {
find "$LOCKFILE" -mmin +60 -delete 2>/dev/null || true # Remove stale lockfiles older than 60 minutes
[[ -f "$LOCKFILE" ]] && handle_error "Script is already running"
touch "$LOCKFILE" && chmod 600 "$LOCKFILE"
}
# Install cron job - Automatically schedule this script to run periodically
install_cron_job() {
if [[ "$NO_CRON" == "true" ]]; then
log_info "Skipping cron job installation (--no-cron specified)"
return 0
fi
if [[ -f "$SCRIPT_PATH" ]] && ! crontab -l 2>/dev/null | grep -q "$SCRIPT_PATH"; then
local cron_entry="$UPDATE_INTERVAL $SCRIPT_PATH > $NODE_EXPORTER_DIR/usrlogins.prom 2>&1"
if ! (echo -e "$(crontab -u "$CRONTAB_USER" -l 2>/dev/null || echo '')\n$cron_entry" | crontab -u "$CRONTAB_USER" -); then
log "WARNING" "Failed to install cron job for user $CRONTAB_USER"
else
log_info "Cron job installed successfully"
fi
fi
}
# Get logged users - Extract user information and format as Prometheus metrics
get_logged_users() {
"${CMD_PATHS[who]}" | "${CMD_PATHS[sort]}" | "${CMD_PATHS[uniq]}" | \
"${CMD_PATHS[awk]}" '{
gsub(/US\\|@us\.[^.]+\.net/, "", $1) # Remove domain prefixes from username (US\ or @us.*.net)
gsub(/\//, " ", $2) # Replace slashes in terminal names
gsub(/:/, "", $2) # Remove colons from terminal names
gsub(/:100/, "aws_workspace", $5) # Convert AWS workspace notation
gsub(/\(|\)/, "", $5) # Remove parentheses from location
print "node_logged_in_usrs{name=\""$1"\", terminal=\""$2"\", location=\""$5"\"}", 1
}'
}
# Get user terminal count - Count open terminals per user
get_user_terminal_count() {
"${CMD_PATHS[who]}" | "${CMD_PATHS[sed]}" 's/.*US\\[\t ]*//;s/,//g' | \
"${CMD_PATHS[cut]}" -f1 -d' ' | "${CMD_PATHS[sort]}" | "${CMD_PATHS[uniq]}" -c | \
"${CMD_PATHS[awk]}" '{
gsub(/@us\.[^.]+\.net/, "", $2) # Remove email domain from username (@us.*.net)
print "node_logged_in_usr_terminals{username=\""$2"\"}", $1
}'
}
# Get total user count - Count total logged in sessions
get_total_user_count() {
"${CMD_PATHS[who]}" -q | "${CMD_PATHS[grep]}" users | \
"${CMD_PATHS[awk]}" '{print $2}' | "${CMD_PATHS[cut]}" -d "=" -f2
}
# Get last user commands - Extract recent bash history for each user
get_last_user_commands() {
local username="$1"
local history_file
if [[ -z "$username" ]]; then
return 1
fi
# Try different history file locations based on username and common paths
for hist_path in "/home/${username}/.bash_history" "/home/${username}/.history" "/root/.bash_history"; do
if [[ -r "$hist_path" ]]; then
history_file="$hist_path"
break
fi
done
# Extract last 10 commands and format as Prometheus metrics
if [[ -n "$history_file" ]]; then
tail -n 10 "$history_file" 2>/dev/null | \
"${CMD_PATHS[awk]}" -v user="$username" 'NR <= 10 {
gsub(/\\/, "\\\\", $0) # Escape backslashes first (before other escaping)
gsub(/"/, "\\\"", $0) # Escape double quotes in commands
gsub(/'\''/, "", $0) # Remove single quotes (problematic for Prometheus)
print "node_user_last_commands{username=\"" user "\", command_number=\"" NR "\", command=\"" $0 "\"} 1"
}'
fi
}
# Get sudo commands - Extract recent privileged commands from auth logs
get_sudo_commands() {
local username="$1"
if [[ -z "$username" ]]; then
return 1
fi
# Strip domain prefixes for comparison
local clean_username="${username#US\\}"
clean_username="${clean_username%@*}"
# Check both Ubuntu (/var/log/auth.log) and RHEL (/var/log/secure) locations
local auth_logs=("/var/log/secure" "/var/log/auth.log")
local commands_found=""
for log_file in "${auth_logs[@]}"; do
if [[ -r "$log_file" ]]; then
# Try RHEL/Amazon Linux format first (TTY= pattern)
commands_found=$(grep "TTY=" "$log_file" 2>/dev/null | \
grep -E "(US\\\\$clean_username|$clean_username|$username)" | \
grep "COMMAND=" | \
tail -10 | \
"${CMD_PATHS[awk]}" -F'; COMMAND=' -v user="$clean_username" '{
if (NF >= 2) {
cmd = $2
gsub(/#040/, " ", cmd) # Convert #040 to spaces
gsub(/^[ \t]+|[ \t]+$/, "", cmd) # Trim whitespace
gsub(/\\/, "\\\\", cmd) # Escape backslashes first (before other escaping)
gsub(/"/, "\\\"", cmd) # Escape double quotes
gsub(/'\''/, "", cmd) # Remove single quotes (problematic for Prometheus)
if (cmd != "" && length(cmd) > 0) {
print user "|||" cmd # Use delimiter for deduplication
}
}
}')
# If RHEL format didn't work, try Ubuntu format
if [[ -z "$commands_found" ]]; then
commands_found=$(grep "COMMAND=" "$log_file" 2>/dev/null | \
grep -E "(USER=$clean_username|$clean_username :)" | \
tail -10 | \
"${CMD_PATHS[awk]}" -F'COMMAND=' -v user="$clean_username" '{
if (NF >= 2) {
cmd = $2
gsub(/^[ \t]+|[ \t]+$/, "", cmd) # Trim whitespace
gsub(/\\/, "\\\\", cmd) # Escape backslashes first (before other escaping)
gsub(/"/, "\\\"", cmd) # Escape double quotes
gsub(/'\''/, "", cmd) # Remove single quotes (problematic for Prometheus)
if (cmd != "" && length(cmd) > 0) {
print user "|||" cmd # Use delimiter for deduplication
}
}
}')
fi
# If we found commands, break (prefer secure over auth.log for RHEL)
if [[ -n "$commands_found" ]]; then
break
fi
fi
done
# Deduplicate and format as proper metrics
if [[ -n "$commands_found" ]]; then
echo "$commands_found" | "${CMD_PATHS[sort]}" | "${CMD_PATHS[uniq]}" | \
"${CMD_PATHS[awk]}" -F'\\|\\|\\|' '{
print "node_user_sudo_commands{username=\"" $1 "\", command=\"" $2 "\"} 1"
}'
fi
}
# Get session events - Extract login/logout events from auth logs
get_session_events() {
local username="$1"
if [[ -z "$username" ]]; then
return 1
fi
# Strip domain prefixes for comparison
local clean_username="${username#US\\}"
clean_username="${clean_username%@*}"
# Check both log files for session events
local auth_logs=("/var/log/secure" "/var/log/auth.log")
local session_events=""
for log_file in "${auth_logs[@]}"; do
if [[ -r "$log_file" ]]; then
# Get recent session events (last 24 hours worth)
session_events=$(grep -E "(session opened|session closed|Accepted)" "$log_file" 2>/dev/null | \
grep -E "(US\\\\$clean_username|$clean_username|$username)" | \
tail -20 | \
"${CMD_PATHS[awk]}" -v user="$clean_username" '{
if ($0 ~ /session opened/) {
method = "ssh"
if ($0 ~ /sudo/) method = "sudo"
print user "|||login|||" method # Use delimiter for deduplication
}
else if ($0 ~ /session closed/) {
method = "ssh"
if ($0 ~ /sudo/) method = "sudo"
print user "|||logout|||" method # Use delimiter for deduplication
}
else if ($0 ~ /Accepted/) {
method = "ssh"
if ($0 ~ /publickey/) method = "ssh-key"
else if ($0 ~ /password/) method = "ssh-password"
print user "|||login|||" method # Use delimiter for deduplication
}
}')
if [[ -n "$session_events" ]]; then
break
fi
fi
done
# Deduplicate and format as proper metrics
if [[ -n "$session_events" ]]; then
echo "$session_events" | "${CMD_PATHS[sort]}" | "${CMD_PATHS[uniq]}" | \
"${CMD_PATHS[awk]}" -F'\\|\\|\\|' '{
print "node_user_session_events{username=\"" $1 "\", event=\"" $2 "\", method=\"" $3 "\"} 1"
}'
fi
}
# Get failed login attempts - Track security events
get_failed_logins() {
# Check both log files for failed authentication attempts
local auth_logs=("/var/log/secure" "/var/log/auth.log")
local failed_logins=""
for log_file in "${auth_logs[@]}"; do
if [[ -r "$log_file" ]]; then
# Get failed login attempts from last 24 hours
failed_logins=$(grep -E "(Failed password|authentication failure|Invalid user)" "$log_file" 2>/dev/null | \
tail -50 | \
"${CMD_PATHS[awk]}" '{
username = "unknown"
source_ip = "unknown"
# Extract username - handle various formats
if ($0 ~ /for [a-zA-Z0-9_]+/) {
match($0, /for ([a-zA-Z0-9_\\]+)/, arr)
if (arr[1]) {
username = arr[1]
gsub(/US\\/, "", username) # Clean domain prefix
}
}
# Extract source IP
if ($0 ~ /from [0-9]+\.[0-9]+\.[0-9]+\.[0-9]+/) {
match($0, /from ([0-9]+\.[0-9]+\.[0-9]+\.[0-9]+)/, arr)
if (arr[1]) source_ip = arr[1]
}
failure_type = "password"
if ($0 ~ /Invalid user/) failure_type = "invalid_user"
else if ($0 ~ /authentication failure/) failure_type = "auth_failure"
print username "|||" source_ip "|||" failure_type # Use delimiter for deduplication
}')
if [[ -n "$failed_logins" ]]; then
break
fi
fi
done
# Deduplicate and format as proper metrics
if [[ -n "$failed_logins" ]]; then
echo "$failed_logins" | "${CMD_PATHS[sort]}" | "${CMD_PATHS[uniq]}" | \
"${CMD_PATHS[awk]}" -F'\\|\\|\\|' '{
print "node_user_failed_logins{username=\"" $1 "\", source_ip=\"" $2 "\", failure_type=\"" $3 "\"} 1"
}'
fi
}
# Get active session durations - Calculate how long users have been logged in
get_session_durations() {
local current_time
current_time=$(date +%s)
"${CMD_PATHS[who]}" -u | "${CMD_PATHS[awk]}" -v current_time="$current_time" '{
if (NF >= 5) {
username = $1
gsub(/US\\|@us\.[^.]+\.net/, "", username) # Clean username (US\ or @us.*.net)
# Parse login time (format: Oct 15 14:30 or 14:30)
login_time = ""
if ($3 ~ /:/) {
# Today format: 14:30
login_time = $3
login_date = strftime("%Y-%m-%d", current_time)
} else if ($4 ~ /:/) {
# Date format: Oct 15 14:30
login_date = strftime("%Y", current_time) "-" $3 "-" $4
login_time = $5
}
if (login_time != "" && login_date != "") {
# Convert to epoch (approximate)
split(login_time, time_parts, ":")
hours = time_parts[1]
minutes = time_parts[2]
# Simple duration calculation (today only)
login_seconds = (hours * 3600) + (minutes * 60)
current_seconds = strftime("%H", current_time) * 3600 + strftime("%M", current_time) * 60
if (current_seconds >= login_seconds) {
duration = current_seconds - login_seconds
} else {
duration = (86400 - login_seconds) + current_seconds # Cross midnight
}
print username "|||" duration # Use delimiter for deduplication
}
}
}' | "${CMD_PATHS[sort]}" -k1,1 | \
"${CMD_PATHS[awk]}" -F'\\|\\|\\|' '{
# Keep the latest/highest duration for each username
if ($1 != prev_user) {
if (prev_user != "") {
print "node_user_session_duration_seconds{username=\"" prev_user "\"} " max_duration
}
prev_user = $1
max_duration = $2
} else if ($2 > max_duration) {
max_duration = $2
}
} END {
if (prev_user != "") {
print "node_user_session_duration_seconds{username=\"" prev_user "\"} " max_duration
}
}'
}
# Output metric - Format and display Prometheus metric with help text and type
output_metric() {
local metric_name="$1"
local help_text="$2"
local metric_type="$3"
local metric_value="$4"
local default_value="$5"
echo "# HELP $metric_name $help_text"
echo "# TYPE $metric_name $metric_type"
echo "${metric_value:-$default_value}"
}
# Main function - Orchestrate the entire monitoring process
main() {
# Parse command line arguments first
parse_arguments "$@"
# Record script start time for runtime metric
local script_start_time
script_start_time=$(date +%s.%N)
# Add dry-run header if applicable
if [[ "$DRY_RUN" == "true" ]]; then
echo "=== DRY RUN MODE - Metrics that would be written to $NODE_EXPORTER_DIR/usrlogins.prom ===" >&2
fi
trap cleanup EXIT # Ensure cleanup runs when script exits
# Initialize environment and commands
find_commands
# Skip setup in dry-run mode
if [[ "$DRY_RUN" == "false" ]]; then
setup_directory
setup_lockfile
install_cron_job
fi
# Generate and output all Prometheus metrics
# Metric 1: Individual user sessions with details
local users
users=$(get_logged_users)
output_metric "node_logged_in_usrs" "Currently Logged in Users" "gauge" \
"$users" 'node_logged_in_usrs{name="", location=""} 0'
# Metric 2: Terminal count per user
local user_terminals
user_terminals=$(get_user_terminal_count)
output_metric "node_logged_in_usr_terminals" "Total of open sessions per user" "gauge" \
"$user_terminals" 'node_logged_in_usr_terminals{username=""} 0'
# Metric 3: Total user count system-wide
local total_count
total_count=$(get_total_user_count)
output_metric "node_logged_in_total" "Total of open sessions on the system" "gauge" \
"node_logged_in_total ${total_count:-0}" "node_logged_in_total 0"
# Metric 4: Last 10 commands for each logged in user
local logged_users
logged_users=$("${CMD_PATHS[who]}" | "${CMD_PATHS[awk]}" '{gsub(/US\\|@us\.[^.]+\.net/, "", $1); print $1}' | "${CMD_PATHS[sort]}" | "${CMD_PATHS[uniq]}")
local user_commands=""
while IFS= read -r user; do
if [[ -n "$user" ]]; then
local commands
commands=$(get_last_user_commands "$user")
if [[ -n "$commands" ]]; then
user_commands+="$commands"$'\n'
fi
fi
done <<< "$logged_users"
output_metric "node_user_last_commands" "Last 10 commands executed by logged in users" "gauge" \
"$user_commands" 'node_user_last_commands{username="", command_number="", command=""} 0'
# Metric 5: Recent sudo commands for each logged in user
local sudo_commands=""
while IFS= read -r user; do
if [[ -n "$user" ]]; then
local sudo_cmds
sudo_cmds=$(get_sudo_commands "$user")
if [[ -n "$sudo_cmds" ]]; then
sudo_commands+="$sudo_cmds"$'\n'
fi
fi
done <<< "$logged_users"
output_metric "node_user_sudo_commands" "Recent sudo commands executed by logged in users" "gauge" \
"$sudo_commands" 'node_user_sudo_commands{username="", command=""} 0'
# Metric 6: Session events (login/logout) for each logged in user
local session_events=""
while IFS= read -r user; do
if [[ -n "$user" ]]; then
local events
events=$(get_session_events "$user")
if [[ -n "$events" ]]; then
session_events+="$events"$'\n'
fi
fi
done <<< "$logged_users"
output_metric "node_user_session_events" "Login and logout events for users" "gauge" \
"$session_events" 'node_user_session_events{username="", event="", method=""} 0'
# Metric 7: Active session durations
local session_durations
session_durations=$(get_session_durations)
output_metric "node_user_session_duration_seconds" "Duration of active user sessions in seconds" "gauge" \
"$session_durations" 'node_user_session_duration_seconds{username=""} 0'
# Metric 8: Failed login attempts (security monitoring)
local failed_logins
failed_logins=$(get_failed_logins)
output_metric "node_user_failed_logins" "Failed login attempts by username and source IP" "counter" \
"$failed_logins" 'node_user_failed_logins{username="", source_ip="", failure_type=""} 0'
# Metric 9: Script runtime
local script_end_time script_runtime
script_end_time=$(date +%s.%N)
script_runtime=$(echo "$script_end_time - $script_start_time" | bc -l 2>/dev/null || echo "0")
output_metric "node_user_monitor_runtime_seconds" "Script execution time in seconds" "gauge" \
"node_user_monitor_runtime_seconds $script_runtime" "node_user_monitor_runtime_seconds 0"
if [[ "$DRY_RUN" == "true" ]]; then
echo "=== END DRY RUN OUTPUT ===" >&2
fi
}
# Script entry point
main "$@"
# 2025-09-23
# Fixed: Prometheus parsing errors with single quotes (\' sequences)
# Fixed: Prometheus parsing errors with backslash escapes (\u, \x, etc.)
# Improved: Domain regex pattern now handles any us.*.net domain instead of just us.calormen.net